jsp頁面上傳Word,用POI讀取word裏的內容

工作需要完成一個Word的上傳,然後讀取數據,對數據進行比對得到需要的信息,然後把比對後的數據存入到數據庫裏。這個真是難倒我了,經過我很長時間的查找才實現了一個Word的讀取,可以實現兩個讀取,一:直接讀取全文信息,二:讀取每一段的信息。數據的比對等我做出了接着更新。代碼有參考別人的,出處忘了。。。



此程序只能實現固定地址上傳Word,其他位置上傳Word研究了一陣,沒有找到獲取路徑的辦法。。。


import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.PushbackInputStream;


import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;


import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;






/*
 *讀取項目目錄下的 test.docx 內容,以流的形式返回給頁面 
 * */
public class ReadWord extends HttpServlet {


/**
* The doGet method of the servlet. <br>
*
* This method is called when a form has its tag value method equals to get.

* @param request the request send by the client to the server
* @param response the response send by the server to the client
* @throws ServletException if an error occurred
* @throws IOException if an error occurred
*/
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {


doPost(request,response);
}


/**
* The doPost method of the servlet. <br>
*
* This method is called when a form has its tag value method equals to post.

* @param request the request send by the client to the server
* @param response the response send by the server to the client
* @throws ServletException if an error occurred
* @throws IOException if an error occurred
*/
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {


 request.setCharacterEncoding("gbk");
 response.setContentType("text/plain;charset=gbk");
 String file = request.getParameter("boy");
//  獲得項目根目錄地址
 String rootPath = this.getServletConfig().getServletContext().getRealPath("/");
//  獲得文件完整的地址
 String path=rootPath+file; 
 String root = getServletContext().getRealPath("/");   
 InputStream in = new FileInputStream(path);
 
 System.out.println(path);
 String bodyText=""; 
 try {
//  轉換成  PushbackinputStream
  if (!in.markSupported()) {
           in = new PushbackInputStream(in, 8);
       } 
//  其他word版本
 if(POIFSFileSystem.hasPOIFSHeader(in))
 {
 HWPFDocument document = new HWPFDocument(in);
 WordExtractor extractor = new WordExtractor(document);
 bodyText = extractor.getText(); 
 response.getWriter().write(bodyText);
 return ;
 }
//   07 版本
  XWPFDocument document = new XWPFDocument(in);
  XWPFWordExtractor extractor =new XWPFWordExtractor(document);
  bodyText = extractor.getText(); 
  response.getWriter().write(bodyText);
 
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
 
}


}





下面是按照段落讀取Word裏的信息,參考文章http://blog.csdn.net/robinliu2010/article/details/7584173




import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.PushbackInputStream;


import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;


import org.apache.poi.hslf.model.textproperties.ParagraphFlagsTextProp;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Section;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;






/*
 *讀取項目目錄下的 test.docx 內容,以流的形式返回給頁面 
 * */
public class Read2 extends HttpServlet {


/**
* The doGet method of the servlet. <br>
*
* This method is called when a form has its tag value method equals to get.

* @param request the request send by the client to the server
* @param response the response send by the server to the client
* @throws ServletException if an error occurred
* @throws IOException if an error occurred
*/
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {


doPost(request,response);
}


/**
* The doPost method of the servlet. <br>
*
* This method is called when a form has its tag value method equals to post.

* @param request the request send by the client to the server
* @param response the response send by the server to the client
* @throws ServletException if an error occurred
* @throws IOException if an error occurred
*/
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {


 request.setCharacterEncoding("gbk");
 response.setContentType("text/plain;charset=gbk");
 String file = request.getParameter("boy");
//  獲得項目根目錄地址
 String rootPath = this.getServletConfig().getServletContext().getRealPath("/");
//  獲得文件完整的地址
 String path=rootPath+file; 
 String root = getServletContext().getRealPath("/");   
 InputStream in = new FileInputStream(path);
 
 
 String bodyText=""; 
 try {
//  轉換成  PushbackinputStream
  if (!in.markSupported()) {
           in = new PushbackInputStream(in, 8);
       } 
//  其他word版本
 if(POIFSFileSystem.hasPOIFSHeader(in))
 {
 HWPFDocument doc = new HWPFDocument(in);
 Range r = doc.getRange();
 for (int x = 0; x < r.numSections(); x++) {
      Section s = r.getSection(x);
      String[] textString=new String[s.numParagraphs()];
      for (int y = 0; y < s.numParagraphs(); y++) {
             Paragraph p = s.getParagraph(y);
             for (int z = 0; z < p.numCharacterRuns(); z++) {
                    CharacterRun run = p.getCharacterRun(z);
                    //字符串文本
                    textString[y] = run.text();
                    System.out.println(textString[y]);
             }
      }
}
 return ;
 }
//   07 版本
  XWPFDocument document = new XWPFDocument(in);
  XWPFWordExtractor extractor =new XWPFWordExtractor(document);
  bodyText = extractor.getText(); 
  response.getWriter().write(bodyText);
 
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
 
}


}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章