【utils】Word2HtmlUtil

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;

public class Word2HtmlUtil {

    /**
     * 將word2003轉換爲html文件
     *
     * @param wordPath
     *            word文件路徑
     * @param wordName
     *            word文件名稱無後綴
     * @param suffix
     *            word文件後綴
     * @return
     */
    public static String Word2003ToHtml(String wordPath, String wordName,
                                        String suffix){
        // 原word文檔
        final String file = wordPath + wordName + suffix;

        URL url = null;
        InputStream input = null;
        HWPFDocument wordDocument = null;
        WordToHtmlConverter wordToHtmlConverter = null;
        String content = null;
        try {
            url = new URL(file);
            input = new BufferedInputStream(url.openStream());
            wordDocument = new HWPFDocument(input);
            wordToHtmlConverter = new WordToHtmlConverter(
                    DocumentBuilderFactory.newInstance().newDocumentBuilder()
                            .newDocument());
            // 解析word文檔
            wordToHtmlConverter.processDocument(wordDocument);
            Document htmlDocument = wordToHtmlConverter.getDocument();


            // 使用字符數組流獲取解析的內容
            try(ByteArrayOutputStream baos = new ByteArrayOutputStream();
                OutputStream outStream = new BufferedOutputStream(baos);) {
                DOMSource domSource = new DOMSource(htmlDocument);
                StreamResult streamResult = new StreamResult(outStream);

                TransformerFactory factory = TransformerFactory.newInstance();
                Transformer serializer = factory.newTransformer();
                serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
                serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                serializer.setOutputProperty(OutputKeys.METHOD, "html");

                serializer.transform(domSource, streamResult);

                // 使用字符數組流獲取解析的內容
                content = baos.toString();
            } catch (IOException e) {
                e.printStackTrace();
            }

        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        } catch (TransformerConfigurationException e) {
            e.printStackTrace();
        } catch (TransformerException e) {
            e.printStackTrace();
        }

        return content;
    }

    /**
     * 2007版本word轉換成html
     *
     * @param wordPath
     *            word文件路徑
     * @param wordName
     *            word文件名稱無後綴
     * @param suffix
     *            word文件後綴
     * @return
     */
    public static String Word2007ToHtml(String wordPath, String wordName, String suffix)
             {
        // word文件
        String file = wordPath + wordName + suffix;

        // 加載word文檔生成 XWPFDocument對象
         URL url = null;
         InputStream in = null;
         XWPFDocument document = null;
         String content = null;
         try {
             url = new URL(file);
             in = new BufferedInputStream(url.openStream());
             document = new XWPFDocument(in);
             XHTMLOptions options = XHTMLOptions.create();
             options.setIgnoreStylesIfUnused(false);
             options.setFragment(true);
             // 使用字符數組流獲取解析的內容
             try(ByteArrayOutputStream baos = new ByteArrayOutputStream();) {
                 XHTMLConverter.getInstance().convert(document, baos, options);
                 content = baos.toString();
             } catch (IOException e) {
                 e.printStackTrace();
             }
         } catch (MalformedURLException e) {
             e.printStackTrace();
         } catch (IOException e) {
             e.printStackTrace();
         }

        return content;
    }

    public static void main(String[] args) throws Exception {
      //  System.out.println(Word2003ToHtml("http://10.103.7.195/doc/", "help", ".doc"));
        System.out.println(Word2007ToHtml("http://10.103.7.195/doc/", "help", ".docx"));
    }

}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章