package cn.wgd.util;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.converter.AbstractWordUtils;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.util.XMLHelper;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import fr.opensagres.poi.xwpf.converter.core.IXWPFConverter;
import fr.opensagres.poi.xwpf.converter.core.ImageManager;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
/**
* @author Kevin 2018-3-14
*
* 將word,pdf等文件轉爲html,用於附件預覽!
*
* 圖片處理https://www.cnblogs.com/feiruo/p/5924514.html
*
* 本例程需要jar包:poi(poi3.17)相關jar包外,
* fr.opensagres.poi.xwpf.converter.core-2.0.1.jar
* fr.opensagres.poi.xwpf.converter.xhtml-2.0.1.jar
* fr.opensagres.xdocreport.core-2.0.1.jar
* ooxml-schemas-1.3.jar等
*
* 注:此方法爲簡單實現,如word需要更多樣式處理,還需要自行實現!
*
*/
public class ConvertWord2HtmlUtil {
public static void main(String[] args) throws IOException, ParserConfigurationException, TransformerException, SAXException {
String path = "D:\\testfile2html\\test.docx";
String descPath = "D:\\testfile2html\\test.html";
String imagePath = "D:\\testfile2html";
word2007ToHtml(path, descPath, imagePath);
}
/**
* 處理doc文件轉HTML,此方法參考:org.apache.poi.hwpf.converter.WordToHtmlConverter.main()
* @param path
* @param descPath
* @throws IOException
* @throws ParserConfigurationException
* @throws TransformerException
*/
public static void word95T2007ToHtml(String path, String descPath)
throws IOException, ParserConfigurationException, TransformerException{
if(path == null)
throw new NullPointerException("路徑不能爲空!");
System.out.println( "Converting " + path );
System.out.println( "Saving output to " + descPath );
Document doc = ConvertWord2HtmlUtil.process(new File(path));
DOMSource domSource = new DOMSource( doc );
StreamResult streamResult = new StreamResult(new File(descPath));
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
// TODO set encoding from a command argument
serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
serializer.setOutputProperty( OutputKeys.METHOD, "html" );
serializer.transform( domSource, streamResult );
}
/**
*
* 此方法來源於:org.apache.poi.hwpf.converter.WordToHtmlConverter
* @param docFile
* @return
* @throws IOException
* @throws ParserConfigurationException
*/
static Document process( File docFile ) throws IOException, ParserConfigurationException
{
final HWPFDocumentCore wordDocument = AbstractWordUtils.loadDoc( docFile );
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
XMLHelper.getDocumentBuilderFactory().newDocumentBuilder()
.newDocument() );
wordToHtmlConverter.processDocument( wordDocument );
return wordToHtmlConverter.getDocument();
}
/**
* @param path 源文件路徑(doc or docx)
* @param descPath 轉化後的文件路徑(html)
* @param imagePath 圖片存放地址(本地址默認爲html文件同路徑)
* @throws IOException
* @throws ParserConfigurationException
* @throws TransformerException
* @throws SAXException
*/
public static void word2007ToHtml(String path, String descPath, String imagePath)
throws IOException, ParserConfigurationException, TransformerException, SAXException{
if(path == null){
throw new NullPointerException("路徑不能爲空!");
}
File sourceFile = new File(path);
if(!sourceFile.exists()){
System.out.println("用戶文件不存在!");
return;
}else{
if(path.endsWith(".docx") || path.endsWith(".DOCX")){
XWPFDocument document = new XWPFDocument(new FileInputStream(path));
//html轉化器
IXWPFConverter<XHTMLOptions> converter = XHTMLConverter.getInstance();
//html屬性器
XHTMLOptions options = XHTMLOptions.create();
//圖片處理,第二個參數爲html文件同級目錄下,否則圖片找不到。
ImageManager imageManager = new ImageManager(new File(imagePath), "image");
options.setImageManager(imageManager);
converter.convert(document, new FileOutputStream(descPath), options);
}else{
word95T2007ToHtml(path, descPath);
}
}
}
}
POI實現Word轉HTML文件
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.