引入jar包,這裏給出Maven倉庫的jar包地址信息
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.17</version>
</dependency>
代碼:
import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.OLE2ExtractorFactory;
import java.io.FileInputStream;
import java.io.IOException;
public class WordTest {
public static void main(String[] args) {
try {
POITextExtractor extractor = OLE2ExtractorFactory.createExtractor(new FileInputStream("D:\\inputFile\\1.docx"));
String text = extractor.getText();
System.out.println(text);
extractor.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
doc/docx的文檔都是這樣讀取。
注意:這種格式的文件暫不支持讀取。