poi讀取ppt的例子,不只是讀取ppt裏面的文字,還要又文字的樣式,佈局,圖片等。poi包從官網下載即可。
HSLF是POI讀寫PPT的API,例子見官方文檔。
http://poi.apache.org/slideshow/quick-guide.html
文檔格式
http://poi.apache.org/slideshow/ppt-file-format.html
操作Shape的API
http://poi.apache.org/slideshow/how-to-shapes.html
1 提取PPT文字
public class PPTReader {
public static void main(String[] args) throws Exception {
InputStream is = new FileInputStream(new File("2003.ppt"));
PowerPointExtractor extractor = new PowerPointExtractor(is);
String ppString = extractor.getText();
System.out.println(ppString);
}
PPT如下:
2 抽取PPT圖片
public void findpIC() {
try{
SlideShow ppt = new SlideShow(new HSLFSlideShow("2003.ppt"));
//將所有圖片提取出來保存到PictureData[]
PictureData[] pDatas= ppt.getPictureData();
for (int i = 0; i < pDatas.length; i++) {
PictureData pict =pDatas[i];
// 獲得每張圖片的數據
byte[] data = pict.getData();
//獲得每張圖片的類型
int type = pict.getType();
String ext;
switch (type) {
case Picture.JPEG: ext=".jpg";break;
case Picture.PNG: ext=".png"; break;
case Picture.WMF: ext=".wmf"; break;
case Picture.EMF: ext=".emf"; break;
case Picture.PICT: ext=".pict"; break;
default:continue;
}
//輸出圖片命名方式:pic_i_ext
FileOutputStream out = new FileOutputStream("pic_"+i + ext);
out.write(data);
out.close();
}
}catch (IOException e) {
// This is not a powerpoint file
e.printStackTrace();
}
}
3/ 添加一個新的圖片插入到新的幻燈片中,保存
public void addSlide() throws IOException {
SlideShow ppt = new SlideShow(new HSLFSlideShow("2003.ppt"));
// 添加一個新的圖片插入到新的幻燈片中
int idx =ppt.addPicture(new File("cat.jpg"), Picture.JPEG);
//在幻燈片中設置圖片的大小和位置
Picture pict = new Picture(idx);
pict.setAnchor(new Rectangle(100,100,300,200));
Slide slide = ppt.createSlide();
slide.addShape(pict);
//檢索圖片並將ppt保存到磁盤上
slide = ppt.getSlides()[0];
Shape[]sh =slide.getShapes();
for (int i = 0; i < sh.length; i++){
if (sh[i] instanceof Picture){
Picture pict1 = (Picture)sh[i];
PictureData pictData = pict1.getPictureData();
byte[] data = pictData.getData();
int type = pictData.getType();
if (type == Picture.JPEG){
FileOutputStream out = new FileOutputStream("slide0_"+i+".jpg");
out.write(data);
out.close();
} else if (type == Picture.PNG){
FileOutputStream out = new FileOutputStream("slide0_"+i+".png");
out.write(data);
out.close();
}
}
}
FileOutputStream out = new FileOutputStream("slideshow.ppt");
ppt.write(out);
out.close();
}
4/插入表格
@Test
public void createTable() throws IOException {
//準備數據
String[][] data = {
{"INPUT FILE", "NUMBER OF RECORDS"},
{"Item File", "11,559"},
{"Vendor File", "300"},
{"Purchase History File", "10,000"},
{"Total # of requisitions", "10,200,038"} };
SlideShow ppt = new SlideShow(new HSLFSlideShow("slideshow.ppt"));
Slide slide = ppt.createSlide();
//創建一個表格
Table table = new Table(5, 2);
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[i].length; j++) {
TableCell cell = table.getCell(i, j);
cell.setText(data[i][j]);
RichTextRun rt = cell.getTextRun().getRichTextRuns()[0];
rt.setFontName("Arial");
rt.setFontSize(10);
cell.setVerticalAlignment(TextBox.AnchorMiddle);
cell.setHorizontalAlignment(TextBox.AlignCenter);
}
}
//設置表格的邊距
Line border = table.createBorder();
border.setLineColor(Color.black);
border.setLineWidth(1.0);
table.setAllBorders(border);
//第一列的寬度
table.setColumnWidth(0, 300);
//第二列的寬度
table.setColumnWidth(1, 150);
slide.addShape(table);
table.moveTo(100, 100);
//保存
FileOutputStream out = new FileOutputStream("hslf-table.ppt");
ppt.write(out);
out.close();
}
5 去掉PPT中的形狀圖形和聲音
public void removeShapes() throws IOException {
SlideShow ppt = new SlideShow(new HSLFSlideShow("bullets.ppt"));
Slide slide = ppt.createSlide();
Shape[] shape = slide.getShapes();
for (int i = 0; i < shape.length; i++) {
boolean ok = slide.removeShape(shape[i]);
if(ok){
System.out.println("you are successful remove the shape");
}
}
}
public void retrieveSound() throws IOException {
FileInputStream is = new FileInputStream("bullets.ppt");
SlideShow ppt = new SlideShow(is);
is.close();
SoundData[] sound = ppt.getSoundData();
for (int i = 0; i < sound.length; i++) {
//保存.WAV格式的音樂
if(sound[i].getSoundType().equals(".WAV")){
FileOutputStream out = new FileOutputStream(sound[i].getSoundName());
out.write(sound[i].getData());
out.close();
}
}
}
自己畫了主要類圖,不全,個人理解
輸入流有SlideShow進入,輸出流使用文本流寫入保存修改關閉。
SlideShow指整個幻燈片,而Slide 指的是單張幻燈片。SlideMaster 是單張幻燈片的管理類。對於文本信息、超鏈接信息、聲音修改使用TextBox、TextRun、SoundData等類對單張幻燈片進行修改。而背景修改需要使用SlideMaster類調用Fill類進行修改設置。