1、POI讀取百萬級數據工具

1、SheetHandler

SheetHandler  類中處理從excle獲取的數據,官方文檔中 SheetHandler以內部類形式,爲保證更新代碼減少內部類class文件忘記打包,改爲一般java類

package com.lee.poi.util;


import java.util.LinkedHashMap;

import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/**
SheetHandler  類中處理從excle獲取的數據,官方文檔中 SheetHandler以內部類形式,爲保證更新代碼減少內部類class文件忘記打包,改爲一般java類
*/
public class SheetHandler extends DefaultHandler{

	private SharedStringsTable sst;
    private String lastContents;
    private boolean nextIsString;
    private String  cellPosition;
    private  LinkedHashMap<String, String> rowContents = new LinkedHashMap<String, String>(); 

    public LinkedHashMap<String, String> getRowContents() {
        return rowContents;
    }

    public void setRowContents(LinkedHashMap<String, String> rowContents) {
        this.rowContents = rowContents;
    }

    public SheetHandler(SharedStringsTable sst) {
        this.sst = sst;
    }

    public void startElement(String uri, String localName, String name,
        Attributes attributes) throws SAXException {
        if(name.equals("c")) {
            cellPosition=attributes.getValue("r");
            String cellType = attributes.getValue("t");
            if(cellType != null && cellType.equals("s")) {
                nextIsString = true;
            } else {
                nextIsString = false;
            }
        }
        // 清楚緩存內容
        lastContents = "";
    }

    public void endElement(String uri, String localName, String name)
            throws SAXException {
        if(nextIsString) {
            int idx = Integer.parseInt(lastContents);
            lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
            nextIsString = false;
        }

        if(name.equals("v")) {
            //數據讀取結束後,將單元格座標,內容存入map中
            if(!(cellPosition.length()==2)||(cellPosition.length()==2&&!"1".equals(cellPosition.substring(1)))){//不保存第一行數據
                rowContents.put(cellPosition, lastContents);
            }
        }
    }

    public void characters(char[] ch, int start, int length)
            throws SAXException {
        lastContents += new String(ch, start, length);
    }
	
	
}

 

 

 

2、 LargeExcelFileReadUtil

 數據量比較大(8萬條以上)的excel文件解析,將excel文件解析爲 行列座標-值的形式存入map中,此方式速度快,內存耗損小 但只能讀取excle文件
 提供處理單個sheet方法 processOneSheet(String  filename) 以及處理多個sheet方法 processAllSheets(String  filename)
 只需傳入文件路徑+文件名即可  調用處理方法結束後,只需 接收LargeExcelFileReadUtil.getRowContents()返回值即可獲得解析後的數據

package com.lee.poi.util;

import java.io.InputStream;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.junit.Test;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;


/**
 * 數據量比較大(8萬條以上)的excel文件解析,將excel文件解析爲 行列座標-值的形式存入map中,此方式速度快,內存耗損小 但只能讀取excle文件
 * 提供處理單個sheet方法 processOneSheet(String  filename) 以及處理多個sheet方法 processAllSheets(String  filename)
 * 只需傳入文件路徑+文件名即可  調用處理方法結束後,只需 接收LargeExcelFileReadUtil.getRowContents()返回值即可獲得解析後的數據
 */
public class LargeExcelFileReadUtil {

	private  LinkedHashMap<String, String> rowContents=new LinkedHashMap<String, String>();
	
    private  SheetHandler sheetHandler;

    public LinkedHashMap<String, String> getRowContents() {
        return rowContents;
    }
    public void setRowContents(LinkedHashMap<String, String> rowContents) {
        this.rowContents = rowContents;
    }

    public SheetHandler getSheetHandlexr() {
        return sheetHandler;
    }
    public void setSheetHandler(SheetHandler sheetHandler) {
        this.sheetHandler = sheetHandler;
    }
    //處理一個sheet
    public void processOneSheet(String filename) throws Exception {
        InputStream sheet2=null;
        OPCPackage pkg =null;
        try {
                pkg = OPCPackage.open(filename);
                XSSFReader r = new XSSFReader(pkg);
                SharedStringsTable sst = r.getSharedStringsTable();
                XMLReader parser = fetchSheetParser(sst);
                sheet2 = r.getSheet("rId1");
                InputSource sheetSource = new InputSource(sheet2);
                parser.parse(sheetSource);
                setRowContents(sheetHandler.getRowContents());
                }catch (Exception e) {
                    e.printStackTrace();
                    throw e;
                    }finally{
                        if(pkg!=null){
                            pkg.close();
                                     }
                        if(sheet2!=null){
                            sheet2.close();
                        }
                }
    }
    
    //處理多個sheet
    public void processAllSheets(String filename) throws Exception {
        OPCPackage pkg =null;
        InputStream sheet=null;
        try{
                pkg=OPCPackage.open(filename);
                XSSFReader r = new XSSFReader( pkg );
                SharedStringsTable sst = r.getSharedStringsTable();
                XMLReader parser = fetchSheetParser(sst);
                Iterator<InputStream> sheets = r.getSheetsData();
                while(sheets.hasNext()) {
                    System.out.println("Processing new sheet:\n");
                    sheet = sheets.next();
                    InputSource sheetSource = new InputSource(sheet);
                    parser.parse(sheetSource);
                                        }
            }catch (Exception e) {
                    e.printStackTrace();
                    throw e;
                   }finally{
                       if(pkg!=null){
                           pkg.close();
                                 }
                       if(sheet!=null){
                           sheet.close();
                                    }
                            }
    }

    public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
        XMLReader parser =
            XMLReaderFactory.createXMLReader(
                    "com.sun.org.apache.xerces.internal.parsers.SAXParser"
            );
        setSheetHandler(new SheetHandler(sst));
        ContentHandler handler = (ContentHandler) sheetHandler;
        parser.setContentHandler(handler);
        return parser;
    }


    //測試
    @Test
    public  void test ()throws Exception {
        Long time=System.currentTimeMillis();
        LargeExcelFileReadUtil example = new LargeExcelFileReadUtil();

        example.processOneSheet("C:\\Users\\Never Give Up\\Desktop\\導入文件\\2、24萬企業數據\\24萬企業數據.xlsx");
        Long endtime=System.currentTimeMillis();
        LinkedHashMap<String, String>  map=example.getRowContents();
        Iterator<Entry<String, String>> it= map.entrySet().iterator();
        int count=0;
        int paramCount = 0;
        String prePos="";
        
        Entity entity = null;
        
        while (it.hasNext()){
        	paramCount++;
            Map.Entry<String, String> entry=(Map.Entry<String, String>)it.next();
            String pos=entry.getKey();
            if(!pos.substring(1).equals(prePos)){
                prePos=pos.substring(1);
                count++;
                entity = new Entity();
            }
            if("A".equals(pos.substring(0,1))) {
            	entity.setCompanyName(entry.getValue());
            }
            if("B".equals(pos.substring(0,1))) {
            	entity.setMemberName(entry.getValue());
            }
            if("C".equals(pos.substring(0,1))) {
            	entity.setPhone(entry.getValue());
            }
            if("D".equals(pos.substring(0,1))) {
            	if(entry.getValue().contains("【依法須經批准的項目,經相關部門批准後方可開展經營活動】")) {
            		entity.setMainProducts(entry.getValue().substring(0, entry.getValue().indexOf("【依法須經批准的項目,經相關部門批准後方可開展經營活動】")));
            	}else {
            		entity.setMainProducts(entry.getValue());
            	}
            	paramCount=0;
            	ImportExcel.insertMember_2(entity);
            }
            
        }
        System.out.println("解析數據"+count+"條;耗時"+(endtime-time)/1000+"秒");
    }
	
	
}

 

 

 

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章