SAX解析xml

import java.io.FileInputStream;
import java.io.OutputStreamWriter;

import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

/**
 * 讀取出來的內容可以進一步處理,比如插入數據庫,
 * 文件過大可以分段保存到數據庫,以免內存溢出
 * 
 *
 */
public class MySaxXml extends DefaultHandler {

	/**
	 * @param args
	 * @throws SAXException
	 * @throws Exception
	 */
	private Locator locator;
	private int index1 = 0;
	private int index2 = 0;
	//測試保存數據
	private StringBuffer content = new StringBuffer();

	// 讀取標籤之間的內容,如:<xxx>tttt</xxx>
	public void characters(char[] ch, int start, int length)
			throws SAXException {
		String tmp = new StringBuffer().append(ch, start, length).toString()
				.trim();
		if (tmp.length() == 0) {
			return;
		}
		System.out.print("本書<<");
		try {
			OutputStreamWriter outw = new OutputStreamWriter(System.out);
			outw.write(ch, start, length);
			outw.flush();
		} catch (Exception e) {
			e.printStackTrace();
		}
		if(content.capacity()>30*1000){
			content.delete(0, content.length());
			System.out.println(content.capacity());
		}else{
			content.append(ch, start, length);
		}
		System.out.println(">>的目錄");
	}

	public void endDocument() throws SAXException {
		System.out.println("解析結束:");
	}

	// 遇到結束標籤,在此可以做其它業務處理,因爲是結束標籤處表示一條數據的完整性
	public void endElement(String uri, String localName, String qName)
			throws SAXException {
		
		if (qName.equalsIgnoreCase("chapter"))
			this.index2 = 0;
	}

	public void setDocumentLocator(Locator locator) {
		this.locator = locator;
	}

	public void startDocument() throws SAXException {
		System.out.println("解析開始: ");
	}

	// 重寫開始解析元素的方法. 這裏是將<xxx>中的名稱xxx提取出來,與讀取屬性
	public void startElement(String uri, String localName, String qName,
			Attributes atts) throws SAXException {
		if (qName.equalsIgnoreCase("chapter")) {
			index1++;
			for (int i = 0; i < atts.getLength(); i++) {
				String attName = atts.getQName(i);
				if (attName.equalsIgnoreCase("title")) {
					System.out.println("第" + index1 + "章:" + atts.getValue(i));
				}
			}
		}

		if (qName.equalsIgnoreCase("topic")) {
			index2++;
			for (int i = 0; i < atts.getLength(); i++) {
				String attName = atts.getQName(i);
				if (attName.equalsIgnoreCase("name")) {
					System.out.println("     第" + index2 + "部分:"
							+ atts.getValue(i));
				}
			}
		}
	}

	public static void main(String[] args) throws Exception {
		String vendorParserClass = "org.apache.xerces.parsers.SAXParser";
		String xmlURI = "D:\\book2.xml";
		XMLReader reader = XMLReaderFactory.createXMLReader(vendorParserClass);
		reader.setContentHandler(new MySaxXml());
		InputSource inputSource = new InputSource(new FileInputStream(xmlURI));
		//開始解析xml文件,會自動回調上面重寫的方法
		reader.parse(inputSource);
	}
}

 下面是對應xml:

<?xml version="1.0"?>
<book>
    <title>Java and XML</title> 
    <contents>
        <chapter title="Introduction" number="1">
            <topic name="XML Matters"/> 
            <topic name="What's Important"/>
            <topic name="The Essentials"/>
            <topic name="What&apos;s Next?"/>
        </chapter>
        <chapter title="Nuts and Bolts" number="2">
            <topic name="The Basics"/>
            <topic name="Constraints"/>
            <topic name="Transformations"/>
            <topic name="And More..."/>
            <topic name="What&apos;s Next?"/>
        </chapter>
        <chapter title="SAX" number="3">
            <topic name="Getting Prepared"/>
            <topic name="SAX Readers"/>
            <topic name="Content Handlers"/>
            <topic name="Gotcha!"/>
            <topic name="What&apos;s Next?"/>
        </chapter>       
    </contents>
</book>

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章