import java.io.FileInputStream;
import java.io.OutputStreamWriter;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
/**
* 讀取出來的內容可以進一步處理,比如插入數據庫,
* 文件過大可以分段保存到數據庫,以免內存溢出
*
*
*/
public class MySaxXml extends DefaultHandler {
/**
* @param args
* @throws SAXException
* @throws Exception
*/
private Locator locator;
private int index1 = 0;
private int index2 = 0;
//測試保存數據
private StringBuffer content = new StringBuffer();
// 讀取標籤之間的內容,如:<xxx>tttt</xxx>
public void characters(char[] ch, int start, int length)
throws SAXException {
String tmp = new StringBuffer().append(ch, start, length).toString()
.trim();
if (tmp.length() == 0) {
return;
}
System.out.print("本書<<");
try {
OutputStreamWriter outw = new OutputStreamWriter(System.out);
outw.write(ch, start, length);
outw.flush();
} catch (Exception e) {
e.printStackTrace();
}
if(content.capacity()>30*1000){
content.delete(0, content.length());
System.out.println(content.capacity());
}else{
content.append(ch, start, length);
}
System.out.println(">>的目錄");
}
public void endDocument() throws SAXException {
System.out.println("解析結束:");
}
// 遇到結束標籤,在此可以做其它業務處理,因爲是結束標籤處表示一條數據的完整性
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (qName.equalsIgnoreCase("chapter"))
this.index2 = 0;
}
public void setDocumentLocator(Locator locator) {
this.locator = locator;
}
public void startDocument() throws SAXException {
System.out.println("解析開始: ");
}
// 重寫開始解析元素的方法. 這裏是將<xxx>中的名稱xxx提取出來,與讀取屬性
public void startElement(String uri, String localName, String qName,
Attributes atts) throws SAXException {
if (qName.equalsIgnoreCase("chapter")) {
index1++;
for (int i = 0; i < atts.getLength(); i++) {
String attName = atts.getQName(i);
if (attName.equalsIgnoreCase("title")) {
System.out.println("第" + index1 + "章:" + atts.getValue(i));
}
}
}
if (qName.equalsIgnoreCase("topic")) {
index2++;
for (int i = 0; i < atts.getLength(); i++) {
String attName = atts.getQName(i);
if (attName.equalsIgnoreCase("name")) {
System.out.println(" 第" + index2 + "部分:"
+ atts.getValue(i));
}
}
}
}
public static void main(String[] args) throws Exception {
String vendorParserClass = "org.apache.xerces.parsers.SAXParser";
String xmlURI = "D:\\book2.xml";
XMLReader reader = XMLReaderFactory.createXMLReader(vendorParserClass);
reader.setContentHandler(new MySaxXml());
InputSource inputSource = new InputSource(new FileInputStream(xmlURI));
//開始解析xml文件,會自動回調上面重寫的方法
reader.parse(inputSource);
}
}
下面是對應xml:
<?xml version="1.0"?> <book> <title>Java and XML</title> <contents> <chapter title="Introduction" number="1"> <topic name="XML Matters"/> <topic name="What's Important"/> <topic name="The Essentials"/> <topic name="What's Next?"/> </chapter> <chapter title="Nuts and Bolts" number="2"> <topic name="The Basics"/> <topic name="Constraints"/> <topic name="Transformations"/> <topic name="And More..."/> <topic name="What's Next?"/> </chapter> <chapter title="SAX" number="3"> <topic name="Getting Prepared"/> <topic name="SAX Readers"/> <topic name="Content Handlers"/> <topic name="Gotcha!"/> <topic name="What's Next?"/> </chapter> </contents> </book>