使用高亮顯示時,需要單獨引入lucene-highlighter-3.0.0.jar。這個jar包在目錄lucene-3.0.0\contrib\highlighter中,把它複製到項目的lib文件夾,並在Java Build Path中添加其引用。
導入的package有:
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
使用Highlighter的搜索代碼:
public void searchProgram() {
System.out.println("請輸入關鍵字: ");
Scanner scanner = new Scanner(System.in);
String queryStr = scanner.nextLine();
System.out.println("您輸入的關鍵字是: " + queryStr);
try {
IndexSearcher searcher = new IndexSearcher(FSDirectory
.open(new File(strIndexPath)));
String strField = "content";
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT,
strField, new SmartChineseAnalyzer(Version.LUCENE_CURRENT));
Query query = parser.parse(queryStr);
System.out.println(query.toString());
TopDocs hits = searcher.search(query, 10);
ScoreDoc[] scoreDoc = hits.scoreDocs;
// 高亮顯示設置
Highlighter highlighter = null;
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
"<read>", "</read>");
highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
// 這個100是指定關鍵字字符串的context的長度,你可以自己設定,因爲不可能返回整篇正文內容
highlighter.setTextFragmenter(new SimpleFragmenter(100));
Document doc;
DBConfigure dbConfigure = new DBConfigure();
dbConfigure.OpenConn();
ResultSet rs = null;
String content = "";
for (int index = 0; index < scoreDoc.length; index++) {
doc = searcher.doc(scoreDoc[index].doc);
System.out.println(doc.getField("URL").stringValue());
System.out.println(doc.getField("title").stringValue());
// 根據URL的MD5值查詢數據庫中取出相應的正文
String URL_MD5 = UrlToFilePath.strMd5(doc.getField("URL")
.stringValue().trim(), 1);
String sql = "select content from page where url_md5='"
+ URL_MD5 + "'";
rs = dbConfigure.executeQuery(sql);
if (rs.next()) {
content = rs.getString("content");
}
// 高亮顯示摘要
TokenStream tokenStream = new SmartChineseAnalyzer(Version.LUCENE_CURRENT).tokenStream("token",
new StringReader(content));
System.out.println(highlighter.getBestFragment(tokenStream, content));
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
System.out.println("請輸入關鍵字: ");
Scanner scanner = new Scanner(System.in);
String queryStr = scanner.nextLine();
System.out.println("您輸入的關鍵字是: " + queryStr);
try {
IndexSearcher searcher = new IndexSearcher(FSDirectory
.open(new File(strIndexPath)));
String strField = "content";
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT,
strField, new SmartChineseAnalyzer(Version.LUCENE_CURRENT));
Query query = parser.parse(queryStr);
System.out.println(query.toString());
TopDocs hits = searcher.search(query, 10);
ScoreDoc[] scoreDoc = hits.scoreDocs;
// 高亮顯示設置
Highlighter highlighter = null;
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
"<read>", "</read>");
highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
// 這個100是指定關鍵字字符串的context的長度,你可以自己設定,因爲不可能返回整篇正文內容
highlighter.setTextFragmenter(new SimpleFragmenter(100));
Document doc;
DBConfigure dbConfigure = new DBConfigure();
dbConfigure.OpenConn();
ResultSet rs = null;
String content = "";
for (int index = 0; index < scoreDoc.length; index++) {
doc = searcher.doc(scoreDoc[index].doc);
System.out.println(doc.getField("URL").stringValue());
System.out.println(doc.getField("title").stringValue());
// 根據URL的MD5值查詢數據庫中取出相應的正文
String URL_MD5 = UrlToFilePath.strMd5(doc.getField("URL")
.stringValue().trim(), 1);
String sql = "select content from page where url_md5='"
+ URL_MD5 + "'";
rs = dbConfigure.executeQuery(sql);
if (rs.next()) {
content = rs.getString("content");
}
// 高亮顯示摘要
TokenStream tokenStream = new SmartChineseAnalyzer(Version.LUCENE_CURRENT).tokenStream("token",
new StringReader(content));
System.out.println(highlighter.getBestFragment(tokenStream, content));
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}