// 創建查詢
@Repository
@Transactional
public class SearcherJpa implements Serializable, SearcherDAO {
private static final long serialVersionUID = 1L;
public List<Attachment> do_search(String path, String keyword,
String type, int start, int end) {
List<Attachment> list = null;
Attachment att = null;
PropertiesHelper p = null;
try {
p = new PropertiesHelper(path);
String IndexDir = p.getValue("IndexDir");
Directory dir = FSDirectory.open(new File(IndexDir));
IndexReader reader = IndexReader.open(dir);
// TermEnum termEnum = reader.terms();
IndexSearcher is = new IndexSearcher(reader);
is.setDefaultFieldSortScoring(true, false);
Query query = null;
if (type != "" && type != null) {
String[] fields = { LuceneUtil.FILE_CONTENT,
LuceneUtil.FILE_TYPE };
String[] fields_value = { keyword, type };
// MultiFieldQueryParser mquery = new
// MultiFieldQueryParser(LuceneUtil.VERSION_CURRENT, fields,
// LuceneUtil.ANALYZER_CURRENT);
// QueryFilter qf = new QueryFilter(tq);
query = MultiFieldQueryParser
.parse(LuceneUtil.VERSION_CURRENT, fields_value,
fields, new BooleanClause.Occur[] {
Occur.MUST, Occur.MUST },
LuceneUtil.ANALYZER_CURRENT);
} else {
QueryParser queryParser = new QueryParser(
LuceneUtil.VERSION_CURRENT,
LuceneUtil.FILE_CONTENT,
LuceneUtil.ANALYZER_CURRENT);
query = queryParser.parse(keyword);
}
// QueryParser queryParser = new
// QueryParser(LuceneUtil.VERSION_CURRENT,
// LuceneUtil.FILE_CONTENT, LuceneUtil.ANALYZER_CURRENT);
// Term tm = new Term(LuceneUtil.FILE_TYPE,type);
// TermQuery tq = new TermQuery(tm);
// Filter f = new Filter();
TopDocs hits = is.search(query, 10000, Sort.RELEVANCE);
SimpleHTMLFormatter shf = new SimpleHTMLFormatter(
"<em><strong>", "</strong></em>");
Highlighter hl = new Highlighter(shf, new QueryScorer(query));
// hl.setTextFragmenter(new
// SimpleFragmenter(Integer.MAX_VALUE));
hl.setTextFragmenter(new SimpleFragmenter(200));
// TopScoreDocCollector results =
// TopScoreDocCollector.create(11, false);
// is.search(query, results);
// TopDocs tds = results.topDocs(1, 10);
// ScoreDoc[] sd = tds.scoreDocs;
list = new ArrayList<Attachment>();
for (int i = start; i < end && i < hits.scoreDocs.length; i++) {
System.out.println("start: " + start + " end: " + end);
att = new Attachment();
ScoreDoc sdoc = hits.scoreDocs[i];
Document doc = is.doc(sdoc.doc);
System.out.println(doc.get(LuceneUtil.FILE_NAME));
System.out.println(doc.get(LuceneUtil.FILE_TYPE));
System.out.println(doc.get(LuceneUtil.FILE_PATH));
System.out.println(doc.get(LuceneUtil.FILE_DATE));
att.setFileid(sdoc.doc);
att.setFilename(doc.get(LuceneUtil.FILE_NAME));
att.setFiletype(doc.get(LuceneUtil.FILE_TYPE));
att.setFilepath(doc.get(LuceneUtil.FILE_PATH));
att.setFiledate(doc.get(LuceneUtil.FILE_DATE));
String str = hl.getBestFragment(
LuceneUtil.ANALYZER_CURRENT,
LuceneUtil.FILE_CONTENT,
doc.get(LuceneUtil.FILE_CONTENT));
att.setHitword(str);
att.setFilescore(sdoc.score);
list.add(att);
// TokenStream tokenStream =
// LuceneUtil.ANALYZER_CURRENT.tokenStream(fieldName,new
// StringReader(text));
// TermPositionVector tpv = (TermPositionVector)
// IndexReader.getTermFreqVector(id, "content");
// TokenStream tokenStream =
// TokenSources.getTokenStream(tpv);
// tokenStream.startOffset();
// tokenStream.next().endOffset();
System.out.println(doc.get(LuceneUtil.FILE_CONTENT));
System.out.println(doc.toString());
System.out.println(doc.getBoost());
System.out.println(sdoc.toString());
System.out.println("doc: " + sdoc.doc + " score: "
+ sdoc.score + " shardIndex: " + sdoc.shardIndex);
}
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
e.printStackTrace();
}
return list;
}
@Override
public int getCount(String path, String keyword, String type) {
PropertiesHelper p = null;
TopDocs hits = null;
try {
p = new PropertiesHelper(path);
String IndexDir = p.getValue("IndexDir");
Directory dir = FSDirectory.open(new File(IndexDir));
IndexReader reader = IndexReader.open(dir);
IndexSearcher is = new IndexSearcher(reader);
is.setDefaultFieldSortScoring(true, false);
Query query = null;
if (type != "" && type != null) {
String[] fields = { LuceneUtil.FILE_CONTENT,
LuceneUtil.FILE_TYPE };
String[] fields_value = { keyword, type };
// MultiFieldQueryParser mquery = new
// MultiFieldQueryParser(LuceneUtil.VERSION_CURRENT, fields,
// LuceneUtil.ANALYZER_CURRENT);
// QueryFilter qf = new QueryFilter(tq);
query = MultiFieldQueryParser
.parse(LuceneUtil.VERSION_CURRENT, fields_value,
fields, new BooleanClause.Occur[] {
Occur.MUST, Occur.MUST },
LuceneUtil.ANALYZER_CURRENT);
} else {
QueryParser queryParser = new QueryParser(
LuceneUtil.VERSION_CURRENT,
LuceneUtil.FILE_CONTENT,
LuceneUtil.ANALYZER_CURRENT);
query = queryParser.parse(keyword);
}
// QueryParser queryParser = new
// QueryParser(LuceneUtil.VERSION_CURRENT,
// LuceneUtil.FILE_CONTENT, LuceneUtil.ANALYZER_CURRENT);
// Query query = queryParser.parse(keyword);
hits = is.search(query, 10000, Sort.RELEVANCE);
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
return hits.scoreDocs.length;
}
}
全文檢索引擎lucene的研究和使用(二)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.