lucene.net 2.0 中文分詞後語法高亮問題

 

lucene.net 2.0 中文分詞後語法高亮問題

文/birdshover  出處/博客園

lucene.net 2.0  src包裏自帶了Highlighter.Net-2.0.0,可以用來實現語法高亮。

    //定義多條件搜索分析器
  BooleanQuery bquery = new BooleanQuery();
  //定義分詞器
  Analyzer analyzer  = new StandardAnalyzer();

  //多條件搜索拆分器
            MultiFieldQueryParser parser = new MultiFieldQueryParser(new string[] { "title", "content" }, analyzer);
            query = parser.Parse(key);
            bquery.Add(query, BooleanClause.Occur.MUST);
                
            DateTime now = DateTime.Now;
  //初始化搜索器
  //實現分佈式搜索
  List<IndexReader> indexreaders = new List<IndexReader>();
  string[] dirs = Directory.GetDirectories(dir);
  if (searchtype == SearchType.None)
  {
    foreach (string item in dirs)
    {
    //System.Web.HttpContext.Current.Response.Write(item);
    indexreaders.Add(IndexReader.Open(Path.Combine(Path.Combine(dir, item), "Save")));
    }
  }
  else
  {
    //System.Web.HttpContext.Current.Response.Write(searchtype.ToString());
    indexreaders.Add(IndexReader.Open(Path.Combine(Path.Combine(dir, searchtype.ToString()), "Save")));
  }

  MultiReader reader = new MultiReader(indexreaders.ToArray());
  indexSearcher = new IndexSearcher(reader);

  Hits hits = null;
  hits = indexSearcher.Search(bquery);
  timer = (DateTime.Now - now).TotalMilliseconds;

int count = hits.Length();
  
            /* 計算顯示的條目 */
      int      start = (pageNo - 1) * 10;
      int      end = pageNo * 10 > count ? count : pageNo * 10;
            //Response.Write(readerhelper.MyQuery.ToString());
            /* 語法高亮顯示設置 */
        Highlighter        highlighter = new Highlighter(new QueryScorer(query ));
      highlighter.SetTextFragmenter(new SimpleFragmenter(100));

for (int i = start; i < end; i++)
{
    Lucene.Net.Documents.Document doc = hits.Doc(i);
              System.String text = doc.Get("content");
    //添加結尾,保證結尾特殊符號不被過濾
              string title = doc.Get("title") + "+aaaaaaaaa";
    Lucene.Net.Analysis.TokenStream tokenStream = analyzer  .TokenStream("content", new System.IO.StringReader(text));
    Lucene.Net.Analysis.TokenStream titkeStream = analyzer .TokenStream("title", new System.IO.StringReader(title));
    System.String result = highlighter.GetBestFragments(tokenStream, text, 2, "...");
    string tresult = highlighter.GetBestFragments(titkeStream, title, 0, "..");
    //祛除標題結尾標記
    if (tresult.Length > 10)
      tresult = tresult.Remove(tresult.Length - 10, 10);
    if (string.IsNullOrEmpty(tresult))
      tresult = title.Remove(title.Length - 10, 10);
    //未標註內容讀取
    if (string.IsNullOrEmpty(result))
    {
      if (text.Length > 100)
      result = text.Substring(0, 100);
      else
      result = text;
    }
    if (result.Length < text.Length)
      result = result + "...";
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章