中文分詞器擴充中文詞庫IKAnalyzer
public static void main(String[] args) throws IOException {
String s = "中文分詞工具包";
Configuration cfg = DefualtConfig.getInstance(); //加載詞庫
cfg.setUseSmart(true); //設置智能分詞
Dictionary.initial(cfg);
Dictionary dictionary = Dictionary.getSingleton();
// List<String> words = new ArrayList<String>();
// words.add("基礎班");
// words.add("高級會計實務");
// dictionary.addWords(words); //自動添加自定義詞
System.out.println(cfg.getMainDictionary()); // 系統默認詞庫
System.out.println(cfg.getQuantifierDicionary());
Hit hit = dictionary.matchInMainDict("基礎班".toCharArray());
System.out.println(hit.isMatch());
System.out.println(queryWords(s));
}
/**
* IK 分詞
*
* @param query
* @return
* @throws IOException
*/
public static List<String> queryWords(String query) throws IOException {
List<String> list = new ArrayList<String>();
StringReader input = new StringReader(query.trim());
IKSegmenter ikSeg = new IKSegmenter(input, true);// true 用智能分詞 ,false細粒度
for (Lexeme lexeme = ikSeg.next(); lexeme != null; lexeme = ikSeg.next()) {
list.add(lexeme.getLexemeText());
}
return list;
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.