# 選擇喜歡的小說,統計出場人物詞頻排名
import jieba
excludes = {"人馬","都督","後主","軍馬","主公", "孔明曰","左右","東吳",\
"於是","知道","衆將","大喜","二人", "玄德曰","天下",\
"軍士","引兵","陛下","次日", "丞相","如此","商議","魏兵",\
"只見","今日","卻說","不是", "將軍","不可","不能","荊州",\
"不知","這個","如何","一人","漢中","蜀兵","不敢","大叫",}
f = open("三國演義.txt", "r")
txt = f.read()
f.close()
words = jieba.lcut(txt)
counts = {}
for word in words:
if len(word) == 1: #排除單個字符的分詞結果
continue
else:
counts[word] = counts.get(word,0) + 1
for word in excludes:
del(counts[word])
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
for i in range(15):
word, count = items[i]
print ("{0:<10}{1:>5}".format(word, count))
Python統計小說中出場人物詞頻
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.