一, 对中国十九大报告做词频分析
- import jieba
- txt = open("中国十九大报告. txt.txt","r",encoding="utf-8").read()
- words = jieba.lcut(txt)
- counts = {}
- for Word in words:
- if len(Word)==1:
- continue
- else:
- counts[Word] = counts.get(Word,0)+1
- items = list(counts.items())
- items.sort(key=lambda x:x[1],reverse=True)
- for i in range(15):
- Word,count = items[i]
- print("{0:<10}{1:>5}".format(Word,count))
二, 根据词频制作词云
- import jieba
- import wordcloud
- from imageio import imread
- mask = imread("wujiaoxing.png")
- f = open("中共十九大报告词频. txt","r",encoding="utf-8")
- t = f.read()
- f.close()
- ls = jieba.lcut(t)
- txt = " ".join(ls)
- w = wordcloud.WordCloud(font_path="msyh.ttc",mask = mask,width = 1000,height=800,background_color="black")
- w.generate(txt)
- w.to_file("grwordcloud.png")
来源: http://www.bubuko.com/infodetail-3497367.html