|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
__author__='Hanxiaoyang'
import jieba#分词包
import numpy#numpy计算包
import codecs#codecs提供的open方法来指定打开的文件的语言编码,它会在读取的时候自动转换为内部unicode
import pandas#数据分析包
import matplotlib.pyplot as plt
from wordcloud import WordCloud#词云包
#2、导入文本
file=codecs.open(u"123.txt",'r')
content=file.read()
file.close()
segment=[]
segs=jieba.cut(content)#切词,“么么哒”才能出现
for seg in segs:
if len(seg)>1 and seg!='rn':
segment.append(seg)
#3、去停词(“多喝热水”等这类小细节可以在这里抹去)
words_df=pandas.DataFrame({'segment':segment})
words_df.head()
stopwords=pandas.read_csv("stopwords.txt",index_col=False,quoting=3,sep="t",names=['stopword'],encoding="utf8")
words_df=words_df[~words_df.segment.isin(stopwords.stopword)]
#4、统计词频(对话日常高频词)
words_stat = words_df.groupby(by='segment')['segment'].agg({"count":numpy.size})
words_stat = words_stat.reset_index().sort_values("count",ascending=False)
words_stat#打印统计结果
#5、%matplotlib inline
wordcloud=WordCloud(font_path="simhei.ttf",background_color="black")
#wordcloud=wordcloud.fit_words(words_stat.head(1000).itertuples(index=False))
wordcloud = wordcloud.fit_words(words_stat.head(1000).itertuples)
plt.imshow(wordcloud)
plt.show()
就在这一步报错了,帮忙看看什么原因导致。谢谢 错误信息如下:(使用的事python 2.7.14,使用工具:ipython notebook)
AttributeError Traceback (most recent call last)
<ipython-input-18-9ef3b33eba24> in <module>()
2 wordcloud=WordCloud(font_path="simhei.ttf",background_color="black")
3 #wordcloud=wordcloud.fit_words(words_stat.head(1000).itertuples(index=False))
----> 4 wordcloud = wordcloud.fit_words(words_stat.head(1000).itertuples)
5
6 plt.imshow(wordcloud)
c:\python\python27\lib\site-packages\wordcloud\wordcloud.pyc in fit_words(self, frequencies)
329 self
330 """
--> 331 return self.generate_from_frequencies(frequencies)
332
333 def generate_from_frequencies(self, frequencies, max_font_size=None):
c:\python\python27\lib\site-packages\wordcloud\wordcloud.pyc in generate_from_frequencies(self, frequencies, max_font_size)
348 """
349 # make sure frequencies are sorted and normalized
--> 350 frequencies = sorted(frequencies.items(), key=item1, reverse=True)
351 if len(frequencies) <= 0:
352 raise ValueError("We need at least 1 word to plot a word cloud, "
AttributeError: 'function' object has no attribute 'items'
|
|