| 
 | 
 
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册  
 
x
 
先说问题: 
一直报错, 
ImportError: cannot import name 'Lda2Vec' from 'gensim.models' (C:\ProgramData\anaconda3\lib\site-packages\gensim\models\__init__.py) 
ModuleNotFoundError: No module named 'pyLDAvis' 
 
具体经过: 
最近想通过python做主题分析,一开始使用的LDA进行分析,发现结果很差不是自己想要的。代码和结果如下: 
 
import jieba 
from gensim import corpora, models 
from gensim.models import CoherenceModel 
import matplotlib.pyplot as plt 
 
# 读取停用词 
stopwords_path = 'C:\\Users\\11564\\Desktop\\实战\\scu_stopwords.txt' 
stopwords = [line.strip() for line in open(stopwords_path, 'r', encoding='utf-8').readlines()] 
 
# 读取文本数据 
text_path = 'C:\\Users\\11564\\Desktop\\实战\\滑雪场.txt' 
with open(text_path, 'r', encoding='utf-8') as f: 
    text = f.read() 
 
# 分词和去停用词处理 
words = [word for word in jieba.cut(text) if word not in stopwords] 
 
# 将分词后的文本转换为词袋模型 
dictionary = corpora.Dictionary([words]) 
corpus = [dictionary.doc2bow(words)] 
 
# 通过计算困惑度确定最佳主题数 
coherence_values = [] 
model_list = [] 
for num_topics in range(1, 20): 
    model = models.ldamodel.LdaModel(corpus=corpus, id2word=dictionary, num_topics=num_topics) 
    model_list.append(model) 
    coherence_model_lda = CoherenceModel(model=model, texts=[words], dictionary=dictionary, coherence='c_v') 
    coherence_lda = coherence_model_lda.get_coherence() 
    coherence_values.append(coherence_lda) 
 
# 找到最佳主题数 
best_coherence = max(coherence_values) 
best_index = coherence_values.index(best_coherence) 
best_model = model_list[best_index] 
best_num_topics = best_index + 1 
 
# 可视化主题数和困惑度之间的关系 
x = range(1, 20) 
plt.plot(x, coherence_values) 
plt.xlabel('Number of Topics') 
plt.ylabel('Coherence Score') 
plt.show() 
 
# 输出最佳主题数和主题分布 
print('The best number of topics: ', best_num_topics) 
topics = best_model.print_topics(num_words=10) 
for topic in topics: 
    print(topic) 
 
输出结果: 
The best number of topics:  14 
(0, '0.046*"," + 0.020*"\n" + 0.019*"的" + 0.013*"。" + 0.007*"去" + 0.007*"不错" + 0.006*"好" + 0.006*"玩" + 0.005*"有" + 0.005*"!"') 
(1, '0.097*"," + 0.043*"的" + 0.036*"。" + 0.033*"\n" + 0.013*"不错" + 0.013*"去" + 0.012*"好" + 0.012*"有" + 0.011*"玩" + 0.009*"是"') 
(2, '0.070*"," + 0.028*"的" + 0.024*"\n" + 0.015*"。" + 0.007*"也" + 0.007*"好" + 0.007*"去" + 0.006*"有" + 0.006*"!" + 0.006*"人"') 
(3, '0.065*"," + 0.032*"的" + 0.026*"\n" + 0.018*"。" + 0.010*"不错" + 0.009*"也" + 0.009*"好" + 0.009*"去" + 0.007*"有" + 0.006*"玩"') 
(4, '0.108*"," + 0.050*"的" + 0.028*"\n" + 0.027*"。" + 0.011*"好" + 0.010*"去" + 0.009*"!" + 0.009*"玩" + 0.009*"也" + 0.008*"教练"') 
(5, '0.071*"," + 0.029*"的" + 0.026*"\n" + 0.021*"。" + 0.009*"去" + 0.008*"不错" + 0.008*"玩" + 0.007*"也" + 0.007*"好" + 0.006*"!"') 
(6, '0.077*"," + 0.041*"的" + 0.036*"\n" + 0.027*"。" + 0.011*"不错" + 0.010*"好" + 0.009*"人" + 0.009*"去" + 0.009*"有" + 0.009*"!"') 
(7, '0.048*"," + 0.020*"的" + 0.016*"\n" + 0.015*"。" + 0.007*"去" + 0.006*"好" + 0.005*"教练" + 0.005*"也" + 0.005*"玩" + 0.004*" "') 
(8, '0.068*"," + 0.032*"的" + 0.021*"\n" + 0.017*"。" + 0.007*"不错" + 0.007*"去" + 0.006*"好" + 0.006*"玩" + 0.006*"也" + 0.005*"都"') 
(9, '0.037*"," + 0.015*"的" + 0.012*"\n" + 0.009*"。" + 0.005*"好" + 0.005*"也" + 0.004*"不错" + 0.004*"去" + 0.004*"!" + 0.004*"有"') 
(10, '0.135*"," + 0.057*"的" + 0.043*"\n" + 0.031*"。" + 0.017*"好" + 0.013*"不错" + 0.013*"!" + 0.013*"去" + 0.013*"也" + 0.012*"玩"') 
(11, '0.123*"," + 0.040*"\n" + 0.035*"的" + 0.033*"。" + 0.016*"不错" + 0.011*"好" + 0.011*"也" + 0.010*"玩" + 0.009*"!" + 0.009*"去"') 
(12, '0.053*"," + 0.035*"的" + 0.022*"\n" + 0.019*"。" + 0.008*"去" + 0.007*"玩" + 0.007*"不错" + 0.007*"好" + 0.006*"也" + 0.006*"是"') 
(13, '0.065*"," + 0.030*"\n" + 0.029*"的" + 0.024*"。" + 0.010*"去" + 0.008*"有" + 0.007*"玩" + 0.007*"好" + 0.007*"不错" + 0.007*"是"') 
 
 
 
结果很差,然后可视化也不是自己想要的图。于是,换成了: 
import gensim 
import os 
from gensim.models import Lda2Vec 
from gensim.models.ldamulticore import LdaMulticore 
from gensim.models.word2vec import LineSentence 
from gensim.utils import simple_preprocess 
from gensim.parsing.preprocessing import STOPWORDS 
from gensim.corpora import Dictionary 
from gensim.models.coherencemodel import CoherenceModel 
import pyLDAvis.gensim_models 
import logging 
 
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) 
 
# 定义读取文件路径 
input_folder = "C:/Users/11564/Desktop/实战/滑雪场" 
stopwords_file = "C:/Users/11564/Desktop/实战/scu_stopword.txt" 
 
# 加载停用词表 
with open(stopwords_file, 'r', encoding='utf-8') as f: 
    stopwords = f.read().splitlines() 
 
# 去除停用词和长度小于3的词语 
def preprocess(text): 
    result = [] 
    for token in simple_preprocess(text): 
        if token not in stopwords and len(token) > 3: 
            result.append(token) 
    return result 
 
# 读取文件 
class TextIterator: 
    def __init__(self, folder_path): 
        self.folder_path = folder_path 
 
    def __iter__(self): 
        for file_name in os.listdir(self.folder_path): 
            file_path = os.path.join(self.folder_path, file_name) 
            for line in open(file_path, 'r', encoding='utf-8'): 
                yield preprocess(line) 
 
# 构建LDA2Vec模型 
sentences = TextIterator(input_folder) 
dictionary = Dictionary(sentences) 
model = Lda2Vec( 
    corpus=[dictionary.doc2bow(sent) for sent in sentences], 
    num_topics=50, 
    id2word=dictionary, 
    chunksize=5000, 
    passes=10, 
    alpha=0.5, 
    eta=0.5, 
    iterations=200, 
    random_state=42, 
    batch_size=128, 
    workers=4 
) 
 
# 保存模型 
model.save("lda2vec.model") 
 
# 计算模型的困惑度和一致性得分 
perplexity = model.log_perplexity([dictionary.doc2bow(sent) for sent in sentences]) 
coh_score = CoherenceModel(model=model, texts=sentences, dictionary=dictionary, coherence='c_v').get_coherence() 
 
# 可视化结果 
vis_data = pyLDAvis.gensim_models.prepare(model, [dictionary.doc2bow(sent) for sent in sentences], dictionary) 
pyLDAvis.show(vis_data) 
 
# 展示主题和对应的词汇 
for topic in model.topic_word_.T: 
    words = ", ".join([model.id2word[idx] for idx in topic.argsort()[-5:]]) 
    print(words) 
 
print("Perplexity: ", perplexity) 
print("Coherence score: ", coh_score) 
 
 
问题就来了,一直报错, 
ImportError: cannot import name 'Lda2Vec' from 'gensim.models' (C:\ProgramData\anaconda3\lib\site-packages\gensim\models\__init__.py) 
ModuleNotFoundError: No module named 'pyLDAvis' 
明明已经pip install 上面的模块和模型了,就是不行,也upgrade最新版了,甚至把gensim也upgrade了,就是不行,不管怎么尝试都不行。有没有大佬能出出主意啊。
这个问题可能是由于使用的gensim版本与pyLDAvis版本不兼容导致的。您可以尝试卸载已安装的pyLDAvis和gensim,并重新安装旧一些的版本: 
```
 - pip uninstall pyldavis gensim
 
 - pip install pyLDAvis==2.1.2 gensim==3.8.3 -i https://mirrors.aliyun.com/pypi/simple
 
  复制代码
```
 
然后再尝试运行代码。另外,确保在安装这些包之前,先关闭所有Python解释器和编辑器,并在重新打开它们之前重启计算机。  
 
 
 |   
 
 
 
 |