爬取b站视频标题并做成词云
需要安装的第三方库有requests,lxml,wordcloud,jieba代码如下
# -*- coding: utf-8 -*-
# author:xubai
import requests
from lxml import etree
import wordcloud
import jieba
import urllib.parse
savestr = ""#存储需要分析的字符串
enter =''#存储输入内容,用于生成url和图片名称
def get_url():
global enter
enter = input("输入想要搜索的内容: ")
enter_str = urllib.parse.quote(enter)
base_url = "https://search.bilibili.com/all?keyword=%s&from_source=nav_suggest_new&page" % enter_str
try:
pages = request_html(base_url).xpath('//li[@class="page-item last"]/button/text()')
pages = pages.strip()
page = int(input("当前结果共%s页,请输入需要解析的页数:" % pages))
except:
print("未查找到该内容,或者由于查询次数过多被反爬,请稍后再次查询...........")
for i in range(1,page + 1):
url = base_url + "=" + str(i)
parse_html(url)
print('第',i,'页已解析。。')
def request_html(url):
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36"}
response = requests.get(url,headers=headers)
text = response.text
html = etree.HTML(text)
return html
def parse_html(url):
global savestr
title = request_html(url).xpath('//li[@class="video-item matrix"]/a/@title')
a = " ".join(title)
savestr += a
def make_wc():
print("="*30)
print("开始解析数据")
textlist = jieba.lcut(savestr)
textstr = " ".join(textlist)
"""正在生成词云图片"""
w = wordcloud.WordCloud(width=1000,
height=700,
background_color="white",
font_path=r"D:\Python_program\wordcloud\font\msyh.ttc",
scale=15)
w.generate(textstr)
w.to_file("%s.png" % enter)
print("词云:"+ "%s.png" % enter + "已生成!")
input()
def main():
get_url()
make_wc()
if __name__ == '__main__':
main()
为什么报错了,第三方库也安装了啊 zedi 发表于 2020-4-19 15:16
为什么报错了,第三方库也安装了啊
报的什么错?{:10_285:} 没有错,效果很好。感谢。
页:
[1]