|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
需要安装的第三方库有requests,lxml,wordcloud,jieba
代码如下
- # -*- coding: utf-8 -*-
- # author:xubai
- import requests
- from lxml import etree
- import wordcloud
- import jieba
- import urllib.parse
- savestr = ""#存储需要分析的字符串
- enter =''#存储输入内容,用于生成url和图片名称
- def get_url():
- global enter
- enter = input("输入想要搜索的内容: ")
- enter_str = urllib.parse.quote(enter)
- base_url = "https://search.bilibili.com/all?keyword=%s&from_source=nav_suggest_new&page" % enter_str
- try:
- pages = request_html(base_url).xpath('//li[@class="page-item last"]/button/text()')
- pages = pages[0].strip()
- page = int(input("当前结果共%s页,请输入需要解析的页数:" % pages))
- except:
- print("未查找到该内容,或者由于查询次数过多被反爬,请稍后再次查询...........")
- for i in range(1,page + 1):
- url = base_url + "=" + str(i)
- parse_html(url)
- print('第',i,'页已解析。。')
- def request_html(url):
- headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36"}
- response = requests.get(url,headers=headers)
- text = response.text
- html = etree.HTML(text)
- return html
- def parse_html(url):
- global savestr
- title = request_html(url).xpath('//li[@class="video-item matrix"]/a/@title')
- a = " ".join(title)
- savestr += a
- def make_wc():
- print("="*30)
- print("开始解析数据")
- textlist = jieba.lcut(savestr)
- textstr = " ".join(textlist)
- """正在生成词云图片"""
- w = wordcloud.WordCloud(width=1000,
- height=700,
- background_color="white",
- font_path=r"D:\Python_program\wordcloud\font\msyh.ttc",
- scale=15)
- w.generate(textstr)
- w.to_file("%s.png" % enter)
- print("词云:"+ "%s.png" % enter + "已生成!")
- input()
- def main():
- get_url()
- make_wc()
- if __name__ == '__main__':
- main()
复制代码 |
-
-
|