请各位大咖帮我看看,纠正
本帖最后由 heroking146 于 2020-12-17 21:33 编辑import requests
from bs4 import BeautifulSoup
import re
import json
classCornoaVirusSpider(object):
def __init__(self):
self.home_url = "https://ncov.dxy.cn/ncovh5/view/pneumonia"
def get_content_from_url(self,url):
'''
根据URL,获取响应内容的字符串数据
:param url:请求url
:return:响应内容的字符串
'''
response = requests.get(url)
returnresponse.content.decode()
def parse_home_page(self,home_page):
'''
解析首页内容,获取解析后的python的数据
:param home_page:首页内容
:return: 解析后的python数据
'''
soup = BeautifulSoup(home_page,'html.parser')
script = soup.find(id='getListByCountryTypeService2true')
#text = script.text
json_str= re.findall(r'\[.+\]',script)
data = json.loads(json_str)
return data
def save(self,data,path):
with open(path,'w') as fp:
json.dump(data,fp,ensure_ascii=False)
def crawl_last_day_corona_virus(self):
'''
采集最近一天的各国疫情信息
:return:
'''
home_page = self.get_content_from_url(self.home_url)
last_day_corona_virus = self.parse_home_page(home_page)
self.save(last_day_corona_virus,'jsonfile/last_day_corona_virus')
def run(self):
self.crawl_last_day_corona_virus()
if __name__ == '__main__':
spider = CornoaVirusSpider()
spider.run()
显示如下:
TypeError: expected string or bytes-like object
import requests
from bs4 import BeautifulSoup
import re
import json
classCornoaVirusSpider(object):
def __init__(self):
self.home_url = "https://ncov.dxy.cn/ncovh5/view/pneumonia"
def get_content_from_url(self,url):
'''
根据URL,获取响应内容的字符串数据
:param url:请求url
:return:响应内容的字符串
'''
response = requests.get(url)
returnresponse.content.decode()
def parse_home_page(self,home_page):
'''
解析首页内容,获取解析后的python的数据
:param home_page:首页内容
:return: 解析后的python数据
'''
soup = BeautifulSoup(home_page,'html.parser')
script = soup.find(id='getListByCountryTypeService2true')
#print (type(script)) #在这里加一个强制转换格式
script =str(script )
#text = script.text
json_str= re.findall(r'\[.+\]',script)
data = json.loads(json_str)
return data
def save(self,data,path):
with open(path,'w') as fp:
json.dump(data,fp,ensure_ascii=False)
def crawl_last_day_corona_virus(self):
'''
采集最近一天的各国疫情信息
:return:
'''
home_page = self.get_content_from_url(self.home_url)
last_day_corona_virus = self.parse_home_page(home_page)
self.save(last_day_corona_virus,'jsonfile/last_day_corona_virus')
def run(self):
self.crawl_last_day_corona_virus()
if __name__ == '__main__':
spider = CornoaVirusSpider()
spider.run()
re.findall需要传输字符串,而script 是 soup对象所以报错,你可以这样: json_str= re.findall(r'\[.+\]',script.text)
另外requests本身支持直接取text,不需要decode,简单写了一个:
import requests
import re
def main():
url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia'
headers = {'user-agent': 'firefox'}
r = requests.get(url, headers=headers)
r.encoding = 'utf-8'
result = re.findall(r'\[(.*?)\]', r.text)
with open('data.json', 'w', encoding='utf-8') as f:
f.write(result)
if __name__ == '__main__':
main() def main():
url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia'
headers = {'user-agent': 'firefox'}
r = requests.get(url, headers=headers)
r.encoding = 'utf-8'
print(r.text)
# result = re.findall(r'\[(.*?)\]', r.text)
result = re.find(id = 'getListByCountryTypeService2true', r.text)##这样通过属性查找????报错:SyntaxError: positional argument follows keyword argument!!!!!
#print(result) import matplotlib.pyplot as plt
import numpy as np
x = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
y1 =
y2 =
y3 =
plt.bar(x, y1, label="label1", color='red')
plt.bar(x, y2, label="label2",color='orange')
plt.bar(x, y3, label="label3", color='lightgreen')
plt.xticks(np.arange(len(x)), x, rotation=0, fontsize=10)# 数量多可以采用270度,数量少可以采用340度,得到更好的视图
plt.legend(loc="upper left")# 防止label和图像重合显示不出来
plt.rcParams['font.sans-serif'] = ['SimHei']# 用来正常显示中文标签
plt.ylabel('数量')
plt.xlabel('name')
plt.rcParams['savefig.dpi'] = 300# 图片像素
plt.rcParams['figure.dpi'] = 300# 分辨率
plt.rcParams['figure.figsize'] = (15.0, 8.0)# 尺寸
plt.title("title")
plt.savefig('D:\\result.png')
plt.show()
报错如下:OSError: %1 不是有效的 Win32 应用程序。 如何处置??
页:
[1]