|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- #爬取世界各国疫情数据
- import urllib.request as request
- import io
- def download(url:str):
- response = request.urlopen(url)
- textIOWrapper = io.TextIOWrapper(buffer=response,encoding="UTF-8")
- html=textIOWrapper.read()
- return html
- url = "https://wp.m.163.com/163/page/news/virus_report/index.html?_nw_=1&_anw_=1"
- html=download(url=url)
- from bs4 import BeautifulSoup
- FIELDS = ('name', 'today_confirm','confirm','dead','heal')# 定义要抓取的各列的标识
- def bs_scraper(html):
- # parse the HTML
- soup = BeautifulSoup(html, features='html.parser')
- body = soup.find(name='body')
- results = {}
- div_1 = body.find_all(name='div', attrs={'class': 'overseas_list_nation'})
- print(div_1)
- for div in div_1:
- for field in FIELDS:
- div_2 = div.find(name='div', attrs={'class': 'overseas_list_%s' % field})
- results[field] = div_2.text
- print(results)
- return results
- bs_scraper(html)
复制代码
|
|