leoqs
发表于 2020-2-15 18:40:16
看楼主装X
Kate_M
发表于 2020-2-15 20:32:38
回复看代码
yellow_rain
发表于 2020-2-15 20:40:07
学习一下
supergzx
发表于 2020-2-15 21:19:08
学习学习
wt9503
发表于 2020-2-15 21:50:06
1
DavidCT
发表于 2020-2-15 22:12:39
想看代码,支持楼主
Carry2020
发表于 2020-2-16 09:34:49
python
云犀
发表于 2020-2-16 09:51:55
新小白,来看看
人间洪荒镜
发表于 2020-2-16 10:15:11
厉害
python-yjl
发表于 2020-2-16 11:01:11
高大上,好好学习下。
蒙哥-斧王
发表于 2020-2-16 11:02:08
爬取肺炎疫情最新动态
windsboy
发表于 2020-2-16 11:44:29
学习
暗冬序曲
发表于 2020-2-16 12:30:53
666
PhycoCycle
发表于 2020-2-16 13:34:25
我想看看
MichaelBach
发表于 2020-2-16 13:52:44
666
ski_magicboy
发表于 2020-2-16 17:12:17
from selenium import webdriver
import re
import pandas as pd
import openpyxl as op
import time
class feiyan():
def __init__(self,html=None,source=None,confirmed=None,suspected=None,cure=None,died=None,timess=None):
self.html = html
self.source = source
self.confirmed = confirmed
self.suspected = suspected
self.cure = cure
self.died = died
self.timess = timess
def FindWeb(self,html):
option = webdriver.ChromeOptions()
option.add_argument('--headless')
driver = webdriver.Chrome(options=option)
driver.get(html)
time.sleep(5)
#source = driver.page_source
self.source = driver.execute_script('return document.documentElement.outerHTML') #执行JS后得到整个HTML
return self.source
def FindData(self):
confir = re.search(r'number.*\S\s*.*累计确诊',self.source)
self.confirmed = re.search(r'\d+',confir.group())
suspect = re.search(r'number.*\S\s*.*现有疑似',self.source)
self.suspected = re.search(r'\d+',suspect.group())
cur = re.search(r'number.*\S\s*.*治愈人数',self.source)
self.cure = re.search(r'\d+',cur.group())
die = re.search(r'number.*\S\s*.*死亡人数',self.source)
self.died = re.search(r'\d+',die.group())
times = re.search(r'统计截至.*?</span>',self.source)
self.timess = re.search(r'\d+-\d+-\d+\s+\d+:\d+:\d+',times.group())
print('截止{}:\n全国确诊人数为:{}人\n'.format(self.timess.group(),self.confirmed.group()))
print('疑似病例为:{}人\n'.format(self.suspected.group()))
print('治愈人数为:{}人\n'.format(self.cure.group()))
print('死亡人数为:{}人\n'.format(self.died.group()))
def save(self):
target = r'E:\Python\肺炎数据\肺炎数据.xlsx'#读取前一天数据,合并生成本日数据并保存
excel = pd.read_excel(target)
num = len(excel.loc[:,'时间'])
excel.loc = self.timess.group()
excel.loc = self.confirmed.group()
excel.loc = self.died.group()
excel.loc = self.cure.group()
excel.loc = self.suspected.group()
book = op.load_workbook(target)
write = pd.ExcelWriter(target,engine = 'openpyxl')
write.book = book
excel.to_excel(write,sheet_name='alldata')
write.save()
write.close()
if __name__ == '__main__':
f = feiyan()
f.FindWeb(html='https://news.qq.com//zt2020/page/feiyan.htm')
f.FindData()
f.save()
Spx12138
发表于 2020-2-16 17:28:01
爬取
jmfsw
发表于 2020-2-16 17:51:47
武汉加油!
挣扎起飞
发表于 2020-2-16 18:10:01
牛逼
0404
发表于 2020-2-16 19:59:22
6