这是代码import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.by import By
import bs4
import csv
import datetime
import time
time_start=time.time()
# 生成日期数据
def create_assist_date(datestart=None, dateend=None):
# 创建日期辅助表
if datestart is None:
datestart = '2016-01-01'
if dateend is None:
dateend = datetime.datetime.now().strftime('%Y-%m-%d')
# 转为日期格式
datestart = datetime.datetime.strptime(datestart, '%Y-%m-%d')
dateend = datetime.datetime.strptime(dateend, '%Y-%m-%d')
date_list = []
date_list.append(datestart.strftime('%Y-%m-%d'))
while datestart < dateend:
# 日期叠加一天
datestart += datetime.timedelta(days=+1)
# 日期转字符串存入列表
date_list.append(datestart.strftime('%Y-%m-%d'))
return date_list
if __name__ == '__main__':
date = create_assist_date("2006-04-05", '2006-6-30')
for i in range(len(date)):
# 设置selenium使用chrome的无头模式
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
# 在启动浏览器时加入配置
browser = webdriver.Chrome("D:/技能/chromedriver.exe",options=chrome_options)
# 模拟浏览器打开网址
browser.get('http://61.163.88.227:8006/hwsq.aspx?sr=0nkRxv6s9CTRMlwRgmfFF6jTpJPtAv87')
# 等待加载,最多等待20秒
js='document.getElementById("ContentLeft_menuDate1_TextBox11").removeAttribute("readonly");'
browser.execute_script(js)
browser.find_element(By.ID,'ContentLeft_menuDate1_TextBox11').clear()#清空原来的数据
browser.find_element(By.ID,'ContentLeft_menuDate1_TextBox11').send_keys(date[i])
browser.find_element(By.ID,'ContentLeft_Button1').click()#模拟点击按钮
sleep(2)
html=browser.page_source
try:
soup=BeautifulSoup(html,'html.parser')
data=soup.find_all('table','mainTxt')
except:
print("找不到")
f = open('data2006(4).csv', 'a+', newline="", encoding='gbk')
writer = csv.writer(f)
for tr in data[1]('tbody')[0].children:#注意因为data[1]('tbody')是列表,必须加上[0]
if isinstance(tr,bs4.element.Tag):
tds=tr('td')
writer.writerow([date[i],tds[0].string,tds[1].string,tds[2].string,tds[3].string,tds[4].string])
for tr in data[2]('tbody')[0].children:#注意因为data[2]('tbody')是列表,必须加上[0]
if isinstance(tr,bs4.element.Tag):
tds=tr('td')
writer.writerow([date[i],tds[0].string,tds[1].string,tds[2].string,tds[3].string,tds[4].string])
f.close()
time_end=time.time()
print('共运行{}s'.format(time_end-time_start))
|