|

楼主 |
发表于 2023-3-2 17:05:22
|
显示全部楼层
这是代码
- import requests
- from bs4 import BeautifulSoup
- from selenium import webdriver
- from selenium.webdriver.chrome.options import Options
- from selenium import webdriver
- from time import sleep
- from selenium.webdriver.common.by import By
- import bs4
- import csv
- import datetime
- import time
- time_start=time.time()
- # 生成日期数据
- def create_assist_date(datestart=None, dateend=None):
- # 创建日期辅助表
- if datestart is None:
- datestart = '2016-01-01'
- if dateend is None:
- dateend = datetime.datetime.now().strftime('%Y-%m-%d')
- # 转为日期格式
- datestart = datetime.datetime.strptime(datestart, '%Y-%m-%d')
- dateend = datetime.datetime.strptime(dateend, '%Y-%m-%d')
- date_list = []
- date_list.append(datestart.strftime('%Y-%m-%d'))
- while datestart < dateend:
- # 日期叠加一天
- datestart += datetime.timedelta(days=+1)
- # 日期转字符串存入列表
- date_list.append(datestart.strftime('%Y-%m-%d'))
- return date_list
- if __name__ == '__main__':
- date = create_assist_date("2006-04-05", '2006-6-30')
- for i in range(len(date)):
- # 设置selenium使用chrome的无头模式
- chrome_options = Options()
- chrome_options.add_argument('--headless')
- chrome_options.add_argument('--disable-gpu')
- # 在启动浏览器时加入配置
- browser = webdriver.Chrome("D:/技能/chromedriver.exe",options=chrome_options)
- # 模拟浏览器打开网址
- browser.get('http://61.163.88.227:8006/hwsq.aspx?sr=0nkRxv6s9CTRMlwRgmfFF6jTpJPtAv87')
- # 等待加载,最多等待20秒
- js='document.getElementById("ContentLeft_menuDate1_TextBox11").removeAttribute("readonly");'
- browser.execute_script(js)
- browser.find_element(By.ID,'ContentLeft_menuDate1_TextBox11').clear()#清空原来的数据
- browser.find_element(By.ID,'ContentLeft_menuDate1_TextBox11').send_keys(date[i])
- browser.find_element(By.ID,'ContentLeft_Button1').click()#模拟点击按钮
- sleep(2)
- html=browser.page_source
- try:
- soup=BeautifulSoup(html,'html.parser')
- data=soup.find_all('table','mainTxt')
- except:
- print("找不到")
- f = open('data2006(4).csv', 'a+', newline="", encoding='gbk')
- writer = csv.writer(f)
- for tr in data[1]('tbody')[0].children:#注意因为data[1]('tbody')是列表,必须加上[0]
- if isinstance(tr,bs4.element.Tag):
- tds=tr('td')
- writer.writerow([date[i],tds[0].string,tds[1].string,tds[2].string,tds[3].string,tds[4].string])
- for tr in data[2]('tbody')[0].children:#注意因为data[2]('tbody')是列表,必须加上[0]
- if isinstance(tr,bs4.element.Tag):
- tds=tr('td')
- writer.writerow([date[i],tds[0].string,tds[1].string,tds[2].string,tds[3].string,tds[4].string])
- f.close()
- time_end=time.time()
- print('共运行{}s'.format(time_end-time_start))
复制代码 |
|