|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests
from bs4 import BeautifulSoup
import csv
header = ('城市', '高温', '低温')
def parse_page(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/108.0.1462.54 Safari/537.36',
}
response = requests.get(url, headers=headers)
text = response.content.decode('utf-8')
soup = BeautifulSoup(text, 'html5lib')
conMidtab = soup.find('div', class_='conMidtab')
tables = conMidtab.find_all('table')
lst = []
for table in tables:
trs = table.find_all('tr')[2:]
for index, tr in enumerate(trs):
tds = tr.find_all('td')
city_td = tds[0] # 城市
if index == 0:
city_td = tds[1]
info = {}
city = list(city_td.stripped_strings)[0] # 只要文本
temp_max_td = tds[-5] # 最高温度
temp_max = list(temp_max_td.stripped_strings)[0]
temp_min_td = tds[-2] # 最低温度
temp_min = list(temp_min_td.stripped_strings)[0]
info['城市'] = city
info['高温'] = temp_max + '℃'
info['低温'] = temp_min + '℃'
lst.append(info)
return lst
def writedata(lst):
with open('weather.csv', 'w', encoding='utf-8', newline='') as file_obj:
writer = csv.DictWriter(file_obj, fieldnames=header)
writer.writeheader()
writer.writerows(lst)
def main():
lst = []
urls = ['http://www.weather.com.cn/textFC/hb.shtml', # 华北地区
'http://www.weather.com.cn/textFC/db.shtml', # 东北地区
'http://www.weather.com.cn/textFC/hd.shtml', # 华东地区
'http://www.weather.com.cn/textFC/hz.shtml', # 华中地区
'http://www.weather.com.cn/textFC/hn.shtml', # 华南
'http://www.weather.com.cn/textFC/xn.shtml', # 西南
'http://www.weather.com.cn/textFC/xb.shtml', # 西北
'http://www.weather.com.cn/textFC/gat.shtml']
for url in urls:
lst += parse_page(url)
writedata(lst)
if __name__ == '__main__':
main()
不导出csv的时候数据是全的,但是导出去的时候发现只有一部分数据 |
|