马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
输入城市名称和年份,获取2011~2019年中任意一年的历史天气并保持在excel中
感觉用正则表达式有点繁杂了
城市和年份太多,只测试了几个城市,不保证每个城市每个年份都能查到
import requests
from bs4 import BeautifulSoup
import openpyxl
import re
#查找日期正则表达式
find_date = re.compile(r'<div><a href="//lishi.tianqi.com/.*?/(.*?).html"',re.S)
#查找最高温度正则表达式
find_htemp = re.compile(r'<div style="width: 100px">(\d{1,2}|-\d{1,2})</div>',re.S)
#查找最低温度正则表达式
find_dtemp = re.compile(r'<div>(\d{1,2}|-\d{1,2})</div>',re.S)
#查找天气情况正则表达式
find_tianqi = re.compile(r'<div>([^0-9]*?)</div>',re.S)
#查找风力正则表达式
find_fengli = re.compile(r'<div style="width:200px;">(.*?)</div>',re.S)
#生成查询网址的函数
def genHTML(cityname,year):
months = ["%d%02d" % (year, month + 1) for month in range(12)]
todo_urls = [f"http://lishi.tianqi.com/{cityname}/{month}.html" for month in months]
return todo_urls
#抓取网页信息
def askURL(url):
headers = {
"User-Agent": "Mozilla / 5.0(Windows NT 10.0; WOW64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome/78.0.3904.108 Safari / 537.36"
}
res = requests.get(url, headers=headers)
res = res.content.decode("utf-8")
return res
#解析网页信息
def getdata(res):
datalist = [ ]
soup = BeautifulSoup(res, "html.parser")
data = soup.find_all('ul',class_="lishitable_content clearfix")
data = str(data)
dates = re.findall(find_date,data)
dtemp = re.findall(find_dtemp,data)
htemp = re.findall(find_htemp,data)
tianqi = re.findall(find_tianqi,data)
fengli = re.findall(find_fengli,data)
for i in range(len(dates)):
datalist.append([dates[i],dtemp[i],htemp[i],tianqi[i],fengli[i]])
return datalist
#保存信息至文件
def saveData(datalist,cityname,year):
wb = openpyxl.Workbook()
ws = wb.active
ws['A1'] = "日期"
ws['B1'] = "最低温度"
ws['C1'] = "最高温度"
ws['D1'] = "天气"
ws['E1'] = "风力"
#datalist为双层列表
for i in range(12):
for each in datalist[i]:
ws.append(each)
wb.save("%d年%s历史天气.xlsx"%(year,cityname))
def main():
print("-------历史天气查询系统-------")
cityname = input("请输入城市名称(小写全拼):")
year = int(input("请输入查询年份:"))
urls = genHTML(cityname,year)
result = []
for i in range(12):
res = askURL(urls[i])
result.append(getdata(res))
saveData(result,cityname,year)
if __name__ == '__main__':
main()
|