|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
输入城市名称和年份,获取2011~2019年中任意一年的历史天气并保持在excel中
感觉用正则表达式有点繁杂了
城市和年份太多,只测试了几个城市,不保证每个城市每个年份都能查到
- import requests
- from bs4 import BeautifulSoup
- import openpyxl
- import re
- #查找日期正则表达式
- find_date = re.compile(r'<div><a href="//lishi.tianqi.com/.*?/(.*?).html"',re.S)
- #查找最高温度正则表达式
- find_htemp = re.compile(r'<div style="width: 100px">(\d{1,2}|-\d{1,2})</div>',re.S)
- #查找最低温度正则表达式
- find_dtemp = re.compile(r'<div>(\d{1,2}|-\d{1,2})</div>',re.S)
- #查找天气情况正则表达式
- find_tianqi = re.compile(r'<div>([^0-9]*?)</div>',re.S)
- #查找风力正则表达式
- find_fengli = re.compile(r'<div style="width:200px;">(.*?)</div>',re.S)
- #生成查询网址的函数
- def genHTML(cityname,year):
- months = ["%d%02d" % (year, month + 1) for month in range(12)]
- todo_urls = [f"http://lishi.tianqi.com/{cityname}/{month}.html" for month in months]
- return todo_urls
- #抓取网页信息
- def askURL(url):
- headers = {
- "User-Agent": "Mozilla / 5.0(Windows NT 10.0; WOW64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome/78.0.3904.108 Safari / 537.36"
- }
- res = requests.get(url, headers=headers)
- res = res.content.decode("utf-8")
- return res
- #解析网页信息
- def getdata(res):
- datalist = [ ]
- soup = BeautifulSoup(res, "html.parser")
- data = soup.find_all('ul',class_="lishitable_content clearfix")
- data = str(data)
- dates = re.findall(find_date,data)
- dtemp = re.findall(find_dtemp,data)
- htemp = re.findall(find_htemp,data)
- tianqi = re.findall(find_tianqi,data)
- fengli = re.findall(find_fengli,data)
- for i in range(len(dates)):
- datalist.append([dates[i],dtemp[i],htemp[i],tianqi[i],fengli[i]])
- return datalist
- #保存信息至文件
- def saveData(datalist,cityname,year):
- wb = openpyxl.Workbook()
- ws = wb.active
- ws['A1'] = "日期"
- ws['B1'] = "最低温度"
- ws['C1'] = "最高温度"
- ws['D1'] = "天气"
- ws['E1'] = "风力"
- #datalist为双层列表
- for i in range(12):
- for each in datalist[i]:
- ws.append(each)
- wb.save("%d年%s历史天气.xlsx"%(year,cityname))
- def main():
- print("-------历史天气查询系统-------")
- cityname = input("请输入城市名称(小写全拼):")
- year = int(input("请输入查询年份:"))
- urls = genHTML(cityname,year)
- result = []
- for i in range(12):
- res = askURL(urls[i])
- result.append(getdata(res))
- saveData(result,cityname,year)
- if __name__ == '__main__':
- main()
复制代码 |
|