|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- #-*- codeing=utf-8 -*-
- import re
- from bs4 import BeautifulSoup
- import urllib.request
- import region #此库为地区编码(有芒市,昆明,保山)
- def main():
- baseurl="http://www.weather.com.cn/"
- datalist=getData(baseurl)
- print(datalist)
-
- #设置获取规则(规则相同的同一归为Allrule1和Allrule2)
- Allrule1=re.compile(r'<span>(.*)</span>')
- Allrule2=re.compile(r'<em>(.*)</em>')
- findwindclassAct=Allrule2
- findtempAct=Allrule1
- findActtime=Allrule1
- findhumAct=Allrule2
- findwinddirAct=Allrule1
- findinqDay=re.compile(r'<p class="wea" title="(.*)">(.*)</p>')
- findtempDay=Allrule1
- findwinddirDay=re.compile(r'<span class title="(.*)">')
- findwindclassDay=re.compile(r'<(.*)</span>')
- findsunriTime=Allrule1
- findinqNight=re.compile(r'<p class="wea" title="(.*)">(.*)</p>')
- findtempNight=Allrule1
- findwinddirNight=re.compile(r'<span class title="(.*)">')
- findwindclassNight=re.compile(r'<(.*)</span>')
- sunsetTime=Allrule1
- #爬取网页
- def getData(baseurl):
- regionnum=region.selectRegion()
- url=baseurl+r"weather1d/"+regionnum+".shtml#around1"
- html=askUrl(url)
- #解析数据
- soup=BeautifulSoup(html,"html.parser")
- for item in soup.find_all('div',class_="t"):
- data=[]
- item=str(item)
-
- Acttime=re.findall(findActtime,item)[0] #获取当前时间
- data.append(Acttime)
- tempAct=re.findall(findtempAct,item)[3] #获取当前气温
- data.append(tempAct)
- humAct=re.findall(findhumAct,item)[0] #获取当前湿度
- data.append(humAct)
- winddirAct=re.findall(findwinddirAct,item)[2] #获取当前风向
- data.append(winddirAct)
- windclassAct=re.findall(findwindclassAct,item)[1] #获取当前风力级别
- data.append(windclassAct)
- inqDay=re.findall(findinqDay,item)[0] #获取白天天气种类
- data.append(inqDay)
- tempDay=re.findall(findtempDay,item)[5] #获取白天气温
- data.append(tempDay)
- winddirDay=re.findall(findwinddirDay,item)[0] #获取白天风向
- data.append(winddirDay)
- windclassDay=re.findall(findwindclassDay,item)[0] #获取白天风力级别
- data.append(windclassDay)
- sunriTime=re.findall(findsunriTime,item)[6] #获取日出时间
- data.append(sunriTime)
- inqNight=re.findall(findinqNight,item)[1] #获取夜间天气种类
- data.append(inqNight)
- tempNight=re.findall(findtempNight,item)[7] #获取夜间气温
- data.append(tempNight)
- winddirNight=re.findall(findwinddirNight,item)[1] #获取夜间风向
- data.append(winddirNight)
- windclassNight=re.findall(findwindclassNight,item)[1] #获取夜间风力级别
- data.append(windclassNight)
- sunsetTime=re.findall(findsunsetTime,item)[8] #获取日落时间
- data.append(sunsetTime)
-
- return data
- """
- def printData():
- data=getData(baseurl)
- print(data)
- """
- #获取网页
- def askUrl(url):
- head={
- "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36"
- }
- req=urllib.request.Request(url,headers=head)
- html=""
- try:
- resp=urllib.request.urlopen(req)
- html=resp.read().decode("utf-8")
- except urllib.error.URLError as e:
- if hasattr(e,"code"):
- print(e.code)
- if hasattr(e,"reason"):
- print(e.reason)
-
- return html
- if __name__=="__main__":
- main()
复制代码
------------------------------------
报错信息为:
Traceback (most recent call last):
File "F:\python_file\spider_wether.py", line 115, in <module>
main()
File "F:\python_file\spider_wether.py", line 10, in main
data=getData(baseurl)
File "F:\python_file\spider_wether.py", line 63, in getData
winddirDay=re.findall(findwinddirDay,item)[0]
IndexError: list index out of range
------------------------------------
求助求助求助求助 |
|