|
|

楼主 |
发表于 2019-3-27 17:16:09
|
显示全部楼层
只能爬取单年的代码如下:
- import pandas as pd
- import numpy as np
- from bs4 import BeautifulSoup as bs
- import requests as res
- import re
- import time
- def Wea(year):
- for j in range(1,13):
- if len(str(j))<2:
- j = '0'+ str(j)
- else:
- j = j
- url = 'http://www.tianqihoubao.com/lishi/fujianfuzhou/month/%s%s.html'%(year,j)
- header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
- res1 = res.get(url,headers = header) #请求
- soup = bs(res1.text,'lxml') #解析
- cont1 = soup.find_all('td')
- list1 = []
- print('爬到第%s月了!'%j)
- time.sleep(np.random.randint(1))
- for i in cont1:
- weather = []
- for n in i.strings:
- a = n.replace(' ','').replace('\r','').replace('\n','')
- weather.append(a)
- total = (weather)
- list1.append(total)
- list2 = []
- for i in list1:
- for n in i:
- if n !='':
- list2.append(n)
- list3 = []
- for i in range(0,len(list2),4):
- date = list2[i]
- weather = list2[i+1]
- temp = list2[i+2]
- wind = list2[i+3]
- total =(date,weather,temp,wind)
- list3.append(total)
- data = pd.DataFrame(list3)
- data.to_csv(r"F:\数据资源\天气资料%s.csv"%year,encoding='gbk',header = None,mode='a')
- Wea(2015)
复制代码 |
|