|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import requests
- from bs4 import BeautifulSoup
- def parse_page(url):#解析url页面函数
- headers ={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0'}
- response = requests.get(url,headers = headers)
- #print(response.content.decode('utf-8'))#当解码之后的页面有乱码说明用response.test()猜错了解码方式,则应该改成response.content.decode('utf-8')
- text = response.content.decode('utf-8')
- soup = BeautifulSoup(text,'lxml')
- conMidtab =soup.find('div',class_ = 'conMidtab')
- tables = conMidtab.find_all('table')
- for table in tables:
- trs = table.find_all('tr')[2:]
- for tr in trs:
- tds =tr.find_all('td')
- city_td = tds[0]
- city = list(city_td.stripped_strings)[0]
- temp_td = tds[-2]
- min_temp =list(temp_td.stripped_strings)[0]
- print({'city':city,'min_temp':min_temp})
- def main():
- url = 'http://www.weather.com.cn/textFC/hb.shtml'
- parse_page(url)
- if __name__ == '__main__':
- main()
复制代码 |
|