|
发表于 2023-7-8 19:14:17
|
显示全部楼层
- import requests
- from parsel import Selector
- url = 'http://www.weather.com.cn/textFC/hb.shtml'
- response = requests.get(url)
- response.encoding='utf-8'
- html_content = response.text
- # 假设html_content是你从网页获取的HTML内容
- selector = Selector(html_content)
- date = selector.css('body > div.lqcontentBoxH > div.contentboxTab > div > div > ul.day_tabs > li::text')
- # 遍历页面上每天的信息
- for i, day_info in enumerate(selector.css('body > div.lqcontentBoxH > div.contentboxTab > div > div > div.hanml > div')):
- print(f'{date[i].get()}:\n')
- # 遍历页面中的每个省/直辖市信息
- for province_info in day_info.xpath('./div'):
- # 获取第一个城市名
- city = province_info.css('table > tr:nth-child(3) > td:nth-child(2) > a::text').get()
- # 获取第一个城市的最低温度
- min_temperature = province_info.css('table > tr:nth-child(3) > td:nth-child(8)::text').get()
- print(f'城市: {city}, 最低气温: {min_temperature}')
- # 遍历每个省的每个城市
- for city_info in province_info.css('table > tr')[3:]:
- # 获取城市名
- city = city_info.css('td:nth-child(1) > a::text').get()
- # 获取最低温度
- min_temperature = city_info.css('td:nth-child(7)::text').get()
- print(f'城市: {city}, 最低气温: {min_temperature}')
- print('\n', '-' * 50, '\n', sep='')
复制代码 |
|