|
发表于 2023-5-31 15:20:24
|
显示全部楼层
错误:
1. Import应为import,Python关键字应该小写。
2. if _name__== '_main_':中的双下划线应为单下划线,应为if __name__ == '__main__':。
下面是已修正的代码:
- import requests
- import pandas as pd
- from bs4 import BeautifulSoup
- import matplotlib.pyplot as plt
- # 获取百度搜索结果
- def get_search_result(query):
- url = f'https://www.baidu.com/s?wd={query}'
- headers = {
- 'Accept-Encoding': 'gzip, deflate, br',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'
- }
- response = requests.get(url, headers=headers)
- response.encoding = response.apparent_encoding
- return response.text
- # 解析搜索结果页面,获取结果数
- def get_result_count(query):
- html = get_search_result(query)
- soup = BeautifulSoup(html, 'html.parser')
- result_stats = soup.find(id='result-stats').text
- result_count = int(''.join(filter(str.isdigit, result_stats)))
- return result_count
- # 解析搜索结果页面,获取百度百科信息
- def get_baike_info(query):
- url = f'https://baike.baidu.com/item/{query}'
- headers = {
- 'Accept-Encoding': 'gzip, deflate, br',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'
- }
- response = requests.get(url, headers=headers)
- response.encoding = response.apparent_encoding
- soup = BeautifulSoup(response.text, 'html.parser')
- try:
- info_tables = soup.find_all('table', class_='basicInfo')
- headers = [header.text.strip() for header in info_tables[0].find_all('dt')]
- values = []
- for value in info_tables[0].find_all('dd'):
- value_text = value.text.strip()
- value_text = value_text.replace('\n', '')
- value_text = value_text.replace('\xa0', '')
- values.append(value_text)
- info_dict = dict(zip(headers, values))
- return info_dict
- except:
- return None
- def main():
- query = '农业 湿度温度自然灾害次数 农业产量'
- result_count = get_result_count(query)
- print(f'总共找到{result_count}个相关搜索结果\n')
- info_list = []
- for page in range(0, 10, 10):
- url = f'https://www.baidu.com/s?wd={query}&pn={page}'
- html = get_search_result(url)
- soup = BeautifulSoup(html, 'html.parser')
- results = soup.find_all('div', class_='result')
- for result in results:
- title = result.find('h3').text
- link = result.find('a')['href']
- baike_info = get_baike_info(title)
- if baike_info is not None:
- info = {
- '标题': title,
- '链接': link,
- '温度': baike_info.get('适宜温度'),
- '湿度': baike_info.get('湿度'),
- '自然灾害次数': baike_info.get('自然灾害频率'),
- '农业产量': baike_info.get('农业产量')
- }
- info_list.append(info)
-
- # 将获取的数据转换为dataframe
- df = pd.DataFrame(info_list)
- # 将各列的数据类型修改为float
- df['温度'] = df['温度'].astype('float')
- df['湿度'] = df['湿度'].astype('float')
- df['自然灾害次数'] = df['自然灾害次数'].astype('float')
- df['农业产量'] = df['农业产量'].astype('float')
-
- # 绘制散点图
- plt.scatter(df['自然灾害次数'], df['农业产量'])
- plt.xlabel('自然灾害次数')
- plt.ylabel('农业产量')
-
- # 显示图表
- plt.show()
- if __name__ == '__main__':
- main()
复制代码
这是修正后的代码,现在应该可以运行并绘制散点图。请确保你的环境中已安装必要的库(requests、pandas、beautifulsoup4、matplotlib)。如果没有安装,请使用以下命令安装:
- pip install requests pandas beautifulsoup4 matplotlib
复制代码 |
|