|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
这个是原代码,运行后没有报错,可是没有文件出来,不知道哪里错了。
- import requests
- import bs4
- import random
- import openpyxl
- def open_url(url):
- '''
- ip_poor = ['61.19.145.66:8080','105.27.237.27:80','110.36.239.234:8080'\
- '110.39.187.50:49850','159.192.141.89:8080','162.243.244.206:80'\
- '184.149.34.86:51529','181.118.167.104:80','3.6.99.110:80']
- proxies = {'http':random.choice(ip_poor)}
- '''
- headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'}
-
- #html = request.get(url=url, headers=headers, proxies=proxies)
- html = request.get(url,headers=headers)
-
- return html
- def find_info(html):
- content = bs4.BeautifulSoup(html.text,'html.parser')
- #电影名
- movie_name = []
- target_name = content.find_all('div',class_='hd')
- for each_name in target_name:
- movie_name.append(each_name.a.span.text)
- #评分
- movie_score = []
- target_score = content.find_all('span',class_='rating5-t')
- for each_score in target_score:
- movie_score.append(each_score.text)
- #信息
- movie_info = []
- target_info = content.find_all('div',class_='bd')
- for each_info in target_score:
- try:
- movie_info.append(each_info.p.text.split('\n')[1].strip() + each_info.p.text.split('\n')[2].strip())
- except:
- continue
- content = []
- for i in range(len(movie_name)):
- result.append(movie_name[i] + movie_score[i] + movie_info[i])
- return contenr
- def next_page(html):
- target = bs4.BeautifulSoup(html.text,'html.parser')
- next_path = target.find_all('span',class_='next').previous_sibling.previous_sibling.text
- return int(next_path)
- def save_excel(content):
- wb = openpyxl.Workbook()
- ws = wb.active
- ws['A1'] = '电影名'
- ws['B1'] = '评分'
- ws['C1'] = '电影信息'
- for each in content:
- ws.append(each)
- wb.save(r'D:\Desktop\豆瓣TOP250电影.xlsx')
- def main():
- url = 'https://movie.douban.com/top250'
- html = open_url(url)
- next_path = next_path(html)
- content = []
- for i in range(next_path):
- url_next = url + '/?start=' + str(25 * i)
- html = open_url(url_next)
- content.extend(find_info(html))
- save_excel(content)
- if __name__ == 'main':
- main()
复制代码
本帖最后由 Twilight6 于 2020-7-2 23:32 编辑
第16行使用 requests 模块少了个 s
错误:html = request.get(url,headers=headers)
正确:html = requests.get(url,headers=headers)
第 44 行~ 46 行,result 不知从何而来,应该是你不小心写成了 content = []
错误:
- content = []
- for i in range(len(movie_name)):
- result.append(movie_name[i] + movie_score[i] + movie_info[i])
- return contenr
复制代码
正确:
- result = []
- for i in range(len(movie_name)):
- result.append(movie_name[i] + movie_score[i] + movie_info[i])
- return result
复制代码
第 68 行,next_page 写成了 next_path:
错误:next_path = next_path(html)
正确:next_path = next_page(html)
暂时就找到这么多错误,是否运行成功我还没看
|
|