|
发表于 2020-6-3 07:45:36
|
显示全部楼层
本楼为最佳答案

你看3L 啊 被无视的赶脚有点不舒服 这样改就可以了
豆瓣有的时候爬到的名称 不是在 a.span里 有的就在a里要加个判断 看代码吧
- import requests
- from bs4 import BeautifulSoup
- import re
- def open_url(url):
- hearders = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
- res = requests.get(url, headers=hearders)
- return res
- def find_depth(res):
- pass
- def biaoti(res):
- # 电影名字
- movies = []
- req = BeautifulSoup(res.text, 'html.parser')
- name = req.find_all('div', class_=['pl2'])
- for each1 in name:
- if not (each1.a.span):
- movies.append((each1.a.text).strip())
- continue
- movies.append(each1.a.span.text)
- print(movies)
- # 评分
- ranks = []
- rank = req.find_all('span', class_="rating_nums")
- for each2 in rank:
- ranks.append(each2.text)
- print(ranks)
- # 内容
- messages = []
- message = req.find_all("p", class_=['pl'])
- for each3 in message:
- messages.append(each3.text)
- result = []
- moveies_num = len(movies)
- for i in range(moveies_num):
- title = movies[i] + '->' + ranks[i] + '->' + messages[i] + '\n'
- result.append(title)
- print(result)
- return result
- def main():
- url = 'https://movie.douban.com/chart'
- res = open_url(url)
- nr = biaoti(res)
- with open('豆瓣本周排行名单.txt','w',encoding='utf-8') as f:
- for a in nr:
- f.write(a+'\n')
- main()
复制代码 |
|