[已解决]小甲鱼课程中的代码问题

ColaPlusIce · 发表于 2020-2-6 14:39:15

马上注册，结交更多好友，享用更多功能^_^

您需要登录才可以下载或查看，没有账号？立即注册

x

运行爬虫时，就会报错：
  File "D:\Python(zzt)\甲鱼文件任务\爬虫\GetTop250.py", line 45, in find_depth
soup = bs4.BeautifulSoup(res.text,'html.parser')
AttributeError: 'NoneType' object has no attribute 'text'

还请各位大神帮忙指点！！！
（下面是源代码）

import requests
import bs4
import re
def open_url(url):
headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
res = requests.get(url,headers=headers)

def find_movies(res):
soup=bs4.BeautifulSoup(res.text,'html.parser')

#获取电影名
movies=[]
target = soup.find_all('div',class_='hd')
for each in target:
      movies.append(each.a.span.text)

#获取电影评分
ranks=[]
target = soup.fina_all('span',class_='rating_num')
for each in target:
      ranks.append('%s' % each.text)

#获取电影资料
message=[]
target = soup.find_all('div',class_='bd')
for each in target:
      try:
         message.append(each.p.text.split('\n')[1].strip()+each.p.text.split('\n')[2].strip())
      except:
         continue

result=[]
length = len(movies)
for i in range(length):
   result.append(movies[i]+ranks[i]+message[i]+'\n')
return result

#找出一共有多少个页面
def find_depth(res):
soup = bs4.BeautifulSoup(res.text,'html.parser')
depth = soup.find('span',class_='next').previous_sibling.text
return int(depth)

def main():
host='https://movie.douban.com/top250'
res=open_url(host)
depth = find_depth(res)
result = []
for i in range(depth):
      url=host+ '?start='+str(25*i)+'&filter='
      res = open_url(url)
      result.extend(find_movies(res))


with open ('豆瓣250爬虫下载.txt','w',encode='utf-8')as f:
      for each in result:
         print('打印成功')
         f.write(each)

if __name__=='__main__':
main()

最佳答案

月排行榜 / 总排行榜

zltzlt

2020-2-6 14:43:43

这样试试：

import requests
import bs4
import re
def open_url(url):
headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
res = requests.get(url,headers=headers)
return res
def find_movies(res):
soup=bs4.BeautifulSoup(res.text,'html.parser')
#获取电影名
movies=[]
target = soup.find_all('div',class_='hd')
for each in target:
movies.append(each.a.span.text)
#获取电影评分
ranks=[]
target = soup.fina_all('span',class_='rating_num')
for each in target:
ranks.append('%s' % each.text)
#获取电影资料
message=[]
target = soup.find_all('div',class_='bd')
for each in target:
try:
message.append(each.p.text.split('\n')[1].strip()+each.p.text.split('\n')[2].strip())
except:
continue
result=[]
length = len(movies)
for i in range(length):
result.append(movies[i]+ranks[i]+message[i]+'\n')
return result
#找出一共有多少个页面
def find_depth(res):
soup = bs4.BeautifulSoup(res.text,'html.parser')
depth = soup.find('span',class_='next').previous_sibling.text
return int(depth)
def main():
host='https://movie.douban.com/top250'
res=open_url(host)
depth = find_depth(res)
result = []
for i in range(depth):
url=host+ '?start='+str(25*i)+'&filter='
res = open_url(url)
result.extend(find_movies(res))
with open ('豆瓣250爬虫下载.txt','w',encode='utf-8')as f:
for each in result:
print('打印成功')
f.write(each)
if __name__=='__main__':
main()

复制代码

跳转到最佳答案楼层

zltzlt · 发表于 2020-2-6 14:43:43

这个最佳答案由 zltzlt 给出，感谢 zltzlt 的回答。

单击隐藏图章

这样试试：

import requests
import bs4
import re
def open_url(url):
headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
res = requests.get(url,headers=headers)
return res
def find_movies(res):
soup=bs4.BeautifulSoup(res.text,'html.parser')
#获取电影名
movies=[]
target = soup.find_all('div',class_='hd')
for each in target:
movies.append(each.a.span.text)
#获取电影评分
ranks=[]
target = soup.fina_all('span',class_='rating_num')
for each in target:
ranks.append('%s' % each.text)
#获取电影资料
message=[]
target = soup.find_all('div',class_='bd')
for each in target:
try:
message.append(each.p.text.split('\n')[1].strip()+each.p.text.split('\n')[2].strip())
except:
continue
result=[]
length = len(movies)
for i in range(length):
result.append(movies[i]+ranks[i]+message[i]+'\n')
return result
#找出一共有多少个页面
def find_depth(res):
soup = bs4.BeautifulSoup(res.text,'html.parser')
depth = soup.find('span',class_='next').previous_sibling.text
return int(depth)
def main():
host='https://movie.douban.com/top250'
res=open_url(host)
depth = find_depth(res)
result = []
for i in range(depth):
url=host+ '?start='+str(25*i)+'&filter='
res = open_url(url)
result.extend(find_movies(res))
with open ('豆瓣250爬虫下载.txt','w',encode='utf-8')as f:
for each in result:
print('打印成功')
f.write(each)
if __name__=='__main__':
main()

复制代码

ColaPlusIce · 发表于 2020-2-6 14:48:56

zltzlt 发表于 2020-2-6 14:43
这样试试：

还是会报错：
File "D:\Program Files (x86)\python\lib\site-packages\bs4\element.py", line 871, in __getattr__
self.__class__.__name__, attr))
AttributeError: 'NavigableString' object has no attribute 'text'

ColaPlusIce · 发表于 2020-2-6 14:55:21

本帖最后由 ColaPlusIce 于 2020-2-6 14:57 编辑

可以了，感谢！
encoding 错打成了encode
import requests
import bs4
import re
def open_url(url):
headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
res = requests.get(url,headers=headers)
return res

def find_movies(res):
soup=bs4.BeautifulSoup(res.text,'html.parser')

#获取电影名
movies=[]
target = soup.find_all('div',class_='hd')
for each in target:
      movies.append(each.a.span.text)

#获取电影评分
ranks=[]
target = soup.find_all('span',class_='rating_num')
for each in target:
      ranks.append('%s' % each.text)

#获取电影资料
message=[]
target = soup.find_all('div',class_='bd')
for each in target:
      try:
         message.append(each.p.text.split('\n')[1].strip()+each.p.text.split('\n')[2].strip())
      except:
         continue

result=[]
length = len(movies)
for i in range(length):
   result.append(movies[i]+ranks[i]+message[i]+'\n')
return result

#找出一共有多少个页面
def find_depth(res):
soup = bs4.BeautifulSoup(res.text,'html.parser')
depth = soup.find('span',class_='next').previous_sibling.previous_sibling.text
return int(depth)

def main():
host='https://movie.douban.com/top250'
res=open_url(host)
depth = find_depth(res)
result = []
for i in range(depth):
      url=host+ '?start='+str(25*i)+'&filter='
      res = open_url(url)
      result.extend(find_movies(res))


with open ('豆瓣250爬虫下载.txt','w',encoding='utf-8')as f:
      for each in result:
         print('打印成功')
         f.write(each)

if __name__=='__main__':
main()

账号		自动登录	找回密码
密码			立即注册

[已解决]小甲鱼课程中的代码问题

马上注册，结交更多好友，享用更多功能^_^

浏览过的版块