第二版书上爬虫的问题

阿妃 · 发表于 2019-10-23 15:25:14

马上注册，结交更多好友，享用更多功能^_^

您需要登录才可以下载或查看，没有账号？立即注册

x

为什么报错了说是变量未定义    NameError: name 'depth' is not defined

# p14_2.py
import requests
import bs4
import re

def open_url(url):
# 使用代理
# proxies = {"http":"127.0.0.1:1080","https":"127.0.0.1:1080"}
headers = {'user-agent':'Mozilla/5.0 (windows NT 10.0; WOW64)AppleWebKit/537.36(KHTML,like Gecko)Chrome/57.0.298.98 Safari/537.36'}
# res = requests.get(url,headers=headers,proxies=proxies)
return res

def find_movies(res):
soup = bs4.BeautifulSoup(res.text,'html.parser')
#电影名
movies = []
targets = soup.find_all("div",class_="hd")
for each in tergets:
      movies.append(each.a.span.text)
#评分
ranks = []
targets = soup.find_all("span",class_="rating_num")
for each in targets:
      ranks.append('评分: %s ' % each.text)
#资料
messages = []
targets = soup.find_all("div",class_="bd")
for each in targets:
      try:
         messages.append(each.p.text.split('\n')[1].strip()+each.p.text.split('\n')[2].strip())
      except:
         continue

result = []
length = len(movies)
for i in range(length):
      result.append(movies[i] + ranks[i] + messages[i] + '\n')
return result

#找出一共有多少个页面
def find_depth(res):
soup = bs4.BeautifulSoup(res.text,'html.parser')
depth = soup.find('span',class_='next').previous_sibling.previous_sibling.text
return int(depth)

def main():
host = "https://movie.douban.com/top250"
res = open_url(host)
depth = find_depth(res)
result = []

for i in range(depth):
ulr = host + '/?start=' + str(25 + 1)
res = open_url(url)
result.extend(find_movies(res))

with open("豆瓣 TOP250 电影.txt","w",encoding="utf-8") as f:
for each in result:
      f.write(each)

if __name__ == "__main__":
main()

Python3005 · 发表于 2019-10-23 18:40:53

你的depth定义是在main函数下的，最后for循环引用不了

zltzlt · 发表于 2019-10-23 19:50:20

应该把 for i in range(depth): 以及后面的内容增加一个缩进，同时 ulr 要改成 url

# p14_2.py
import requests
import bs4
import re
def open_url(url):
# 使用代理
# proxies = {"http":"127.0.0.1:1080","https":"127.0.0.1:1080"}
headers = {'user-agent':'Mozilla/5.0 (windows NT 10.0; WOW64)AppleWebKit/537.36(KHTML,like Gecko)Chrome/57.0.298.98 Safari/537.36'}
# res = requests.get(url,headers=headers,proxies=proxies)
return res
def find_movies(res):
soup = bs4.BeautifulSoup(res.text,'html.parser')
#电影名
movies = []
targets = soup.find_all("div",class_="hd")
for each in tergets:
movies.append(each.a.span.text)
#评分
ranks = []
targets = soup.find_all("span",class_="rating_num")
for each in targets:
ranks.append('评分: %s ' % each.text)
#资料
messages = []
targets = soup.find_all("div",class_="bd")
for each in targets:
try:
messages.append(each.p.text.split('\n')[1].strip()+each.p.text.split('\n')[2].strip())
except:
continue
result = []
length = len(movies)
for i in range(length):
result.append(movies[i] + ranks[i] + messages[i] + '\n')
return result
#找出一共有多少个页面
def find_depth(res):
soup = bs4.BeautifulSoup(res.text,'html.parser')
depth = soup.find('span',class_='next').previous_sibling.previous_sibling.text
return int(depth)
def main():
host = "https://movie.douban.com/top250"
res = open_url(host)
depth = find_depth(res)
result = []
for i in range(depth):
url = host + '/?start=' + str(25 + 1)
res = open_url(url)
result.extend(find_movies(res))
with open("豆瓣 TOP250 电影.txt","w",encoding="utf-8") as f:
for each in result:
f.write(each)
if __name__ == "__main__":
main()

复制代码

账号		自动登录	找回密码
密码			立即注册