| 
 | 
 
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册  
 
x
 
 本帖最后由 是阿佳! 于 2022-1-18 14:08 编辑  
 
已经用代理、写headers,但是还是出现这样的问题 
 
- import pickle, random, requests, bs4
 
  
- def loadips():
 
  
-     with open('ips2.pkl', 'rb') as f:
 
 -         ips = pickle.load(f)
 
  
-     return ips
 
  
 
 
- def getSoup(ips):
 
 -     
 
 -     headers = {"User-Agent":"ozilla/5.0 (Windows NT 10.0; " \
 
 -                + "Win64; x64) AppleWebKit/537.36 (KHTML, like " \
 
 -                + "Gecko) Chrome/97.0.4692.71 Safari/537.36"}
 
 -     
 
 -     hosts, nums, soups = [], [i*25 for i in range(10)], []
 
  
 
-     for i in nums:
 
 -         hosts.append("https://movie.douban.com/top250?start=" + str(i) +"&filter=")
 
  
 
-     for i in hosts:
 
 -         
 
 -         proxy = {'http': random.choice(ips)}
 
 -         res = requests.get(i, \
 
 -                        headers=headers, proxies=proxy)
 
  
-         html = bs4.BeautifulSoup(res.text, "html.parser")
 
 -         soups.append(html)
 
 -         print(proxy)
 
 -         print(html)
 
 -         print('='*100)
 
  
-     return soups
 
  
 
 
- getSoup(loadips())
 
  
  复制代码 
 
打印结果: 
 
 
- {'http': '219.246.65.55:80'}
 
 - <html>
 
 - <head>
 
 - <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
 
 - <meta content="width=device-width, initial-scale=1" name="viewport"/>
 
 - <title>豆瓣 - 登录跳转页</title>
 
 - <style type="text/css">
 
 -         body{font-family:Arial,Helvetica,sans-serif;font-size:14px;}
 
 -         h1{font-size:25px;margin:25px 0 10px 0;}
 
 -     </style>
 
 - </head>
 
 - <body>
 
 - <div>
 
 - <div style="margin:20px auto;">
 
 - <div style="font-size:25px;color:#1b9336;border-bottom:5px solid #eef9eb">
 
 - <span style="font-size:20px;font-weight:bold">豆瓣</span> d<span style="color:#0092c8">o</span><span style="color:#ffad68">u</span><span>b</span><span style="color:#0092c8">a</span><span style="color:#ffad68">n</span>
 
 - </div>
 
 - <h1>登录跳转</h1>
 
 - <div><p>有异常请求从你的 IP 发出,请 <a href="https://accounts.douban.com/passport/login?redir=https%3A%2F%2Fmovie.douban.com%2Ftop250%3Fstart%3D0%26filter%3D">登录</a> 使用豆瓣</p></div>
 
 - </div>
 
 - </div>
 
 - </body>
 
 - </html>
 
  
- ====================================================================================================
 
 - {'http': '222.74.73.202:42055'}
 
 - <html>
 
 - <head>
 
 - <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
 
 - <meta content="width=device-width, initial-scale=1" name="viewport"/>
 
 - <title>豆瓣 - 登录跳转页</title>
 
 - <style type="text/css">
 
 -         body{font-family:Arial,Helvetica,sans-serif;font-size:14px;}
 
 -         h1{font-size:25px;margin:25px 0 10px 0;}
 
 -     </style>
 
 - </head>
 
 - <body>
 
 - <div>
 
 - <div style="margin:20px auto;">
 
 - <div style="font-size:25px;color:#1b9336;border-bottom:5px solid #eef9eb">
 
 - <span style="font-size:20px;font-weight:bold">豆瓣</span> d<span style="color:#0092c8">o</span><span style="color:#ffad68">u</span><span>b</span><span style="color:#0092c8">a</span><span style="color:#ffad68">n</span>
 
 - </div>
 
 - <h1>登录跳转</h1>
 
 - <div><p>有异常请求从你的 IP 发出,请 <a href="https://accounts.douban.com/passport/login?redir=https%3A%2F%2Fmovie.douban.com%2Ftop250%3Fstart%3D25%26filter%3D">登录</a> 使用豆瓣</p></div>
 
 - </div>
 
 - </div>
 
 - </body>
 
 - </html>
 
  
- ====================================================================================================
 
 - {'http': '59.63.74.63:8118'}
 
 - <html>
 
 - <head>
 
 - <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
 
 - <meta content="width=device-width, initial-scale=1" name="viewport"/>
 
 - <title>豆瓣 - 登录跳转页</title>
 
 - <style type="text/css">
 
 -         body{font-family:Arial,Helvetica,sans-serif;font-size:14px;}
 
 -         h1{font-size:25px;margin:25px 0 10px 0;}
 
 -     </style>
 
 - </head>
 
 - <body>
 
 - <div>
 
 - <div style="margin:20px auto;">
 
 - <div style="font-size:25px;color:#1b9336;border-bottom:5px solid #eef9eb">
 
 - <span style="font-size:20px;font-weight:bold">豆瓣</span> d<span style="color:#0092c8">o</span><span style="color:#ffad68">u</span><span>b</span><span style="color:#0092c8">a</span><span style="color:#ffad68">n</span>
 
 - </div>
 
 - <h1>登录跳转</h1>
 
 - <div><p>有异常请求从你的 IP 发出,请 <a href="https://accounts.douban.com/passport/login?redir=https%3A%2F%2Fmovie.douban.com%2Ftop250%3Fstart%3D50%26filter%3D">登录</a> 使用豆瓣</p></div>
 
 - </div>
 
 - </div>
 
 - </body>
 
 - </html>
 
 
  复制代码 
 
我不用代理也是这个样子 
- import requests, bs4
 
  
- def getSoup():
 
 -     
 
 -     headers = {"User-Agent":"ozilla/5.0 (Windows NT 10.0; " \
 
 -                + "Win64; x64) AppleWebKit/537.36 (KHTML, like " \
 
 -                + "Gecko) Chrome/97.0.4692.71 Safari/537.36"}
 
 -     
 
 -     hosts, nums, soups = [], [i*25 for i in range(10)], []
 
  
 
-     for i in nums:
 
 -         hosts.append("https://movie.douban.com/top250?start=" + str(i) +"&filter=")
 
  
 
-     for i in hosts:
 
  
-         res = requests.get(i, \
 
 -                        headers=headers) 
 
  
-         html = bs4.BeautifulSoup(res.text, "html.parser")
 
 -         soups.append(html)
 
 -         print(html)
 
 -         print('='*100)
 
  
-     return soups
 
  
 
 
- getSoup()
 
 
  复制代码- <html>
 
 - <head>
 
 - <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
 
 - <meta content="width=device-width, initial-scale=1" name="viewport"/>
 
 - <title>豆瓣 - 登录跳转页</title>
 
 - <style type="text/css">
 
 -         body{font-family:Arial,Helvetica,sans-serif;font-size:14px;}
 
 -         h1{font-size:25px;margin:25px 0 10px 0;}
 
 -     </style>
 
 - </head>
 
 - <body>
 
 - <div>
 
 - <div style="margin:20px auto;">
 
 - <div style="font-size:25px;color:#1b9336;border-bottom:5px solid #eef9eb">
 
 - <span style="font-size:20px;font-weight:bold">豆瓣</span> d<span style="color:#0092c8">o</span><span style="color:#ffad68">u</span><span>b</span><span style="color:#0092c8">a</span><span style="color:#ffad68">n</span>
 
 - </div>
 
 - <h1>登录跳转</h1>
 
 - <div><p>有异常请求从你的 IP 发出,请 <a href="https://accounts.douban.com/passport/login?redir=https%3A%2F%2Fmovie.douban.com%2Ftop250%3Fstart%3D0%26filter%3D">登录</a> 使用豆瓣</p></div>
 
 - </div>
 
 - </div>
 
 - </body>
 
 - </html>
 
  
- ====================================================================================================
 
 - <html>
 
 - <head>
 
 - <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
 
 - <meta content="width=device-width, initial-scale=1" name="viewport"/>
 
 - <title>豆瓣 - 登录跳转页</title>
 
 - <style type="text/css">
 
 -         body{font-family:Arial,Helvetica,sans-serif;font-size:14px;}
 
 -         h1{font-size:25px;margin:25px 0 10px 0;}
 
 -     </style>
 
 - </head>
 
 - <body>
 
 - <div>
 
 - <div style="margin:20px auto;">
 
 - <div style="font-size:25px;color:#1b9336;border-bottom:5px solid #eef9eb">
 
 - <span style="font-size:20px;font-weight:bold">豆瓣</span> d<span style="color:#0092c8">o</span><span style="color:#ffad68">u</span><span>b</span><span style="color:#0092c8">a</span><span style="color:#ffad68">n</span>
 
 - </div>
 
 - <h1>登录跳转</h1>
 
 - <div><p>有异常请求从你的 IP 发出...
 
  复制代码 
 
但是用浏览器却能正常访问,是不是cookie的问题????? 
 
这里改UA,加上cookie和time.sleep(1)即可
是你的UA的问题吧,“Mozilla”少了个“M” 
 
 
 |   
 
 
 
 |