豆瓣更新后,模拟登入豆瓣不成功
import requestsurl_one='https://accounts.douban.com/j/mobile/login/basic'
url='https://www.douban.com/'
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
data={
'ck':'',
'name': '账号',
'password': '密码',
'remember':'false',
'ticket': ''
}
s=requests.session()
r1=s.get(url_one,headers=headers)
r2=s.post(url_one,headers=headers,data=data)
print(r2.text)
结果:{"status":"failed","message":"captcha_required","description":"需要图形验证码","payload":{"tc_app_id":"2044348370","captcha_signature_sample":"12:8,21:3","touch_cap_url":"https:\/\/ssl.captcha.qq.com\/TCap
若倒数第三行不执行:
结果:{"status":"failed","message":"parameter_missing","description":"参数缺失","payload":{}}
求各位大神,如何解决豆瓣模拟登入问题 验证码这东西不好搞啊 wp231957 发表于 2022-4-17 12:09
验证码这东西不好搞啊
不是验证码的问题,只要不被网站检测到,就不会有验证码。我采取了IP代理的方法,用200个IP地址随机选择一个进行爬取,循环,再用fiddler抓包,获取cookie,post与get都添加cookie参数。就可以成功了。 本帖最后由 饮酒 于 2022-4-18 09:44 编辑
with open("C:\\Users\\ASUS\\Desktop\\LunWen\\ip代理.txt") as f:
iplist = f.readlines()
def getip(self):
self.proxy = iplist
self.proxy = self.proxy.replace("\n", "")
self.proxies = {
'http': 'http://' + str(self.proxy),
# 'https':'https://'+str(proxy),
}
return self.proxies
def login(self):
cookie = {
'Cookie': 'll="118209"; bid=JzDcvUA1bCg; push_noty_num=0; push_doumail_num=0; __utmv=30149280.25606; apiKey=; __utmc=30149280; last_login_way=account; login_start_time=1650193218938; __utma=30149280.780716301.1650174263.1650191791.1650202452.5; __utmz=30149280.1650202452.5.4.utmcsr=so.com|utmccn=(referral)|utmcmd=referral|utmcct=/link; __utmt=1; __utmb=30149280.2.9.1650202452'
}
response = self.session.post(self.login_url, data=self.login_data, cookies=cookie,proxies=self.getip(),headers=self.headers,verify=False)
self.cookies = requests.utils.dict_from_cookiejar(response.cookies)
print(response.json())
def get_html(self, url):
cookie = {
'Cookie': 'll="118209"; bid=JzDcvUA1bCg; push_noty_num=0; push_doumail_num=0; __utmv=30149280.25606; apiKey=; __utmc=30149280; last_login_way=account; login_start_time=1650193218938; __utma=30149280.780716301.1650174263.1650191791.1650202452.5; __utmz=30149280.1650202452.5.4.utmcsr=so.com|utmccn=(referral)|utmcmd=referral|utmcct=/link; __utmt=1; __utmb=30149280.2.9.1650202452'
}
return self.session.get(url,proxies=self.getip(),cookies=cookie,headers = self.headers,verify=False)
#大神们,哪里需要改进,请指教。
页:
[1]