|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
网上看了很多别人写的程序
要么用不了
要么出错找不到原因
要么就是不用登录的,不登录爬取的图片又有限制
于是就想自己写,但是登录时遇到很大的问题,我把登录需要的参数传过去了,但是后面requests.text传回来的网页源码显示我没登录
这是我写的代码,卡在了登录这一步好久了,希望看到的大神能指导一下(网址是https://accounts.pixiv.net/login?lang=zh,需要翻墙才能登录)
- import requests
- from bs4 import BeautifulSoup
- from urllib.parse import urlencode
- from requests.packages.urllib3.exceptions import InsecureRequestWarning
- requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
- def login():
- headers = {
- 'User-Agent': 'Mozilla / 5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/83.0.4103.97Safari/537.36',
- # 'referer': 'https://accounts.pixiv.net/login?return_to=https%3A%2F%2Fwww.pixiv.net%2F&lang=zh&source=pc&view_type=page'
- 'Referer': 'https://accounts.pixiv.net/login?lang=zh&source=pc&view_type=page&ref=wwwtop_accounts_index'
- }
- # session = requests.session()
- # response = session.get('https://accounts.pixiv.net/login?lang=zh',headers=headers,verify=False)
- response = requests.get('https://accounts.pixiv.net/login?lang=zh',headers=headers,verify=False)
- print(response.status_code)
- bs = BeautifulSoup(response.text,'html.parser')
- post_key = bs.find('div',id='old-login').find('input',type='hidden')['value']
- print(post_key)
- data = {
- 'post_key': post_key,
- 'captcha':'',
- 'g_recaptcha_response':'',
- 'ref':'',
- 'return_to': 'https://www.pixiv.net/',
- 'source': 'pc',
- 'pixiv_id': '@qq.com',
- 'password': ''
- }
- res = requests.post('https://accounts.pixiv.net/api/login?lang=zh',data=data,headers=headers,verify=False)
- print(res.cookies)
- print(type(res.cookies))
- return res.cookies
- def get_id():
- response_list = []
- cookies = login()
- headers2 = {
- 'User-Agent': 'Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/83.0.4103.97Safari/537.36',
- 'Accept-Language': 'zh-CN,zh',
- 'Referer':'https://accounts.pixiv.net/login?return_to=https%3A%2F%2Fwww.pixiv.net%2F&lang=zh&source=pc&view_type=page'
- }
- # url = 'https://www.pixiv.net/ranking.php?mode=daily_r18&p=1&format=json'
- data = {
- 'mode':'daily_r18'
- }
- url = 'https://www.pixiv.net/ranking.php?'+urlencode(data)
- print(url)
- response = requests.get(url,headers=headers2,data=data,cookies=cookies,verify=False)
- print(response.text)
- # global false, null, true
- # false = 'False'
- # null = 'None'
- # true = 'True'
- res = response.json()
- print(res)
- list_id = res['contents']
- for i in list_id:
- pic_id = i['illust_id']
- pic_name = i[0]['title']
- response_list.append([pic_name,pic_id])
- return response_list
- def main():
- pic_id = get_id()
- main()
复制代码 |
|