爬虫入门,模拟登录github时遇到了timestamp_secret怎么解呢?
爬虫入门,模拟登录github时遇到了timestamp和timestamp_secret,其中 timestamp 靠着百度解决了,但是后面这个搜网页源码 ,连个毛不会找{:10_324:} 本帖最后由 suchocolate 于 2021-9-1 07:33 编辑贴一下你的代码。 suchocolate 发表于 2021-9-1 07:29
贴一下你的代码。
import requests
import re#正则模块
import time
# 输入毫秒级的时间,转出正常格式的时间
def timeStamp(timeNum):
global otherStyleTime
timeStamp = float(timeNum / 1000)
timeArray = time.localtime(timeStamp)
otherStyleTime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
return otherStyleTime
def login():
#session
session = requests.session()
#headers
session.headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'}
#url11.获取token2. 发请求 3. 正则提取
url1='https://github.com/login'
response1=session.get(url1).content.decode()#正则只对str操作,所以要转换
#name="authenticity_token" value="VMAW29pzDbiHSBXuhaR0BhMPyPatsl/3FbZ8ig2MtiS2SXMw
token=re.findall('name="authenticity_token" value="(.*?)"/>',response1)#.匹配多个*多个?非贪婪模式
print(token) #10054, '远程主机强迫关闭了一个现有的连接
#url2构建表单数据
url2 ='https://github.com/session'
data={'commit':'Sign in',
'authenticity_token':'VMAW29pzDbiHSBXuhaR0BhMPyPatsl/3FbZ8ig2MtiS2SXMwOoKS2baWSNwEtX1qUcX7aVWo97N17Cz+8W21og==',
'login':'',
'password':'',
'trusted_device':'',
'webauthn-support':'supported',
'webauthn-iuvpaa-support':'unsupported',
'return_to':'https://github.com/login',
'allow_signup':'',
'client_id':'',
'integration':'',
'required_field_6ab1':'',
'timestamp':otherStyleTime,
'timestamp_secret':'cf8f6694c191d005c8b9171b29b9d8d4fad8f539801d0feb18417263f507fe42'} #这玩意儿怎么搞?
print(data)
#请求登录
session.post(url2,data=data)
#验证
url3='https://github.com/1667334486@qq.com'
response=session.get(url3)
with open('github.html','wb')as f:
f.write(response.content)
if __name__ == '__main__':
login()
data不用考虑那么多参数:import requests
import re
def login():
session = requests.session()
session.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'}
login_url = 'https://github.com/login'
r = session.get(login_url)
token = re.findall('name="authenticity_token" value="(.*?)"/>', r.text)
print(token)
post_url = 'https://github.com/session'
data = {
'commit': 'Sign in',
'utf8': '✓',
'authenticity_token': token,
'login': 'email',
'password': 'password'
}
print(data)
session.post(post_url, data=data)
logined_url = 'https://github.com/settings/profile'
r = session.get(logined_url)
with open('github.html', 'w') as f:
f.write(r.text)
if __name__ == '__main__':
login()
别人的案例
import requests
from lxml import etree
class Login(object):
def __init__(self):
self.headers = {
'Referer': 'https://github.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
'Host': 'github.com'
}
self.login_url = 'https://github.com/login'
self.post_url = 'https://github.com/session'
self.logined_url = 'https://github.com/settings/profile'
self.session = requests.Session()
def token(self):
response = self.session.get(self.login_url, headers=self.headers)
selector = etree.HTML(response.text)
token = selector.xpath('//input/@value')
return token
def login(self, email, password):
post_data = {
'commit': 'Sign in',
'utf8': '✓',
'authenticity_token': self.token(),
'login': email,
'password': password
}
response = self.session.post(self.post_url, data=post_data, headers=self.headers)
if response.status_code == 200:
self.dynamics(response.text)
response = self.session.get(self.logined_url, headers=self.headers)
if response.status_code == 200:
self.profile(response.text)
def dynamics(self, html):
selector = etree.HTML(html)
dynamics = selector.xpath('//div//div')
for item in dynamics:
dynamic = ' '.join(item.xpath('.//div[@class="title"]//text()')).strip()
print(dynamic)
def profile(self, html):
selector = etree.HTML(html)
name = selector.xpath('//input[@id="user_profile_name"]/@value')
email = selector.xpath('//select[@id="user_profile_email"]/option/@value')
# 能正确显示自己账号的信息即为成功
print(name, email)
if __name__ == "__main__":
login = Login()
login.login(email='账号', password='密码')
suchocolate 发表于 2021-9-2 11:49
data不用考虑那么多参数:
刚开始只是说 要 写刷新后变化的项{:9_234:} suchocolate 发表于 2021-9-1 07:29
贴一下你的代码。
远程主机强迫关闭了一个现有的连接这种有没有好的避免方法呢? 我爱l两条柴 发表于 2021-9-3 00:56
远程主机强迫关闭了一个现有的连接这种有没有好的避免方法呢?
操作太快了,time.sleep一会,或重启py 爬虫入门,模拟登录github时遇到了timestamp和timestamp_secret,哈哈哈哈我遇到了同样的问题,然后搜索到了你的问题。{:5_108:}握个爪!!
页:
[1]