python爬虫的一些问题
代码如下import urllib.request
import urllib.parse
import json
content=input('请输入翻译词语')
url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
data={}
data['i']=content
data['doctype']='json'
data['keyfrom']='fanyi.web'
data['typoResult']='true'
data['from']='AUTO'
data['to']='AUTO'
data['smartresult']='dict'
data['client']='fanyideskweb'
data['salt']='1520416292076'
data['sign']='41fe7ea28425a0a4ceb88ab4c8609d13'
data['version']='2.1'
data['action']='FY_BY_CLICKBUTTION'
data['typoResult']='false'
data=urllib.parse.urlencode(data).encode('utf-8')
response=urllib.request.urlopen(url,data)
html=response.read().decode('utf-8')
ta = json.loads(html)
print(ta['translateResult']['tgt'])
经过测试可用,但这是在搜索别人的代码下实现的,问题出在我自己按小甲鱼第一期python找到的form date与上面代码不同
自己找到的form date如下
data['i']:content
data['from']: 'AUTO'
data['to']: 'AUTO'
data['smartresult']: 'dict'
data['client']: 'fanyideskweb'
data['salt']: '16012838780358'
data['sign']: 'dcc11dde189f70a1e028041fee113205'
data['lts']: '1601283878035'
data['bv']: '4fa486883137f3299a6cff61cf098e44'
data['doctype']: 'json'
data['version']: '2.1'
data['keyfrom']: 'fanyi.web'
data['action']: 'FY_BY_CLICKBUTTION'
希望各位鱼油帮帮忙,表单数据具体的原理以及其包含的内容也不太清楚,希望能帮忙解释下,谢谢 data参数具体含义只有网站清楚,我们只能透过字面意思或javascript反向分析,或者完全模仿f12看到的流程来模拟查询,这也是爬虫正在做的。
另外,从经验来看,提交表单时可以不用都提交,有时只需要提交几个参数就能得到正确响应,比如有道翻译实际只提交一个i参数即可。
当然,不同的网站,不同API各有不同。 import json
import time
import random
import hashlib
import requests
def youdao_fanyi(key):
baseurl = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
# 计算时间戳13位
time_span = int(time.time()*1000)
lts = str(time_span)
# salt 为lts后添加一位随机数,凑成14位
salt =str(time_span*10 + random.randint(0, 10))
# sign 计算方式
s = "fanyideskweb" + key + salt + "]BjuETDhU)zqSxf-=B#7m"
sign = hashlib.md5(s.encode()).hexdigest()
# navigator.appVersion,固定,可以使用固定值
agent = "5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36"
bv = hashlib.md5(agent.encode()).hexdigest()
data = {
"i" :key,
"from" : "AUTO",
"to" : "AUTO",
"smartresult" : "dict",
"client" : "fanyideskweb",
"salt" : salt,
"sign" : sign,
"lts" :lts,
"bv" : bv,
"doctype" : "json",
"version" : "2.1",
"keyfrom" : "fanyi.web",
"action" : "FY_BY_REALTlME"
}
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Encoding" : "gzip, deflate",
"Accept-Language" : "zh-CN,zh;q=0.9,en-GB;q=0.8,en;q=0.7",
"Connection" : "keep-alive",
"Content-Length" : str(len(key)),
"Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8",
"Cookie" : "OUTFOX_SEARCH_USER_ID=1195241757@10.108.160.101; JSESSIONID=aaaewgbb34F9kdlA9wswx; OUTFOX_SEARCH_USER_ID_NCOO=1539706065.7723656; ___rl__test__cookies=1604559693998",
"Host" : "fanyi.youdao.com",
"Origin" : "http://fanyi.youdao.com",
"Referer" : "http://fanyi.youdao.com/",
"User-Agent" : "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36",
"X-Requested-With" : "XMLHttpRequest"
}
response =requests.post(url=baseurl, data=data, headers = headers)
# json大概内容
"""
{
"translateResult": [
[{
"tgt": "Please use",
"src": "请使用"
}]
],
"errorCode": 0,
"type": "zh-CHS2en"
}
"""
if response.status_code == 200:
# print(response.text)
result = response.json()
else:
result = {}
if result.get("errorCode")==0:
str_result = "翻译结果:"
data_list = result.get('translateResult')
for item in data_list:
str_result += item.get('tgt')
return str_result
else:
return key
if __name__ == "__main__":
key = input("翻译的内容:")
print(youdao_fanyi(key))
import json
import time
import random
import hashlib
import requests
def youdao_fanyi(key):
baseurl = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
# 计算时间戳13位
time_span = int(time.time()*1000)
lts = str(time_span)
# salt 为lts后添加一位随机数,凑成14位
salt =str(time_span*10 + random.randint(0, 10))
# sign 计算方式
s = "fanyideskweb" + key + salt + "]BjuETDhU)zqSxf-=B#7m"
sign = hashlib.md5(s.encode()).hexdigest()
# navigator.appVersion,固定,可以使用固定值
agent = "5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36"
bv = hashlib.md5(agent.encode()).hexdigest()
data = {
"i" :key,
"from" : "AUTO",
"to" : "AUTO",
"smartresult" : "dict",
"client" : "fanyideskweb",
"salt" : salt,
"sign" : sign,
"lts" :lts,
"bv" : bv,
"doctype" : "json",
"version" : "2.1",
"keyfrom" : "fanyi.web",
"action" : "FY_BY_REALTlME"
}
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Encoding" : "gzip, deflate",
"Accept-Language" : "zh-CN,zh;q=0.9,en-GB;q=0.8,en;q=0.7",
"Connection" : "keep-alive",
"Content-Length" : str(len(key)),
"Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8",
"Cookie" : "OUTFOX_SEARCH_USER_ID=1195241757@10.108.160.101; JSESSIONID=aaaewgbb34F9kdlA9wswx; OUTFOX_SEARCH_USER_ID_NCOO=1539706065.7723656; ___rl__test__cookies=1604559693998",
"Host" : "fanyi.youdao.com",
"Origin" : "http://fanyi.youdao.com",
"Referer" : "http://fanyi.youdao.com/",
"User-Agent" : "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36",
"X-Requested-With" : "XMLHttpRequest"
}
response =requests.post(url=baseurl, data=data, headers = headers)
# json大概内容
"""
{
"translateResult": [
[{
"tgt": "Please use",
"src": "请使用"
}]
],
"errorCode": 0,
"type": "zh-CHS2en"
}
"""
if response.status_code == 200:
# print(response.text)
result = response.json()
else:
result = {}
if result.get("errorCode")==0:
str_result = "翻译结果:"
data_list = result.get('translateResult')
for item in data_list:
str_result += item.get('tgt')
return str_result
else:
return key
if __name__ == "__main__":
key = input("翻译的内容:")
print(youdao_fanyi(key))
我也有同样的问题{:10_266:} 廖刘龙 发表于 2020-11-11 21:58
我也有同样的问题
有道翻译接口分析,可以看看https://blog.csdn.net/YungGuo/article/details/109559512 {:10_250:}{:10_250:}
页:
[1]