请问这段爬虫代码在哪里有问题
import urllib.requestimport urllib.parse
import json
content = input("请输入翻译的内容:")
url = "https://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
head = {}
head['Referer'] = 'http://fanyi.youdao.com'
head['User-Agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36X-Requested-With: XMLHttpRequest"
data = {}
data['type'] = 'AUTO'
data['i'] = content
data['doctype'] = 'json'
data['xmlVersion'] = '1.6'
data['keyfrom'] = 'fanyi.web'
data['ue'] = 'UTF-8'
data['typoResult'] = 'true'
data = urllib.parse.urlencode(data).encode('utf-8')
req = urllib.request.Request(url, data, head)
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
target = json.loads(html)
print(html)
#print("翻译结果:%s" % (target['translateResult']['tgt']))
按照小甲鱼书上给出的代码敲的,设置了headers参数,但是还被拦截了,请问原因是什么 有道的代码坛子里实在是太多了,自己先搜搜 网站改版了,现在有道翻译有反爬措施,你的爬虫被拦截了,你可以把url中的translate_o改为translate,这样爬虫就可以正常运行了 临时号 发表于 2022-8-3 18:11
网站改版了,现在有道翻译有反爬措施,你的爬虫被拦截了,你可以把url中的translate_o改为translate,这样爬虫 ...
好像没啥用啊…… tommyyu 发表于 2022-8-3 18:50
好像没啥用啊……
网站改了,data要多点东西
import urllib.request
import urllib.parse
import json
content = input("请输入翻译的内容:")
url = "https://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule"
head = {}
head['Referer'] = 'http://fanyi.youdao.com'
head['User-Agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36X-Requested-With: XMLHttpRequest"
data = {}
data["i"] = content
data["from"] = "AUTO"
data["to"] = "AUTO"
data["smartresult"] = "dict"
data["client"] = "fanyideskweb"
data["salt"] = "16595244179934"
data["sign"] = "762040024cd23ba625a1ab1ddae94a20"
data["lts"] = "1659524417993"
data["bv"] = "a16a6033635b516a9006542112cdda8f"
data["doctype"] = "json"
data["version"] = "2.1"
data["keyfrom"] = "fanyi.web"
data["action"] = "FY_BY_REALTlME"
data = urllib.parse.urlencode(data).encode('utf-8')
req = urllib.request.Request(url, data, head)
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
target = json.loads(html)
#print(html)
print("翻译结果:%s" % (target['translateResult']['tgt'])) 临时号 发表于 2022-8-3 19:05
网站改了,data要多点东西
是这里的都要写么 tommyyu 发表于 2022-8-3 19:13
是这里的都要写么
对 tommyyu 发表于 2022-8-3 19:13
是这里的都要写么
如果你对有道的反爬加密算法感兴趣的话可以看看我之前的有道爬虫代码
import urllib.request
import urllib.parse
import hashlib
import random
import time
import json
#init
url = "https://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36',
'Referer': 'https://fanyi.youdao.com/',
'Cookie': 'OUTFOX_SEARCH_USER_ID=1907954952@182.203.147.49; OUTFOX_SEARCH_USER_ID_NCOO=2032702153.1070416; fanyi-ad-id=306808; fanyi-ad-closed=1; ___rl__test__cookies=1654931146702'
}
content = input("请输入翻译的内容:")
#加密
lts = str(int(time.time()*1000))
salt = lts + str(random.randint(0,10))
sign_str = 'fanyideskweb' + content + salt + 'Ygy_4c=r#e#4EX^NUGUc5'
m = hashlib.md5(sign_str.encode())
sign = m.hexdigest()
#配置data
data = {
"i":content,
"from":"AUTO",
"to":"AUTO",
"smartresult":"dict",
"client":"fanyideskweb",
"salt":salt,
"sign":sign,
"lts":lts,
"bv":"a16a6033635b516a9006542112cdda8f",
"doctype":"json",
"version":"2.1",
"keyfrom":"fanyi.web",
"action":"FY_BY_CLICKBUTTION"
}
#爬虫
data = urllib.parse.urlencode(data).encode('utf-8')
request = urllib.request.Request(url,data,headers)
response = urllib.request.urlopen(request)
html = response.read().decode('utf-8')
target = json.loads(html)
try:
resalut = target["translateResult"]["tgt"]
print("翻译结果:",resalut)
except:
print("翻译失败") 临时号 发表于 2022-8-3 19:15
如果你对有道的反爬加密算法感兴趣的话可以看看我之前的有道爬虫代码
好的,谢谢
页:
[1]