爬虫,百度翻译
import requestsimport json
url = 'https://fanyi.baidu.com/sug'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
}
work = input('请输入需要翻译的关键词')
data = {'kw':work}
response = requests.post(url=url,data=data,headers=headers)
a = response.json()
print(a)
print(json.dumps(a,ensure_ascii=False,sort_keys=True))
请输入需要翻译的关键词cat
{'errno': 0, 'data': [{'k': 'cat ear', 'v': '[医]猫耳'}, {'k': 'cat fur', 'v': '[医]猫皮毛'}, {'k': 'cat nap', 'v': 'n. 打瞌睡'}, {'k': 'cat eyes', 'v': '猫眼'}, {'k': 'cat fish', 'v': '[医]鲶(鱼),鲇鱼'}]}
{"data": [{"k": "cat ear", "v": "[医]猫耳"}, {"k": "cat fur", "v": "[医]猫皮毛"}, {"k": "cat nap", "v": "n. 打瞌睡"}, {"k": "cat eyes", "v": "猫眼"}, {"k": "cat fish", "v": "[医]鲶(鱼),鲇鱼"}], "errno": 0}
我想问为什么这里没有一个我想要的结果,
会不会是哪里出问题了?如果你只需要英翻中,可以试试直接get,我的浏览器显示的这样的url
https://fanyi.baidu.com/?aldtype=16047#en/zh/cat
看见没有,这个就很明显了
temp = input('请输入待翻译的英文:')
url = r'https://fanyi.baidu.com/?aldtype=16047#en/zh/'+temp 本帖最后由 suchocolate 于 2020-11-21 13:01 编辑
这个sug url只能翻译单个字,翻译有另外的url:https://fanyi.baidu.com/v2transapi,且还需要提交sign和token,你可以参考下这个:https://blog.csdn.net/qq_38534107/article/details/90440403,以下是参考他的方法写的代码:
import execjs
import requests
class BaiduTranslateJS:
def __init__(self):
self.url = "https://fanyi.baidu.com/v2transapi"
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0",
"Cookie": "BAIDUID=F0E3DE0018AFB4C172AD6D2E7C917128:FG=1; BIDUPSID=F0E3DE0018AFB4C1DFC82BD817EAC44D; "
"PSTM=1595323520; MCITY=-%3A; "
"BDUSS=IxVFNzRlJ0b1VhcWVWS3VLV1dwLVpRdmNMY01ZMUtxM2RGblBPanVZV3pHV2RmSVFBQUFBJCQAAAAAAAAAAAEAAA"
"AQTRwgQ3JpdGljMjAxMgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALOMP"
"1-zjD9fa1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=16059"
"21866,1605929186; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWI"
"TCH=1; SOUND_PREFER_SWITCH=1; yjs_js_security_passport=01bd0488dc53c6663e9038c21110dd20eaa3d83"
"e_1605929187_js; BA_HECTOR=a080alal0501a02h1e1frh03m0q; __yjsv5_shitong=1.0_7_3d669e2415c752e931"
"566ddcdfe31f01a9af_300_1605929186447_112.1.222.242_a0e2999e; Hm_lpvt_64ecd82404c51e03dc91cb9e8c"
"025574=1605929186 "
}
self.data = {
"from": "zh",
"to": "en",
"query": "",
"simple_means_flag": "3",
"sign": "",
"token": "5a3db26d59330f426a3c74eca9c27a27", #token获取方法见下面
"domain": "common"
}
def t(self, query):
self.data['query'] = query
with open('test.js', 'r', encoding='utf-8') as f:
ctx = execjs.compile(f.read())
sign = ctx.call('e', query)
self.data['sign'] = sign
r = requests.post(self.url, headers=self.headers, data=self.data)
rj = r.json()
# print(rj)
r_x = rj['trans_result']['data']['dst']
return r_x
if __name__ == '__main__':
bt = BaiduTranslateJS()
result = bt.t('我的家在东北')
print(result)
token和js里的i值获取方法:import requests
import re
def main():
url = 'https://fanyi.baidu.com'
headers = {'user-agent': 'firefox'}
r = requests.get(url, headers=headers)
token = re.findall(r"token: '(.*?)',", r.text)
print(token) # token
gtk = re.findall(r"gtk = '(.*?)'", r.text)
print(gtk) # js里用的i的值
if __name__ == '__main__':
main()
官网现在的js照那个笔记有变动,从浏览器拷贝代码,本地新建test.jsfunction n(r, o) {
for (var t = 0; t < o.length - 2; t += 3) {
var a = o.charAt(t + 2);
a = a >= 'a' ? a.charCodeAt(0) - 87 : Number(a),
a = '+' === o.charAt(t + 1) ? r >>> a : r << a,
r = '+' === o.charAt(t) ? r + a & 4294967295 : r ^ a
}
return r
}
function e(r) {
var i = '320305.131321201'
var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
if (null === o) {
var t = r.length;
t > 30 && (r = '' + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr( - 10, 10))
} else {
for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = [
]; h > C; C++) '' !== e && f.push.apply(f, a(e.split(''))),
C !== h - 1 && f.push(o);
var g = f.length;
g > 30 && (r = f.slice(0, 10).join('') + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join('') + f.slice( - 10).join(''))
}
var u = void 0,
l = '' + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
u = null !== i ? i : (i = window || '') || '';
for (var d = u.split('.'), m = Number(d) || 0, s = Number(d) || 0, S = [
], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S = A : (2048 > A ? S = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S = A >> 18 | 240, S = A >> 12 & 63 | 128): S = A >> 12 | 224, S = A >> 6 & 63 | 128), S = 63 & A | 128)
}
for (var p = m, F = '' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ('' + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = '' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ('' + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ('' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S,
p = n(p, F);
return p = n(p, D),
p ^= s,
0 > p && (p = (2147483647 & p) + 2147483648),
p %= 1000000,
p.toString() + '.' + (p ^ m)
}
另外百度翻译免费开放api,申请一下就能用:https://api.fanyi.baidu.com/
页:
[1]