鱼C论坛

 找回密码
 立即注册
查看: 1708|回复: 2

[已解决]爬虫,百度翻译

[复制链接]
发表于 2020-11-20 22:15:45 | 显示全部楼层 |阅读模式

马上注册,结交更多好友,享用更多功能^_^

您需要 登录 才可以下载或查看,没有账号?立即注册

x
import requests
import json
url = 'https://fanyi.baidu.com/sug'
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
    }

work = input('请输入需要翻译的关键词')
data = {'kw':work}
response = requests.post(url=url,data=data,headers=headers)
a = response.json()
print(a)
print(json.dumps(a,ensure_ascii=False,sort_keys=True))

请输入需要翻译的关键词cat
{'errno': 0, 'data': [{'k': 'cat ear', 'v': '[医]猫耳'}, {'k': 'cat fur', 'v': '[医]猫皮毛'}, {'k': 'cat nap', 'v': 'n. 打瞌睡'}, {'k': 'cat eyes', 'v': '猫眼'}, {'k': 'cat fish', 'v': '[医]鲶(鱼),鲇鱼'}]}
{"data": [{"k": "cat ear", "v": "[医]猫耳"}, {"k": "cat fur", "v": "[医]猫皮毛"}, {"k": "cat nap", "v": "n. 打瞌睡"}, {"k": "cat eyes", "v": "猫眼"}, {"k": "cat fish", "v": "[医]鲶(鱼),鲇鱼"}], "errno": 0}

我想问为什么这里没有一个我想要的结果,

最佳答案
2020-11-21 10:53:31
本帖最后由 suchocolate 于 2020-11-21 13:01 编辑

这个sug url只能翻译单个字,翻译有另外的url:https://fanyi.baidu.com/v2transapi,且还需要提交sign和token,你可以参考下这个:https://blog.csdn.net/qq_38534107/article/details/90440403,以下是参考他的方法写的代码:

  1. import execjs
  2. import requests


  3. class BaiduTranslateJS:
  4.     def __init__(self):
  5.         self.url = "https://fanyi.baidu.com/v2transapi"
  6.         self.headers = {
  7.             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0",
  8.             "Cookie": "BAIDUID=F0E3DE0018AFB4C172AD6D2E7C917128:FG=1; BIDUPSID=F0E3DE0018AFB4C1DFC82BD817EAC44D; "
  9.                       "PSTM=1595323520; MCITY=-%3A; "
  10.                       "BDUSS=IxVFNzRlJ0b1VhcWVWS3VLV1dwLVpRdmNMY01ZMUtxM2RGblBPanVZV3pHV2RmSVFBQUFBJCQAAAAAAAAAAAEAAA"
  11.                       "AQTRwgQ3JpdGljMjAxMgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALOMP"
  12.                       "1-zjD9fa1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=16059"
  13.                       "21866,1605929186; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWI"
  14.                       "TCH=1; SOUND_PREFER_SWITCH=1; yjs_js_security_passport=01bd0488dc53c6663e9038c21110dd20eaa3d83"
  15.                       "e_1605929187_js; BA_HECTOR=a080alal0501a02h1e1frh03m0q; __yjsv5_shitong=1.0_7_3d669e2415c752e931"
  16.                       "566ddcdfe31f01a9af_300_1605929186447_112.1.222.242_a0e2999e; Hm_lpvt_64ecd82404c51e03dc91cb9e8c"
  17.                       "025574=1605929186 "
  18.         }
  19.         self.data = {
  20.             "from": "zh",
  21.             "to": "en",
  22.             "query": "",
  23.             "simple_means_flag": "3",
  24.             "sign": "",
  25.             "token": "5a3db26d59330f426a3c74eca9c27a27",   #  token获取方法见下面
  26.             "domain": "common"
  27.         }

  28.     def t(self, query):
  29.         self.data['query'] = query
  30.         with open('test.js', 'r', encoding='utf-8') as f:
  31.             ctx = execjs.compile(f.read())
  32.         sign = ctx.call('e', query)
  33.         self.data['sign'] = sign
  34.         r = requests.post(self.url, headers=self.headers, data=self.data)
  35.         rj = r.json()
  36.         # print(rj)
  37.         r_x = rj['trans_result']['data'][0]['dst']
  38.         return r_x


  39. if __name__ == '__main__':
  40.     bt = BaiduTranslateJS()
  41.     result = bt.t('我的家在东北')
  42.     print(result)
复制代码


token和js里的i值获取方法:
  1. import requests
  2. import re


  3. def main():
  4.     url = 'https://fanyi.baidu.com'
  5.     headers = {'user-agent': 'firefox'}
  6.     r = requests.get(url, headers=headers)
  7.     token = re.findall(r"token: '(.*?)',", r.text)[0]
  8.     print(token)   # token
  9.     gtk = re.findall(r"gtk = '(.*?)'", r.text)[0]
  10.     print(gtk)    # js里用的i的值


  11. if __name__ == '__main__':
  12.     main()
复制代码



官网现在的js照那个笔记有变动,从浏览器拷贝代码,本地新建test.js
  1. function n(r, o) {
  2.   for (var t = 0; t < o.length - 2; t += 3) {
  3.     var a = o.charAt(t + 2);
  4.     a = a >= 'a' ? a.charCodeAt(0) - 87 : Number(a),
  5.     a = '+' === o.charAt(t + 1) ? r >>> a : r << a,
  6.     r = '+' === o.charAt(t) ? r + a & 4294967295 : r ^ a
  7.   }
  8.   return r
  9. }
  10. function e(r) {
  11.   var i = '320305.131321201'
  12.   var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
  13.   if (null === o) {
  14.     var t = r.length;
  15.     t > 30 && (r = '' + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr( - 10, 10))
  16.   } else {
  17.     for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = [
  18.     ]; h > C; C++) '' !== e[C] && f.push.apply(f, a(e[C].split(''))),
  19.     C !== h - 1 && f.push(o[C]);
  20.     var g = f.length;
  21.     g > 30 && (r = f.slice(0, 10).join('') + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join('') + f.slice( - 10).join(''))
  22.   }
  23.   var u = void 0,
  24.   l = '' + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
  25.   u = null !== i ? i : (i = window[l] || '') || '';
  26.   for (var d = u.split('.'), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [
  27.   ], c = 0, v = 0; v < r.length; v++) {
  28.     var A = r.charCodeAt(v);
  29.     128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128)  : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
  30.   }
  31.   for (var p = m, F = '' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ('' + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = '' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ('' + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ('' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b],
  32.   p = n(p, F);
  33.   return p = n(p, D),
  34.   p ^= s,
  35.   0 > p && (p = (2147483647 & p) + 2147483648),
  36.   p %= 1000000,
  37.   p.toString() + '.' + (p ^ m)
  38. }
复制代码

6.png

另外百度翻译免费开放api,申请一下就能用:https://api.fanyi.baidu.com/

我想要这里的结果

我想要这里的结果
小甲鱼最新课程 -> https://ilovefishc.com
回复

使用道具 举报

发表于 2020-11-21 01:22:18 | 显示全部楼层
会不会是哪里出问题了?如果你只需要英翻中,可以试试直接get,我的浏览器显示的这样的url
https://fanyi.baidu.com/?aldtype=16047#en/zh/cat
看见没有,这个就很明显了
temp = input('请输入待翻译的英文:')
url = r'https://fanyi.baidu.com/?aldtype=16047#en/zh/'+temp
小甲鱼最新课程 -> https://ilovefishc.com
回复 支持 反对

使用道具 举报

发表于 2020-11-21 10:53:31 | 显示全部楼层    本楼为最佳答案   
本帖最后由 suchocolate 于 2020-11-21 13:01 编辑

这个sug url只能翻译单个字,翻译有另外的url:https://fanyi.baidu.com/v2transapi,且还需要提交sign和token,你可以参考下这个:https://blog.csdn.net/qq_38534107/article/details/90440403,以下是参考他的方法写的代码:

  1. import execjs
  2. import requests


  3. class BaiduTranslateJS:
  4.     def __init__(self):
  5.         self.url = "https://fanyi.baidu.com/v2transapi"
  6.         self.headers = {
  7.             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0",
  8.             "Cookie": "BAIDUID=F0E3DE0018AFB4C172AD6D2E7C917128:FG=1; BIDUPSID=F0E3DE0018AFB4C1DFC82BD817EAC44D; "
  9.                       "PSTM=1595323520; MCITY=-%3A; "
  10.                       "BDUSS=IxVFNzRlJ0b1VhcWVWS3VLV1dwLVpRdmNMY01ZMUtxM2RGblBPanVZV3pHV2RmSVFBQUFBJCQAAAAAAAAAAAEAAA"
  11.                       "AQTRwgQ3JpdGljMjAxMgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALOMP"
  12.                       "1-zjD9fa1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=16059"
  13.                       "21866,1605929186; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWI"
  14.                       "TCH=1; SOUND_PREFER_SWITCH=1; yjs_js_security_passport=01bd0488dc53c6663e9038c21110dd20eaa3d83"
  15.                       "e_1605929187_js; BA_HECTOR=a080alal0501a02h1e1frh03m0q; __yjsv5_shitong=1.0_7_3d669e2415c752e931"
  16.                       "566ddcdfe31f01a9af_300_1605929186447_112.1.222.242_a0e2999e; Hm_lpvt_64ecd82404c51e03dc91cb9e8c"
  17.                       "025574=1605929186 "
  18.         }
  19.         self.data = {
  20.             "from": "zh",
  21.             "to": "en",
  22.             "query": "",
  23.             "simple_means_flag": "3",
  24.             "sign": "",
  25.             "token": "5a3db26d59330f426a3c74eca9c27a27",   #  token获取方法见下面
  26.             "domain": "common"
  27.         }

  28.     def t(self, query):
  29.         self.data['query'] = query
  30.         with open('test.js', 'r', encoding='utf-8') as f:
  31.             ctx = execjs.compile(f.read())
  32.         sign = ctx.call('e', query)
  33.         self.data['sign'] = sign
  34.         r = requests.post(self.url, headers=self.headers, data=self.data)
  35.         rj = r.json()
  36.         # print(rj)
  37.         r_x = rj['trans_result']['data'][0]['dst']
  38.         return r_x


  39. if __name__ == '__main__':
  40.     bt = BaiduTranslateJS()
  41.     result = bt.t('我的家在东北')
  42.     print(result)
复制代码


token和js里的i值获取方法:
  1. import requests
  2. import re


  3. def main():
  4.     url = 'https://fanyi.baidu.com'
  5.     headers = {'user-agent': 'firefox'}
  6.     r = requests.get(url, headers=headers)
  7.     token = re.findall(r"token: '(.*?)',", r.text)[0]
  8.     print(token)   # token
  9.     gtk = re.findall(r"gtk = '(.*?)'", r.text)[0]
  10.     print(gtk)    # js里用的i的值


  11. if __name__ == '__main__':
  12.     main()
复制代码



官网现在的js照那个笔记有变动,从浏览器拷贝代码,本地新建test.js
  1. function n(r, o) {
  2.   for (var t = 0; t < o.length - 2; t += 3) {
  3.     var a = o.charAt(t + 2);
  4.     a = a >= 'a' ? a.charCodeAt(0) - 87 : Number(a),
  5.     a = '+' === o.charAt(t + 1) ? r >>> a : r << a,
  6.     r = '+' === o.charAt(t) ? r + a & 4294967295 : r ^ a
  7.   }
  8.   return r
  9. }
  10. function e(r) {
  11.   var i = '320305.131321201'
  12.   var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
  13.   if (null === o) {
  14.     var t = r.length;
  15.     t > 30 && (r = '' + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr( - 10, 10))
  16.   } else {
  17.     for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = [
  18.     ]; h > C; C++) '' !== e[C] && f.push.apply(f, a(e[C].split(''))),
  19.     C !== h - 1 && f.push(o[C]);
  20.     var g = f.length;
  21.     g > 30 && (r = f.slice(0, 10).join('') + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join('') + f.slice( - 10).join(''))
  22.   }
  23.   var u = void 0,
  24.   l = '' + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
  25.   u = null !== i ? i : (i = window[l] || '') || '';
  26.   for (var d = u.split('.'), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [
  27.   ], c = 0, v = 0; v < r.length; v++) {
  28.     var A = r.charCodeAt(v);
  29.     128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128)  : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
  30.   }
  31.   for (var p = m, F = '' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ('' + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = '' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ('' + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ('' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b],
  32.   p = n(p, F);
  33.   return p = n(p, D),
  34.   p ^= s,
  35.   0 > p && (p = (2147483647 & p) + 2147483648),
  36.   p %= 1000000,
  37.   p.toString() + '.' + (p ^ m)
  38. }
复制代码

6.png

另外百度翻译免费开放api,申请一下就能用:https://api.fanyi.baidu.com/
小甲鱼最新课程 -> https://ilovefishc.com
回复 支持 反对

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

小黑屋|手机版|Archiver|鱼C工作室 ( 粤ICP备18085999号-1 | 粤公网安备 44051102000585号)

GMT+8, 2025-6-30 01:45

Powered by Discuz! X3.4

© 2001-2023 Discuz! Team.

快速回复 返回顶部 返回列表