鱼C论坛

 找回密码
 立即注册
查看: 1360|回复: 2

[已解决]爬虫,百度翻译

[复制链接]
发表于 2020-11-20 22:15:45 | 显示全部楼层 |阅读模式

马上注册,结交更多好友,享用更多功能^_^

您需要 登录 才可以下载或查看,没有账号?立即注册

x
import requests
import json
url = 'https://fanyi.baidu.com/sug'
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
    }

work = input('请输入需要翻译的关键词')
data = {'kw':work}
response = requests.post(url=url,data=data,headers=headers)
a = response.json()
print(a)
print(json.dumps(a,ensure_ascii=False,sort_keys=True))

请输入需要翻译的关键词cat
{'errno': 0, 'data': [{'k': 'cat ear', 'v': '[医]猫耳'}, {'k': 'cat fur', 'v': '[医]猫皮毛'}, {'k': 'cat nap', 'v': 'n. 打瞌睡'}, {'k': 'cat eyes', 'v': '猫眼'}, {'k': 'cat fish', 'v': '[医]鲶(鱼),鲇鱼'}]}
{"data": [{"k": "cat ear", "v": "[医]猫耳"}, {"k": "cat fur", "v": "[医]猫皮毛"}, {"k": "cat nap", "v": "n. 打瞌睡"}, {"k": "cat eyes", "v": "猫眼"}, {"k": "cat fish", "v": "[医]鲶(鱼),鲇鱼"}], "errno": 0}

我想问为什么这里没有一个我想要的结果,

最佳答案
2020-11-21 10:53:31
本帖最后由 suchocolate 于 2020-11-21 13:01 编辑

这个sug url只能翻译单个字,翻译有另外的url:https://fanyi.baidu.com/v2transapi,且还需要提交sign和token,你可以参考下这个:https://blog.csdn.net/qq_38534107/article/details/90440403,以下是参考他的方法写的代码:
import execjs
import requests


class BaiduTranslateJS:
    def __init__(self):
        self.url = "https://fanyi.baidu.com/v2transapi"
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0",
            "Cookie": "BAIDUID=F0E3DE0018AFB4C172AD6D2E7C917128:FG=1; BIDUPSID=F0E3DE0018AFB4C1DFC82BD817EAC44D; "
                      "PSTM=1595323520; MCITY=-%3A; "
                      "BDUSS=IxVFNzRlJ0b1VhcWVWS3VLV1dwLVpRdmNMY01ZMUtxM2RGblBPanVZV3pHV2RmSVFBQUFBJCQAAAAAAAAAAAEAAA"
                      "AQTRwgQ3JpdGljMjAxMgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALOMP"
                      "1-zjD9fa1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=16059"
                      "21866,1605929186; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWI"
                      "TCH=1; SOUND_PREFER_SWITCH=1; yjs_js_security_passport=01bd0488dc53c6663e9038c21110dd20eaa3d83"
                      "e_1605929187_js; BA_HECTOR=a080alal0501a02h1e1frh03m0q; __yjsv5_shitong=1.0_7_3d669e2415c752e931"
                      "566ddcdfe31f01a9af_300_1605929186447_112.1.222.242_a0e2999e; Hm_lpvt_64ecd82404c51e03dc91cb9e8c"
                      "025574=1605929186 "
        }
        self.data = {
            "from": "zh",
            "to": "en",
            "query": "",
            "simple_means_flag": "3",
            "sign": "",
            "token": "5a3db26d59330f426a3c74eca9c27a27",   #  token获取方法见下面
            "domain": "common"
        }

    def t(self, query):
        self.data['query'] = query
        with open('test.js', 'r', encoding='utf-8') as f:
            ctx = execjs.compile(f.read())
        sign = ctx.call('e', query)
        self.data['sign'] = sign
        r = requests.post(self.url, headers=self.headers, data=self.data)
        rj = r.json()
        # print(rj)
        r_x = rj['trans_result']['data'][0]['dst']
        return r_x


if __name__ == '__main__':
    bt = BaiduTranslateJS()
    result = bt.t('我的家在东北')
    print(result)

token和js里的i值获取方法:
import requests
import re


def main():
    url = 'https://fanyi.baidu.com'
    headers = {'user-agent': 'firefox'}
    r = requests.get(url, headers=headers)
    token = re.findall(r"token: '(.*?)',", r.text)[0]
    print(token)   # token
    gtk = re.findall(r"gtk = '(.*?)'", r.text)[0]
    print(gtk)    # js里用的i的值


if __name__ == '__main__':
    main()


官网现在的js照那个笔记有变动,从浏览器拷贝代码,本地新建test.js
function n(r, o) {
  for (var t = 0; t < o.length - 2; t += 3) {
    var a = o.charAt(t + 2);
    a = a >= 'a' ? a.charCodeAt(0) - 87 : Number(a),
    a = '+' === o.charAt(t + 1) ? r >>> a : r << a,
    r = '+' === o.charAt(t) ? r + a & 4294967295 : r ^ a
  }
  return r
}
function e(r) {
  var i = '320305.131321201'
  var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
  if (null === o) {
    var t = r.length;
    t > 30 && (r = '' + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr( - 10, 10))
  } else {
    for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = [
    ]; h > C; C++) '' !== e[C] && f.push.apply(f, a(e[C].split(''))),
    C !== h - 1 && f.push(o[C]);
    var g = f.length;
    g > 30 && (r = f.slice(0, 10).join('') + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join('') + f.slice( - 10).join(''))
  }
  var u = void 0,
  l = '' + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
  u = null !== i ? i : (i = window[l] || '') || '';
  for (var d = u.split('.'), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [
  ], c = 0, v = 0; v < r.length; v++) {
    var A = r.charCodeAt(v);
    128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128)  : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
  }
  for (var p = m, F = '' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ('' + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = '' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ('' + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ('' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b],
  p = n(p, F);
  return p = n(p, D),
  p ^= s,
  0 > p && (p = (2147483647 & p) + 2147483648),
  p %= 1000000,
  p.toString() + '.' + (p ^ m)
}
6.png

另外百度翻译免费开放api,申请一下就能用:https://api.fanyi.baidu.com/

我想要这里的结果

我想要这里的结果
想知道小甲鱼最近在做啥?请访问 -> ilovefishc.com
回复

使用道具 举报

发表于 2020-11-21 01:22:18 | 显示全部楼层
会不会是哪里出问题了?如果你只需要英翻中,可以试试直接get,我的浏览器显示的这样的url
https://fanyi.baidu.com/?aldtype=16047#en/zh/cat
看见没有,这个就很明显了
temp = input('请输入待翻译的英文:')
url = r'https://fanyi.baidu.com/?aldtype=16047#en/zh/'+temp
想知道小甲鱼最近在做啥?请访问 -> ilovefishc.com
回复 支持 反对

使用道具 举报

发表于 2020-11-21 10:53:31 | 显示全部楼层    本楼为最佳答案   
本帖最后由 suchocolate 于 2020-11-21 13:01 编辑

这个sug url只能翻译单个字,翻译有另外的url:https://fanyi.baidu.com/v2transapi,且还需要提交sign和token,你可以参考下这个:https://blog.csdn.net/qq_38534107/article/details/90440403,以下是参考他的方法写的代码:
import execjs
import requests


class BaiduTranslateJS:
    def __init__(self):
        self.url = "https://fanyi.baidu.com/v2transapi"
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0",
            "Cookie": "BAIDUID=F0E3DE0018AFB4C172AD6D2E7C917128:FG=1; BIDUPSID=F0E3DE0018AFB4C1DFC82BD817EAC44D; "
                      "PSTM=1595323520; MCITY=-%3A; "
                      "BDUSS=IxVFNzRlJ0b1VhcWVWS3VLV1dwLVpRdmNMY01ZMUtxM2RGblBPanVZV3pHV2RmSVFBQUFBJCQAAAAAAAAAAAEAAA"
                      "AQTRwgQ3JpdGljMjAxMgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALOMP"
                      "1-zjD9fa1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=16059"
                      "21866,1605929186; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWI"
                      "TCH=1; SOUND_PREFER_SWITCH=1; yjs_js_security_passport=01bd0488dc53c6663e9038c21110dd20eaa3d83"
                      "e_1605929187_js; BA_HECTOR=a080alal0501a02h1e1frh03m0q; __yjsv5_shitong=1.0_7_3d669e2415c752e931"
                      "566ddcdfe31f01a9af_300_1605929186447_112.1.222.242_a0e2999e; Hm_lpvt_64ecd82404c51e03dc91cb9e8c"
                      "025574=1605929186 "
        }
        self.data = {
            "from": "zh",
            "to": "en",
            "query": "",
            "simple_means_flag": "3",
            "sign": "",
            "token": "5a3db26d59330f426a3c74eca9c27a27",   #  token获取方法见下面
            "domain": "common"
        }

    def t(self, query):
        self.data['query'] = query
        with open('test.js', 'r', encoding='utf-8') as f:
            ctx = execjs.compile(f.read())
        sign = ctx.call('e', query)
        self.data['sign'] = sign
        r = requests.post(self.url, headers=self.headers, data=self.data)
        rj = r.json()
        # print(rj)
        r_x = rj['trans_result']['data'][0]['dst']
        return r_x


if __name__ == '__main__':
    bt = BaiduTranslateJS()
    result = bt.t('我的家在东北')
    print(result)

token和js里的i值获取方法:
import requests
import re


def main():
    url = 'https://fanyi.baidu.com'
    headers = {'user-agent': 'firefox'}
    r = requests.get(url, headers=headers)
    token = re.findall(r"token: '(.*?)',", r.text)[0]
    print(token)   # token
    gtk = re.findall(r"gtk = '(.*?)'", r.text)[0]
    print(gtk)    # js里用的i的值


if __name__ == '__main__':
    main()


官网现在的js照那个笔记有变动,从浏览器拷贝代码,本地新建test.js
function n(r, o) {
  for (var t = 0; t < o.length - 2; t += 3) {
    var a = o.charAt(t + 2);
    a = a >= 'a' ? a.charCodeAt(0) - 87 : Number(a),
    a = '+' === o.charAt(t + 1) ? r >>> a : r << a,
    r = '+' === o.charAt(t) ? r + a & 4294967295 : r ^ a
  }
  return r
}
function e(r) {
  var i = '320305.131321201'
  var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
  if (null === o) {
    var t = r.length;
    t > 30 && (r = '' + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr( - 10, 10))
  } else {
    for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = [
    ]; h > C; C++) '' !== e[C] && f.push.apply(f, a(e[C].split(''))),
    C !== h - 1 && f.push(o[C]);
    var g = f.length;
    g > 30 && (r = f.slice(0, 10).join('') + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join('') + f.slice( - 10).join(''))
  }
  var u = void 0,
  l = '' + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
  u = null !== i ? i : (i = window[l] || '') || '';
  for (var d = u.split('.'), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [
  ], c = 0, v = 0; v < r.length; v++) {
    var A = r.charCodeAt(v);
    128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128)  : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
  }
  for (var p = m, F = '' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ('' + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = '' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ('' + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ('' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b],
  p = n(p, F);
  return p = n(p, D),
  p ^= s,
  0 > p && (p = (2147483647 & p) + 2147483648),
  p %= 1000000,
  p.toString() + '.' + (p ^ m)
}
6.png

另外百度翻译免费开放api,申请一下就能用:https://api.fanyi.baidu.com/
想知道小甲鱼最近在做啥?请访问 -> ilovefishc.com
回复 支持 反对

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

小黑屋|手机版|Archiver|鱼C工作室 ( 粤ICP备18085999号-1 | 粤公网安备 44051102000585号)

GMT+8, 2025-1-17 14:04

Powered by Discuz! X3.4

© 2001-2023 Discuz! Team.

快速回复 返回顶部 返回列表