|
楼主 |
发表于 2021-11-25 14:39:43
|
显示全部楼层
大神,受累帮忙瞟一眼,看看是啥问题
- import requests
- from lxml import etree
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from fake_useragent import UserAgent
- from urllib import parse
- import re
- class tencent_movie (object):
- def __init__(self):
- ua = UserAgent(verify_ssl=False)
- for i in range(1,2):
- self.headers = {'User-Agent':ua.random}
- def get_html(self,url):
- res = requests.get(url,headers=self.headers)
- html = res.content.decode('utf-8')
- return html
- def parse_html(self,html):
- target = etree.HTML(html)
- links = target.xpath('//h2[@class="result_title"]/a/@href')
- host = links[0]
- res = requests.get(host,headers=self.headers)
- con = res.content.decode('utf-8')
- new_html = etree.HTML(con)
- # first_select = int(input('1.电视剧\n2.电影\n'))
- first_select = 1
- pattern = re.compile('https://v.qq.com/x/cover/(.*?).html')
- cid = pattern.findall(host)[0]
- if first_select == 1:
- #获取电视剧集链接
- url_link ='https://pbaccess.video.qq.com/trpc.universal_backend_service.page_server_rpc.PageServer/GetPageData?video_appid=3000010&vplatform=2'
- data = {
- 'cid': cid,
- 'id_type': "1",
- 'lid': "",
- 'page_context': "",
- 'page_id': "vsite_episode_list",
- 'page_num': "",
- 'page_size': "30",
- 'page_type': "detail_operation",
- 'req_from': "web",
- 'vid': "d0027j9renh"
- }
- res_series = requests.post(url=url_link,headers=self.headers,data=data).json()
- print(res_series)
- #运行不下去了~~~~~~~
- select = int(input('需要哪一集:'))
- # last_host = 'https://api.akmov.net/?url='+ new_link
- else:
- # 电影是直接可以得到链接地址
- last_host = host
- self.driver = webdriver.Edge()
- self.driver.get(last_host)
- def main(self):
- # name = input('请输入电视局名或电影名:')
- name = '沙海'
- query = {'wd':name}
- name_link = parse.urlencode(query)
- url = 'https://v.qq.com/x/search/?q={}&stag=0&smartbox_ab='.format(name_link)
- html = self.get_html(url)
- self.parse_html(html)
- if __name__ == '__main__':
- spider=tencent_movie()
- spider.main()
复制代码
代码到了这里时就运行不下去了
-
- url_link = 'https://pbaccess.video.qq.com/trpc.universal_backend_service.page_server_rpc.PageServer/GetPageData?video_appid=3000010&vplatform=2'
- data = {
- 'cid': cid,
- 'id_type': "1",
- 'lid': "",
- 'page_context': "",
- 'page_id': "vsite_episode_list",
- 'page_num': "",
- 'page_size': "30",
- 'page_type': "detail_operation",
- 'req_from': "web",
- 'vid': "d0027j9renh"
- }
- res_series = requests.post(url=url_link,headers=self.headers,data=data).json()
- print(res_series)
- #运行不下去了~~~~~~~
复制代码
因为得到的 res_series = {'ret': 35013, 'msg': 'unknow error.'}
不是正常的数据
求大神帮忙看看,问题时在哪里?
是data错了,还是腾讯视频有什么反爬机制? |
|