|
发表于 2021-10-7 20:37:27
|
显示全部楼层
本帖最后由 suchocolate 于 2021-10-7 20:57 编辑
- import requests
- from lxml import etree
- import random
- import json
- def main():
- result = [] # 存储视频链接
- url = 'https://www.pearvideo.com/category_4' # 总入口
- base_url = 'https://www.pearvideo.com/' # 用于合成url和referer
- headers = {'user-agent': 'firefox'}
- r = requests.get(url, headers=headers)
- html = etree.HTML(r.text)
- v_pages = html.xpath('//a[contains(@class,"vervideo-lilink")]/@href') # 获得各视频的页面的url
- for v_page in v_pages: # 遍历各视频页面的url,从url中获取视频编号
- # example: v_page like video_1743082
- num = v_page.split('_')[1] # 视频编号,用于合成视频信息url,视频信息url浏览器f12-网络就能看到,如下:
- # example: https://www.pearvideo.com/videoStatus.jsp?contId=1743082&mrd=0.554376234182324
- url = f'{base_url}videoStatus.jsp?contId={num}&mrd={random.random()}'
- # example: Referer: https://www.pearvideo.com/video_1743082
- headers['Referer'] = f'{base_url}{v_page}' # 想要获得视频信息,需要在头中添加referer
- r = requests.get(url, headers=headers)
- data = json.loads(r.text) # 视频信息是以json形式反馈的
- # print(data['videoInfo']['videos']['srcUrl'])
- result.append(data['videoInfo']['videos']['srcUrl']) # 把视频连接放到结果列表中
- print(result)
- if __name__ == '__main__':
- main()
复制代码 |
|