|
发表于 2023-4-1 02:25:40
|
显示全部楼层
本楼为最佳答案
本帖最后由 昭昭天命amg 于 2023-4-1 02:27 编辑
这是我去年跟着教程敲的代码,看对你有没有帮助吧
- import requests
- import re
- from lxml import etree
- # 检测连接是否为广告
- def check_video(string):
- patt = re.compile(r'^video')
- if patt.match(string) is not None:
- return True
- else:
- return False
- url = 'htt(删去)ps://w(括号)ww.pearvideo.c(内容)om/' # 梨视频首页
- resp = requests.get(url)
- html = etree.HTML(resp.text)
- hrefs = html.xpath('//*[@id="vervideoTlist"]//a/@href') # 获取首页视频列表的子链接
- for href in hrefs:
- if check_video(href): # 检测连接是否为广告
- cont_id = href.split('_')[1]
- child_url = url + href # 拼接子链接
- # child_resp = requests.get(url=child_url)
- # child_html = etree.HTML(child_resp.text)
- # title = child_html.xpath('//title/text()')[0].split('_')[0] # 获取视频标题
- headers = {
- 'Referer': child_url # 防盗链
- }
- videoAPI = f'http(删去)s://ww(括号)w.pearvideo.co(内容)m/videoStatus.jsp?contId={cont_id}&mrd=0.13729588533471238'
- # print(requests.get(url=videoAPI, headers=headers).json())
- dic = requests.get(url=videoAPI, headers=headers).json()
- systemTime = dic['systemTime'] # 获取时间戳
- srcUrl = dic['videoInfo']['videos']['srcUrl'] # 获取视频地址
- srcUrl = srcUrl.replace(systemTime, f'cont-{cont_id}') # 将时间戳替换为cont_id,得到真正的视频地址
- video_resp = requests.get(srcUrl)
- with open(f'video/{cont_id}.mp4', 'wb') as f:
- f.write(video_resp.content)
- print(f'视频{cont_id}下载成功')
- video_resp.close()
- resp.close()
复制代码 |
|