| 
 | 
 
 
发表于 2023-4-1 02:25:40
|
显示全部楼层
   本楼为最佳答案    
 
 
 本帖最后由 昭昭天命amg 于 2023-4-1 02:27 编辑  
 
这是我去年跟着教程敲的代码,看对你有没有帮助吧 
- import requests
 
 - import re
 
 - from lxml import etree
 
  
- # 检测连接是否为广告
 
 - def check_video(string):
 
 -     patt = re.compile(r'^video')
 
 -     if patt.match(string) is not None:
 
 -         return True
 
 -     else:
 
 -         return False
 
  
 
- url = 'htt(删去)ps://w(括号)ww.pearvideo.c(内容)om/'  # 梨视频首页
 
 - resp = requests.get(url)
 
 - html = etree.HTML(resp.text)
 
 - hrefs = html.xpath('//*[@id="vervideoTlist"]//a/@href')  # 获取首页视频列表的子链接
 
 - for href in hrefs:
 
 -     if check_video(href):  # 检测连接是否为广告
 
 -         cont_id = href.split('_')[1]
 
 -         child_url = url + href  # 拼接子链接
 
 -         # child_resp = requests.get(url=child_url)
 
 -         # child_html = etree.HTML(child_resp.text)
 
 -         # title = child_html.xpath('//title/text()')[0].split('_')[0]  # 获取视频标题
 
 -         headers = {
 
 -             'Referer': child_url  # 防盗链
 
 -         }
 
 -         videoAPI = f'http(删去)s://ww(括号)w.pearvideo.co(内容)m/videoStatus.jsp?contId={cont_id}&mrd=0.13729588533471238'
 
 -         # print(requests.get(url=videoAPI, headers=headers).json())
 
 -         dic = requests.get(url=videoAPI, headers=headers).json()
 
 -         systemTime = dic['systemTime']  # 获取时间戳
 
 -         srcUrl = dic['videoInfo']['videos']['srcUrl']  # 获取视频地址
 
 -         srcUrl = srcUrl.replace(systemTime, f'cont-{cont_id}')  # 将时间戳替换为cont_id,得到真正的视频地址
 
 -         video_resp = requests.get(srcUrl)
 
 -         with open(f'video/{cont_id}.mp4', 'wb') as f:
 
 -             f.write(video_resp.content)
 
 -         print(f'视频{cont_id}下载成功')
 
 -         video_resp.close()
 
 - resp.close()
 
 
  复制代码 |   
 
 
 
 |