|
发表于 2021-10-12 01:57:11
|
显示全部楼层
来迟了
服务器可能使用了某种反爬虫机制,这个地址我这边一开始是会报错的,和你的报错信息一样,之后我使用 curl 和 wget 一个参数一个参数的排除,然后现在是不添加 headers 也可以用了
我没有弄明白服务器检查了哪个参数,因为我现在这边正常了,我弄不回那个不能用的状态了,没办法再一个参数一个参数的排除了
还有,你这个视频 13 个小时,一共有 15245 个 ts 视频,8 线程下载这些 ts 我这边花了 1 个多小时,合并视频花了 5 个多小时,合并后的 mp4 文件 11.9GB,^_^
下面这个代码是我前不久写的,思路和你的完全一样,不过就是没有第 1 步和第 2 步,毕竟这个代码不是我用的,是帮别人写的,所以这些简单的部分我就没有写
- #!/usr/bin/env python
- #coding=utf-8
- import re
- import requests
- import threadpool
- import shutil
- import os
- import sys
- import subprocess
- def get_file(url):
- global session
- while True:
- try: content = session.get(url).content
- except: sys.stderr.write('"' + url + '": download failed, retry!\n'); continue
- break
- return content
- def get_base_url(url):
- return re.search(r'^(.+?://.+?)/', url).group(1)
- def get_list_full_path(url_list, base_url):
- for i in range(len(url_list)):
- url_list[i] = url_list[i] if re.search(r'^.+?://', url_list[i]) else base_url + url_list[i]
- return url_list
- def get_media_play_list(url):
- play_list = get_file(url).decode()
- master_play_list = re.findall(r'^.+\.m3u8$', play_list, re.MULTILINE)
- if len(master_play_list) == 0: return play_list
- master_play_list = get_list_full_path(master_play_list, get_base_url(url))
- return get_file(master_play_list[0]).decode()
- def get_ts_list(media_play_list):
- return re.findall(r'^.+?://.+?\.ts$', media_play_list, re.MULTILINE)
- def download_file(url, path):
- filename = re.search(r'^.+/(.+)$', url).group(1)
- with open(path + '/' + filename, 'wb') as f: f.write(get_file(url))
- sys.stdout.write(path + ': ' + url + '\n')
- def get_video_info(path):
- ffprobe_cmd = ['ffprobe', '-protocol_whitelist', 'https,file,crypto,tls,tcp', '-show_streams', '-print_format', 'json', path]
- startupinfo = subprocess.STARTUPINFO()
- startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
- startupinfo.wShowWindow = subprocess.SW_HIDE
- ffprobe = subprocess.Popen(ffprobe_cmd, stdout = subprocess.PIPE, startupinfo = startupinfo)
- ffprobe.wait()
- return json.loads(ffprobe.stdout.read())
- def convert_video_format(v1, v2):
- ffmpeg_cmd = ['ffmpeg', '-y', '-protocol_whitelist', 'https,file,crypto,tls,tcp', '-i', v1, '-c:v', 'h264', v2]
- ffmpeg = subprocess.Popen(ffmpeg_cmd)
- ffmpeg.wait()
- return ffmpeg.returncode
- url = 'https://pull-sh1.weizan.cn/1885047439/468616706126501938/replay.1632042576.30049749.m3u8'
- session = requests.Session()
- path = 'download'
- shutil.rmtree(path, True)
- os.mkdir(path)
- media_play_list = get_media_play_list(url)
- ts_list = get_ts_list(media_play_list)
- pool = threadpool.ThreadPool(8)
- requests = threadpool.makeRequests(download_file, [((url, path), None) for url in ts_list])
- [pool.putRequest(req) for req in requests]
- pool.wait()
- media_play_list = re.sub(r'^.+/(.+?\.ts)$', r'\1', media_play_list, flags = re.MULTILINE)
- with open(path + '/index.m3u8', 'w') as f: f.write(media_play_list)
- print(convert_video_format(path + '/index.m3u8', '31.mp4'))
复制代码 |
|