|
60鱼币
这个是最后一次尝试,还是不行。要怎么弄呀# 目标:哗哩哗哩插放页面输入网址栏的ur1,即可获取到视颊数据
import re
import json
import requests
import subprocess
# 过滤Windows文件名中的非法字符
def setFileTitle(title):
rstr = r"[\/\\\:\*\?"\<\>\|]" # '/ \ : * ? " < > |'
name = re.sub(rstr, "_", title) # 替换为下划线
return name
# 相应的数据内容在__playinfo__里
url = 'https://www.bilibili.com/video/BV1684y1G73z/?spm_id_from=333.1007.tianma.1-1-1.click'
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
"cookie": "_uuid=6F74D356-D356-D741-D10D3-D7B6F5F8B29967778infoc; buvid_fp=da80279a888d378c2f4b8690887d5c58; CURRENT_FNVAL=4048; buvid3=4E7146FA-43A5-3251-AF99-30B56383F3A768920infoc; b_nut=1675329869; buvid4=D7668A3A-1793-39E0-25EE-938D236E2B7768920-023020217-WiXBYh03a5sqpgE/VvEB3g%3D%3D; rpdid=|(JYYu~Y))J|0J'uY~lJkm)~k; i-wanna-go-back=-1; b_ut=7; b_lsid=D2C8F6F6_18649B276B8; nostalgia_conf=-1; sid=7qns657c; innersign=1; theme_style=light; PVID=1",
}
res = requests.get(url, headers=headers)
# print(res.text)
# 获取防盗链
headers['referer'] = url
# 获取视频标题
title = re.findall('"title":"(.*?)","pubdate"', res.text)[0]
name = setFileTitle(title)
# print(name)
# 获取视频数据信息
html_data = re.findall('<script>window.__playinfo__=(.*?)</script>', res.text)
# 转换数据类型字符串数据转成json字典数据类型
json_data = json.loads(html_data[0])
# pprint(json_data)
# 提取音频和视频url
audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
video_url = json_data['data']['dash']['video'][0]['baseUrl']
# print(audio_url)
# print(video_url)
# 合并音频和视频
def merge(video_address, audio_address, merge_address):
cmd = f'F:\\FFmpeg\\bin\\ffmpeg -i {video_address} -i {audio_address} -acodec copy -vcodec copy {merge_address}'
subprocess.call(cmd,shell=True)
audio_info = requests.get(audio_url, headers=headers)
video_info = requests.get(video_url, headers=headers)
# 保存内容
audio_address = f"C:/Users/misaka/Desktop/bilibili/{name}.mp3"
video_address = f"C:/Users/misaka/Desktop/bilibili/{name}.mp4"
with open(audio_address, mode='wb') as audio:
audio.write(audio_info.content)
with open(video_address, mode='wb') as video:
video.write(video_info.content)
merge_address = f"C:/Users/misaka/Desktop/bilibili/合并/{name}.mp4"
merge(video_address, audio_address, merge_address)
C:\Users\misaka\AppData\Local\Programs\Python\Python311\python.exe C:\Users\misaka\PycharmProjects\pythonProject\爬虫\爬取哔哩哔哩\爬取视频测试.py
ϵͳ�Ҳ���ָ������������
进程已结束,退出代码0
本帖最后由 isdkz 于 2023-2-13 18:04 编辑
我在调试的时候路径调整了一下,你可以改回来
我做的调整:判断目录存在不存在,不存在就创建
关键点:windows命令执行的时候如果路径带空格就会截断成多个参数,解决方法就是在路径两边加双引号,所以我在 cmd 那里给大括号两边都加了双引号
# 目标:哗哩哗哩插放页面输入网址栏的ur1,即可获取到视颊数据
import re
import json
import requests
import subprocess
# 过滤Windows文件名中的非法字符
def setFileTitle(title):
rstr = r"[\/\\\:\*\?\"\<\>\|]" # '/ \ : * ? " < > |'
name = re.sub(rstr, "_", title) # 替换为下划线
return name
# 相应的数据内容在__playinfo__里
url = 'https://www.bilibili.com/video/BV1684y1G73z/?spm_id_from=333.1007.tianma.1-1-1.click'
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
"cookie": "_uuid=6F74D356-D356-D741-D10D3-D7B6F5F8B29967778infoc; buvid_fp=da80279a888d378c2f4b8690887d5c58; CURRENT_FNVAL=4048; buvid3=4E7146FA-43A5-3251-AF99-30B56383F3A768920infoc; b_nut=1675329869; buvid4=D7668A3A-1793-39E0-25EE-938D236E2B7768920-023020217-WiXBYh03a5sqpgE/VvEB3g%3D%3D; rpdid=|(JYYu~Y))J|0J'uY~lJkm)~k; i-wanna-go-back=-1; b_ut=7; b_lsid=D2C8F6F6_18649B276B8; nostalgia_conf=-1; sid=7qns657c; innersign=1; theme_style=light; PVID=1",
}
res = requests.get(url, headers=headers)
# print(res.text)
# 获取防盗链
headers['referer'] = url
# 获取视频标题
title = re.findall('"title":"(.*?)","pubdate"', res.text)[0]
name = setFileTitle(title)
# print(name)
# 获取视频数据信息
html_data = re.findall('<script>window.__playinfo__=(.*?)</script>', res.text)
# 转换数据类型字符串数据转成json字典数据类型
json_data = json.loads(html_data[0])
# pprint(json_data)
# 提取音频和视频url
audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
video_url = json_data['data']['dash']['video'][0]['baseUrl']
# print(audio_url)
# print(video_url)
# 合并音频和视频
def merge(video_address, audio_address, merge_address):
cmd = fr'F:\FFmpeg\bin\ffmpeg -i "{video_address}" -i "{audio_address}" -acodec copy -vcodec copy "{merge_address}"'
subprocess.call(cmd,shell=True)
audio_info = requests.get(audio_url, headers=headers)
video_info = requests.get(video_url, headers=headers)
# 保存内容
audio_address = f"bilibili/{name}.mp3"
video_address = f"bilibili/{name}.mp4"
with open(audio_address, mode='wb') as audio:
audio.write(audio_info.content)
with open(video_address, mode='wb') as video:
video.write(video_info.content)
merge_address = f"bilibili/合并/{name}.mp4"
import os
if not os.path.exists('bilibili/合并'):
os.makedirs('bilibili/合并')
merge(video_address, audio_address, merge_address)
|
最佳答案
查看完整内容
我在调试的时候路径调整了一下,你可以改回来
我做的调整:判断目录存在不存在,不存在就创建
关键点:windows命令执行的时候如果路径带空格就会截断成多个参数,解决方法就是在路径两边加双引号,所以我在 cmd 那里给大括号两边都加了双引号
|