鱼C论坛

 找回密码
 立即注册
查看: 2229|回复: 2

[已解决]网络上搜索了好久好久,一直不行,各种错误

[复制链接]
发表于 2023-2-13 17:32:23 | 显示全部楼层 |阅读模式
60鱼币
这个是最后一次尝试,还是不行。要怎么弄呀
  1. # 目标:哗哩哗哩插放页面输入网址栏的ur1,即可获取到视颊数据
  2. import re
  3. import json
  4. import requests
  5. import subprocess


  6. # 过滤Windows文件名中的非法字符
  7. def setFileTitle(title):
  8.     rstr = r"[\/\\\:\*\?"\<\>\|]"  # '/ \ : * ? " < > |'
  9.     name = re.sub(rstr, "_", title)  # 替换为下划线
  10.     return name



  11. # 相应的数据内容在__playinfo__里

  12. url = 'https://www.bilibili.com/video/BV1684y1G73z/?spm_id_from=333.1007.tianma.1-1-1.click'

  13. headers = {
  14.     "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
  15.     "cookie": "_uuid=6F74D356-D356-D741-D10D3-D7B6F5F8B29967778infoc; buvid_fp=da80279a888d378c2f4b8690887d5c58; CURRENT_FNVAL=4048; buvid3=4E7146FA-43A5-3251-AF99-30B56383F3A768920infoc; b_nut=1675329869; buvid4=D7668A3A-1793-39E0-25EE-938D236E2B7768920-023020217-WiXBYh03a5sqpgE/VvEB3g%3D%3D; rpdid=|(JYYu~Y))J|0J'uY~lJkm)~k; i-wanna-go-back=-1; b_ut=7; b_lsid=D2C8F6F6_18649B276B8; nostalgia_conf=-1; sid=7qns657c; innersign=1; theme_style=light; PVID=1",
  16. }

  17. res = requests.get(url, headers=headers)
  18. # print(res.text)

  19. # 获取防盗链
  20. headers['referer'] = url

  21. # 获取视频标题
  22. title = re.findall('"title":"(.*?)","pubdate"', res.text)[0]
  23. name = setFileTitle(title)
  24. # print(name)

  25. # 获取视频数据信息
  26. html_data = re.findall('<script>window.__playinfo__=(.*?)</script>', res.text)

  27. # 转换数据类型字符串数据转成json字典数据类型
  28. json_data = json.loads(html_data[0])
  29. # pprint(json_data)

  30. # 提取音频和视频url
  31. audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
  32. video_url = json_data['data']['dash']['video'][0]['baseUrl']
  33. # print(audio_url)
  34. # print(video_url)

  35. # 合并音频和视频
  36. def merge(video_address, audio_address, merge_address):
  37.     cmd = f'F:\\FFmpeg\\bin\\ffmpeg -i {video_address} -i {audio_address} -acodec copy -vcodec copy {merge_address}'
  38.     subprocess.call(cmd,shell=True)

  39. audio_info = requests.get(audio_url, headers=headers)
  40. video_info = requests.get(video_url, headers=headers)
  41. # 保存内容
  42. audio_address = f"C:/Users/misaka/Desktop/bilibili/{name}.mp3"
  43. video_address = f"C:/Users/misaka/Desktop/bilibili/{name}.mp4"
  44. with open(audio_address, mode='wb') as audio:
  45.     audio.write(audio_info.content)
  46. with open(video_address, mode='wb') as video:
  47.     video.write(video_info.content)

  48. merge_address = f"C:/Users/misaka/Desktop/bilibili/合并/{name}.mp4"
  49. merge(video_address, audio_address, merge_address)
复制代码
$X0YT]Q]HVVJ%3XP$Z(G477.png
  1. C:\Users\misaka\AppData\Local\Programs\Python\Python311\python.exe C:\Users\misaka\PycharmProjects\pythonProject\爬虫\爬取哔哩哔哩\爬取视频测试.py
  2. &#1013;&#883;&#65533;&#1202;&#65533;&#65533;&#65533;&#1464;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;

  3. 进程已结束,退出代码0
复制代码
最佳答案
2023-2-13 17:32:24
本帖最后由 isdkz 于 2023-2-13 18:04 编辑

我在调试的时候路径调整了一下,你可以改回来

我做的调整:判断目录存在不存在,不存在就创建

关键点:windows命令执行的时候如果路径带空格就会截断成多个参数,解决方法就是在路径两边加双引号,所以我在 cmd 那里给大括号两边都加了双引号

  1. # 目标:哗哩哗哩插放页面输入网址栏的ur1,即可获取到视颊数据
  2. import re
  3. import json
  4. import requests
  5. import subprocess


  6. # 过滤Windows文件名中的非法字符
  7. def setFileTitle(title):
  8.     rstr = r"[\/\\\:\*\?\"\<\>\|]"  # '/ \ : * ? " < > |'
  9.     name = re.sub(rstr, "_", title)  # 替换为下划线
  10.     return name



  11. # 相应的数据内容在__playinfo__里

  12. url = 'https://www.bilibili.com/video/BV1684y1G73z/?spm_id_from=333.1007.tianma.1-1-1.click'

  13. headers = {
  14.     "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
  15.     "cookie": "_uuid=6F74D356-D356-D741-D10D3-D7B6F5F8B29967778infoc; buvid_fp=da80279a888d378c2f4b8690887d5c58; CURRENT_FNVAL=4048; buvid3=4E7146FA-43A5-3251-AF99-30B56383F3A768920infoc; b_nut=1675329869; buvid4=D7668A3A-1793-39E0-25EE-938D236E2B7768920-023020217-WiXBYh03a5sqpgE/VvEB3g%3D%3D; rpdid=|(JYYu~Y))J|0J'uY~lJkm)~k; i-wanna-go-back=-1; b_ut=7; b_lsid=D2C8F6F6_18649B276B8; nostalgia_conf=-1; sid=7qns657c; innersign=1; theme_style=light; PVID=1",
  16. }

  17. res = requests.get(url, headers=headers)
  18. # print(res.text)

  19. # 获取防盗链
  20. headers['referer'] = url

  21. # 获取视频标题
  22. title = re.findall('"title":"(.*?)","pubdate"', res.text)[0]
  23. name = setFileTitle(title)
  24. # print(name)

  25. # 获取视频数据信息
  26. html_data = re.findall('<script>window.__playinfo__=(.*?)</script>', res.text)

  27. # 转换数据类型字符串数据转成json字典数据类型
  28. json_data = json.loads(html_data[0])
  29. # pprint(json_data)

  30. # 提取音频和视频url
  31. audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
  32. video_url = json_data['data']['dash']['video'][0]['baseUrl']
  33. # print(audio_url)
  34. # print(video_url)

  35. # 合并音频和视频
  36. def merge(video_address, audio_address, merge_address):
  37.     cmd = fr'F:\FFmpeg\bin\ffmpeg -i "{video_address}" -i "{audio_address}" -acodec copy -vcodec copy "{merge_address}"'
  38.     subprocess.call(cmd,shell=True)


  39. audio_info = requests.get(audio_url, headers=headers)
  40. video_info = requests.get(video_url, headers=headers)
  41. # 保存内容
  42. audio_address = f"bilibili/{name}.mp3"
  43. video_address = f"bilibili/{name}.mp4"
  44. with open(audio_address, mode='wb') as audio:
  45.     audio.write(audio_info.content)
  46. with open(video_address, mode='wb') as video:
  47.     video.write(video_info.content)

  48. merge_address = f"bilibili/合并/{name}.mp4"

  49. import os
  50. if not os.path.exists('bilibili/合并'):
  51.     os.makedirs('bilibili/合并')
  52. merge(video_address, audio_address, merge_address)
复制代码

最佳答案

查看完整内容

我在调试的时候路径调整了一下,你可以改回来 我做的调整:判断目录存在不存在,不存在就创建 关键点:windows命令执行的时候如果路径带空格就会截断成多个参数,解决方法就是在路径两边加双引号,所以我在 cmd 那里给大括号两边都加了双引号
小甲鱼最新课程 -> https://ilovefishc.com
回复

使用道具 举报

发表于 2023-2-13 17:32:24 | 显示全部楼层    本楼为最佳答案   
本帖最后由 isdkz 于 2023-2-13 18:04 编辑

我在调试的时候路径调整了一下,你可以改回来

我做的调整:判断目录存在不存在,不存在就创建

关键点:windows命令执行的时候如果路径带空格就会截断成多个参数,解决方法就是在路径两边加双引号,所以我在 cmd 那里给大括号两边都加了双引号

  1. # 目标:哗哩哗哩插放页面输入网址栏的ur1,即可获取到视颊数据
  2. import re
  3. import json
  4. import requests
  5. import subprocess


  6. # 过滤Windows文件名中的非法字符
  7. def setFileTitle(title):
  8.     rstr = r"[\/\\\:\*\?\"\<\>\|]"  # '/ \ : * ? " < > |'
  9.     name = re.sub(rstr, "_", title)  # 替换为下划线
  10.     return name



  11. # 相应的数据内容在__playinfo__里

  12. url = 'https://www.bilibili.com/video/BV1684y1G73z/?spm_id_from=333.1007.tianma.1-1-1.click'

  13. headers = {
  14.     "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
  15.     "cookie": "_uuid=6F74D356-D356-D741-D10D3-D7B6F5F8B29967778infoc; buvid_fp=da80279a888d378c2f4b8690887d5c58; CURRENT_FNVAL=4048; buvid3=4E7146FA-43A5-3251-AF99-30B56383F3A768920infoc; b_nut=1675329869; buvid4=D7668A3A-1793-39E0-25EE-938D236E2B7768920-023020217-WiXBYh03a5sqpgE/VvEB3g%3D%3D; rpdid=|(JYYu~Y))J|0J'uY~lJkm)~k; i-wanna-go-back=-1; b_ut=7; b_lsid=D2C8F6F6_18649B276B8; nostalgia_conf=-1; sid=7qns657c; innersign=1; theme_style=light; PVID=1",
  16. }

  17. res = requests.get(url, headers=headers)
  18. # print(res.text)

  19. # 获取防盗链
  20. headers['referer'] = url

  21. # 获取视频标题
  22. title = re.findall('"title":"(.*?)","pubdate"', res.text)[0]
  23. name = setFileTitle(title)
  24. # print(name)

  25. # 获取视频数据信息
  26. html_data = re.findall('<script>window.__playinfo__=(.*?)</script>', res.text)

  27. # 转换数据类型字符串数据转成json字典数据类型
  28. json_data = json.loads(html_data[0])
  29. # pprint(json_data)

  30. # 提取音频和视频url
  31. audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
  32. video_url = json_data['data']['dash']['video'][0]['baseUrl']
  33. # print(audio_url)
  34. # print(video_url)

  35. # 合并音频和视频
  36. def merge(video_address, audio_address, merge_address):
  37.     cmd = fr'F:\FFmpeg\bin\ffmpeg -i "{video_address}" -i "{audio_address}" -acodec copy -vcodec copy "{merge_address}"'
  38.     subprocess.call(cmd,shell=True)


  39. audio_info = requests.get(audio_url, headers=headers)
  40. video_info = requests.get(video_url, headers=headers)
  41. # 保存内容
  42. audio_address = f"bilibili/{name}.mp3"
  43. video_address = f"bilibili/{name}.mp4"
  44. with open(audio_address, mode='wb') as audio:
  45.     audio.write(audio_info.content)
  46. with open(video_address, mode='wb') as video:
  47.     video.write(video_info.content)

  48. merge_address = f"bilibili/合并/{name}.mp4"

  49. import os
  50. if not os.path.exists('bilibili/合并'):
  51.     os.makedirs('bilibili/合并')
  52. merge(video_address, audio_address, merge_address)
复制代码
小甲鱼最新课程 -> https://ilovefishc.com
回复

使用道具 举报

 楼主| 发表于 2023-2-13 17:33:30 | 显示全部楼层
视频和音频是成功的
小甲鱼最新课程 -> https://ilovefishc.com
回复

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

小黑屋|手机版|Archiver|鱼C工作室 ( 粤ICP备18085999号-1 | 粤公网安备 44051102000585号)

GMT+8, 2025-4-24 22:58

Powered by Discuz! X3.4

© 2001-2023 Discuz! Team.

快速回复 返回顶部 返回列表