马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 591821661 于 2021-10-5 22:50 编辑
Bilibili视频下载过程为:
1.登录会员账号,获取SESSDATA。(这样才能下载高清视频)
2.解析视频下载链接,并下载视频音频文件(m4s)。
3.合并视频音频文件(ffmpeg)为MP4格式。
核心就是之后会利用bilibili自带的API
`https://api.bilibili.com/x/player/playurl?cid=&bvid=视频bv号&qn=视频清晰度&type=&otype=json&fourk=&fnver=0&fnval=&session=`
运行截图:
运行截图
# -*- coding: utf-8 -*-
"""
I love FishC.com
-Sharpstar
"""
import qrcode
import urllib.request
import json
import time
import http.cookiejar
import re
import gzip
from io import BytesIO
# 观测视频地址
video_url = 'https://www.bilibili.com/video/BV1rf4y1n78w'
# 所用到的API
qrurl = 'http://passport.bilibili.com/qrcode/getLoginUrl' # 登陆二维码
checkurl = 'http://passport.bilibili.com/qrcode/getLoginInfo' # 登陆状态
mydataurl = 'https://api.bilibili.com/x/web-interface/nav' # 用户状态
proxy = ''
UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36'
tabvideourl = 'https://t.bilibili.com/?tab=8'
if proxy:
proxy_support = urllib.request.ProxyHandler({
'http' : proxy,
'https' : proxy,
})
else:
proxy_support = urllib.request.ProxyHandler({
})
# 自动保存cookies
cookiejar = http.cookiejar.CookieJar()
cookie_support = urllib.request.HTTPCookieProcessor(cookiejar)
# urllib钢铁侠安装各个组件
opener = urllib.request.build_opener(proxy_support,cookie_support)
opener.addheaders = [('User-Agent', UA),]
urllib.request.install_opener(opener) #使得所有urllib默认使用此opener
qrurl_res = urllib.request.urlopen(qrurl)
qrurl_json = json.loads(qrurl_res.read())
qroauthKey = qrurl_json['data']['oauthKey']
qrimg_url = qrurl_json['data']['url']
# 扫码登陆
qrcode.make(qrimg_url).show()
# 将form表单的元素进行编码,Post的提前准备操作
oauthKey_coded = urllib.parse.urlencode({'oauthKey':qroauthKey}).encode('utf-8') #url编码
startCheckTime = time.time()
LastStatusData = None
while(True):
endCheckTime = time.time()
if(endCheckTime - startCheckTime) < 180.0: #如果超过180秒 停止检测
loginInfoPostRes = urllib.request.urlopen(checkurl,data=oauthKey_coded)
loginStatus_json = json.loads(loginInfoPostRes.read())
if not loginStatus_json['status']: #如果未登录,则显示当前状态(已经扫码/未扫码)
if LastStatusData == loginStatus_json['data']:
# 显示正在查询
print('.',end='')
else:
if loginStatus_json['data'] == -5:
print('\n已扫码,等待登录中',end='')
if loginStatus_json['data'] == -4:
print('\n未扫码,等待扫码中',end='')
# 延迟3秒 避免过度频繁查询
LastStatusData = loginStatus_json['data']
time.sleep(3)
else: #登录成功,获取
loginSESSDATA_raw = loginStatus_json['data']['url']
break
else:
print('验证码失效超时,请重新登录')
break
my_html = urllib.request.urlopen(mydataurl)
my_html_json = json.loads(my_html.read())
if my_html_json['data']['isLogin']:
if my_html_json['data']['vipStatus']:
print('尊敬的大会员你好!')
else:
print('尊敬的会员你好!')
# SESSDATA_Clue = re.compile(r'SESSDATA=(.*?)&bili_jct')
# SESSDATA_Res = re.findall(SESSDATA_Clue,loginSESSDATA_raw)[0]
# print('\n登录成功,SESSDATA =',SESSDATA_Res)
# 显示当前cookie内容(内部已经包含SESSDATA,这就是python爬虫的优越性,简单一步)
print('Cookies:')
for item in cookiejar:
print(item.name,'=',item.value)
video_html = urllib.request.urlopen(video_url)
video_html_read = video_html.read()
# 读取内容需要gizp解码
buff = BytesIO(video_html_read)
f = gzip.GzipFile(fileobj=buff)
video_html_read_extracted = f.read()
video_html_res = video_html_read_extracted.decode('utf-8')
downloadInfo_Clue = re.compile(r'__playinfo__=(.*?)</script><script>window')
downloadInfo_Res = re.findall(downloadInfo_Clue,video_html_res)
downloadInfo_json = json.loads(downloadInfo_Res[0])
videoInfo_Clue = re.compile(r'<script>window.__INITIAL_STATE__=(.*?);\(function\(\)')
videoInfo_Res = re.findall(videoInfo_Clue,video_html_res)
videoInfo_json = json.loads(videoInfo_Res[0])
# 获取视频基本信息
video_bvid = videoInfo_json['bvid'] # BV号
video_title = videoInfo_json['videoData']['title'] # 标题
video_pubdate = videoInfo_json['videoData']['pubdate'] # 发布日期(Unix时间戳)
video_cover = videoInfo_json['videoData']['pic'] # 封面(URL)
video_pages = videoInfo_json['videoData']['pages'] # 分P数信息(列表字典)
# video_quality = downloadInfo_json['data']['accept_quality'] # 清晰度范围(数字列表)
# video_describe = downloadInfo_json['data']['accept_description']# 清晰度对应中文解释
video_formats = downloadInfo_json['data']['support_formats'] # 支持视频格式
videp_code = downloadInfo_json['code']
# video_code
# 0:成功
# -400:请求错误
# -403:权限不足
# -404:无视频
# 62002:稿件不可见
# 发布者信息
video_up = videoInfo_json['videoData']['owner']['name'] # UP主名字
video_up_uid = videoInfo_json['videoData']['owner']['mid'] # UP主UID
video_up_face = videoInfo_json['videoData']['owner']['face'] # UP主头像
# 视频质量衡量信息
video_view = videoInfo_json['videoData']['stat']['view'] # 视频播放量
video_like = videoInfo_json['videoData']['stat']['like'] # 视频点赞数
video_coin = videoInfo_json['videoData']['stat']['coin'] # 视频硬币数
video_favorite = videoInfo_json['videoData']['stat']['favorite'] # 视频收藏数
print('标题 : %s'%video_title)
print('视频封面地址 : %s'%video_cover)
print('BV号 : %s'%video_bvid)
print('发布时间 :',time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(video_pubdate)))
print('支持视频格式 : ',end='')
for i in video_formats:
print(i['new_description'],end=' ')
print('\n共有%d个分P视频'%len(video_pages))
print('UP主 :%s 【UID:%d】'%(video_up,video_up_uid))
print('播放:%d,点赞:%d,硬币:%d,收藏:%d'%(video_view,video_like,video_coin,video_favorite))
"""
重要参考内容,致以无比感谢
【Github】
SocialSisterYi/bilibili-API-collect
blogwy / BilibiliVideoDownload
"""
|