|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 591821661 于 2021-10-5 22:50 编辑
Bilibili视频下载过程为:
1.登录会员账号,获取SESSDATA。(这样才能下载高清视频)
2.解析视频下载链接,并下载视频音频文件(m4s)。
3.合并视频音频文件(ffmpeg)为MP4格式。
核心就是之后会利用bilibili自带的API
`https://api.bilibili.com/x/player/playurl?cid=&bvid=视频bv号&qn=视频清晰度&type=&otype=json&fourk=&fnver=0&fnval=&session=`
运行截图:
运行截图
- # -*- coding: utf-8 -*-
- """
- I love FishC.com
- -Sharpstar
- """
- import qrcode
- import urllib.request
- import json
- import time
- import http.cookiejar
- import re
- import gzip
- from io import BytesIO
- # 观测视频地址
- video_url = 'https://www.bilibili.com/video/BV1rf4y1n78w'
- # 所用到的API
- qrurl = 'http://passport.bilibili.com/qrcode/getLoginUrl' # 登陆二维码
- checkurl = 'http://passport.bilibili.com/qrcode/getLoginInfo' # 登陆状态
- mydataurl = 'https://api.bilibili.com/x/web-interface/nav' # 用户状态
- proxy = ''
- UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36'
- tabvideourl = 'https://t.bilibili.com/?tab=8'
- if proxy:
- proxy_support = urllib.request.ProxyHandler({
- 'http' : proxy,
- 'https' : proxy,
- })
- else:
- proxy_support = urllib.request.ProxyHandler({
- })
- # 自动保存cookies
- cookiejar = http.cookiejar.CookieJar()
- cookie_support = urllib.request.HTTPCookieProcessor(cookiejar)
- # urllib钢铁侠安装各个组件
- opener = urllib.request.build_opener(proxy_support,cookie_support)
- opener.addheaders = [('User-Agent', UA),]
- urllib.request.install_opener(opener) #使得所有urllib默认使用此opener
-
- qrurl_res = urllib.request.urlopen(qrurl)
- qrurl_json = json.loads(qrurl_res.read())
- qroauthKey = qrurl_json['data']['oauthKey']
- qrimg_url = qrurl_json['data']['url']
- # 扫码登陆
- qrcode.make(qrimg_url).show()
- # 将form表单的元素进行编码,Post的提前准备操作
- oauthKey_coded = urllib.parse.urlencode({'oauthKey':qroauthKey}).encode('utf-8') #url编码
- startCheckTime = time.time()
- LastStatusData = None
- while(True):
- endCheckTime = time.time()
- if(endCheckTime - startCheckTime) < 180.0: #如果超过180秒 停止检测
- loginInfoPostRes = urllib.request.urlopen(checkurl,data=oauthKey_coded)
- loginStatus_json = json.loads(loginInfoPostRes.read())
- if not loginStatus_json['status']: #如果未登录,则显示当前状态(已经扫码/未扫码)
- if LastStatusData == loginStatus_json['data']:
- # 显示正在查询
- print('.',end='')
- else:
- if loginStatus_json['data'] == -5:
- print('\n已扫码,等待登录中',end='')
- if loginStatus_json['data'] == -4:
- print('\n未扫码,等待扫码中',end='')
- # 延迟3秒 避免过度频繁查询
- LastStatusData = loginStatus_json['data']
- time.sleep(3)
- else: #登录成功,获取
- loginSESSDATA_raw = loginStatus_json['data']['url']
- break
- else:
- print('验证码失效超时,请重新登录')
- break
- my_html = urllib.request.urlopen(mydataurl)
- my_html_json = json.loads(my_html.read())
- if my_html_json['data']['isLogin']:
- if my_html_json['data']['vipStatus']:
- print('尊敬的大会员你好!')
- else:
- print('尊敬的会员你好!')
- # SESSDATA_Clue = re.compile(r'SESSDATA=(.*?)&bili_jct')
- # SESSDATA_Res = re.findall(SESSDATA_Clue,loginSESSDATA_raw)[0]
- # print('\n登录成功,SESSDATA =',SESSDATA_Res)
- # 显示当前cookie内容(内部已经包含SESSDATA,这就是python爬虫的优越性,简单一步)
- print('Cookies:')
- for item in cookiejar:
- print(item.name,'=',item.value)
-
- video_html = urllib.request.urlopen(video_url)
- video_html_read = video_html.read()
- # 读取内容需要gizp解码
- buff = BytesIO(video_html_read)
- f = gzip.GzipFile(fileobj=buff)
- video_html_read_extracted = f.read()
- video_html_res = video_html_read_extracted.decode('utf-8')
- downloadInfo_Clue = re.compile(r'__playinfo__=(.*?)</script><script>window')
- downloadInfo_Res = re.findall(downloadInfo_Clue,video_html_res)
- downloadInfo_json = json.loads(downloadInfo_Res[0])
- videoInfo_Clue = re.compile(r'<script>window.__INITIAL_STATE__=(.*?);\(function\(\)')
- videoInfo_Res = re.findall(videoInfo_Clue,video_html_res)
- videoInfo_json = json.loads(videoInfo_Res[0])
- # 获取视频基本信息
- video_bvid = videoInfo_json['bvid'] # BV号
- video_title = videoInfo_json['videoData']['title'] # 标题
- video_pubdate = videoInfo_json['videoData']['pubdate'] # 发布日期(Unix时间戳)
- video_cover = videoInfo_json['videoData']['pic'] # 封面(URL)
- video_pages = videoInfo_json['videoData']['pages'] # 分P数信息(列表字典)
- # video_quality = downloadInfo_json['data']['accept_quality'] # 清晰度范围(数字列表)
- # video_describe = downloadInfo_json['data']['accept_description']# 清晰度对应中文解释
- video_formats = downloadInfo_json['data']['support_formats'] # 支持视频格式
- videp_code = downloadInfo_json['code']
- # video_code
- # 0:成功
- # -400:请求错误
- # -403:权限不足
- # -404:无视频
- # 62002:稿件不可见
- # 发布者信息
- video_up = videoInfo_json['videoData']['owner']['name'] # UP主名字
- video_up_uid = videoInfo_json['videoData']['owner']['mid'] # UP主UID
- video_up_face = videoInfo_json['videoData']['owner']['face'] # UP主头像
- # 视频质量衡量信息
- video_view = videoInfo_json['videoData']['stat']['view'] # 视频播放量
- video_like = videoInfo_json['videoData']['stat']['like'] # 视频点赞数
- video_coin = videoInfo_json['videoData']['stat']['coin'] # 视频硬币数
- video_favorite = videoInfo_json['videoData']['stat']['favorite'] # 视频收藏数
- print('标题 : %s'%video_title)
- print('视频封面地址 : %s'%video_cover)
- print('BV号 : %s'%video_bvid)
- print('发布时间 :',time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(video_pubdate)))
- print('支持视频格式 : ',end='')
- for i in video_formats:
- print(i['new_description'],end=' ')
- print('\n共有%d个分P视频'%len(video_pages))
- print('UP主 :%s 【UID:%d】'%(video_up,video_up_uid))
- print('播放:%d,点赞:%d,硬币:%d,收藏:%d'%(video_view,video_like,video_coin,video_favorite))
- """
- 重要参考内容,致以无比感谢
- 【Github】
- SocialSisterYi/bilibili-API-collect
- blogwy / BilibiliVideoDownload
- """
复制代码 |
|