|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import requests
- import re
- import time
- import csv
- # 消息头信息
- header = {
- 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
- }
- # 获取评论API
- original_url = 'https://api.bilibili.com/x/v2/reply/main?jsonp=jsonp&next={}&type=1&oid={}&mode=3'
- # 时间戳转换成日期
- def get_time(ctime):
- timeArray = time.localtime(ctime)
- otherStyleTime = time.strftime("%Y.%m.%d", timeArray)
- return str(otherStyleTime)
- # 获取aid
- def get_oid(bvid):
- video_url = 'https://www.bilibili.com/video/' + bvid
- page = requests.get(video_url, headers=header).text
- aid = re.search(r'"aid":[0-9]+', page).group()[6:]
- return aid
- # 边爬取评论边保存文件
- def online_save(bvid):
- all_count = 0
- oid = get_oid(bvid)
- page = 1
- url = original_url.format(page, oid)
- html = requests.get(url, headers=header)
- data = html.json()
- count = int(data['data']['cursor']['all_count'])
- fname = bvid + '_评论.csv'
- with open(fname, 'w+', newline='', encoding='utf_8_sig') as f:
- csv_writer = csv.writer(f)
- csv_writer.writerow(["时间", "点赞", "用户名", "评论"]) # Added "用户名" header
- for i in data['data']['replies']:
- message = i['content']['message']
- message = re.sub('\s+', '', message)
- ctime = get_time(i['ctime'])
- like = i['like']
- username = i['member']['uname'] # Added to get username
- csv_writer.writerow([ctime, str(like), username, message]) # Added username
- all_count = all_count + 1
-
- # Check for and collect reply comments
- if 'replies' in i:
- for reply in i['replies']:
- reply_message = reply['content']['message']
- reply_message = re.sub('\s+', '', reply_message)
- reply_like = reply['like']
- reply_username = reply['member']['uname']
- csv_writer.writerow(["REPLY", str(reply_like), reply_username, reply_message])
- print('总评论数:{},当前评论数:{},爬取Page{}完毕。'.format(count, all_count, page))
- time.sleep(5)
- while all_count < count:
- page += 1
- url = original_url.format(page, oid)
- try:
- html = requests.get(url, headers=header)
- data = html.json()
- for i in data['data']['replies']:
- message = i['content']['message']
- ctime = get_time(i['ctime'])
- like = i['like']
- username = i['member']['uname']
- csv_writer.writerow([ctime, str(like), username, message])
- all_count = all_count + 1
-
- # Check for and collect reply comments
- if 'replies' in i:
- for reply in i['replies']:
- reply_message = reply['content']['message']
- reply_message = re.sub('\s+', '', reply_message)
- reply_like = reply['like']
- reply_username = reply['member']['uname']
- csv_writer.writerow(["REPLY", str(reply_like), reply_username, reply_message])
- print('总评论数:{},当前评论数:{},爬取Page{}完毕。'.format(count, all_count, page))
- time.sleep(5)
- except:
- break
- f.close()
- if __name__ == '__main__':
- bvid = input('输入视频Bvid:')
- online_save(bvid)
- print('完成!')
复制代码 |
|