爬取B站评论，下面代码只能爬取一级评论，有没有大佬指导以下如何爬取交互回复信息.,Python交流,编程语言专区,鱼C论坛

不会改名的吧 发表于 2023-11-10 20:05:16

爬取B站评论，下面代码只能爬取一级评论，有没有大佬指导以下如何爬取交互回复信息.

import requests
import re
import time
import csv

# 消息头信息
header = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
}

# 获取评论API
original_url = 'https://api.bilibili.com/x/v2/reply/main?jsonp=jsonp&next={}&type=1&oid={}&mode=3'

# 时间戳转换成日期
def get_time(ctime):
timeArray = time.localtime(ctime)
otherStyleTime = time.strftime("%Y.%m.%d", timeArray)
return str(otherStyleTime)

# 获取aid
def get_oid(bvid):
video_url = 'https://www.bilibili.com/video/' + bvid
page = requests.get(video_url, headers=header).text
aid = re.search(r'"aid":+', page).group()
return aid

# 边爬取评论边保存文件
def online_save(bvid):
all_count = 0
oid = get_oid(bvid)
page = 1
url = original_url.format(page, oid)
html = requests.get(url, headers=header)
data = html.json()
count = int(data['data']['cursor']['all_count'])
fname = bvid + '_评论.csv'
with open(fname, 'w+', newline='', encoding='utf_8_sig') as f:
   csv_writer = csv.writer(f)
   csv_writer.writerow(["时间", "点赞", "用户名", "评论"])# Added "用户名" header
   for i in data['data']['replies']:
         message = i['content']['message']
         message = re.sub('\s+', '', message)
         ctime = get_time(i['ctime'])
         like = i['like']
         username = i['member']['uname']# Added to get username
         csv_writer.writerow()# Added username
         all_count = all_count + 1

         # Check for and collect reply comments
         if 'replies' in i:
            for reply in i['replies']:
               reply_message = reply['content']['message']
               reply_message = re.sub('\s+', '', reply_message)
               reply_like = reply['like']
               reply_username = reply['member']['uname']
               csv_writer.writerow(["REPLY", str(reply_like), reply_username, reply_message])

   print('总评论数：{}，当前评论数:{},爬取Page{}完毕。'.format(count, all_count, page))
   time.sleep(5)
   while all_count < count:
         page += 1
         url = original_url.format(page, oid)
         try:
            html = requests.get(url, headers=header)
            data = html.json()
            for i in data['data']['replies']:
               message = i['content']['message']
               ctime = get_time(i['ctime'])
               like = i['like']
               username = i['member']['uname']
               csv_writer.writerow()
               all_count = all_count + 1

               # Check for and collect reply comments
               if 'replies' in i:
                     for reply in i['replies']:
                        reply_message = reply['content']['message']
                        reply_message = re.sub('\s+', '', reply_message)
                        reply_like = reply['like']
                        reply_username = reply['member']['uname']
                        csv_writer.writerow(["REPLY", str(reply_like), reply_username, reply_message])

            print('总评论数：{}，当前评论数:{},爬取Page{}完毕。'.format(count, all_count, page))
            time.sleep(5)
         except:
            break
   f.close()

if __name__ == '__main__':
bvid = input('输入视频Bvid:')
online_save(bvid)
print('完成！')
@不二如是

页: [1]

鱼C论坛's Archiver

爬取B站评论，下面代码只能爬取一级评论，有没有大佬指导以下如何爬取交互回复信息.