|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests
import csv
import hashlib
import time
from datetime import datetime
import json
from urllib.parse import quote
#w_rid加密参数
def GetW(wts,NextPage):
pagination_str = '{"offset":%s}' %NextPage
print(quote(pagination_str))
l=["mode=2",
"oid=113814703972019",
f"pagination_str={quote(pagination_str)}",
"plat=1",
"type=1",
"web_location=1315875",
f"wts={wts}"
]
y = '&'.join(l)
string = y + "ea1db124af3c7062474693fa704f4ff8"
MD5 = hashlib.md5()
MD5.update(string.encode('utf-8'))
w_rid = MD5.hexdigest()
print(w_rid)
return w_rid
def GetContent(NextPage):
headers = {"cookie":"bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NDE1Nzk0MzAsImlhdCI6MTc0MTMyMDE3MCwicGx0IjotMX0.FsFshnZPFdht_Fj7Pxqp5XgzHlgnYPnpoGWX53F1imw; bili_ticket_expires=1741579370; buvid3=1D55509D-6660-9458-1C1A-6EC43569BE4130240infoc; b_nut=1741320227; buvid4=6A35DDCB-75FC-4C6D-03B6-B828DEA70BA430240-025030704-QwNGI+jNopUTI1GhLVomaw%3D%3D; _uuid=1D11D2210-2BC9-2529-8C44-C1010A76E8D381028745infoc; CURRENT_FNVAL=4048; buvid_fp=2f0eefefa84c6a4ab0d42472a63a6dd4; b_lsid=D414ECEB_1956F97B63D; csrf_state=130ae0096803953f25dfdd2b87144e35; SESSDATA=cdefeabd%2C1756887034%2C4ca20%2A32CjD7yyeY9-VFI_RWy8IsjId3cPUs-xTs-3CYvAyVNuAehtfvU-_ii9uXRESeWr7O9I4SVlVNQUFBUzR6NkxLcDh6SGhvVWtPbWVqZUJ6U2t6OFgtYVdqVEFJb3ZtSjJmTUQ3VHBlRFh5LXFNSDN0bUlOTlgwMF93RVV4aHh5TXB5N3o0SVRDOUdRIIEC; bili_jct=1018cac8e70a199a4feca14132a9055c; DedeUserID=3546377353169818; DedeUserID__ckMd5=b43dfea08718ba45; sid=mcmifm2h",
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0",}
url = 'https://api.bilibili.com/x/v2/reply/wbi/main'
wts = int(time.time())
pagination_str = '{"offset":%s}' % offset
w_rid = GetW(wts = wts,NextPage = pagination_str)
data = {
'oid': '114040961501285',
'type': '1',
'mode':' 2',
'pagination_str': '{"offset":%s}' % offset,
'plat': '1',
'seek_rpid': '',
'web_location': '1315875',
'w_rid': w_rid,
'wts': wts,}
response = requests.get(url = url, params = data ,headers = headers)
#Get查询参数
#请求方法:Get请求方法
# 获取数据
json_data = response.json()
# 解析数据
# 字典取值
replies = json_data['data']['replies']
for index in replies:
ctime = index['ctime']
date = str(datetime.fromtimestamp(ctime))
dit = {
'昵称':index['member']['uname'],
'性别' :index['member']['sex'],
'地区':index['reply_control']['location'].replace('IP属地',''),
'评论':index['content']['message'],
'点赞':index['like'],}
print(dit)
csv_writer.writerow(dit)
# 保存数据
# 批量采集数据
# paginaion_str 第一页为空,后续一样
# wts:时间戳 可以通过time模块获取当前时间戳
# w_rid:加密参数·
NextPage = json_dumps['data']['cursor']['pagination_reply']['next_offset']
return NextPage
# 发送请求
#cookie 用户信息,常用于检测是否有登录账号
#user-agent 用户代理,表述浏览器用户身份信息
f = open(file = 'data.csv',mode = 'w',encoding = 'utf-8',newline = '')
#字典写入方法
csv_writer = csv.DictWriter(f,fieldnames = ['昵称','性别','地区','评论','点赞'])
#
csv_writer.writeheader()
NextPage = '""'
for page in range(1,21):
print(f'正在采集第 |
|