|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 Pythonnewers 于 2021-1-17 13:14 编辑
RT
可爬取"mid",'名字','性别','头像地址','等级','大会员'
- import requests
- import re
- import json
- from bs4 import BeautifulSoup
- import xlsxwriter
- import time
- import random
- book = xlsxwriter.Workbook(r'C:\Users\Sun\Desktop\Bilili User.xlsx')
- sheet = book.add_worksheet()
- word=['A','B','C','D','E','F','G','E']
- oppotion=["mid",'名字','性别','头像地址','等级','大会员']
- for p in range(len(oppotion)):
- l = word[p]
- sheet.write(f"{l}1",oppotion[p])
- headers={"user-agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
- def excel(*x):
- for i in range(len(x)):
- l=word[i]
- sheet.write(f"{l}{ueser+2}",x[i])
- print(str(x[0])+"完成")
- def mes(x):
- print(x)
- if x['code']==-404:
- return
- x=x['data']
- mid=x["mid"]
- name=x["name"]
- sex=x["sex"]
- face=x["face"]
- level=x["level"]
- vip=x['vip']['label']['text']
- excel(mid,name,sex,face,level,vip)
- def req(url):
- res=requests.get(url=url,headers=headers).content.decode("utf-8")
- soup=BeautifulSoup(res,"lxml")
- dic=json.loads(soup.p.get_text())
- mes(dic)
- def uid(a):
- mid=re.findall("\d+",a)[0]
- global data
- data={'mid':f'{mid}','jsonp':'jsonp'}
- url=f"https://api.bilibili.com/x/space/acc/info?mid={mid}&jsonp=jsonp"
- req(url)
- def user(uesers):
- global ueser
- for ueser in range(uesers):
- url=f"https://space.bilibili.com/{ueser+1}/"
- uid(url)
- time.sleep(random.random())
- book.close()
- user(100)
复制代码
↑↑↑
user这里()写爬取多少用户(按顺序)
↑我有些打英文手滑写错了,运行后才发现拼写错误
mid应该就是uid |
|