|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 lengyue869 于 2024-4-15 08:49 编辑
我想根据ID爬取LOL对应的皮肤数量跟最后战绩,但是爬不到数据,路过的大婶帮忙看下
网址:https://fa8.pw/
https://chat18.aichatos.xyz/
import requests
import json
import pandas as pd
from datetime import datetime
import winreg,sys
from fake_useragent import UserAgent
#https://luck.92.edri.mobi/links/5DE54B72
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
r'Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders')
deskPath = winreg.QueryValueEx(key, "Desktop")[0]
url_id = "https://luck.92.edri.mobi/shop/shop/getAccount"
myheaders = {
"User-Agent": UserAgent().random
}
data_list = []
lastpage = 0
allowed_regions = ["卡拉曼达","暗影岛","征服之海","诺克萨斯","战争学院","雷瑟守备","艾欧尼亚","黑色玫瑰"]
dic_goods={"67310":"1807","67311":"1808","67320":"1817","67328":"1825","67321":"1818","67327":"1824","67313":"1810","67316":"1813","67318":"1815"}
# dic_goods={"67305":"1802"}#皮肤
for agent_goodsid,goodsid in dic_goods.items():
#获取每个区的总页数
mydata = {"agent_goodsid" : agent_goodsid , "goodsid": goodsid, "page": "1", "userid": "959", "type": "new"}
html = requests.post(url_id, headers=myheaders, data=mydata).text
page = -(-json.loads(html)['count']//10)
if page==0:
continue
else:
daqu=json.loads(html)['data'][0]['number']['2']
print(f"{daqu},page:{page}")
for page_num in range(page):
try:
mydata = {"agent_goodsid" : agent_goodsid , "goodsid": goodsid, "page": page_num + 1, "userid": "959", "type": "new"}
html = requests.post(url_id, headers=myheaders, data=mydata).text
text = json.loads(html)['data']
if len(text) > 0:
lastpage += 1
for item in text:
area = item['number']['2']
name = item['number']['3']
detail = item['number']['4']
detail=detail.replace('|','----')
if area not in allowed_regions:
continue # Skip if region not in allowed list
lst1 = [i for i in detail.split('----')]
if not "英雄:" in lst1[2]:
lst1[2] = "英雄:" + lst1[2]
lst = [i.split(':') for i in lst1 if ":" in i]
data_dict = {"大区": area, "ID": name, **{item[0]: item[1] for item in lst}}
data_dict['页码'] = page_num + 1 # 添加页码字段
data_list.append(data_dict)
except Exception as e:
print(f"Error occurred: {e}")
continue # Continue to the next iteration if an error occurs
df = pd.DataFrame(data_list)
if df.empty:
print("没有数据,程序退出。")
sys.exit()
numeric_columns = [column for column in df.columns]
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='ignore')
df = df[df['皮肤'] > 400]
if '最后游戏' in df.columns:
df = df[df.columns].sort_values(by=["皮肤","最后游戏", "等级"], ascending=[False,True, True])
else:
df = df[df.columns].sort_values(by=["皮肤", "等级"], ascending=[False, True])
columns = ["页码", "大区", "ID", "等级", "英雄", "皮肤", "单", "组"]
df=df[columns]
exl_name = "EZ__PF" if goodsid == "1802" else "EZ"
current_time = datetime.now().strftime("%m%d__%H.%M")
with pd.ExcelWriter(f"{deskPath}\\{exl_name}__{current_time}.xlsx", engine='xlsxwriter') as writer:
df.to_excel(writer, index=False, sheet_name='Sheet1')
print(f"共{lastpage}页,数据已保存到桌面。")
import winsound
winsound.Beep(440,1000)
import os
import sys
import tkinter as tk
from tkinter import filedialog
from datetime import datetime,timedelta
import requests
import json
import pandas as pd
import hashlib
from fake_useragent import UserAgent
import re
def md5_hash(text):
md5 = hashlib.md5()
md5.update(text.encode('utf-8'))
encrypted_text = md5.hexdigest()
return encrypted_text
url_cx = "https://fa8.pw/api/api.php?act=cx"
head_cx = {
"User-Agent": UserAgent().random,
"Cookie": "__51vcke__KEAGaASdi4vVsbMk=1efc3001-fb11-50f9-9147-77f4f6eb3599; __51vuft__KEAGaASdi4vVsbMk=1712214416217; sign=8b8db337e75948fe5bb408040a5d9618; __51uvsct__KEAGaASdi4vVsbMk=15; swl=true; __vtins__KEAGaASdi4vVsbMk=%7B%22sid%22%3A%20%227009469d-4a54-519c-bd22-7eb46204f9e8%22%2C%20%22vd%22%3A%203%2C%20%22stt%22%3A%20199586%2C%20%22dr%22%3A%203335%2C%20%22expires%22%3A%201712976322096%2C%20%22ct%22%3A%201712974522096%7D",
"Referer": "https://fa8.pw/"
}
dic_dq = {
'艾欧尼亚': '1',
'比尔吉沃特': '2',
'祖安': '3',
'诺克萨斯': '4',
'德玛西亚': '6',
'班德尔城': '5',
'皮尔特沃夫': '7',
'战争学院': '8',
'弗雷尔卓德': '9',
'巨神峰': '10',
'雷瑟守备': '11',
'无畏先锋': '12',
'裁决之地': '13',
'黑色玫瑰': '14',
'暗影岛': '15',
'钢铁烈阳': '17',
'恕瑞玛': '16',
'水晶之痕': '18',
'影流': '22',
'守望之海': '23',
'扭曲丛林': '20',
'征服之海': '24',
'卡拉曼达': '25',
'皮城警备': '27',
'巨龙之巢': '26',
'均衡教派': '19',
'男爵领域': '30',
'峡谷之巅': '31'
}
# 创建tkinter应用程序
root = tk.Tk()
root.withdraw() # 隐藏tkinter主窗口
# 获取桌面路径
desktop_path = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
# 打开文件选择对话框,默认选择桌面路径
file_path = filedialog.askopenfilename(initialdir=desktop_path)
root.destroy()
if file_path == "":
sys.exit()
df = pd.DataFrame(columns=["页码", "大区", "ID", "等级", "英雄", "皮肤", "单", "组", "最后游戏","备注"])
df1 = pd.read_excel(file_path)
with pd.ExcelWriter(file_path) as writer:
# 将原始数据保存在Excel文件的第二张表中
df1.to_excel(writer, sheet_name='原始数据', index=False)
# 处理数据并保存在第一张表中
pattern = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}'
# 在循环中添加条件
for index, row in df1.iterrows():
my_skin = row["皮肤"]
if my_skin < 450:
continue
mydq = row["大区"]
mydq_num = dic_dq.get(row["大区"], "") # 获取大区名称对应的编号,如果找不到,默认为空字符串
myname = row["ID"]
mysc = md5_hash(f"name={myname}dq-{mydq_num}")
mydata = {
"name": myname,
"dq": mydq_num,
"start": "0",
"end": "10",
"type": "lol",
"tag": "1",
"sc": mysc
}
html = requests.post(url_cx, data=mydata, headers=head_cx)
data = json.loads(html.text)
if data['zhanji'] is None:
continue
if '已过免费查询时间段' in data['zhanji']:
print("已过免费查询时间段,程序退出。")
sys.exit()
if '没有这个召唤师' in data['zhanji']:
continue
# 正则匹配战绩中的第一个时间(最后游戏时间)
last_match = re.search(pattern, data['zhanji'])[0]
skin_count = int(data['skin'].split("|")[1].split("</font>")[0].split("皮 肤:")[1]) # 皮肤数量
ds = data['dsdj'] # 单双排
zp = data['lhdj'] # 组排
# 在这里对应DataFrame的行索引与循环中的index值
df.at[index, '皮肤'] = skin_count
df.at[index, '单'] = ds
df.at[index, '组'] = zp
df.at[index, '最后游戏'] = last_match
# 计算最后游戏时间和当前时间的差值
last_game_date = datetime.strptime(last_match, '%Y-%m-%d %H:%M')
time_difference = datetime.now() - last_game_date
# 如果最后游戏时间在一个月以内,则添加备注
if time_difference <= timedelta(days=30):
df.at[index, '备注'] = 'NG'
# 将其他字段也放在循环外面,避免重复赋值
df["页码"] = df1["页码"]
df["大区"] = df1["大区"]
df["ID"] = df1["ID"]
df["等级"] = df1["等级"]
df["英雄"] = df1["英雄"]
if df.empty:
print("没有数据,程序退出。")
sys.exit()
# df = df[df['皮肤'] > 400]
df['最后游戏'] = df['最后游戏'].fillna('').str.slice(0, 10)
df['最后游戏'] = df['最后游戏'].str.replace('-', '/')
df.to_excel(writer, sheet_name='处理后数据', index=False)
import winsound
winsound.Beep(440, 1000)
// 定义一个函数,用于连续点击链接元素
function clickLinkMultipleTimesWithDelay(numClicks, delay) {
// 获取链接元素
var linkElement = document.getElementById('getE');
// 如果链接元素存在,则执行点击操作
if (linkElement) {
var i = 0;
function clickNext() {
if (i < numClicks) {
linkElement.click(); // 模拟点击链接元素
i++;
setTimeout(clickNext, delay); // 设置延迟后继续点击
}
}
clickNext();
} else {
console.log('找不到链接元素。');
}
}
// 调用函数,点击链接元素10次,每次点击间隔100毫秒(0.1秒)
clickLinkMultipleTimesWithDelay(10, 100);
|
|