from random import choice
from time import sleep
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
def parse_data(wd):
sleep(1)
room_list = wd.find_elements(By.XPATH,"/html/body/div[1]/div/div[2]/div/div/div[3]/ul/li")
print(len(room_list))
temp_list = []
for room in room_list:
temp = {}
try:
# temp["title"] = room.find_element(By.XPATH,"./a[2]/li[1]/a[2]").get_attribute("title")
temp["url"] = room.find_element(By.XPATH,"./a[1]").get_attribute("href")
temp["owner"] = room.find_element(By.XPATH,"./span[1]/span[1]/i").text
temp["type"] = room.find_element(By.XPATH,"./span[1]/span[2]/i[2]").text
# temp["num"] = room.find_element(By.XPATH,"./span[1]/span[3]/i[2]").text
temp_list.append(temp)
except:
continue
return temp_list
def save_data(temp_list):
count = 1
for temp in temp_list:
print(count,temp)
count += 1
def HuyaDanmu(address,ua):
options = webdriver.ChromeOptions() # 定义Chrome配置参数
options.add_argument('--ignore-certificate-errors') # 忽略https证书错误
options.add_argument('user-agent=' + ua) # 设置user-agent,取消浏览器被识别出来
# options.add_argument('--headless') # 该‘浏览器不提供可视化页面’设置去掉
# options.add_argument('blink-settings=imagesEnabled=false') # 添加不加载图片设置,提升速度
s = Service(address) # 加载Chrome服务
wd = webdriver.Chrome(service=s, options=options) # 创建浏览器实例
wd.implicitly_wait(160) # 让浏览器实例隐式等待60秒,方便浏览器加载完成后就可以获取页面的元素
# 调用WebDriver 对象的get方法 可以让浏览器打开指定网址
wd.get('https://www.huya.com')
#点击登陆出现扫码登陆
# wd.find_element(By.LINK_TEXT, "登录").click()
#账号密码在记事本中,第一行账号,第二行密码
with open(r"G:\TDDOWNLOAD\qq自动化登录虎牙\LoginHuyaByQ\HY账号密码.txt", "r", encoding="utf-8") as g:
hypassword = g.readlines()
print(hypassword[0])
print(hypassword[1])
# wd.switch_to.frame("UDBSdkLgn_iframe") #定位第一个弹窗并关闭
# wd.find_element(By.ID, 'close-udbLogin').click()
action = ActionChains(wd)
denglu = wd.find_element(By.CLASS_NAME, "LoginHd--1Jf6S0CCU3DUkJdjVqVn3") #定位悬停位置
action.move_to_element(denglu).perform() #鼠标悬停
# wd.find_element(By.LINK_TEXT, "点我注册").click()
# wd.switch_to.frame("UDBSdkLgn_iframe")
# wd.find_element(By.CLASS_NAME, 'returnlogin-btn').click()
# # wd.find_element(By.XPATH, "//*[@id='account-login-form']/div[1]/input").send_keys(hypassword[0])
# wd.find_element(By.XPATH, "//*[@id=‘account-login-form’]/div[1]/input").send_keys(hypassword[0])
#
# # wd.find_element(By.XPATH, "//*[@id='account-login-form']/div[2]/input").send_keys(hypassword[1])
# wd.find_element(By.XPATH, "//*[@id=‘account-login-form’]/div[2]/input").send_keys(hypassword[1])
# wd.find_element(By.XPATH, "//*[@id='login-btn']").click()
sleep(10)
#打印模拟器板块主播数量以及具体信息
wd.get("https://www.huya.com/g/3203#cate-1-4507-0")
temp_list_monq = parse_data(wd)
save_data(temp_list_monq)
wd.get("https://www.huya.com/g/3203")
temp_list = parse_data(wd)
save_data(temp_list)
while True:
for roomherf in temp_list:
wd.get(roomherf['url'])
f = open("G:\TDDOWNLOAD\qq自动化登录虎牙\LoginHuyaByQ\主播打招呼文本.txt", "r", encoding="UTF-8") # 设置文件对象
lines = f.readlines() # 将txt文件的所有内容读入到字符串str中
f.close()
messageBox = wd.find_element(By.XPATH, '//*[@id="pub_msg_input"]') # 找到页面上的输入框,用于发送弹幕
messageBut = wd.find_element(By.XPATH, '//*[@id="msg_send_bt"]') # 找到发送弹幕按钮
danmu = choice(lines).strip('\n') # 随机从文件中读取一个弹幕
messageBox.send_keys(danmu) # 在输入框中输入弹幕
sleep(1)
messageBut.click() # 点击发送弹幕
sleep(20)
break
# try:
# el_next = wd.find_element_by_xpath("//a[@class='laypage_next']")
# wd.execute_script("scrollTo(0, 100000)")
# el_next.click()
# except:
# print("结束!")
# break
if __name__ == '__main__':
# f = open("settings.txt", "r",encoding='utf-8') # 设置文件对象
# lines = f.readlines() # 将txt文件的所有内容读入到字符串str中
address="G:\TDDOWNLOAD\qq自动化登录虎牙\LoginHuyaByQ\chromedriver.exe"
ua="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
# f.close()
HuyaDanmu(address,ua)
input("输入任意键结束")
现在这个网页怎么获取登录框信息,切换不到那个ifram框里,捣鼓半天没弄不好,想爬取每个网页信息 现在最多获取7个,还请各位大佬帮忙,谢谢了,代码让我改的烂摊子了 |