|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- from selenium import webdriver
- class Douyu(object):
- def __init__(self):
- self.url = 'https://www.douyu.com/directory/all'
- self.driver = webdriver.Chrome(executable_path='D:\chromedriver_win32\chromedriver.exe')
- def run(self):
- # url
- # driver
- # get
- self.driver.get(self.url)
- # parse
- self.parse_data()
- # save
- # next_page
- def parse_data(self):
- room_list = self.driver.find_elements_by_xpath('//*[@id="listAll"]/section[2]/div[2]/ul/li/div')
- # print(room_list)
- # js = 'scrollTo(0,1000)' # 将滚动条向下滚动
- # 遍历房间列表,从每一个房间结点获取数据
- for room in room_list:
- # title = room.find_element_by_class_name("DyListCover-userName").text
- # self.driver.execute_script(js) # 执行JS代码
- temp = {'title': room.find_element_by_xpath('./a[1]/div[2]/div[1]/h3').text,
- 'type': room.find_element_by_xpath('./a[1]/div[2]/div[1]/span').text,
- 'owner': room.find_element_by_xpath('./a[1]/div[2]/div[2]/h2/div').text,
- 'number': room.find_element_by_xpath('./a[1]/div[2]/div[2]/span').text}
- print(temp)
- if __name__ == '__main__':
- douyu = Douyu()
- douyu.run()
复制代码
这段代码打印出来的temp会报错,请问应该怎么解决??
- def parse_data(self):
- room_list = self.driver.find_elements_by_xpath("//div[@class='layout-Module-container layout-Cover ListContent']")
- for room in room_list:
- roomlist=room.text.split('\n')
- print(roomlist)
复制代码
报错是由于find_elements_by_xpath后页面元素有变化。你根据之前获取的WebElement的ID去获取会找不到。你这个可以一次获取然后4个一组就能获取到了
|
|