|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
StaleElementReferenceException: stale element reference: element is not attached to the page document
(Session info: headless chrome=81.0.4044.138)
(Driver info: chromedriver=2.32.498550 (9dec58e66c31bcc53a9ce3c7226f0c1c5810906a),platform=Windows NT 10.0.18362 x86_64)
我觉得应该是页面被自动刷新了才会报这个错。但是我把东西都爬下来存列表里了,为什么还会报这个错,想不明白
- global liebiao
- chrome_options = Options()
- #后面的两个是固定写法 必须这么写
- chrome_options.add_argument('--headless')
- chrome_options.add_argument('--disable-gpu')
- #驱动路径 谷歌的驱动存放路径
- path = r'C:\Users\Administrator\Desktop\chromedriver.exe'
- #创建浏览器对象
- browser = webdriver.Chrome(executable_path=path,chrome_options=chrome_options)
- url ='http://roll.caijing.com.cn/'
- browser.get(url)
- time.sleep(1)
-
-
- news_url = browser.find_elements_by_xpath("//ul[contains(@class,'ntb')]/li/a")#此处已经把所有链接存放在news_url这个列表里了,为什么页面刷新后还会报错?
-
- for i in news_url:
- real = i.get_attribute('href')#这里可以执行,执行到一半就会爆出上面那个错误
-
- print(real)
-
- #_________________________________
- headers = {
- 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',
- 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
- 'Accept-Language':'en-US,en;q=0.5',
- 'Accept-Encoding':'gzip',
- 'DNT':'1',
- 'Connection':'close'
- }
-
- try:
-
- r = requests.get(real, headers=headers)
- r.encoding = 'utf-8'
- html = etree.HTML(r.text)#etree.HTML():构造了一个XPath解析对象并对HTML文本进行自动修正。
- result = html.xpath('//div[contains(@id,"the_content")]/p/text()')#('//div[@id="mainNewsContent"]/p/text()')
-
- if len(result):
- print("有内容")
- else:
- result = html.xpath('//div[contains(@class,"article-content")]/p/text()')
-
- #处理文本
- result = str(result)
- result2 = result.replace('\\u3000','')
- print(result2)
- except:
- print("访问失败")
-
- browser.quit()
复制代码
|
|