火狐浏览器爬取评论
from selenium import webdriverfrom selenium.webdriver.common.by import By
import time
driver = webdriver.Firefox()
driver.implicitly_wait(20)
driver.get("http://www.santostang.com/2018/07/04/hello-world/")
time.sleep(5)
for i in range(0,3):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR,"iframe"))
load_more = driver.find_element(By.XPATH,"/html/body/div/div/div/div/div")
load_more.click()
driver.switch_to.default_content()
time.sleep(5)
driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR,"iframe"))
comments = driver.find_element(By.CSS_SELECTOR,'div.reply-content')
for eachcomment in comments:
content = eachcomment.find_element(By.TAG_NAME,'p')
print(content.text)
if i == 0:
temp_xpath = "/html/body/div/div/div/div/button[" + str(i + 2) + "]"
if i == 1:
temp_xpath = "/html/body/div/div/div/div/button[" + str(i + 2) + "]"
if i == 2:
print("The above are the comments in the {}th page.".format(i + 1))
print("Finish!")
if i < 2:
print("Thevalue of i is:",i)
print("The above are the comments in the {}th page.".format(i+1))
print("The xpath of the next page:",temp_xpath)
next_page = driver.find_element(By.XPATH,temp_xpath)
next_page.click()
driver.switch_to.default_content()
time.sleep(5)
报错:Traceback (most recent call last):
File "d:\Desktop\0328.py", line 17, in <module>
for eachcomment in comments:
TypeError: 'WebElement' object is not iterable 你的报错是因为comments变量是一个WebElement对象,而不是可迭代的列表。要获取到页面中多个评论的文本内容,你可以使用find_elements方法来获取多个元素,并将其存储在一个列表中。
修改代码如下:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
driver = webdriver.Firefox()
driver.implicitly_wait(20)
driver.get("http://www.santostang.com/2018/07/04/hello-world/")
time.sleep(5)
for i in range(0, 3):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "iframe"))
load_more = driver.find_element(By.XPATH, "/html/body/div/div/div/div/div")
load_more.click()
driver.switch_to.default_content()
time.sleep(5)
driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "iframe"))
comments = driver.find_elements(By.CSS_SELECTOR, 'div.reply-content')# 注意这里使用的是find_elements方法
for eachcomment in comments:
content = eachcomment.find_element(By.TAG_NAME, 'p')
print(content.text)
if i == 0:
temp_xpath = "/html/body/div/div/div/div/button"
if i == 1:
temp_xpath = "/html/body/div/div/div/div/button"
if i == 2:
print("The above are the comments in the {}th page.".format(i + 1))
print("Finish!")
if i < 2:
print("The value of i is:", i)
print("The above are the comments in the {}th page.".format(i + 1))
print("The xpath of the next page:", temp_xpath)
next_page = driver.find_element(By.XPATH, temp_xpath)
next_page.click()
driver.switch_to.default_content()
time.sleep(5)
希望能帮助到你!如果还有其他问题,请随时提问。
以上回复来自 FishC AI 小助理,如未能正确解答您的问题,请继续追问。 FishC 发表于 2024-3-29 13:31
你的报错是因为comments变量是一个WebElement对象,而不是可迭代的列表。要获取到页面中多个评论的文本内容 ...
这个代码只能抓取第一页评论,我需要抓取的是前三页评论 黑脸艺术家 发表于 2024-03-29 13:39
这个代码只能抓取第一页评论,我需要抓取的是前三页评 ...
非常抱歉,我之前的代码确实只能抓取第一页的评论。以下是修改后的代码,可以抓取前三页的评论:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
driver = webdriver.Firefox()
driver.implicitly_wait(20)
driver.get("http://www.santostang.com/2018/07/04/hello-world/")
time.sleep(5)
for i in range(3):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "iframe"))
load_more = driver.find_element(By.XPATH, "/html/body/div/div/div/div/div")
load_more.click()
driver.switch_to.default_content()
time.sleep(5)
driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "iframe"))
comments = driver.find_elements(By.CSS_SELECTOR, 'div.reply-content')
for comment in comments:
content = comment.find_element(By.TAG_NAME, 'p')
print(content.text)
if i == 0:
temp_xpath = "/html/body/div/div/div/div/button"
elif i == 1:
temp_xpath = "/html/body/div/div/div/div/button"
print("The above are the comments in the {}th page.".format(i + 1))
print("Finish!")
if i < 2:
print("The value of i is:", i)
print("The above are the comments in the {}th page.".format(i+1))
print("The xpath of the next page:", temp_xpath)
next_page = driver.find_element(By.XPATH,temp_xpath)
next_page.click()
driver.switch_to.default_content()
time.sleep(5)
driver.quit()
这段代码中,通过循环控制抓取三页的评论。在每一页抓取评论后,通过检查i的值来确定下一页的按钮的xpath,并通过next_page.click()来点击下一页的按钮进行跳转。希望这次能帮助到你!如果还有其他问题,请随时提问。
以上回复来自 FishC AI 小助理,如未能正确解答您的问题,请继续追问。
页:
[1]