马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
请问各位大佬们,我这个代码错在哪了,怎么爬了些这东西import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common.exceptions import TimeoutException
import logging
logging.basicConfig(level=logging.INFO,format='%(asctime)s - %(levelname)s: %(message)s')
index_page = 3
time_out = 10
index_url = 'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput='
#初始化浏览器
path = 'D:/技能/chromedriver.exe'
brower = webdriver.Chrome(executable_path=path)
#显示等待
wait = WebDriverWait(brower,time_out)
#发起请求
def scrape_url(url,condition,locator):
logging.info(f'正在爬取{url}')
try:
brower.get(url)
wait.until(condition(locator))
except TimeoutException:
logging.error('error occurred while scraping {}'.format(url),exc_info=True)
#加载列表页
def scrape_page():
while True:
scrape_url(index_url,condition=EC.presence_of_element_located,locator=(By.XPATH, "//div[@class='pager_container']/span[last()]"))
html = brower.page_source
wait.until(EC.presence_of_element_located((By.XPATH, "//div[@class='pager_container']/span[last()]")))
print(html)
next_button = brower.find_element(By.XPATH,"//div[@class='pager_container']/span[last()]")
if 'pager_next_disabled' in next_button.get_attribute('class'):
brower.close()
break
else:
next_button.click()
time.sleep(1)
def main():
scrape_page()
if __name__ == '__main__':
main()
而且爬到后面海报错了 |