|
发表于 2024-12-1 13:06:34
|
显示全部楼层
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- import time
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- options = webdriver.ChromeOptions()
- driver = webdriver.Chrome(options=options)
- wait = WebDriverWait(driver, 10)
- # 打开猎聘网
- url = "https://www.liepin.com/"
- driver.get(url)
- # 搜索 '会计'
- search_input = wait.until(EC.presence_of_element_located((By.XPATH, '//input[@class="jsx-1374046090"]')))
- search_input.send_keys('会计')
- search_button = driver.find_element(By.XPATH, '//span[@class="jsx-1374046090 search-btn"]')
- search_button.click()
- # 等待搜索结果页面加载
- wait.until(EC.presence_of_element_located((By.XPATH, '//div[@class="job-list-box"]')))
- print(driver.current_url)
- # 收集所有职位链接和公司名称
- job_elements = driver.find_elements(By.XPATH, '//div[@class="job-list-box"]/div')
- job_links = []
- company_names = []
- for index, element in enumerate(job_elements):
- try:
- # 获取公司名称
- company_elem = element.find_element(By.XPATH, './/span[@class="company-name ellipsis-1"]')
- company_name = company_elem.text
- # 获取职位链接
- job_link_elem = element.find_element(By.XPATH, './/a[contains(@class,"job-card-left")]')
- job_link = job_link_elem.get_attribute('href')
- job_links.append(job_link)
- company_names.append(company_name)
- print(f"已收集职位 #{index + 1},公司:{company_name}")
- except Exception as e:
- print(f"收集职位 #{index + 1} 出错:{e}")
- # 逐一访问职位链接并提取内容
- for index, (job_link, company_name) in enumerate(zip(job_links, company_names)):
- try:
- # 打开职位链接
- driver.execute_script("window.open(arguments[0]);", job_link)
- driver.switch_to.window(driver.window_handles[-1])
- # 等待职位详情内容加载
- content = wait.until(EC.presence_of_element_located((By.XPATH, '//dd[@data-selector="job-intro-content"]')))
- print(f"正在处理职位 #{index + 1},公司:{company_name}")
- print("职位内容:", content.text)
- # 关闭职位详情页
- driver.close()
- driver.switch_to.window(driver.window_handles[0])
- except Exception as e:
- print(f"处理职位 #{index + 1} 出错:{e}")
- driver.close()
- driver.switch_to.window(driver.window_handles[0])
- finally:
- time.sleep(2) # 等待片刻再处理下一个职位
- # 保持浏览器打开用于检查
- time.sleep(1000)
复制代码 |
|