提示'chromedriver.exe路径出错
import scrapyfrom selenium import webdriver
from wangyiPro.items import WangyiproItem
class WangyiSpider(scrapy.Spider):
name = 'wangyi'
# allowed_domains = ['www.xxx.com']
start_urls = ['https://news.163.com/']
models_urls = []
#解析五大板块对应详情页的URL
#实例化一个浏览器对象
def __init__(self):
self.bro = webdriver.Chrome(executable_path='chromedriver.exe')
def parse(self, response):
li_list = response.xpath('//*[@id="index2016_wrap"]/div/div/div/div/div/div/ul')
alist =
for index in alist:
model_url = li_list.xpath('./a/@href').extract.first()
self.models_urls.append(model_url)
#依次对每一个板块对应的页面进行请求
for url in self.model_url:#对每一个板块的url进行请求发送
yield scrapy.Request(url,callback=self.models_urls)
#每一个板块对应的新闻标题相关的内容是动态加载出来的。
def parse_model(self,response):#解析每一个板块页面对应新闻的标题和新闻详情页的url
div_list = response.xpath('/html/body/div/div/div/div/div/div/ul/li/div/div')
for div in div_list:
title = div.xpath('./div/div/h3/a/text()').extract_first()
new_detail_url=div.xpath('./div/div/h3/a/@href').extract_first()
item = WangyiproItem()
item['title']=title
#对新闻详情页发起请求
yield scrapy.Request(url=new_detail_url,callback=self.parse_detail,meta={'item':item})
def parse_detail(self,response):
content = response.xpath('//*[@id="content"]/div//text()').extract()
content=''.join(content)
item = response.meta['item']
item['content']=content
yield item
def close(self,spider):
self.bro.quit()
报错的内容为:
selenium.common.exceptions.WebDriverException: Message: 'chromedriver.exe' executable needs to be in PATH. Please see https://sites.google.com/a/chromium.org/chromedriver/home
self.bro = webdriver.Chrome(executable_path='chromedriver.exe')
path换成完整的绝对路径试试 直接设置系统环境变量不就好了?每次都要在代码中加路径不麻烦么。
页:
[1]