|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
想把列表中的几篇文章合并,下面代码没有实现。哪位大哥帮忙看看。头想破了。。。
- import scrapy
- from lxml import etree
- class FwSpider(scrapy.Spider):
- name = 'fw'
- allowed_domains = ['xiegw.cn']
- start_urls = ['http://www.xiegw.cn/shenqingshu/dxs/']
- def parse(self, response):
- item = {}
- ret = etree.HTML(response.text)
- domain=''.join(self.start_urls).split('.cn/')[0]+'.cn'
- lst=ret.xpath("//div[@class='ls_list']//ul[@class='ls_list_ul bj_mt30']")
- for i in lst:
- urls=i.xpath("./li[@class='ls_list_li list_li ts']/div[@class='li_r li_nr']/h3/a/@href")
- urlss=urls[3:8:2]
- # print(urlss)
- for j in urlss:
- url=domain+j
- # print(url)
- yield scrapy.Request(
- url=url,
- callback=self.body_html,
- meta={'item':item}
- )
- def body_html(self,response):
- item=response.meta['item']
- title=[]
- res=etree.HTML(response.text)
- title1=''.join(res.xpath('//h1/text()'))
- title.append(title1)
- item['title']=title
- print(item)
- print('\n' + '==============' + '\n')
- # yield item
复制代码 |
|