|

楼主 |
发表于 2020-6-7 14:16:10
|
显示全部楼层
#########这是dmoz_spider.py 在64讲里面,总共需要写的模块就是左边的和items,小哥应该很熟,唔该赛
import scrapy
from tutor.items import DemozItem
class DmozSpider(scrapy.Spider):
name='dmoz'
allowed_domains=['tupianzj.com']
start_urls=[
'https://www.tupianzj.com/chuangyi/',
'https://www.tupianzj.com/mingxingku/'
]
def parse(self,response):
sel=scrapy.selector.Selector(response)
sites=sel.xpath('//div[@class="warpbox_con_bottom"]/ul/li')
items=[]
for site in sites:
item=DemozItem()
item['title']= site.xpath('a/text()').extract()
item['links']l=site.xpath('a/@href').extract()
items.append(item)
return items
##############################
#######这是items.py
import scrapy
class DemozItem(scrapy.Item):
title=scrapy.Field()
links=scrapy.Field()
###########################
|
|