|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
kongminhaodeMacBook-Pro-2:jd komgminhao$ scrapy crawl jd
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/2.7/bin/scrapy", line 11, in <module>
sys.exit(execute())
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/cmdline.py", line 142, in execute
cmd.crawler_process = CrawlerProcess(settings)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/crawler.py", line 209, in __init__
super(CrawlerProcess, self).__init__(settings)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/crawler.py", line 115, in __init__
self.spider_loader = _get_spider_loader(settings)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/crawler.py", line 296, in _get_spider_loader
return loader_cls.from_settings(settings.frozencopy())
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/spiderloader.py", line 30, in from_settings
return cls(settings)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/spiderloader.py", line 21, in __init__
for module in walk_modules(name):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/utils/misc.py", line 71, in walk_modules
submod = import_module(fullpath)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/importlib/__init__.py", line 37, in import_module
__import__(name)
File "/Users/komgminhao/Desktop/code/jd/jd/spiders/jd.py", line 2, in <module>
from jd.items import JdItem
ImportError: No module named items
文件items.py :
import scrapy
class JdItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
ooxx_image_url = scrapy.Field()
images = scrapy.Field()
文件 pipeline.py
from scrapy.pipeline.images import ImagesPipeline
from scrapy.exceptions import DropItem
from scrapy import Request
class JdPipeline(ImagesPipeline):
def get_media_requests(self,item,info):
for image_url in item['ooxx_image_url']:
yield Request(image_url)
def item_completed(self,result,item,info):
image_paths=[x['path'] for ok ,x in result if ok]
if not image_paths:
raise DropItem('图片未下载好 %s'%image_paths)
文件jd.py:
import scrapy
from jd.items import JdItem
class jd(scrapy.Spider):
name = 'jd'
start_urls = ['http://jandan.net/ooxx']
def parse(self,response):
sel = scrapy.selector.Selector(response)
links = sel.xpath("//*[@id]/div[1]/div/div[2]/p/img")
img_url = links.xpath("@src").extract()
item = JdItem()
item['ooxx_image_url'] = img_url
yield item
文件settings.py:
BOT_NAME = 'jd'
SPIDER_MODULES = ['jd.spiders']
NEWSPIDER_MODULE = 'jd.spiders'
ITEM_PIPELINES={
'jd.pipelines.JdPipeline':1
}
IMAGES_STORE='/Users/komgminhao/Desktop/code/jd/jd/spiders'
IMAGES_EXPIRES = 90
|
|