|
发表于 2020-9-28 21:39:34
|
显示全部楼层
本帖最后由 Stubborn 于 2020-9-28 21:41 编辑
可以用scrapy自带的图片管道哦
- from scrapy.pipelines.images import ImagesPipeline
- class MzituScrapyPipeline(ImagesPipeline):
- def strip(self, s):
- import re
- return re.sub(r'[?\\*|"<>:/]', "", str(s))
- def file_path(self, request, response=None, info=None):
- item = request.meta["item"]
- file = self.strip(item["name"])
- img = request.url.split("/")[-1]
- file_name = u"full/{0}/{1}".format(file, img)
- return file_name
- def get_media_requests(self, item, info):
- referer = item["url"]
- yield scrapy.Request(
- url=item["image_urls"],
- meta={"item": item, "referer": referer}
- )
- def item_completed(self, results, item, info):
- image_paths = [x['path'] for ok, x in results if ok]
- if not image_paths:
- raise DropItem("Item contains no images")
- return item
复制代码 |
|