本帖最后由 Stubborn 于 2020-4-24 00:06 编辑
好可爱的妹子啊 ,测试10张没有问题,速度好慢,不得行,得加代理池用Scrapy取搞
这里额外说一句,
for m in D:
session.headers["Referer"] = data[0]
关于跟新headers的Referer,还是重新用一个headers用于新的请求,有代测试,这里就爬了一个连接,不测试了
# -*- coding: utf-8 -*-
# !/usr/bin/python3
"""
@ version: ??
@ author: Alex
@ file: test
@datetime: 2020/4/23 - 23:30
@explain:
"""
import requests
import re
import os
session = requests.session()
domain = "http://zhainanba.net"
params = {
'Accept': 'text/html, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Host': 'zhainanba.net',
'Pragma': 'no-cache',
'Referer': 'http://zhainanba.net/category/zhainanfuli/jinrimeizi',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
}
data = []
for page in range(1, 5):
url = f"http://zhainanba.net/category/zhainanfuli/jinrimeizi/page/{page}"
resp = session.get(url, params=params)
data += re.findall(r"http://zhainanba\.net/\d+\.html", resp.text)
param = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36',
}
# for urls in data:
resp = session.get(data[0])
D = re.findall("https://ac\.meijiecao\.net/ac/img/znb/meizitu/.*?\.jpg", resp.text)
for m in D:
session.headers["Referer"] = data[0]
resp = session.get(m)
*_, path_, name = m.split("/")
file_path = os.path.abspath(path_)
img_path = os.path.join(file_path, name)
if not os.path.exists(file_path):
os.mkdir(file_path)
with open(img_path, "wb") as f:
f.write(resp.content)
|