https://www.vmgirls.com/
匹配10个项目下载自动创建目录保存图片,过滤已下载过的。
还有很多地方可以改进,比如用selenium来 加载更多 下载更多项 等坛友们添加了 有兴趣的话
已有 1 人购买 本主题需向作者支付 2 鱼币 才能浏览 购买主题
{:5_95:} 匹配10个项目下载自动创建目录保存图片,过滤已下载过的。
还有很多地方可以改进,比如用selenium来 加载更多 下载更多项 等坛友们添加了 有兴趣的话
# -*-coding = utf-8 -*-
# @Time : 2021/6/24 16:16
# @Author :
# @File : vmgirls.py
# @Software : PyCharm
import requests
import time
import re
import os
from tkinter import *
from tqdm import trange
def requestURL(url):
headers = {
'referer':'https://www.vmgirls.com/',
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
}
response = requests.get(url,headers=headers)
return response
def getTitleImgUrl(html):
title_pattern = re.compile(r'<title>(.*?) 丨 唯美女生</title>',re.S)
title = re.findall(title_pattern,html)[zxsq-anti-bbcode-0]
url_pattern = re.compile(r'<a href="//(.*?)" alt=".*?" title=".*?">',re.S)
img_url_list = re.findall(url_pattern,html)
for i in range(0,len(img_url_list)):
img_url_list[zxsq-anti-bbcode-i] = "http://"+img_url_list[zxsq-anti-bbcode-i]
return [title,img_url_list]
def mkdir(directory_name):
if os.path.exists(r'./vmgirls'):
pass
else:
try:
os.makedirs(r'./vmgirls')
except Exception as e:
print(e)
if os.path.exists(fr'./vmgirls/{directory_name}'):
pass
else:
try:
os.makedirs(fr'./vmgirls/{directory_name}')
except Exception as e:
print(e)
def save_img(url):
res = requestURL(url).text
data_list = getTitleImgUrl(res)
try:
mkdir(data_list[zxsq-anti-bbcode-0])
except Exception as e:
print(e)
current_path = os.getcwd() + '/vmgirls/' + data_list[zxsq-anti-bbcode-0] + '/'
for i in range(0, len(data_list[zxsq-anti-bbcode-1])):
file_name = data_list[zxsq-anti-bbcode-0] + str(i) + ".jpg"
res = requestURL(data_list[zxsq-anti-bbcode-1][zxsq-anti-bbcode-i])
try:
with open(current_path + file_name, 'wb') as f:
f.write(res.content)
print("保存成功:" + file_name)
except Exception as e:
print(e)
def getFlist():
list = []
for root, dirs, files in os.walk('vmgirls'):
list.append(os.path.basename(root))
return list
def main():
url = "https://www.vmgirls.com/"
res = requestURL(url).text
url_pattern = re.compile(r'<a href=(.*?) title="(.*?)" class="list-title text-md h-2x">.*?</a>',re.S)
url_list = re.findall(url_pattern,res)
# url_pattern = re.compile(r'<a href=(\d{5}?).html title="(.*?)".*?>', re.S)
# url_list = re.findall(url_pattern, res)
dir_list = getFlist()
for i in trange(0,len(url_list)):
dir_name = url_list[zxsq-anti-bbcode-i][zxsq-anti-bbcode-1]
if dir_name in dir_list:
continue
else:
url = 'https://www.vmgirls.com/'+url_list[zxsq-anti-bbcode-i][zxsq-anti-bbcode-0]
save_img(url)
if __name__ == '__main__':
main()
页:
[1]