import os
import requests
import urllib3
from bs4 import BeautifulSoup
import random
import string
"""
主要是批量获取konachan.net的图(斯哈斯哈)
没有做多线程
"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
}
# 忽略 InsecureRequestWarning 警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def getContent(url):
req = requests.get(url, headers=headers)
req.encoding = 'utf-8'
html = req.text
return html
def getAllImageContentUrls(html):
ImageContentUrls = []
soup = BeautifulSoup(html, 'html.parser')
thumb_links = soup.find_all('a', class_='thumb')
for link in thumb_links:
href_value = link['href']
imageContentUrl = "https" + "://konachan.net/" + href_value
ImageContentUrls.append(imageContentUrl)
return ImageContentUrls
def getImageUrl(html):
soup = BeautifulSoup(html, 'html.parser')
imageUrl = soup.find('link', rel='image_src').get('href')
return imageUrl
def downloadImage(url,path):
response = requests.get(url, headers=headers)
if response.status_code == 200:
characters = string.digits + string.ascii_letters
random_code = ''.join(random.choice(characters) for _ in range(5))
file_extension = os.path.splitext(url)[1]
fileName = random_code + file_extension
save_path = path + fileName
os.makedirs(os.path.dirname(save_path), exist_ok=True)
with open(save_path, 'wb') as file:
file.write(response.content)
file.close()
print("保存地址:{}".format(save_path))
def getImageUrls(ImageContentUrls):
imageUrls = []
for i in ImageContentUrls:
html = getContent(i)
imageUrl =getImageUrl(html)
imageUrls.append(imageUrl)
return imageUrls
if __name__ == '__main__':
"""important"""
print("开始")
count = 0
#开始页面
startPage = 10
#结束页面
endPage = 11
#保存地址
path = "D:\\seseimage\"
for i in range(startPage,endPage + 1):
url = "https:" + "//konachan.net/post?page=" + str(i)
imageContentUrls = getAllImageContentUrls(getContent(url))
imageUrls = getImageUrls(imageContentUrls)
for j in imageUrls:
downloadImage(j,path)
count = count + 1
print("下载完成,共{}张".format(count))
|