|
发表于 2023-9-27 23:32:23
|
显示全部楼层
- import os
- import requests
- import urllib3
- from bs4 import BeautifulSoup
- import random
- import string
- """
- 主要是批量获取konachan.net的图(斯哈斯哈)
- 没有做多线程
- """
- headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
- }
- # 忽略 InsecureRequestWarning 警告
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
- def getContent(url):
- req = requests.get(url, headers=headers)
- req.encoding = 'utf-8'
- html = req.text
- return html
- def getAllImageContentUrls(html):
- ImageContentUrls = []
- soup = BeautifulSoup(html, 'html.parser')
- thumb_links = soup.find_all('a', class_='thumb')
- for link in thumb_links:
- href_value = link['href']
- imageContentUrl = "https" + "://konachan.net/" + href_value
- ImageContentUrls.append(imageContentUrl)
- return ImageContentUrls
- def getImageUrl(html):
- soup = BeautifulSoup(html, 'html.parser')
- imageUrl = soup.find('link', rel='image_src').get('href')
- return imageUrl
- def downloadImage(url,path):
- response = requests.get(url, headers=headers)
- if response.status_code == 200:
- characters = string.digits + string.ascii_letters
- random_code = ''.join(random.choice(characters) for _ in range(5))
- file_extension = os.path.splitext(url)[1]
- fileName = random_code + file_extension
- save_path = path + fileName
- os.makedirs(os.path.dirname(save_path), exist_ok=True)
- with open(save_path, 'wb') as file:
- file.write(response.content)
- file.close()
- print("保存地址:{}".format(save_path))
- def getImageUrls(ImageContentUrls):
- imageUrls = []
- for i in ImageContentUrls:
- html = getContent(i)
- imageUrl =getImageUrl(html)
- imageUrls.append(imageUrl)
- return imageUrls
- if __name__ == '__main__':
- """important"""
- print("开始")
- count = 0
- #开始页面
- startPage = 10
- #结束页面
- endPage = 11
- #保存地址
- path = "D:\\seseimage\"
- for i in range(startPage,endPage + 1):
- url = "https:" + "//konachan.net/post?page=" + str(i)
- imageContentUrls = getAllImageContentUrls(getContent(url))
- imageUrls = getImageUrls(imageContentUrls)
- for j in imageUrls:
- downloadImage(j,path)
- count = count + 1
- print("下载完成,共{}张".format(count))
复制代码 |
|