|
发表于 2020-12-3 01:30:55
|
显示全部楼层
import requests
import os
import re
from bs4 import BeautifulSoup
import time
def url_open(url):
headers = {
'Referer': 'https://www.mzitu.com/',
'User-Agent': 'Mozilla /5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
}
response = requests.get(url, headers=headers)
#print(response)
#return response
html=response.text
#soup=BeautifulSoup(html,'html.parser')
soup=BeautifulSoup(html,'lxml')
#div=soup.find('div',attrs={'id':'pins'})
soup=soup.find(class_='postlist')
soup1=str(soup)
ss = r'<a href="([^"]+\d)"'
urls=re.findall(ss,soup1)
return urls
def url_open2(url):
headers = {
'Referer': 'https://www.mzitu.com/',
'User-Agent': 'Mozilla /5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
}
response = requests.get(url, headers=headers)
#print(response)
#return response
html=response.text
return response
def save_jpg(jpg,x):
jpg_dir='jpgs'
filename =str(x) + '_' + jpg.split('/')[-1]
#filename=jpg
#x += 1
print('正在保存文件'+filename)
print('='*50)
with open(filename, 'wb') as f:
img = url_open2(jpg).content
f.write(img)
def find_imgs(url):
os.system("cls")
x=0
while True:
headers = {
'Referer': 'https://www.mzitu.com/',
'User-Agent': 'Mozilla /5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
}
response = requests.get(url, headers=headers)
html = response.text
soup = BeautifulSoup(html, 'lxml')
# print(html)
soup = soup.find(class_='main-image')
soup1 = str(soup)
p = r'src="([^"]+\.jpg)"'
pp = re.findall(p, soup1)
# print(soup1)
s = r'href="([^"]+\d)"'
ss = re.findall(s,soup1)
ppp=pp[0]
sss=ss[0]
print()
print('处理图片:%s \n下一网址:%s' %(ppp,sss),)
url=sss
save_jpg(ppp,x)
x+=1
def url_open1(htmls):
print(htmls)
for url in htmls:
find_imgs(url)
print('Ctrl+C退出吧。')
time.sleep(555)
def download_new(folder='OOXX'):
os.system('cls')
os.system('del /q OOXX\*.*')
os.system('rmdir OOXX')
os.mkdir(folder)
os.chdir(folder)
print(url)
htmls = url_open(url)
htmls1 = url_open1(htmls)
if __name__ == '__main__':
url = 'http://www.mzitu.com/'
#download_mm()
download_new() |
|