|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 孟大大 于 2022-2-13 15:42 编辑
import requests,bs4,os,time,random
from bs4 import BeautifulSoup
from retry import retry
from fake_useragent import UserAgent
ua = UserAgent().random
#coding:utf-8
#coding=gbk
#coding=GB2312
errorurl = []
def get_urls():
newheaderss = {
'user-agent':ua}
url = 'https://www.mzitu.com/all/'
res = requests.get(url,headers = newheaderss).text
soup = BeautifulSoup(res, "html.parser")
urls =[]
for each in soup.find_all(target="_blank"):
urls.append({each.text: each.get('href')})
return urls
def get_page(url):
headerss = {
'user-agent': ua}
res = requests.get(url, headers=headerss).text
soup =BeautifulSoup(res, "html.parser")
page = soup.find_all('span')[9].text
imgurl = soup.find_all('p')[0].find('a').find('img').get('src')
alt = soup.find_all('p')[0].find('a').find('img').get('alt')
return page,imgurl,alt
def save_image(imgurl,alt,i):
headers = {'Referer': imgurl,
'Sec-Fetch-Mode': 'no-cors',
'user - agent': ua}
res = requests.get(imgurl, headers=headers)
name = str(i) + '.jpg'
with open(r'C:\Users\m\Desktop\meizitu\{}\{}'.format(alt,name), 'wb') as f:
f.write(res.content)
@retry(ZeroDivisionError, tries=3, delay=2)
def downimage(urls):
for each in urls:
time.sleep(random.randint(1, 5))
url = list(each.values())[0]
try:
msg = get_page(url)
page =int(msg[0])
imgurl1 = msg[1]
alt = msg[2]
os.makedirs(r'C:\Users\m\Desktop\meizitu\{}'.format(alt))
print(alt,page)
for i in range(1,page+1):
try:
imgurl = imgurl1[:-6] + '{}'.format('%02d' % i) + '.jpg'
save_image(imgurl,alt,i)
except:
print(i)
continue
except:
print(url)
errorurl.append(url)
print(len(errorurl))
continue
if __name__=='__main__':
urls = get_urls()
downimage(urls)
|
|