|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 java2python 于 2020-6-15 17:45 编辑
灌水回帖太多,找不到原帖,也不知道他的标题是18+,还是15+,选择顶级(18+)。。。
界面如下(操作:双击页码,右边listbox表示该页的作品,双击作品,右边listbox表示有多少图片,点击开始就下载选中的图片):
代码(请不要做产品测试,明明这里没选,去点击开始,出错了。。。):
- import requests
- import time
- import re
- import os
- import tkinter as tk
- from tkinter import *
- import threading
- class App:
- base_url = 'https://www.mzitu.com/'
- headers = {
- 'Referer': 'https://www.mzitu.com/',
- 'Sec-Fetch-Dest': 'image',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
- }
- list_meizi_url = None
- photo_sum = 0
- sel_meizi_no = 0
- os_path = os.getcwd()
- #process_msg = None
-
- def __init__(self, top):
- #top = Tk()
- top.title('Heavy task')
- top.geometry('1030x800')
- top.resizable(False, False)
- top.config(bg='#535353')
- #
- sb = Scrollbar(top) # 设置滚动条组件
- sb.grid(row=0, column=3, sticky=N + S)
- page_list = Listbox(top, # 设置IP列表
- yscrollcommand=sb.set,
- height=30, bg='black', fg='white',
- highlightcolor='black', font=('微软雅黑')
- )
- page_list.grid(row=0, column=0, columnspan=2)
- page_list.bind("<Double-Button-1>", self.sel_page) # #左键双击某条元素,执行方法
- sb.config(command=page_list.yview) # 设置鼠标滚轮
- meizi_list = Listbox(top, # 设置IP列表
- #yscrollcommand=sb3.set,
- height=25, bg='black', fg='white',
- highlightcolor='black', font=('微软雅黑')
- )
- meizi_list.place(x=200, y=0, width=500)
- meizi_list.bind("<Double-Button-1>", self.sel_meizi) # #左键双击某条元素,执行方法
- photo_list = Listbox(top, # 设置IP列表
- #yscrollcommand=sb3.set,
- height=25, bg='black', fg='white',
- highlightcolor='black', font=('微软雅黑')
- )
- photo_list.place(x=700, y=0, width=200)
- photo_list.bind("<Double-Button-1>", self.sel_photo) # #左键双击某条元素,执行方法
- text_process = Text(top,width = 120,height = 18,bg='black', fg='white')
- text_process.place(x=200,y=555)
- # 按钮
- button_start = Button(top, text='开始', fg='#F5F5F5', bg='#7A7A7A', command=self.thread_process_show_start, height=1, width=15, relief=GROOVE, bd=2, activebackground='#F5F5F5', activeforeground='#535353')
- button_start.place(x=900,y=100)
- #App.thread_process_show_start()
-
- self.page_list = page_list
- self.meizi_list = meizi_list
- self.photo_list = photo_list
- self.text_process = text_process
- def heavy(self):
- global app
- app.text_process.delete("1.1" ,'end')
- try:
- if os.getcwd() != App.os_path:
- os.chdir(os.pardir)
- os.mkdir(f'{App.list_meizi_url[App.sel_meizi_no][0]}')
- except OSError:
- pass
- os.chdir(f'{App.list_meizi_url[App.sel_meizi_no][0]}')
- save_original(App.list_meizi_url[App.sel_meizi_no][1],self.photo_list.curselection()[0])
- def thread_process_show_start(self):
- th = threading.Thread(target=self.heavy)
- th.setDaemon(True)
- th.start()
-
- def sel_page(self,no):
- access_page(App.headers,None,self.page_list.curselection()[0]+1)
-
- def sel_meizi(self,no):
- access_meizi(App.list_meizi_url[self.meizi_list.curselection()[0]][1],None)
-
- def sel_photo(self,no):
- pass
- @staticmethod
- def print_msg(msg,end=None):
- if end == None:
- print(msg)
- #App.process_msg = msg
- app.text_process.insert('end',msg+'\n')
- else:
- print(msg,end='')
- # 获取所有IP
- def __ip(url, headers):
- response = requests.get(url, headers=headers)
- html = response.text
- ip_list = re.findall(r'<td>(\d+\.\d+\.\d+\.\d+)</td>', html)
- ip_port = re.findall(r'<td>(\d+)</td>', html)
- _https = re.findall(r'<td class="country">(.+)</td>\s*<td>(\w+)</td>', html)
- _ip = []
- for i in range(len(ip_list)):
- if _https[i][0] == '高匿' and _https[i][1] == 'HTTPS':
- _ip.append(ip_list[i] + ':' + ip_port[i])
- return _ip
- # 获取可用IP
- def _https():
- ip_list = __ip('https://www.xicidaili.com/nn/', headers=headers)
- count = 0
- for i in ip_list:
- count += 1
- try:
- proxies = {
- 'https': f'http://{i}'
- }
- url = 'https://httpbin.org/get'
- requests.get(url, headers=headers, proxies=proxies)
- except:
- App.print_msg(f'| --- 获取第{count}个IP({proxies["https"][7:]})失效...... \n| --- 继续尝试获取有效IP......')
- continue
- yield proxies
- # 访问首页
- def access(url, headers, ip=None):
- global app
- response = requests.get(url, headers=headers, proxies=ip)
- html = response.text
- page = re.findall(r'href=\'.+/page/(.+)/\'', html)[-1]
- for p in range(1,int(page)+1):
- app.page_list.insert(END, p)
-
- def access_page(headers, ip,ask):
- global app
- url = f'https://www.mzitu.com/page/{ask}/'
- response = requests.get(url, headers=headers, proxies=ip)
- html = response.text
- group = list(photo_url(html))
- page = re.findall(r'aria-current=.+>(\d+)', html)[0]
- app.meizi_list.delete(0, END)
- App.list_meizi_url = []
- for i in range(len(group)):
- #print('| {:-^6}> {:<} '.format(i, group[i][0]))
- app.meizi_list.insert(END,group[i][0])
- App.list_meizi_url.append([group[i][0],group[i][1]])
- # 爬取图片url及图片标题
- def photo_url(html):
- photo_urls = re.findall(r'<li><a href="(.+)" .+<img', html)
- titles = re.findall(r'data-original=\'.+ alt=\'(.+)\' w', html)
- return zip(titles, photo_urls)
- # 获取原图片url,并保存
- def access_meizi(meizi_url,ip):
- global app
- response = requests.get(meizi_url, headers=App.headers, proxies=ip).text
- page = max(list(map(int, re.findall(meizi_url + r'/(\d+)', response))))
- App.sel_meizi_no = app.meizi_list.curselection()[0]
- App.photo_sum = page
- app.photo_list.delete(0,END)
- for p in range(page//9 + 1):
- app.photo_list.insert(END,str(p*9)+"-"+str(min(p*9+8,App.photo_sum)))
-
- def save_original(photo_url,photo_no):
- for i in range(photo_no*9+1, min(photo_no*9+9,App.photo_sum)+1):
- url = photo_url + f'/{i}'
- rep = requests.get(url, headers=App.headers, proxies=None).text
- add_page_photo = int(re.findall(r'><span>(\d+)</span></a>', rep)[-1])
- try:
- photo_urls = re.findall(r'<img class=.+ src="(.+\.jpg)" alt=', rep)
- photo_urls[0]
- except IndexError:
- App.print_msg('| ---此图片专辑消失了,跳过此专辑下载...')
- break
- save_photo(photo_urls[0], App.list_meizi_url[App.sel_meizi_no][0], App.headers, i, add_page_photo, None)
- # 保存图片,显示下载进度条
- def save_photo(url, title, headers, count, page, ip=None):
- with open(title + f'{count}.jpg', 'wb') as f:
- response = requests.get(url, headers=headers, stream=True, proxies=ip)
- chunk_size = 50
- size = 0
- content_size = int(response.headers['content-length'])
- _temp = page-count
- if _temp == -1:
- _temp = 0
- App.print_msg('[文件大小]{:.2f}KB 正在下载第 {} 张 本专辑还剩 {} 张'.format(content_size / (chunk_size*2*10.24),count,_temp))
- start = time.time()
- for data in response.iter_content(chunk_size=chunk_size):
- f.write(data)
- size += len(data)
- App.print_msg('\r' + '| --- 下载进度 %s%.2f%%' % ('>' * int(size * 50 / content_size), float(size / content_size * 100)),
- end='')
- end = time.time()
- App.print_msg(' 下载完成!用时%.2f秒' % (end - start))
- # 创建一个toplevel的根窗口,并把他作为擦参数实例化APP对象
- top = tk.Tk()
- app = App(top)
- access(App.base_url, App.headers, None)
- # 开始主事件循环
- top.mainloop()
复制代码 |
评分
-
查看全部评分
|