|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 ~风介~ 于 2015-11-23 19:50 编辑
- import http.cookiejar
- import urllib.request
- import urllib.parse
- import random
- from PIL import Image
- import gzip,re,os
- data_ = [('xinggan',57),('japan',24),('taiwan',7),('mm',24)]
- url_host = 'http://www.mzitu.com/'
- cur_file_dir = r'E:\Python34\PyTestFishc\mzitu'
- header_data = {
- 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
- 'Accept-Encoding':'gzip, deflate, sdch',
- 'Accept-Language':'zh-CN,zh;q=0.8',
- 'Cache-Control':'no-cache',
- 'Connection':'keep-alive',
- 'Host':'www.mzitu.com',
- 'Pragma':'no-cache',
- 'Upgrade-Insecure-Requests':'1',
- 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'
- }
- def ungzip(data):
- try:
- # 尝试解压
- data = gzip.decompress(data)
- print('解压完毕!')
- except:
- print('未经压缩, 无需解压')
- return data
- def getOpener(_head):
- cj = http.cookiejar.CookieJar()
- pro = urllib.request.HTTPCookieProcessor(cj)
- opener = urllib.request.build_opener(pro)
- header = []
- for key,value in _head.items():
- elem = (key,value)
- header.append(elem)
- opener.addheaders = header
- return opener
- def Get_list(oop,listaddr):
- p = re.compile(r"data-original='(.+?\.jpg)'")
- listaddr = p.findall(oop)
- return listaddr
- def Get_mzitu_url_list(listaddr,request_header = header_data,url = url_host):
- opener = getOpener(request_header)
- op = opener.open(url)
- data = ungzip(op.read()).decode('utf-8')
- return Get_list(data,listaddr)
-
- def Save_img(url_list,request_header = header_data,Current_file_path = r'E:\Python34\PyTestFishc\mzitu'):
- for each in url_list:
- file_name = each.split('/')[-1]
- if not (file_name in os.listdir()):
- with open(file_name,'wb') as f:
- img = getOpener(request_header).open(each).read()
- f.write(img)
- print('成功保存MZi图片:%s ...' % file_name)
- def Download_Map():
- print('程序运行')
- for each,each_1 in data_ :
- os.chdir(r'E:\Python34\PyTestFishc\mzitu')
- if not(each in os.listdir()):
- os.mkdir(each)
- print('成功创建文件夹 %s:' % each)
- os.chdir(each)
- for each_2 in range(1,each_1+1):
- url_1_1 = url_host + each + r'/page/'+str(each_2)
- print(url_1_1)
- list_data = []
- pp = Get_mzitu_url_list(list_data,url = url_1_1)
- Save_img(pp)
- list_data.clear()
-
- if __name__ == '__main__':
- Download_Map()
-
复制代码 |
|