| 
 | 
 
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册  
 
x
 
import urllib.request 
import urllib.error 
from http import server, client 
import os, re, math 
 
header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'} 
 
def urlOpen(url): 
    global header 
    req = urllib.request.Request(url, None, header) 
    return urllib.request.urlopen(req).read() 
 
def tryToGet(url): 
    errorTimes = 0 
    while errorTimes != 5: 
        try: 
            return urlOpen(url) 
        except: 
            errorTimes += 1 
 
    return None 
 
def getSubPage(url): 
    response = tryToGet(url) 
    if response != None: 
        html = response.decode('utf-8') 
 
        p = re.compile(r'<li class="s\d{1,2}"><a href="/tupian/(.{1,20})">') 
        return p.findall(html) 
    else: 
        print('**********当前页面获取失败') 
        return list() 
 
imgCount = 1 
def saveImgInPage(url): 
    global imgCount 
    print('********正在获取页面' + url) 
    response = tryToGet(url) 
    if response != None: 
        html = response.decode('utf-8') 
 
        p = re.compile(r'<div class="il_img".*<img src="(.*)" alt.*</div>') 
        imgList = p.findall(html) 
 
        for each in imgList: 
            response = tryToGet(each) 
            if response != None: 
                with open(str(imgCount) + '.jpg', 'wb') as f: 
                    f.write(response) 
                print('**********目前已成功获取%d张图片!' % imgCount) 
                imgCount += 1 
    else: 
        print('**********当前页面获取失败!') 
 
def work(): 
    if not os.path.isdir('图片素材'): 
        os.mkdir('图片素材') 
    os.chdir('图片素材') 
 
    url = 'http://www.ivsky.com/tupian/' 
    subPageList = getSubPage(url) 
 
    for each in subPageList: 
        saveImgInPage(url + each) 
 
if __name__ == '__main__': 
    work() 
 
 
 
 
 
 |   
 
 
 
 |