| 
 | 
 
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册  
 
x
 
import urllib.request 
import time as t 
import base64 
import re 
 
def open_url(url_in): 
    req = urllib.request.Request(url_in) 
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.66') 
    response = urllib.request.urlopen(req)  
    html_out = response.read() 
    return html_out 
 
 
def page_url(page):#页面地址base64解码 
    page_str = str(t.localtime().tm_year) + str(t.localtime().tm_mon) + str(t.localtime().tm_mday)+'-'+str(page) 
    temp = str(base64.b64encode(str.encode(page_str))) 
    temp = temp[2:len(temp)-1] 
    page_url_out = 'http://jandan.net/ooxx/'+ temp +'#comments' 
    #print(page_url) #测试 
    return page_url_out 
 
def mm_page_url(page_url_in): 
    html = open_url(page_url_in) 
    html = html.decode('utf-8') 
    mm_pattern = re.compile(r'<a href="([^"]+\.jpg)') 
    #mm_pattern = re.compile(r'<a href="//(.+\.jpg)')错误会执行到后一个.jpg 
    mm_page_urls_out = mm_pattern.findall(html) 
    return mm_page_urls_out 
 
''' 
    for each in mm_page_urls_out: 
        print(each) #测试''' 
 
def mm_download(page): 
    global num 
    page_urls = page_url(page) 
    mm_page_urls = mm_page_url(page_urls) 
    for each in mm_page_urls: 
        filename = 'C:\\Users\\imcaimimi\\Desktop\\python learning\\mm\\mm_%s.jpg' % (num) 
        urllib.request.urlretrieve(('http:'+each),filename,None) 
        num += 1 
         
n = int(input('Confirm the pages of mm_pics_jiandansuishoupai:')) 
html = open_url('http://jandan.net/ooxx') 
html = html.decode('utf-8') 
ini_a = html.find('current-comment-page') 
ini_b = html.find(']',ini_a) 
ini_page = int(html[ini_a+23:ini_b]) 
#print(ini_page)#测试 
num = 0 
for i in range(n): 
    mm_download(ini_page-i) 
 
 |   
 
 
 
 |