|  | 
 
| 
import urllib.request
x
马上注册,结交更多好友,享用更多功能^_^您需要 登录 才可以下载或查看,没有账号?立即注册  import os
 import random
 import time
 
 def url_open(url):
 ip_list = ['101.254.136.130:443','114.234.80.188:9000','103.99.10.49:83','190.144.127.234:3128','125.26.99.186:34577']
 '''req = urllib.request.Request(url)
 req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36 Edg/83.0.478.45')'''
 proxy_support = urllib.request.ProxyHandler({'http':random.choice(ip_list)})
 opener = urllib.request.build_opener(proxy_support)
 opener.addheaders = [('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36 Edg/83.0.478.45')]
 urllib.request.install_opener(opener)
 response = opener.open(url)
 
 #这里为什么不解码?
 html = response.read()
 return html
 
 def save_imgs(floder,img_addrs):
 print(img_addrs)
 for each in img_addrs:
 filename = each.split('/')[-1]
 with open(filename,'wb') as f:
 img = url_open(each)
 f.write(img)
 
 def find_imgs(url):
 html = url_open(url).decode('utf-8')
 #print(html)
 img_addrs = []
 a = html.find('blur" src="')
 while a != -1:
 b = html.find('.jpg',a,a+255)
 if b != -1:
 img_addrs.append(html[a+11:b+4])
 break
 else:
 b = a + 11
 
 a = html.find('blur" src="',b)
 return img_addrs
 
 def download_mm(floder = 'E:\\桌面\XXOO',pages = 30):
 os.mkdir(floder)
 os.chdir(floder)
 
 url = 'https://www.mzitu.com/214947'
 
 for i in range(1,30):
 print(i)
 page_url = url + '/' + str(i)
 img_addrs = find_imgs(page_url)
 save_imgs(floder,img_addrs)
 
 if __name__ == '__main__':
 download_mm()
 
 请问一下大佬们,为什么我这里爬取可以成功,但是爬到的图片全是这样的:
 
 
 
 那为什么我加上可以?
 
 
 复制代码import urllib.request
import os
import random
import time
def url_open(url):
    ip_list = ['101.254.136.130:443', '114.234.80.188:9000', '103.99.10.49:83', '190.144.127.234:3128',
               '125.26.99.186:34577']
    '''req = urllib.request.Request(url)
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36 Edg/83.0.478.45')'''
    proxy_support = urllib.request.ProxyHandler({'http': random.choice(ip_list)})
    opener = urllib.request.build_opener(proxy_support)
    opener.addheaders = [('User-Agent',
                          'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36 Edg/83.0.478.45'),
                         ('Referer','https://www.mzitu.com/')
                         ]
    urllib.request.install_opener(opener)
    response = opener.open(url)
    # 这里为什么不解码?
    html = response.read()
    return html
def save_imgs(floder, img_addrs):
    print(img_addrs)
    for each in img_addrs:
        filename = each.split('/')[-1]
        with open(filename, 'wb') as f:
            img = url_open(each)
            f.write(img)
def find_imgs(url):
    html = url_open(url).decode('utf-8')
    # print(html)
    img_addrs = []
    a = html.find('blur" src="')
    while a != -1:
        b = html.find('.jpg', a, a + 255)
        if b != -1:
            img_addrs.append(html[a + 11:b + 4])
            break
        else:
            b = a + 11
        a = html.find('blur" src="', b)
    return img_addrs
def download_mm(floder='./XXOO', pages=30):
    os.mkdir(floder)
    os.chdir(floder)
    url = 'https://www.mzitu.com/214947'
    for i in range(1, 30):
        print(i)
        page_url = url + '/' + str(i)
        img_addrs = find_imgs(page_url)
        save_imgs(floder, img_addrs)
if __name__ == '__main__':
    download_mm()
 | 
 
![5@H7[K5WQHF_%Q][M_CII18.png 5@H7[K5WQHF_%Q][M_CII18.png](https://xxx.ilovefishc.com/forum/202006/14/132725wjataq3nst7q2qkt.png)  |