|  | 
 
| 
import urllib.request
x
马上注册,结交更多好友,享用更多功能^_^您需要 登录 才可以下载或查看,没有账号?立即注册  import os
 import re
 
 #打开一个URL
 def url_open(url):
 headers = {
 'Referer':url,
 'User-Agent':'Mozilla /5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
 }
 req = urllib.request.Request(url,headers=headers)
 response = urllib.request.urlopen(req)
 html = response.read()
 return html
 
 #获取页面中的所有图集地址
 def get_album_addrs(url):
 html = url_open(url).decode("utf-8")
 p = re.compile("https://www.mzitu.com/\d{6}")
 album_addrs = p.findall(html)
 return album_addrs
 
 #获取某一图集中所包含图片地址
 def get_img_addrs(url):
 img_addrs=[]
 for n in range(1, 11):
 page_url = url +"/" + str(n)
 html = url_open(page_url).decode("utf-8")
 p = re.compile("https://i5.meizitu.net/\d{4}/\d{2}/\d{2}.{3}.jpg")
 img_addr = p.findall(html)
 img_addrs += img_addr
 return img_addrs
 
 #下载保存图片
 def save_img(url):
 img = url_open(url)
 filename = url.split('/')[-1]
 with open(filename, "wb") as f:
 f.write(img)
 
 #主函数
 def main():
 '''
 url_list = ['https://www.mzitu.com/xinggan/',
 'https://www.mzitu.com/japan/',
 'https://www.mzitu.com/taiwan/',
 'https://www.mzitu.com/mm/']
 '''
 url='https://www.mzitu.com/xinggan/'
 os.chdir("D:\Python素材\爬虫案例\mm")
 folder=url.split("/")[-2]
 os.mkdir(folder)
 os.chdir(folder)
 album_addrs = get_album_addrs(url)
 for album_addr in album_addrs:
 img_addrs = get_img_addrs(album_addr)
 for img_addr in img_addrs:
 save_img(img_addr)
 
 #执行主函数
 if __name__ == '__main__':
 main()
 
 
 | 
 
  |