|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import urllib.request
import os
import re
#打开一个URL
def url_open(url):
headers = {
'Referer':url,
'User-Agent':'Mozilla /5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
}
req = urllib.request.Request(url,headers=headers)
response = urllib.request.urlopen(req)
html = response.read()
return html
#获取页面中的所有图集地址
def get_album_addrs(url):
html = url_open(url).decode("utf-8")
p = re.compile("https://www.mzitu.com/\d{6}")
album_addrs = p.findall(html)
return album_addrs
#获取某一图集中所包含图片地址
def get_img_addrs(url):
img_addrs=[]
for n in range(1, 11):
page_url = url +"/" + str(n)
html = url_open(page_url).decode("utf-8")
p = re.compile("https://i5.meizitu.net/\d{4}/\d{2}/\d{2}.{3}.jpg")
img_addr = p.findall(html)
img_addrs += img_addr
return img_addrs
#下载保存图片
def save_img(url):
img = url_open(url)
filename = url.split('/')[-1]
with open(filename, "wb") as f:
f.write(img)
#主函数
def main():
'''
url_list = ['https://www.mzitu.com/xinggan/',
'https://www.mzitu.com/japan/',
'https://www.mzitu.com/taiwan/',
'https://www.mzitu.com/mm/']
'''
url='https://www.mzitu.com/xinggan/'
os.chdir("D:\Python素材\爬虫案例\mm")
folder=url.split("/")[-2]
os.mkdir(folder)
os.chdir(folder)
album_addrs = get_album_addrs(url)
for album_addr in album_addrs:
img_addrs = get_img_addrs(album_addr)
for img_addr in img_addrs:
save_img(img_addr)
#执行主函数
if __name__ == '__main__':
main()
|
-
|