ÂíÉÏ×¢²á£¬½á½»¸ü¶àºÃÓÑ£¬ÏíÓøü¶à¹¦ÄÜ^_^
ÄúÐèÒª µÇ¼ ²Å¿ÉÒÔÏÂÔØ»ò²é¿´£¬Ã»ÓÐÕ˺ţ¿Á¢¼´×¢²á
x
import urllib.request
import os
def get_url(url):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
}
req = urllib.request.Request(url,headers=headers)
response = urllib.request.urlopen(req)
html = response.read()
return html
def url_img(url):
img_addrs = []
html = get_url(url).decode('utf-8')
a = html.find('img src=')
while a != -1:
b = html.find('.jpg', a, a + 255)
if b != -1:
img_addrs.append(html[a:b] + '.jpg')
else:
print('ÕÒ²»µ½Í¼Æ¬µØÖ·')
a = html.find('img src',b)
return img_addrs
#print(url_img(url)) #»ñÈ¡ÁбíÄÚÒ³ÃæµÚÒ»Ò³ËùÓÐͼƬµØÖ·
#print(len(url_img(url))) #ÁбíÄÚÓжàÉÙͼƬ
def save_imgs(folder,url_img):
for each in url_img:
img_url = each.split('"')[1]
with open(img_url,'wb') as f:
img = get_url(each)
f.write(img)
def download_mm(folder='katong',pages=10):
os.mkdir(folder)
os.chdir(folder)
url = 'http://sc.chinaz.com/tupian/katongtupian_2.html'
for i in range(1,pages):
i += 1
get_img = 'http://sc.chinaz.com/tupian/katongtupian' + '_' + str(i) + '.html'
img_addrs = url_img(get_img)
save_imgs(folder,img_addrs)
if __name__ == '__main__':
download_mm()
|