|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import urllib.request
- import os
- def get_url(url):
- headers = {
- 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
- }
- req = urllib.request.Request(url,headers=headers)
- response = urllib.request.urlopen(req)
- html = response.read()
- return html
- def url_img(url):
- img_addrs = []
- html = get_url(url).decode('utf-8')
- a = html.find('img src2=')
- while a != -1:
- b = html.find('.jpg', a, a + 255)
- if b != -1:
- img_addrs.append(html[a:b] + '.jpg')
- else:
- print('找不到图片地址')
- a = html.find('img src2=',b)
- return img_addrs
- #print(url_img(url)) #获取列表内页面第一页所有图片地址
- #print(len(url_img(url))) #列表内有多少图片
- def save_imgs(folder,img_addrs):
- for each in img_addrs:
- img_url = each.split('"')[1]
- with open(img_url,'wb') as f:
- img = get_url(each)
- f.write(img)
- def download_mm(folder='katong',pages=10):
- os.mkdir(folder)
- os.chdir(folder)
- url = 'http://sc.chinaz.com/tupian/katongtupian_2.html'
- for i in range(1,pages):
- i += 1
- get_img = 'http://sc.chinaz.com/tupian/katongtupian' + '_' + str(i) + '.html'
- img_addrs = url_img(get_img)
- save_imgs(folder,img_addrs)
- if __name__ == '__main__':
- download_mm()
- #错误
- Traceback (most recent call last):
- File "C:/Users/Administrator/Desktop/测试.py", line 52, in <module>
- download_mm()
- File "C:/Users/Administrator/Desktop/测试.py", line 48, in download_mm
- save_imgs(folder,img_addrs)
- File "C:/Users/Administrator/Desktop/测试.py", line 34, in save_imgs
- with open(img_url,'wb') as f:
- OSError: [Errno 22] Invalid argument: 'http://pic.sc.chinaz.com/Files/pic/pic9/201806/zzpic12352_s.jpg'
复制代码 |
|