|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import re
- import urllib.request
- import os
- def open_url(url): #设置访问对象
- req = urllib.request.Request(url)
- req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0')
- response = urllib.request.urlopen(req)
- html = response.read().decode('utf-8')
- return html
- def get_img(html): #获取图片
- p = r'<img src="([^"]+\.jpg")'
- imglist = re.findall(p,html)
- '''
- for each in imglist:
- print(each)
- '''
- for each in imglist:
- filename =each.split('/')[-1]
- print(filename)
- #报错应该在这里
- pic = 'https:'+ each
- with open(filename, 'wb') as f:
- img = open_url(pic)
- f.write(img)
-
- if __name__ == '__main__':
- url ='http://jandan.net/ooxx/MjAyMDAxMDMtNjk=#comments'
- get_img(open_url(url))
复制代码
报错如下:
- Traceback (most recent call last):
- File "C:\Users\Administrator\Desktop\hello.py", line 32, in <module>
- get_img(open_url(url))
- File "C:\Users\Administrator\Desktop\hello.py", line 25, in get_img
- with open(filename, 'wb') as f:
- OSError: [Errno 22] Invalid argument: '6e2a5d4dly1g10d7j3l6oj20j30rvq5q.jpg"'
复制代码
如果不用图片的原名称,怎么定义图片的名称,比如从1开始取值
嗯,编码问题 在重新定义一个不用 decode()即可
- import re
- import urllib.request
- import os
- def open_url(url): # 设置访问对象
- req = urllib.request.Request(url)
- req.add_header('User-Agent',
- 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0')
- response = urllib.request.urlopen(req)
- html = response.read().decode('utf-8', "replace")
- return html
- def open_url2(url):
- req = urllib.request.Request(url)
- req.add_header('User-Agent',
- 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0')
- response = urllib.request.urlopen(req)
- html = response.read()
- return html
- def get_img(html): # 获取图片
- p = r'<img src="([^"]+\.jpg)'
- imglist = re.findall(p, html)
- '''
- for each in imglist:
- print(each)
- '''
- count = 0
- for each in imglist:
- count += 1
- filename = '%d.jpg'
- pic = 'http:' + each
- with open(filename % count, 'wb') as f:
- img = open_url2(pic)
- f.write(img)
- if __name__ == '__main__':
- url = 'http://jandan.net/ooxx/MjAyMDAxMDMtNjk=#comments'
- get_img(open_url(url))
复制代码
|
|