|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
这是一个爬取煎蛋网的代码,返回的图片地址都是没问题的,可是在下载的时候出了问题,求助!!!
- import urllib.request
- import re
- import os
- def open_url(url):
- req = urllib.request.Request(url)
- req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36')
- page = urllib.request.urlopen(req)
- html = page.read().decode('utf-8')
- return html
- def get_img(html):
- p = re.compile('<br /><img\ssrc=(.*?)\sreferrerPolicy')
- #p = re.compile('<img\ssrc="(.*?\.jpg)"')
- img_list = re.findall(p,html)
- save_img(img_list)
- def save_img(img_list):
- for each in img_list:
- #print(each)
- filename = each.split("/")[-1]
- with open(filename, 'wb') as f:
- img = open_url(each)
- f.write(img)
- if __name__ == '__main__':
- url = 'http://jandan.net/ooxx'
- os.mkdir('ooxx')
- os.chdir('ooxx')
- get_img(open_url(url))
复制代码
这个是报错
Traceback (most recent call last):
File "C:\Users\巴鲁斯\Desktop\新建文件夹\mmtu.py", line 34, in <module>
get_img(open_url(url))
File "C:\Users\巴鲁斯\Desktop\新建文件夹\mmtu.py", line 19, in get_img
save_img(img_list)
File "C:\Users\巴鲁斯\Desktop\新建文件夹\mmtu.py", line 25, in save_img
with open(filename, 'wb') as f:
OSError: [Errno 22] Invalid argument: '0076BSS5ly1gh25iz10zcj30u0190gra.jpg"'
>>>
帮你改完了:
- import urllib.request
- import re
- import os
- def open_url(url):
- headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
- req = urllib.request.Request(url,headers=headers)
- try:
- page = urllib.request.urlopen(req)
- html = page.read()
- except:
- html = ''
- return html
- def get_img(html):
- p = re.compile("<br /><img\ssrc=(.*?)\sreferrerPolicy")
- # p = re.compile('<img\ssrc="(.*?\.jpg)"')
- img_list = re.findall(p, html.decode('utf-8'))
- save_img(img_list)
- def save_img(img_list):
- count = 0
- for each in img_list:
- count += 1
- filename = str(count) + ".jpg"
- with open(filename, 'wb') as f:
- img = open_url("http:" +each[1:-1])
- if img == '':
- continue
- f.write(img)
- if __name__ == '__main__':
- url = r"http://jandan.net/ooxx"
- os.mkdir('ooxx')
- os.chdir('ooxx')
- get_img(open_url(url))
复制代码
|
|