关于爬取到的图片不能打开
求助!我在第一次运行时运行正常,并且图片可以下载,但是却说不支持此格式。第二次我再运行时,下载下来的图片都是0kb了,搞不清楚为什么import requests
import urllib.request
import re
import os
def open_url(url):
headers = {'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
html = requests.get(url, headers = headers)
print(html.status_code)
html = html.text
return html
def get_img(html):
p = re.compile(r"<img src='(.*?.jpg)'", re.S)
img_list = re.findall(p, html)
print(img_list)
# print(type(html))
save(img_list)
def save(img_list):
for each in img_list:
filename = each.split("/")[-1]
with open(filename,'wb')as fp:
img = open_url(each)
print(type(img))
img = img.encode(encoding = 'utf-8')
#fp.write(img)
#print(img)
def mk_file():
os.chdir('C:/Users/巴鲁斯/Desktop/小美')
if __name__ == '__main__':
url = 'https://www.xcar.com.cn/bbs/viewthread.php?tid=90672604&zoneclick=104603'
mk_file()
get_img(open_url(url))
改成这样,你 爬图片要用 html.content 二进制读取,而不是 .text 读取文本
import requests
import urllib.request
import re
import os
def open_url(url):
headers = {'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
html = requests.get(url, headers = headers)
print(html.status_code)
return html
def get_img(html):
p = re.compile(r"<img src='(.*?.jpg)'", re.S)
img_list = re.findall(p, html.text)
print(img_list)
# print(type(html))
save(img_list)
def save(img_list):
for each in img_list:
filename = each.split("/")[-1]
with open(filename,'wb')as fp:
img = open_url(each)
print(type(img))
img = img.content
fp.write(img)
#print(img)
def mk_file():
os.chdir('.')
if __name__ == '__main__':
url = 'https://www.xcar.com.cn/bbs/viewthread.php?tid=90672604&zoneclick=104603'
mk_file()
get_img(open_url(url))
Twilight6 发表于 2020-7-27 15:53
改成这样,你 爬图片要用 html.content 二进制读取,而不是 .text 读取文本
谢谢,明白了,以后会注意{:10_254:}
页:
[1]