|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
求助!我在第一次运行时运行正常,并且图片可以下载,但是却说不支持此格式。第二次我再运行时,下载下来的图片都是0kb了,搞不清楚为什么
- import requests
- import urllib.request
- import re
- import os
- def open_url(url):
- headers = {'User-Agent':
- 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
- html = requests.get(url, headers = headers)
- print(html.status_code)
- html = html.text
- return html
- def get_img(html):
- p = re.compile(r"<img src='(.*?.jpg)'", re.S)
- img_list = re.findall(p, html)
- print(img_list)
- # print(type(html))
- save(img_list)
- def save(img_list):
- for each in img_list:
- filename = each.split("/")[-1]
- with open(filename,'wb')as fp:
- img = open_url(each)
- print(type(img))
- img = img.encode(encoding = 'utf-8')
- #fp.write(img)
- #print(img)
- def mk_file():
- os.chdir('C:/Users/巴鲁斯/Desktop/小美')
- if __name__ == '__main__':
- url = 'https://www.xcar.com.cn/bbs/viewthread.php?tid=90672604&zoneclick=104603'
- mk_file()
- get_img(open_url(url))
复制代码
改成这样,你 爬图片要用 html.content 二进制读取,而不是 .text 读取文本
- import requests
- import urllib.request
- import re
- import os
- def open_url(url):
- headers = {'User-Agent':
- 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
- html = requests.get(url, headers = headers)
- print(html.status_code)
- return html
- def get_img(html):
- p = re.compile(r"<img src='(.*?.jpg)'", re.S)
- img_list = re.findall(p, html.text)
- print(img_list)
- # print(type(html))
- save(img_list)
- def save(img_list):
- for each in img_list:
- filename = each.split("/")[-1]
- with open(filename,'wb')as fp:
- img = open_url(each)
- print(type(img))
- img = img.content
- fp.write(img)
- #print(img)
- def mk_file():
- os.chdir('.')
- if __name__ == '__main__':
- url = 'https://www.xcar.com.cn/bbs/viewthread.php?tid=90672604&zoneclick=104603'
- mk_file()
- get_img(open_url(url))
复制代码
|
|