python 创建文件保存问题
import requestsimport re
import os
url = "https://www.qiushibaike.com/imgrank/"
if not os.path.exists("./糗图"):
os.mkdir("./糗图")
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
}
response = requests.get(url, headers=headers).text
img_src = '<div class="thumb">.*?<img src="(.*?) alt='
img_src_list = re.findall(img_src,response,re.S)
for src in img_src_list:
src = "http:" + src
img_data = requests.get(url=src,headers=headers).content
img_name = src.split("/")[-1]
img_path = "./糗图/" + img_name
with open(r"img_path", "wb") as f:
f.write(img_data)
print(img_name, "爬取结束!!")
此段代码可以运行成功,但是文件夹未保存到数据,是为什么那?
open 中你填文件路径填写成了字符串 img_path 了,建议 src 的 http 改为 https 爬取速度可能会更快
而且你正则表达式中 "(.*?) 少了右引号,会影响写入文件时的文件名,参考代码:
import requests
import re
import os
url = "https://www.qiushibaike.com/imgrank/"
if not os.path.exists("./糗图"):
os.mkdir("./糗图")
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
}
response = requests.get(url, headers=headers).text
img_src = '<div class="thumb">.*?<img src="(.*?)" alt='
img_src_list = re.findall(img_src,response,re.S)
for src in img_src_list:
src = "https:" + src
img_data = requests.get(url=src,headers=headers).content
img_name = src.split("/")[-1]
img_path = "./糗图/" + img_name
with open(img_path, "wb") as f:
f.write(img_data)
print(img_name, "爬取结束!!")
页:
[1]