|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
源代码为:
import requests
from bs4 import BeautifulSoup
from lxml import etree
import csv
def download_one_page(url):
#拿到页面源代码
resp = requests.get(url)
resp.encoding = 'utf-8'#处理乱码
#爬取图片,把源代码交给beautifulsoup
pictures = BeautifulSoup(resp.text,"html.parser")
alist = pictures.find_all("img",width="205")#把范围第一次缩小
print(len(alist))
for a in alist:
#拿到图片地址
img_href = a.get('href')
#print(img_href)
#下载图片
img_resp = requests.get(img_src)
path = "D://Picture//"
img_name = img_src.split("/")[-1]
with open(path+img_name,mode='wb') as P:
P.write(img_resp.content)
print("over")
if __name__ == '__main__':
download_one_page('http://www.c-denkei.cn/index.php?d=home&c=goods&m=search&s=%E7%94%B5%E6%BA%90&c1=0&c2=0&c3=0&page=')
用find的话,只能提取到第一张图片,直接用find_all的话又会报错,好难受啊
对你的代码修改如下:
- import requests
- from bs4 import BeautifulSoup
- from lxml import etree
- import csv
- import os
- def download_one_page(url):
- #拿到页面源代码
- resp = requests.get(url)
- resp.encoding = 'utf-8'#处理乱码
- #爬取图片,把源代码交给beautifulsoup
- pictures = BeautifulSoup(resp.text,"html.parser")
- alist = pictures.find_all("img",width="205")#把范围第一次缩小
- print(len(alist))
- for a in alist:
- #拿到图片地址
- img_src = a.get('src') # 改了这一行,img 标签的地址放在 src 属性中
- #print(img_href)
- #下载图片
- img_resp = requests.get(img_src)
- path = "D:/Picture/" # 斜杠不需要两条也可以
- if not os.path.exists(path): # 判断文件夹是否存在,不存在则新建
- os.mkdir(path)
- img_name = img_src.split("/")[-1]
- with open(path+img_name,mode='wb') as P:
- P.write(img_resp.content)
- print("over")
- if __name__ == '__main__':
- download_one_page('http://www.c-denkei.cn/index.php?d=home&c=goods&m=search&s=%E7%94%B5%E6%BA%90&c1=0&c2=0&c3=0&page=')
复制代码
|
|