|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 我龙哟 于 2020-3-21 12:56 编辑
- import requests
- import os
- from bs4 import BeautifulSoup
- def openurl(url):
- try:
- res = requests.get(url,timeout=30)
- res.raise_for_status()
- return res
- except:
- return"访问网页产生异常"
- def findimgurl(furl):
- res = openurl(furl)
- #print(res.text[:100])
- soup = BeautifulSoup(res.text,"html.parser")
- data = soup.find(name='div',attrs={'class':"right-side"})
- imgurldata = data.find_all("img")
- #print(imgurldata)
- imgurl = []
- for each in imgurldata:
- if each.get("src")[-4:] != '.png':
- imgname = each.get("alt")
- imgurl.append({'imgname':imgname,'imgurl':each.get("src")[:-5]})
- #print(imgurl)
- return imgurl#链接列表
-
- def save(imgurl):
- try:
- for each in imgurl:
- #print(each['imgname'],each['imgurl'])
- path = each['imgname'] + each['imgurl'][-4:]
- #print(path)
- if not os.path.exists(path):
- print(each['imgname'] + each['imgurl'][-4:])
- img = openurl(each['imgurl'])
- with open(path,"wb") as f:
- f.write(img.content)
- print("保存成功\n")
- else:
- print(each['imgname'] + each['imgurl'][-4:])
- print("文件已存在\n")
- except:
- return"异常"
- def main():
- for i in range(10):
- print(i+1)
- furl = "http://www.dili360.com/gallery/cate/" + str(i+1) + ".htm"
- imgurl = findimgurl(furl)
- save(imgurl)
- if __name__ == "__main__":
- main()
复制代码
|
|