|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 aa1448176630 于 2020-6-8 22:54 编辑
- import re
- import requests as r
- import time
- import os
- def open_url(url):
- head={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36 Edg/83.0.478.37'}
- p=r.get(url,headers=head)
- #print(p.status_code)
- return p
- def find(p):
- html=p.text
- r_link='<a href="//(img2.[^"]+)'
- link=re.findall(r_link,html)
- if not link:
- r_link=r'<img src="https://([^"]+)'
- link=re.findall(r_link,html)
- #print(link)
- return link
- def save(link):
- #print("ceshi")
- if not os.path.exists(r'C:\Users\Administrator\Desktop\image'):
- #print("ceshi")
- os.mkdir(r'C:\Users\Administrator\Desktop\image')
- os.chdir(r'C:\Users\Administrator\Desktop\image')
- i=0
- for each in link:
- #print(each)
- each='https://'+each
- print(each)
- p=open_url(each)
-
- html=p.content
-
- path=r'C:\Users\Administrator\Desktop\image'+'\\'+each.split('/')[-1]
- with open(path,'wb+') as f:
- f.write(html)
- i=i+1
- print("写入第"+ str(i) + "张图片成功")
-
-
- #print(html)
- if __name__=='__main__':
- url='https://www.woyaogexing.com/touxiang/nv/2020/1005194.html'
- #url='https://www.qqtn.com/article/article_296117_1.html'#链接自行更换 这两个链接仅用于测试
- p=open_url(url)
- link=find(p)
- save(link)
-
复制代码
爬虫新手,仅为交作业大佬绕道 |
|