|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import requests
- import parsel
- import re
- headers = {
- "User-Agent": "Mozilla / 5.0(Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 80.0.3987.122 Safari / 537.40"
- }#UA伪装
- url = 'https://www.leshe.us/?ref=214'#乐摄网
- response = requests.get(url=url,headers=headers)
- selector = parsel.Selector(response.text)
- lis = list(selector.css('.row.posts-wrapper div article'))
- for li in lis:
- urls2 = li.css('a::attr(href)').get()
- response2 = requests.get(url=urls2,headers=headers)
- text = response2.text
- print(text)
- print('--------------------------')
- photo = re.findall('data-srcset="(.*?)" title',text)#这里有问题,photo获取不到想要的网址
- print(photo)
复制代码
获取不到网址,photo返回的是空列表
本帖最后由 suchocolate 于 2021-8-21 15:20 编辑
- # _*_ coding: utf-8 _*_
- # Developer: suchocolate
- # Date: 2021-08-21 14:26
- # File name: test.py
- # Development tool: PyCharm
- import requests
- import re
- import os
- import time
- def main():
- """
- 1)先从总页面获取各相册入口。
- 2)遍历各个相册获取图片链接。
- 3)遍历图片链接下载图片。
- :return:
- """
- pd = 'pics' # 图片存储路径
- if not os.path.exists(pd):
- os.mkdir(pd)
- os.chdir(pd)
- headers = {'user-agent': 'firefox'}
- pl = [] # 存储相册url
- print('获取相册url...')
- for pn in range(5): # 想爬多少页的相册自己定,这里爬5页
- try:
- url = f'https://www.leshe.us/page/{pn}'
- r = requests.get(url, headers=headers)
- result = re.findall(r'<a target="_blank" href="(.*?\.html)"', r.text) # 获取当前页相册url
- pl.extend(result)
- time.sleep(0.3)
- except Exception as e:
- print(e)
- # print(pl)
- jl = [] # 存图片url
- print('获取图片url...')
- for pu in pl:
- try:
- r = requests.get(pu, headers=headers, timeout=5)
- result = re.findall(r'data-srcset="(.*?)"', r.text) # 获取当前页面图片的url
- jl.extend(result)
- time.sleep(0.3)
- except Exception as e:
- print(e)
- # print(jl)
- n = 1 # 图片计数
- print(f'共{len(jl)}张图片,开始下载!')
- for j in jl:
- r = requests.get(j, headers=headers)
- jn = j.split('/')[-1] # 图片的名字由url最后的字段决定
- with open(jn, 'wb') as f:
- f.write(r.content)
- print(f'已下载{jn}, 共下载{n}张图片。')
- n += 1
- if __name__ == '__main__':
- main()
复制代码
|
|