|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
遇到没有尾页的网站,除了for循环有没有其它方法?有尾页时我可以定位总页数,但是没有尾页时,我只能肉眼观察总页数,请问有没有更好的方法?
- import requests
- from lxml import etree
- import os
- if not os.path.exists('c:/视频'):
- os.mkdir('c:/视频')
- url = 'https://www.qiushibaike.com/video/'
- UA = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36'}
- r1 = requests.get(url=url,headers=UA).text
- e1 = etree.HTML(r1)
- j = 0
- for i in range(1,14):
- if i == 1:
- url = 'https://www.qiushibaike.com/video/'
- else:
- url = 'https://www.qiushibaike.com/video/'+ 'page/'+ str(i) +'/'
- r1 = requests.get(url=url, headers=UA).text
- e1 = etree.HTML(r1)
- mp4 = e1.xpath('//div[@class = "col1 old-style-col1"]//source/@src')
- for mp4 in mp4:
- mp4 = 'https:'+mp4
- j += 1
- 后缀 = mp4.split('.')[-1]
- 文件路径 = 'c:/视频/'+ str(j) +'.'+后缀
- r1 = requests.get(url=mp4, headers=UA).content
- with open(文件路径,'wb') as fp:
- fp.write(r1)
- print(f'第{j}个视频','下载成功!!!')
复制代码
爬出页码呗:
- import requests
- from lxml import etree
- import os
- if not os.path.exists('c:/视频'):
- os.mkdir('c:/视频')
- url = 'https://www.qiushibaike.com/video/'
- UA = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36'}
- r1 = requests.get(url=url,headers=UA).text
- e1 = etree.HTML(r1)
- j = 0
- all_age = int(e1.xpath(r'//a/span/text()')[-2][1:-1])
- for i in range(1,all_age+1):
- if i == 1:
- url = 'https://www.qiushibaike.com/video/'
- else:
- url = 'https://www.qiushibaike.com/video/'+ 'page/'+ str(i) +'/'
- r1 = requests.get(url=url, headers=UA).text
- e1 = etree.HTML(r1)
- mp4 = e1.xpath('//div[@class = "col1 old-style-col1"]//source/@src')
- for mp4 in mp4:
- mp4 = 'https:'+mp4
- j += 1
- 后缀 = mp4.split('.')[-1]
- 文件路径 = 'c:/视频/'+ str(j) +'.'+后缀
- r1 = requests.get(url=mp4, headers=UA).content
- with open(文件路径,'wb') as fp:
- fp.write(r1)
- print(f'第{j}个视频','下载成功!!!')
复制代码
|
|