|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- from requests import get
- from lxml import etree
- import os
- try:
- os.mkdir("Video")
- os.chdir("Video")
- except:
- os.chdir("Video")
- def open_url(url):
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36'}
- res = get(url, headers=headers)
- return res
- def get_xs(res):
- temp = 'https://www.ximalaya.com/xiangsheng'
- html = etree.HTML(res.text)
- name = html.xpath("//*[@class='general-album-list']/div[@class='content']/ul/li/div/a/span/text()")
- href = [temp + each for each in html.xpath("//*[@class='general-album-list']/div[@class='content']/ul/li/div/a[1]/@href")]
- i = 1
- result = {}
- for k in zip(name, href):
- result[i] = k
- i += 1
- return result
- def get_Videourl(nm, hf):
- res = open_url(hf)
- vdurl = []
- temp = 'https://www.ximalaya.com/xiangsheng'
- html = etree.HTML(res.text)
- href = html.xpath('//*[@class="sound-list _Qp"]/ul/li/div[2]/a/@href')
- name = html.xpath('//*[@class="sound-list _Qp"]/ul/li/div[2]/a/span/text()')
- ids = [each[-9:] for each in html.xpath('//*[@class="sound-list _Qp"]/ul/li/div[2]/a/@href')]
- # for nm, hf in zip(name, href):
- # print(nm, ':', temp + hf)
- for id in ids:
- vdurl.append('https://www.ximalaya.com/revision/play/v1/audio?id=%s&ptype=1' % id)
- return vdurl, name
- def get_Video(vdurl, nm):
- i = 0
- for url in vdurl:
- res = open_url(url).json()
- for each in res:
- if type(res[each]) != int:
- tempurl = res[each]['src']
- video = open_url(tempurl)
- filename = f"{nm[i]}.m4a"
- print("正在下载:",filename)
- print(i)
- with open(filename, 'wb') as f:
- f.write(video.content)
- i += 1
- def main():
- url = 'https://www.ximalaya.com/xiangsheng/xiangsheng/mr132t2722/'
- res = open_url(url)
- result = get_xs(res)
- for i in result:
- for nm in result[i]:
- print(i, end = ' ')
- print(nm)
- break
- choice = int(input("请选择您要听的专辑序号:"))
- nm, hf = result[choice]
- vdurl, nm = get_Videourl(nm, hf)
- get_Video(vdurl, nm)
- if __name__ == "__main__":
- main()
复制代码
我待会把思路发出来,顺便说几个缺陷:
1,翻页没有写
2,爬取相声数量太少
这些我都会一一改进,如果有好的建议,请回复我,谢谢 |
评分
-
查看全部评分
|