|
发表于 2023-3-4 12:32:34
|
显示全部楼层
你先- import requests
- import re
- from time import sleep
- from lxml import html
- etree = html.etree
- header = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"
- }
- #解析标题和详情页的url
- url_01 = 'https://www.ting456.com/book/d45049.html'
- response = requests.get(url=url_01,headers=header).text
- tree = etree.HTML(response)
- li_list = tree.xpath('//*[@id="xima"]/div/li')
- for li in li_list:
- #sleep(1)
- title = li.xpath('./a/text()')[0]
- href = 'https://www.ting456.com' + li.xpath('./a/@href')[0]
- #print(href)
- #解析动态参数
- hre = li.xpath('./a/@href')[0]
- pattern = re.compile('(/play/\d+?-\d-)(\d)(\.html)')
- for i in pattern.findall(hre):
- don_02 = f"{i[0]}{int(i[1]) + 1}{i[2]}"
- # print(title,href)
- #解析动态参数
- url_02 = href
- response = requests.get(url=url_02, headers=header).text
- tree = etree.HTML(response)
- page = tree.xpath('//*[@id="player"]/script[1]/text()')[0]
- ex = 'now="(.+?)"'
- don_01 = re.findall(ex, page, re.S)[0]
- #解析音乐的url
- url_03 = 'https://www.ting456.com/js/player/play.php'
- parms = {
- "url": don_01,
- "from": "xima",
- "s": "undefined",
- "x": don_02
- }
- # s = "/ play / 45049 - 0 - {}.html".format(i)
- # print(s)
- #sleep(1)
- result = requests.get(url=url_03, headers=header, params=parms).text
- ex = 'mp3:"(.*?)"'
- url_04 = re.findall(ex, result, re.S)[0]
- #print(url_04)
- #
- # #请求音乐数据并保存
- con = requests.get(url=url_04, headers=header).content
- pattern = re.compile(r'[\\/:*?"<>|]')
- title = pattern.sub("", title)
- name = title + '.mp3'
- f = open(name, 'wb')
- f.write(con)
- print(title,"保存完毕")
复制代码 爬一爬,我这边没出错,你试试要是出错了我给你爬 |
|