你先import requests
import re
from time import sleep
from lxml import html
etree = html.etree
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"
}
#解析标题和详情页的url
url_01 = 'https://www.ting456.com/book/d45049.html'
response = requests.get(url=url_01,headers=header).text
tree = etree.HTML(response)
li_list = tree.xpath('//*[@id="xima"]/div/li')
for li in li_list:
#sleep(1)
title = li.xpath('./a/text()')[0]
href = 'https://www.ting456.com' + li.xpath('./a/@href')[0]
#print(href)
#解析动态参数
hre = li.xpath('./a/@href')[0]
pattern = re.compile('(/play/\d+?-\d-)(\d)(\.html)')
for i in pattern.findall(hre):
don_02 = f"{i[0]}{int(i[1]) + 1}{i[2]}"
# print(title,href)
#解析动态参数
url_02 = href
response = requests.get(url=url_02, headers=header).text
tree = etree.HTML(response)
page = tree.xpath('//*[@id="player"]/script[1]/text()')[0]
ex = 'now="(.+?)"'
don_01 = re.findall(ex, page, re.S)[0]
#解析音乐的url
url_03 = 'https://www.ting456.com/js/player/play.php'
parms = {
"url": don_01,
"from": "xima",
"s": "undefined",
"x": don_02
}
# s = "/ play / 45049 - 0 - {}.html".format(i)
# print(s)
#sleep(1)
result = requests.get(url=url_03, headers=header, params=parms).text
ex = 'mp3:"(.*?)"'
url_04 = re.findall(ex, result, re.S)[0]
#print(url_04)
#
# #请求音乐数据并保存
con = requests.get(url=url_04, headers=header).content
pattern = re.compile(r'[\\/:*?"<>|]')
title = pattern.sub("", title)
name = title + '.mp3'
f = open(name, 'wb')
f.write(con)
print(title,"保存完毕")
爬一爬,我这边没出错,你试试要是出错了我给你爬 |