很抱歉,我给出的代码存在一些错误。下面是修复过的版本:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import time
def get_data_of_music_list_detail_page():
df = pd.read_csv('music_data/new/music_list.csv', header=None, on_bad_lines=None)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (HTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
print("正在获取歌单详情页的信息...")
# 输出进度条
t = 60
start = time.perf_counter()
for i in range(t + 1):
finsh = "=" * i
need_do = "-" * (t - i)
progress = (i / t) * 100
dur = time.perf_counter() - start
print("\r{:^3.0f}%[{}->{}]{:.2f}s".format(progress, finsh, need_do, dur), end="")
time.sleep(0.02)
for index, row in df.iterrows():
time.sleep(2)
url = 'https://music.163.com' + row[0]
response = requests.get(url=url, headers=headers)
html = response.text
soup = BeautifulSoup(html, 'html.parser')
# 获取歌单标题
title = soup.select('.tit h2')[0].get_text().replace(',', ',')
# 获取标签
tags = [tag.get_text() for tag in soup.select('.tags .u-tag')]
# 对标签进行格式化
tag = '-'.join(tags) if len(tags) > 1 else tags[0]
# 获取歌单介绍
text = soup.select('#album-desc-more')[0].get_text().replace('\n', '').replace(',', ',') if soup.select(
'#album-desc-more') else '无'
# 获取歌单收藏量
collection = soup.select('#content-operation span')[1].get_text().replace('(', '').replace(')', '')
# 歌单播放量
play = soup.select('.play span')[0].get_text()
# 歌单内歌曲数
songs = soup.select('.sub.s-fc3')[0].get_text()
# 歌单评论数
comments = soup.select('#cnt_comment_count')[0].get_text()
# 输出歌单详情页信息
print('\r', title, tag, text, collection, play, songs, comments, end='', flush=True)
# 将详情页信息写入CSV文件中
with open('music_data/new/music_detail.csv', 'a+', encoding='utf-8-sig') as f:
f.write(','.join([title, tag, text, collection, play, songs, comments]) + '\n')
# 获取歌单内歌曲名称
music_names = [li.get_text() for li in soup.select('.f-hide li a')]
# 将歌曲名称写入CSV文件中
with open('music_data/new/music_name.csv', 'a+', encoding='utf-8-sig') as f:
f.writelines('\n'.join(music_names))
print("\n已获取歌单详情页的信息,保存至 music_data/music_name.csv")
此优化版本对于循环中的进度条输出进行了改进,同时修正了一些数据提取和写入CSV文件的问题。注意,在运行之前确保已安装所需的库。
以上回复来自 FishC AI 小助理,如未能正确解答您的问题,请继续追问。 |