关于爬取网易云音乐的精彩评论
代码哪里错了呀?res.txt 里面写着 {"msg":"资源不存在","code":404}
import requests
import json
def get_comments(url):
name_id = url.split("=")
headers = {
"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36",
"referer" : f"https://music.163.com/song?id={name_id}&market=baiduqk"
}
params = "YgwTgn7hjNHHvsS+rauOW8BY9oAq7A3TKPW0f43JxhWM7Of3PfrHecnSmf6OM10XIAhQnXJSn9gLPJcY6CYn7K9VY/O5DBPH9jI5aN8jx0Ina07EFFIhpp9OGLhPcCkBsHJEoEUoJJs8lWDi0malGxIL0zm5FO13B3bQEZWAPAIoIJ+l6a1BBPU2Zg+RnZwO"
encSecKey = "2891949074c6f52a42c9498e29597d80d9dc1f267361dc64b7ba4fc5c300e546347fece88ac7b2eeba6713a0bc601a99d720cc96845d062d41970de17fcde4632ae590b5d1c9d8ac39a585f80507e78f344a023153f452ea100a9295a15ac6df08b6ff4dbdba41da7e656b8fd371333d529d6c21d1e7c8285e2e3439eb693639"
data = {
"params" : params,
"encSecKey" : encSecKey
}
target_url = f"https://music.163.com/weapi/v1/resource/comments/R_SO_4_{name_id}?csrf_token="
res = requests.post(target_url, headers=headers, data=data)
return res
def main():
url = input("请输入链接地址:")
res = get_comments(url)
with open("res.txt", "w", encoding="utf-8") as file:
file.write(res.text)
if __name__ == "__main__":
main() 关键你的输入链接是什么呢
e:\>python ex13.py
请输入链接地址:https://music.163.com/#/playlist?id=442920217
e:\>type res.txt
{"isMusician":false,"userId":-1,"topComments":[],"moreHot":false,"hotComments":[],"code":200,"comments":[],"total":0,"more":false}
e:\> wp231957 发表于 2020-2-10 18:28
关键你的输入链接是什么呢
e:\>python ex13.py
我输入的是:https://music.163.com/#/song?id=4466775&market=baiduqk wp231957 发表于 2020-2-10 18:28
关键你的输入链接是什么呢
e:\>python ex13.py
但是里面也没有评论啊 我这里输入 https://music.163.com/#/song?id=4466775 正常 zltzlt 发表于 2020-2-10 18:44
我这里输入 https://music.163.com/#/song?id=4466775 正常
把 res.txt 的内容贴上来 一个账号 发表于 2020-2-10 18:49
把 res.txt 的内容贴上来
{"isMusician":false,"userId":-1,"topComments":[],"moreHot":true,"hotComments":[{"user":{"locationInfo":null,"liveInfo":null,"experts":null,"authStatus":0,"vipRights":{"associator":{"vipCode":100,"rights":true},"musicPackage":null,"redVipAnnualCount":-1},"userId":2681655,"userType":0,"nickname":"生榨椰子汁-","vipType":11,"remarkName":null,"expertTags":null,"avatarUrl":"https://p1.music.126.net/m3fDd47thwDPeUzrozF-GQ==/109951164044193670.jpg"},"beReplied":[],"pendantData":null,"showFloorComment":null,"status":0,"commentId":5188239,"content":"一个便秘者的自我挣扎","time":1414331909833,"likedCount":559762,"expressionUrl":null,"commentLocationType":0,"parentCommentId":0,"decoration":null,"repliedMark":null,"liked":false},{"user":{"locationInfo":null,"liveInfo":null,"experts":null,"authStatus":0,"vipRights":{"associator":{"vipCode":100,"rights":true},"musicPackage":null,"redVipAnnualCount":1},"userId":32984063,"userType":0,"nickname":"凡城","vipType":11,"remarkName":null,"expertTags":null,"avatarUrl":"https://p2.music.126.net/vYJGqvVQ8hn8WjMP61_ykg==/8915939789756522.jpg"},"beReplied":[],"pendantData":{"id":6004,"imageUrl":"http://p1.music.126.net/yf9RxFQt9GEJYvEprUcjfw==/109951163313127249.jpg"},"showFloorComment":null,"status":0,"commentId":5669637,"content":"第一:不要自己一个人听;第二:不要在深夜听;第三:用音箱的不要吵到邻居;第四:有心脑血管疾病的人不要听;第五:怕鬼的不要听;第六:我相信这首纯音乐对于一部分人来说真的很好听,是绝对的天籁之音,但是请原谅我听歌少没文化,当她唱第一个音的时候我以为我音箱坏了,之后我意识到,此乃神人。","time":1415681299230,"likedCount":352343,"expressionUrl":null,"commentLocationType":0,"parentCommentId":0,"decoration":null,"repliedMark":null,"liked":false},{"user":{"locationInfo":null,"liveInfo":null,"experts":null,"authStatus":0,"vipRights":null,"userId":41849588,"userType":0,"nickname":"Sehunrise","vipType":0,"remarkName":null,"expertTags":null,"avatarUrl":"https://p2.music.126.net/GgU4rYOEEE-9GIQM9Prpqg==/10995...
后面的太长,省略了 zltzlt 发表于 2020-2-10 18:49
后面的太长,省略了
我之前输入 https://music.163.com/#/song?id=4466775&market=baiduqk 不行,现在输入 https://music.163.com/#/song?id=4466775 就正常了,这是为什么? 一个账号 发表于 2020-2-10 18:52
我之前输入 https://music.163.com/#/song?id=4466775&market=baiduqk 不行,现在输入 https://music.163 ...
可能是多了一个 market=baiduqk 参数吧。 zltzlt 发表于 2020-2-10 18:53
可能是多了一个 market=baiduqk 参数吧。
改成这样就不会了:
import requests
def get_comments(url):
name_id = url.split("=")
if not name_id.isdigit():
name_id = list(filter(str.isdigit, name_id))
name_id = int("".join(name_id))
headers = {
"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36",
"referer" : f"https://music.163.com/song?id={name_id}&market=baiduqk"
}
params = "YgwTgn7hjNHHvsS+rauOW8BY9oAq7A3TKPW0f43JxhWM7Of3PfrHecnSmf6OM10XIAhQnXJSn9gLPJcY6CYn7K9VY/O5DBPH9jI5aN8jx0Ina07EFFIhpp9OGLhPcCkBsHJEoEUoJJs8lWDi0malGxIL0zm5FO13B3bQEZWAPAIoIJ+l6a1BBPU2Zg+RnZwO"
encSecKey = "2891949074c6f52a42c9498e29597d80d9dc1f267361dc64b7ba4fc5c300e546347fece88ac7b2eeba6713a0bc601a99d720cc96845d062d41970de17fcde4632ae590b5d1c9d8ac39a585f80507e78f344a023153f452ea100a9295a15ac6df08b6ff4dbdba41da7e656b8fd371333d529d6c21d1e7c8285e2e3439eb693639"
data = {
"params" : params,
"encSecKey" : encSecKey
}
target_url = f"https://music.163.com/weapi/v1/resource/comments/R_SO_4_{name_id}?csrf_token="
res = requests.post(target_url, headers=headers, data=data)
return res
def main():
url = input("请输入链接地址:")
res = get_comments(url)
with open("res.txt", "w", encoding="utf-8") as file:
file.write(res.text)
if __name__ == "__main__":
main() target_url = f"https://music.163.com/weapi/v1/resource/comments/R_SO_4_{name_id}?csrf_token="
这个网址前边为啥+f呀?????
页:
[1]