|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
大家好,我按照小甲鱼零基础入门学习python的那本书上的方法试图爬取网易云音乐的热评,但是按照小甲鱼的做法,加入User-agent和referer以及请求参数params和enSeckey之后,爬取出来的文档依然是和之前一样,我也试图将url里的weapi改成api了,出来的文档依然不能爬取到评论,这是为什么?
以下是代码:
import requests
def get_url(url):
headers = {}
headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36 Edg/83.0.478.45'
headers['referer'] = 'https://music.163.com/song?id=1451218149'
headers['accept'] = '* / *'
headers['accept - encoding'] = 'gzip, deflate, br'
headers['accept - language'] = 'zh - CN, zh;q = 0.9, en;q = 0.8, en - GB;q = 0.7, en - US;q = 0.6'
headers['cache - control'] = 'no - cache'
headers['content - length'] = '528'
headers['content - type'] = 'application / x - www - form - urlencoded'
headers['cookie'] = 'WM_TID = v4hnbfHbm7VAFBAEVANt % 2F % 2FCEAtBw0QIv;__gads = ID = 9df887e268c87b5d: T = 1552790821:S = ALNI_MZPvwNXrqaXSZWwnvbfVcjGdHQ_cg;_ngd_tid = LIDhA5UA % 2Bo405ordyNVBAJK4CyVxgzeG;_ntes_nnid = b5dbf1f150da07d75530adc76299beba, 1588903655730;_ntes_nuid = 352446f43074e5123625a4bd227787b4;vinfo_n_f_l_n3 = b93e53fbdd61674d.1.3.1552790823174.1568028277918.1575093384414;MUSICIAN_COMPANY_LAST_ENTRY = 132217143_musician;MUSICIAN_COMPANY_LAST_ENTRY.sig = PTJcCbBEclLCUPV3qRXELXV8OUiN64VtZOaAOZhSTCc;ntes_kaola_ad = 1;MUSIC_U = abdca9af4324f26e5084e54a5498df708672ffefed2613721443ea9104bca3ef33a649814e309366;__remember_me = true;__csrf = 05567fdd039dbec08fbbe6e34ea0bbc6;_iuqxldmzr_ = 32;JSESSIONID - WYYY = T % 2FSzCGrFp8 % 2FNjrVGzhWG % 2BtnFYa % 2BiHe3evBHYtNwSiZCH0h % 5CfMnghUpDNSQXXvqBml % 2FU1Xc1FEj6dqAp9qiQY56BrOy985zADetwBovRkEPsOjtzR4veslisXgnKN4gKt5GoKGGYItM5ZvB1pACtKNcNaweEnKlKkgiAx % 2BPdiG % 2FwgKyVB % 3A1591532258872;WM_NI = IS0i9e8YQAUU7IXzO % 2FfmYc1VXADdVTMvxK % 2FktguK7mgSyUlwSmuLwckItCwgyY7279HxmnoSU0Yf4iZ6I % 2B % 2BcP3 % 2FlxUqPqlaw3WIfeqfhh6Qb4HA1 % 2B8jtoslLnjkpxoLaRWc % 3D;WM_NIKE = 9ca17ae2e6ffcda170e2e6eeb4d574a6b99b87dc3af6a88ba7d14b929e8aaff5509a99fb88f37a8cb1abdacf2af0fea7c3b92a8b8d008abc6ffc879fa2d85ff597989bce25a6b39784b45e9c9f8c85d5608faab8acae61b8b9bbacb667a58db7b6ef7d91b3bbadf669a997bca4f66dbb87f9d3b6338ca8a38fee5997f09787b7639aea87ccaa6994ede18dfb5fb390bf8de47df1898787e87ba6b8a8d7d46af8988fd2aa34aaaca194ef48aeb6a591fb6590bf9eb8e237e2a3'
headers['origin'] = 'https: // music.163.com'
headers['pragma'] = 'no - cache'
headers['sec - fetch - dest'] = 'empty'
headers['sec - fetch - mode'] = 'cors'
headers['sec - fetch - site'] = 'same - origin'
data = {}
data['params'] = 'tD6sLYIqR89JQWb03QO/Yoiga0O80c3cum20/UVmaTqIfjNoJDWSGo/ew5iCuonKF5FJ1L46OG+y0D1xePGFhthV2au7MtpyD7u8os8F/bYRINPBlvKGfnmUPuGyHr1Ai56RpRmefdJcd3cuPvWcZNQmv/y2fRGym2XqSordWmny+LmMQwUxgt6rpQ8isxNhtfx+fbUMABRgTrzaAUJoCiceLwzcP9xL2d7T3MZbzBA='
data['encSecKey'] = 'bfacf0eb9433fd15f03f9537860559e513e0a88e869cb7227c76b70f265373b8b4ca1d64f35ff88c8f1cbcca2a3f2fabbbb94fd7505d15c7688f19ba0c9d279810e35ae08e508c809d3d8761804b3d239364b360d4b5da6865b1b0058df65ba23cd3fae2a4746cfeb246690fd90233069de29d69cf9de95ae001f86596b9e9b5'
name_id = url.split('=')[1]
target_url = 'https://music.163.com/api/v1/resource/comments/R_SO_4_{0}?csrf_token=05567fdd039dbec08fbbe6e34ea0bbc6'.format(name_id)
print(target_url)
res = requests.post(url, headers=headers, data=data)
return res
def main():
url = input('请输入网址:')
res = get_url(url)
with open('q.txt','w',encoding='utf-8') as f :
f.write(res.text)
if __name__ == "__main__":
main() |
|