马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 Stubborn 于 2019-4-14 04:09 编辑
这里话不多说,接上一个帖子,用了一些jsonxpath的方法,抓取评论# -*- coding: utf-8 -*-
# @Time : 2019-04-08 00:08
# @Author : Ein
# @File : Jsonpath-淘宝评论.py
# @Software: PyCharm
import requests
import jsonpath
COOKIE = 'thw=cn; cna=MsgjFQjusQICATsqbz+EqQnp; t=f7dd5ba1920f8f8d1073a0578ddd191a; cookie2=11db44742a4b63d88b21b2d5865812d7; _tb_token_=e7f5495003e07; _cc_=V32FPkk%2Fhw%3D%3D; tg=0; enc=GeRVzabt5E1wETGR23fpBaRcJHDXCSf2TfqKH%2FSMRW9qqLMo7NAnamB0ogz%2B5dVVecYMMX%2BOyb67mCh%2Fn5OMiA%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; mt=ci=0_0; v=0; uc1=cookie14=UoTZ4Mn%2BO0mr1A%3D%3D; _m_h5_tk=26f291651bdf878bbacdd979958f0c28_1554672819450; _m_h5_tk_enc=e238ec088024e28c6f090baaa0e6865d; x5sec=7b22726174656d616e616765723b32223a223539363864363064373935353564626463383335393430366466373739616531434f796871655546454c72373776652f6a7375427851453d227d; isg=BIWF8Aqnda6nDlHELahBZUx2lMd_6jm89Rqi1IfqQbzLHqWQT5JJpBN8LAJNRVGM; l=bBEoNHAVv4TUzZ89BOCanurza77OSIRYYuPzaNbMi_5ZV6T1-Z_OlG8EVF96Vj5R_O8B4fxkHvv9-etbq'
'''
接口:currentPageNum 翻页,pageSize 每页数量
https://rate.taobao.com/feedRateList.htm?auctionNumId=521603418228&userNumId=651615231¤tPageNum=2&pageSize=20
'''
def main():
url = 'https://rate.taobao.com/feedRateList.htm?auctionNumId=521603418228&userNumId=651615231¤tPageNum=1&pageSize=20'
headers = {
'User - Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'Cookie':COOKIE
}
response = requests.get(url=url,headers=headers).text.strip('() \n\t\r')
#取出json格式字符串两边非法字符
response = json.loads(response)
comments_list = response['comments']
#抓取评论内容,头像,用户名,评论时间,套餐类型
#取出comments列表
for comments in comments_list:
avatar = 'https:'+jsonpath.jsonpath(comments,'$..avatar')[0] #头像
name = jsonpath.jsonpath(comments,'$..nick')#名字
comment = jsonpath.jsonpath(comments,'$..content') #评论内容
date = jsonpath.jsonpath(comments,'$..date') #评论时间
info =jsonpath.jsonpath(comments,'$..sku') # 套餐类型
item = {
'头像':avatar,
'名字':name,
'评论内容':comment,
'评论时间':date,
'套餐类型':info
}
print(item)
if __name__ == '__main__':
main()
|