|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 Stubborn 于 2019-4-14 04:09 编辑
这里话不多说,接上一个帖子,用了一些jsonxpath的方法,抓取评论
- # -*- coding: utf-8 -*-
- # @Time : 2019-04-08 00:08
- # @Author : Ein
- # @File : Jsonpath-淘宝评论.py
- # @Software: PyCharm
- import requests
- import jsonpath
- COOKIE = 'thw=cn; cna=MsgjFQjusQICATsqbz+EqQnp; t=f7dd5ba1920f8f8d1073a0578ddd191a; cookie2=11db44742a4b63d88b21b2d5865812d7; _tb_token_=e7f5495003e07; _cc_=V32FPkk%2Fhw%3D%3D; tg=0; enc=GeRVzabt5E1wETGR23fpBaRcJHDXCSf2TfqKH%2FSMRW9qqLMo7NAnamB0ogz%2B5dVVecYMMX%2BOyb67mCh%2Fn5OMiA%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; mt=ci=0_0; v=0; uc1=cookie14=UoTZ4Mn%2BO0mr1A%3D%3D; _m_h5_tk=26f291651bdf878bbacdd979958f0c28_1554672819450; _m_h5_tk_enc=e238ec088024e28c6f090baaa0e6865d; x5sec=7b22726174656d616e616765723b32223a223539363864363064373935353564626463383335393430366466373739616531434f796871655546454c72373776652f6a7375427851453d227d; isg=BIWF8Aqnda6nDlHELahBZUx2lMd_6jm89Rqi1IfqQbzLHqWQT5JJpBN8LAJNRVGM; l=bBEoNHAVv4TUzZ89BOCanurza77OSIRYYuPzaNbMi_5ZV6T1-Z_OlG8EVF96Vj5R_O8B4fxkHvv9-etbq'
- '''
- 接口:currentPageNum 翻页,pageSize 每页数量
- https://rate.taobao.com/feedRateList.htm?auctionNumId=521603418228&userNumId=651615231¤tPageNum=2&pageSize=20
- '''
- def main():
- url = 'https://rate.taobao.com/feedRateList.htm?auctionNumId=521603418228&userNumId=651615231¤tPageNum=1&pageSize=20'
- headers = {
- 'User - Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
- 'Cookie':COOKIE
- }
- response = requests.get(url=url,headers=headers).text.strip('() \n\t\r')
- #取出json格式字符串两边非法字符
- response = json.loads(response)
- comments_list = response['comments']
- #抓取评论内容,头像,用户名,评论时间,套餐类型
- #取出comments列表
- for comments in comments_list:
- avatar = 'https:'+jsonpath.jsonpath(comments,'$..avatar')[0] #头像
- name = jsonpath.jsonpath(comments,'$..nick')#名字
- comment = jsonpath.jsonpath(comments,'$..content') #评论内容
- date = jsonpath.jsonpath(comments,'$..date') #评论时间
- info =jsonpath.jsonpath(comments,'$..sku') # 套餐类型
- item = {
- '头像':avatar,
- '名字':name,
- '评论内容':comment,
- '评论时间':date,
- '套餐类型':info
- }
- print(item)
- if __name__ == '__main__':
- main()
复制代码 |
|