小小鹏 发表于 2020-9-3 10:19:57

爬虫中request payload

import requests
import json

def get_comments(url):
   headers = {
    "accept": "*/*",
    "accept-encoding": "gzip, deflate, br",
    "accept-language": "zh-CN,zh;q=0.9",
    "content-length": "278",
    "content-type": "application/json",
    "cookie":"_abtest_userid=0f540837-6c5a-4629-aa88-93ba1bfec570; _RSG=pHqDcQKOqkD92.7L6HsBL8; _RDG=28b4f4d49352f22e56153739a68cf82eae; _RGUID=dbb46146-a637-4473-854b-d911951fe089; MKT_CKID=1591713956511.w9k7w.9opk; _ga=GA1.2.1020029856.1591713957; _RF1=119.39.127.107; Session=smartlinkcode=U130709&smartlinklanguage=zh&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=; Union=AllianceID=4901&SID=130709&OUID=&createtime=1599095310&Expires=1599700110468; MKT_CKID_LMT=1599095310492; MKT_Pagesource=PC; nfes_isSupportWebP=1; _jzqco=%7C%7C%7C%7C1599095310630%7C1.1301033936.1591713956505.1599095310501.1599095863824.1599095310501.1599095863824.0.0.0.3.3; __zpspc=9.2.1599095310.1599095863.2%233%7Cwww.sogou.com%7C%7C%7C%7C%23; _bfi=p1%3D290510%26p2%3D290510%26v1%3D4%26v2%3D3; _bfa=1.1591713953612.3xeul0.1.1591713953612.1599095307538.2.5; _bfs=1.4",
    "cookieorigin":"https://you.ctrip.com",
    "origin": "https://you.ctrip.com",
    "referer": "https://you.ctrip.com/sight/lijiang32/3049.html",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36 SE 2.X MetaSr 1.0"
    }
   data = {"_fxpcqlniredt": "09031090311113380393"}
   proxies = {"http":"http://103.146.184.77",
            "https:":"https://103.146.184.77"
         }
   payload = {
      "channelType": "2" ,                  
      "collapseType": "0",
      "commentTagId": "0",
      "pageIndex":"1",
      "pageSize": "10",
      "poiId": "75919",
      "sortType":" 3",
      "sourceType": "1",
      "starType": "0",
      "auth": '""',
      "ctok": '""',
      "cid": "09031090311113380393",
      "cver": "1.0",
      "extension": "[]",
      "lang": "01",
      "sid": "8888",
      "syscode": "09",
      "xsid": '""'
      }   
   res = requests.post(url,headers = headers,params = data,data = json.dumps(payload))
   return res
   
   
   
def main():
    url = "https://m.ctrip.com/restapi/soa2/13444/json/getCommentCollapseList?_fxpcqlniredt=09031090311113380393" #headers>general>request url
    res = get_comments(url)
    with open("data.txt","w",encoding="utf-8") as f:
      f.write(res.text)
   
if __name__ == "__main__":
    main()

打印出来的data.txt
https://pic.downk.cc/item/5f5052a4160a154a67197395.png

请问是request payload 解析错误了吗?

1q23w31 发表于 2020-9-3 10:33:40

浏览器直接访问,就是这个字符串啊

小小鹏 发表于 2020-9-3 10:36:21

1q23w31 发表于 2020-9-3 10:33
浏览器直接访问,就是这个字符串啊

我把payload里面的head去掉了。然后就可以得到data.txt数据了,虽然我也不知道为什么会这样

1q23w31 发表于 2020-9-3 11:43:20

小小鹏 发表于 2020-9-3 10:36
我把payload里面的head去掉了。然后就可以得到data.txt数据了,虽然我也不知道为什么会这样

什么意思,建议发个图说明

小小鹏 发表于 2020-9-3 14:35:13

1q23w31 发表于 2020-9-3 11:43
什么意思,建议发个图说明

https://pic.downk.cc/item/5f508e93160a154a67270837.png

YunGuo 发表于 2020-9-3 14:54:04

import requests
import json


url = 'https://m.ctrip.com/restapi/soa2/13444/json/getCommentCollapseList?_fxpcqlniredt=09031100412019912485'

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                  '(KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36'
}

data = {
    'arg': {'channelType': 2,
            'collapseType': 0,
            'commentTagId': 0,
            'pageIndex': 1,
            'pageSize': 10,
            'poiId': 75919,
            'sortType': 3,
            'sourceType': 1,
            'starType': 0
            },
    'head': {
      'auth': '',
      'cid': '09031100412019912485',
      'ctok': '',
      'cver': '1.0',
      'extension': [],
      'lang': '01',
      'sid': '8888',
      'syscode': '09',
      'xsid': ''
      }
    }

re = requests.post(url, headers=headers, data=json.dumps(data))


你那个payload写错了,里面应该写成两个字典。这样写就好了。
页: [1]
查看完整版本: 爬虫中request payload