药监局的数据爬取问题
本帖最后由 wzdyjn 于 2021-12-14 21:15 编辑import requests
#url 是http://scxk.nmpa.gov.cn:81/xk/
url='http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsList'
headers={
'Cookie':'JSESSIONID=3D601AF0C505380F376862A7E8D301DA; acw_tc=276aede916382887245685343e30e944678bb2fad6d67fafbc15f99b8ef02f; JSESSIONID=7FBB3D50981FE6D05AD83E1049402680',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36 Edg/96.0.1054.34'
}
data={
'on': 'true',
'page': '2',
'pageSize': '15',
'productName':'',
'conditionType': '1',
'applyname':'',
'applysn':''
}
r=requests.post(url,headers,data).json()
print(r)
只能获取第一页的数据,,我想获取后面几页的数据,,改page没有 始终就是第一页的数据,,郁闷中,,,哪位大神帮帮我 浏览器打不开 用你的代码运行有返回内容,有什么问题吗? 第二页,,第三页就不行了
wzdyjn 发表于 2021-12-10 15:41
第二页,,第三页就不行了
你获取url的方式发出来 瑞数解决再说。 http://scxk.nmpa.gov.cn:81/xk 本帖最后由 wzdyjn 于 2021-12-14 21:15 编辑
import requests
url='http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsList'
headers={
'Cookie':'JSESSIONID=3D601AF0C505380F376862A7E8D301DA; acw_tc=276aede916382887245685343e30e944678bb2fad6d67fafbc15f99b8ef02f; JSESSIONID=7FBB3D50981FE6D05AD83E1049402680',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36 Edg/96.0.1054.34'
}
data={
'on': 'true',
'page': '2',
'pageSize': '15',
'productName':'',
'conditionType': '1',
'applyname':'',
'applysn':''
}
r=requests.post(url,headers,data).json()
print(r)
只能获取第一页的数据,,我想获取后面几页的数据,,改page没有 始终就是第一页的数据,,郁闷中,,,哪位大神帮帮我 #前段时间刚学的,你试试
import requests as ch
import json
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:94.0) Gecko/20100101 Firefox/94.0Accept: */*'}
url = 'http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsList'
for num in range(1,3):
data={
'on':'true',
'page':str(num),
'pageSize':'15',
'productName':'',
'conditionType':'1',
'applyname':'',
'applysn':''
}
a = ch.post(url=url,headers=headers,data=data)
a = a.json() 本帖最后由 johnnyb 于 2021-12-19 00:29 编辑
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
# Author : RomanData : 2021-12-19 00:20
import requests
def test(page=1):
cookies = {
'JSESSIONID': '1E3604C0D0EA1FEE2F9E1BE3D4D37922',
'acw_tc': '276aedf516398441177493889e52d8385c3e2a0ca6105fdc508fcbd3cc792b',
'__tins__21053225': '%7B%22sid%22%3A%201639844117108%2C%20%22vd%22%3A%201%2C%20%22expires%22%3A%201639845917108%7D',
'__51cke__': '',
'__51laig__': '1',
'SL_G_WPT_TO': 'zh-CN',
'SL_GWPT_Show_Hide_tmp': '1',
'SL_wptGlobTipTmp': '1',
}
headers = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'Accept': '*/*',
'DNT': '1',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded;utf-8',
'Origin': 'http://scxk.nmpa.gov.cn:81',
'Referer': 'http://scxk.nmpa.gov.cn:81/xk/',
'Accept-Language': 'zh-CN,zh;q=0.9,zh-TW;q=0.8,en;q=0.7',
}
params = (
('method', 'getXkzsList'),
)
data = {
'on': 'true',
'page': page,
'pageSize': '15',
'productName': '',
'conditionType': '1',
'applyname': '',
'applysn': ''
}
response = requests.post('http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do', headers=headers, params=params, cookies=cookies, data=data, verify=False)
data_list = response.json().get('list')
for i in data_list:
#打印公司名字
print(i.get('EPS_NAME'))
if __name__ == '__main__':
for i in range(1, 10):
print(f"第{i}页数据.>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
test(i)
页:
[1]