|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
想实现的功能是查询所有日期和考试科目,看有没有考试信息,因为很多年以前考的,日期和科目都不记得了。所以把所有考试日期和对应科目全部爬一遍,找出有成绩的结果。
http://search.neea.edu.cn/QueryM ... ty=Home&sid=300 查询考试成绩,这是主链接
方法是先从 "http://search.neea.edu.cn/Imgs.do?act=verify&t=0.18753489364159748" 里取验证码图片下载到本地 识别 ,然后再从 request_url = 'http://search.neea.edu.cn/QueryMarkUpAction.do?act=doQueryResults' 发起请求 查询 ,提示验证码错误
错误信息:
verify: nx2w
36dR7httt1zr5VYDaxYdQ1 14 身份证 姓名
<script type='text/javascript' src='/tea/neea/chaxun/js/neea.js' ></script>
<script>parent.neea.show('抱歉,验证码错误!');location.href='/QueryMarkUpAction.do?act=doQueryCond&sid=300&pram=certi&ksnf=36
代码如下:
请求模块:
import requests
import json
import yanzhengma
headers = {
"Accept": "application/json, text/javascript, */*",
#"Referer": "http://search.neea.edu.cn/QueryMarkUpAction.do?act=doQueryCond&pram=certi&community=Home&sid=300",
"DNT": "1",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
#"Cookie": 'language=1; Hm_lvt_41266a92b6e4571ae69bf4bc141f0933=1623419352; esessionid=E378E35DCA8A0978C8BF6F4DFE7C51E0; BIGipServersearch.neea.edu.cn=2963326986.37407.0000; Hm_lvt_dc1d69ab90346d48ee02f18510292577=1623418549,1623449031,1623478890,1623537821; Hm_lpvt_dc1d69ab90346d48ee02f18510292577=1623538316; verify=8fb3dda21dff6b5acbb3ab11d89b3883',
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
}
valuesfile = open('find_dict','r')
find_dict = eval(valuesfile.read())
valuesfile.close()
zhengjian = 'xxxxxx'
myname = 'xxxx'
pram = 'certi'
ksxm = '300'
for data in find_dict:
kemulist = find_dict[data]
ksnf = data
for kemu in kemulist:
bkjb = kemu
request_url = 'http://search.neea.edu.cn/QueryMarkUpAction.do?act=doQueryResults'
#下载图片
yanzhengma.goon()
#识别验证码
verify = yanzhengma.getVerify()['pic_str']
print('verify:', verify)
#verify = 'apbd'
print(ksnf,bkjb,zhengjian,myname)
data = {
"pram": "certi",
"ksxm": "300",
"sf":'',
"zkzh":'',
"nexturl":'/QueryMarkUpAction.do?act=doQueryCond&sid=%s&pram=%s&ksnf=%s&sf=&bkjb=%s&sfzh=%s&name=%s'%(ksxm,pram,ksnf,bkjb,zhengjian,myname),
"ksnf": ksnf,
"bkjb": bkjb,
"sfzh": zhengjian,
'name': myname,
'verify': verify,
}
page = requests.post(request_url,headers = headers,data=data)
print(page.text)
break
break
图片请求验证:
from chaojiying import Chaojiying_Client
import requests
headers = {
"Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
"Referer": "http://search.neea.edu.cn/QueryMarkUpAction.do?act=doQueryCond&pram=certi&community=Home&sid=300",
"DNT": "1",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
}
def goon():
image_pic = "http://search.neea.edu.cn/Imgs.do?act=verify&t=0.18753489364159748"
image_data = requests.get(url=image_pic,headers=headers).content
#image_data = urllib.request.urlopen(urllib.request.Request(url=image_pic, headers=headers))
print('two-image_data:',image_data)
with open("E:/爬虫练习/verify.png", "wb") as f:
f.write(image_data)
def getVerify():
superying = Chaojiying_Client('xxxx', 'xxxx', 'xxxxx')
with open("E:/爬虫练习/verify.png", "rb") as fp:
im = fp.read()
verify = superying.PostPic(im,1902)
print('结果验证码:',verify)
return verify
考试日期和对应科目(字典,find_dict)
|
|