太久没玩了,请教一个登陆问题
#coding=utf-8import requests,re,os,time,pymysql,time
from lxml import etree
down_path = '新余快速人才网'
if not os.path.exists(down_path):
os.makedirs(down_path)
url3 = []
def nextpage(lastpage):
for i in range (1,2):
print('页码:',i)
nexturl = f'https://www.xinyurc.com/index.php?m=&c=jobs&a=jobs_list&page={i}'
r = s.get(url=nexturl).text
html = etree.HTML(r)
href = html.xpath('/html/body/div/div/div/div/div/a/@href')# 公司主页
global url3
url3.extend(href)
url3 = set(url3)
print(url3)
for h in url3:
print('hurl:',h)
time.sleep(1)
r = s.get(url=h).text
jobs = re.findall('<a href="(.*?)">全部职位',r)#全部职位
jobs = 'https://www.xinyurc.com' + jobs#全部职位链接
r2 = s.get(url=jobs).text
print('jobs是:',jobs)
html2 = etree.HTML(r2)
#alljobs = html2.xpath('/html/body//div[@class="jname"]/a/text()')#全部职位
try:
allurl = html2.xpath('/html/body//div[@class="jname"]/a/@href')#全部职位URl
except:
pass
for url in allurl:# 遍历全部职位url
r = s.get(url).text
print('Url:',url)
html = etree.HTML(r)
job = html.xpath('/html/body/div/div/div/div/div/text()')# 岗位名称
cname = html.xpath('/html/body//div[@class="comname"]/a/text()')# 公司名称
jobin = html.xpath('/html/body/div/div/div/div/div/text()')# 全职/兼职
joba = html.xpath('/html/body//div[@class="itemli"]/text()')# 职位类别
if len(joba) == 0:
joba = ['无']
jobnum = html.xpath('/html/body/div/div/div/div/div/text()')# 招聘人数
#jobsc = html.xpath('/html/body/div/div/div/div/div/text()')# 学历要求
#jobyear = html.xpath('/html/body/div/div/div/div/div/text()')# 工作经验
sex = html.xpath('/html/body/div/div/div/div/div/text()')# 性别要求
if sex == '不限':
sex = 0
elif sex == '女':
sex = 2
elif sex == '男':
sex = 1
#old = html.xpath('/html/body/div/div/div/div/div/text()')# 年龄要求
#intype = html.xpath('/html/body/div/div/div/div/div/text()')# 招聘部门
jobaddress = html.xpath('/html/body/div/div/div/div/div/text()')# 工作地点
#whos = html.xpath('/html/body/div/div/div/div/div/text()')# 联系人
andtel = html.xpath('/html/body/div/div/div/div/div/span/text()')# 联系电话
classjob = html.xpath('/html/body//div[@class="describe"]/div[@class="txt"]/text()')#职位描述
money = html.xpath('/html/body//div[@class="jobstit"]/div[@class="wage"]/text()')#薪资
if '1K' in money:
money = 2
elif '2K' in money:
money = 3
elif '3K' in money:
money = 4
elif '4K' in money:
moeny = 5
elif '5K' in money:
money = 6
elif '6K' in money:
money = 6
elif '7K' in money:
money = 6
elif '8K' in money or '9K' in money or '10K' in money or '11K' in money or '12K' in money:
money = 7
elif '13K' in money or '14K' in money or '15K' in money or '16K' in money or '17K' in money or '18K' in money or '19K' in money or '20K' in money :
money = 8
else:
money = 0
if len(andtel) != 0:
db = pymysql.connect(host = 'localhost' ,user='root', password='6330055', port=3306, db='spiders')
cursor = db.cursor()
data = {
'uniacid' : 1,
'uid' : 24,
'isc' : 2,
'cid' : 0,
'title' : job,
'jobcatindex' : 0,
'salaryindex' : money,
'flow' : 0,
'recruitnum' : jobnum,
'sexindex' : sex,
'degreesindex' : 0,
'experiencesindex' : 0,
'des' : classjob,
'chosewelfare' : ' a:1:{i:0;s:12:"其他补助";}',
'imgs' : 'a:1:{i:0;s:0:"";}',
'time':int(time.time()),
'status' : 1,
'istop' : 0,
'toptime' : 'NULL',
'flow' : 0,
'istou' : 0,
'isting' : 0,
'citycode' : '360500',
'telnum' : andtel,
'address' : jobaddress,
'state' : '1'
}
table = 'ims_lshd_zhaopinhign_zpxx'
keys = ','.join(data.keys())
values = ','.join(['%s'] * len(data))
sql = 'INSERT INTO {table}({keys}) VALUES ({values})'.format(table=table,keys=keys,values=values)
try:
if cursor.execute(sql,tuple(data.values())):
print('Successful')
db.commit()
except:
print('Failed')
db.rollback()
db.close()
# with open(f'{down_path}/新余快速人才网数据222222.txt', 'a') as f:
# f.write(cname)
# f.write(' ')
# f.write('\n\n')
# f.write('岗位名称:' + job)
# f.write(' ')
# f.write('职位类别:' + joba)
# f.write(' ')
# f.write('招聘人数:' + jobnum)
# f.write(' ')
# f.write('性别要求' + sex)
# f.write(' ')
# f.write(jobaddress)
# f.write(' ')
# f.write('联系电话' + andtel)
# f.write('\n\n')
# f.write('岗位职责:' + classjob)
# f.write('\r\n\n')
#
#
# else:
# pass
if __name__ == '__main__':
url = 'https://www.xinyurc.com/index.php?m=Home&c=Members&a=login'
url2 = 'https://www.xinyurc.com/index.php?m=Home&c=jobs&a=jobs_list'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
'Sec-Fetch-Site': 'same-origin',
'Origin': 'https: // www.xinyurc.com',
'Referer': 'https://www.xinyurc.com/members/login',
'Accept-Encoding': 'gzip, deflate, br',
'sec-ch-ua-mobile': '?0',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'sec-ch-ua': '\" Not;A Brand\";v = \"99\", \"Google Chrome\";v = \"91", \"Chromium\";v = \"91\"',
'Host': 'www.xinyurc.com',
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
'Content-Length': '46',
'Connection': 'keep-alive',
'Accept-Language': 'zh-CN, zh;q=0.9, en;q=0.8, zh-TW;q=0.7',
'Accept': 'application/json,text/javascript, */*; q=0.01',
'Date': 'Sat, 12 Jun 2021 02:34: 55 GMT',
'Expires': 'Thu, 19 Nov 1981 08: 52:00 GMT',
'Transfer-Encoding': 'chunked',
'Vary': 'Accept-Encoding',
}
data = {
'username': '15579001118',
'password':'6330055',
'expire': '1'
}
s = requests.session()
s.headers.update()
r = s.post(url=url,data=data,headers=headers,allow_redirects = True);
r = s.get(url=url2).text
print(r,'*****' * 10)
html = etree.HTML(r)
lastpage = html.xpath('/html/body/div/div/div/div/a/@href')
lastpage = re.findall('page=(\d{3})', lastpage)
nextpage(lastpage)
这里研究了蛮久,不知道为什么现在好像登陆不成功了 盘子开的太大,应先研究能否成功登录 加个登陆后的cookie试试
页:
[1]