#coding=utf-8
import requests,re,os,time,pymysql,time
from lxml import etree
down_path = '新余快速人才网'
if not os.path.exists(down_path):
os.makedirs(down_path)
url3 = []
def nextpage(lastpage):
for i in range (1,2):
print('页码:',i)
nexturl = f'https://www.xinyurc.com/index.php?m=&c=jobs&a=jobs_list&page={i}'
r = s.get(url=nexturl).text
html = etree.HTML(r)
href = html.xpath('/html/body/div[5]/div[1]/div[2]/div/div[3]/a/@href') # 公司主页
global url3
url3.extend(href)
url3 = set(url3)
print(url3)
for h in url3:
print('hurl:',h)
time.sleep(1)
r = s.get(url=h).text
jobs = re.findall('<a href="(.*?)">全部职位',r)#全部职位
jobs = 'https://www.xinyurc.com' + jobs[0]#全部职位链接
r2 = s.get(url=jobs).text
print('jobs是:',jobs)
html2 = etree.HTML(r2)
#alljobs = html2.xpath('/html/body//div[@class="jname"]/a/text()')#全部职位
try:
allurl = html2.xpath('/html/body//div[@class="jname"]/a/@href')#全部职位URl
except:
pass
for url in allurl: # 遍历全部职位url
r = s.get(url).text
print('Url:',url)
html = etree.HTML(r)
job = html.xpath('/html/body/div[3]/div/div/div[2]/div[1]/text()')[0] # 岗位名称
cname = html.xpath('/html/body//div[@class="comname"]/a/text()')[0] # 公司名称
jobin = html.xpath('/html/body/div[4]/div[1]/div[1]/div[1]/div[2]/text()')[0] # 全职/兼职
joba = html.xpath('/html/body//div[@class="itemli"][2]/text()') # 职位类别
if len(joba) == 0:
joba = ['无']
jobnum = html.xpath('/html/body/div[4]/div[1]/div[1]/div[1]/div[4]/text()')[0] # 招聘人数
#jobsc = html.xpath('/html/body/div[4]/div[1]/div[1]/div[1]/div[6]/text()')[0] # 学历要求
#jobyear = html.xpath('/html/body/div[4]/div[1]/div[1]/div[1]/div[7]/text()')[0] # 工作经验
sex = html.xpath('/html/body/div[4]/div[1]/div[1]/div[1]/div[8]/text()')[0] # 性别要求
if sex == '不限':
sex = 0
elif sex == '女':
sex = 2
elif sex == '男':
sex = 1
#old = html.xpath('/html/body/div[4]/div[1]/div[1]/div[1]/div[10]/text()')[0] # 年龄要求
#intype = html.xpath('/html/body/div[4]/div[1]/div[1]/div[1]/div[11]/text()')[0] # 招聘部门
jobaddress = html.xpath('/html/body/div[4]/div[1]/div[1]/div[1]/div[13]/text()')[0] # 工作地点
#whos = html.xpath('/html/body/div[4]/div[1]/div[1]/div[2]/div[2]/text()[1]')[0] # 联系人
andtel = html.xpath('/html/body/div[4]/div[1]/div[1]/div[2]/div[4]/span/text()') # 联系电话
classjob = html.xpath('/html/body//div[@class="describe"]/div[@class="txt"]/text()')#职位描述
money = html.xpath('/html/body//div[@class="jobstit"]/div[@class="wage"]/text()')[0]#薪资
if '1K' in money:
money = 2
elif '2K' in money:
money = 3
elif '3K' in money:
money = 4
elif '4K' in money:
moeny = 5
elif '5K' in money:
money = 6
elif '6K' in money:
money = 6
elif '7K' in money:
money = 6
elif '8K' in money or '9K' in money or '10K' in money or '11K' in money or '12K' in money:
money = 7
elif '13K' in money or '14K' in money or '15K' in money or '16K' in money or '17K' in money or '18K' in money or '19K' in money or '20K' in money :
money = 8
else:
money = 0
if len(andtel) != 0:
db = pymysql.connect(host = 'localhost' ,user='root', password='6330055', port=3306, db='spiders')
cursor = db.cursor()
data = {
'uniacid' : 1,
'uid' : 24,
'isc' : 2,
'cid' : 0,
'title' : job,
'jobcatindex' : 0,
'salaryindex' : money,
'flow' : 0,
'recruitnum' : jobnum,
'sexindex' : sex,
'degreesindex' : 0,
'experiencesindex' : 0,
'des' : classjob,
'chosewelfare' : ' a:1:{i:0;s:12:"其他补助";}',
'imgs' : 'a:1:{i:0;s:0:"";}',
'time':int(time.time()),
'status' : 1,
'istop' : 0,
'toptime' : 'NULL',
'flow' : 0,
'istou' : 0,
'isting' : 0,
'citycode' : '360500',
'telnum' : andtel[0],
'address' : jobaddress,
'state' : '1'
}
table = 'ims_lshd_zhaopinhign_zpxx'
keys = ','.join(data.keys())
values = ','.join(['%s'] * len(data))
sql = 'INSERT INTO {table}({keys}) VALUES ({values})'.format(table=table,keys=keys,values=values)
try:
if cursor.execute(sql,tuple(data.values())):
print('Successful')
db.commit()
except:
print('Failed')
db.rollback()
db.close()
# with open(f'{down_path}/新余快速人才网数据222222.txt', 'a') as f:
# f.write(cname)
# f.write(' ')
# f.write('\n\n')
# f.write('岗位名称:' + job)
# f.write(' ')
# f.write('职位类别:' + joba[0])
# f.write(' ')
# f.write('招聘人数:' + jobnum)
# f.write(' ')
# f.write('性别要求' + sex)
# f.write(' ')
# f.write(jobaddress)
# f.write(' ')
# f.write('联系电话' + andtel[0])
# f.write('\n\n')
# f.write('岗位职责:' + classjob)
# f.write('\r\n\n')
#
#
# else:
# pass
if __name__ == '__main__':
url = 'https://www.xinyurc.com/index.php?m=Home&c=Members&a=login'
url2 = 'https://www.xinyurc.com/index.php?m=Home&c=jobs&a=jobs_list'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
'Sec-Fetch-Site': 'same-origin',
'Origin': 'https: // www.xinyurc.com',
'Referer': 'https://www.xinyurc.com/members/login',
'Accept-Encoding': 'gzip, deflate, br',
'sec-ch-ua-mobile': '?0',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'sec-ch-ua': '" Not;A Brand";v = "99", "Google Chrome";v = "91", "Chromium";v = "91"',
'Host': 'www.xinyurc.com',
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
'Content-Length': '46',
'Connection': 'keep-alive',
'Accept-Language': 'zh-CN, zh;q=0.9, en;q=0.8, zh-TW;q=0.7',
'Accept': 'application/json,text/javascript, */*; q=0.01',
'Date': 'Sat, 12 Jun 2021 02:34: 55 GMT',
'Expires': 'Thu, 19 Nov 1981 08: 52:00 GMT',
'Transfer-Encoding': 'chunked',
'Vary': 'Accept-Encoding',
}
data = {
'username': '15579001118',
'password':'6330055',
'expire': '1'
}
s = requests.session()
s.headers.update()
r = s.post(url=url,data=data,headers=headers,allow_redirects = True);
r = s.get(url=url2).text
print(r,'*****' * 10)
html = etree.HTML(r)
lastpage = html.xpath('/html/body/div[5]/div[1]/div[2]/div[17]/a[7]/@href')[0]
lastpage = re.findall('page=(\d{3})', lastpage)[0]
nextpage(lastpage)