太久没玩了，请教一个登陆问题,Python交流,编程语言专区,鱼C论坛

nanrenne 发表于 2021-6-12 11:06:04

太久没玩了，请教一个登陆问题

#coding=utf-8

import requests,re,os,time,pymysql,time
from lxml import etree
down_path = '新余快速人才网'
if not os.path.exists(down_path):
os.makedirs(down_path)
url3 = []
def nextpage(lastpage):
for i in range (1,2):
   print('页码:',i)
   nexturl = f'https://www.xinyurc.com/index.php?m=&c=jobs&a=jobs_list&page={i}'
   r = s.get(url=nexturl).text
   html = etree.HTML(r)
   href = html.xpath('/html/body/div/div/div/div/div/a/@href')# 公司主页
   global url3
   url3.extend(href)
url3 = set(url3)
print(url3)
for h in url3:
   print('hurl:',h)
   time.sleep(1)
   r = s.get(url=h).text

   jobs = re.findall('<a href="(.*?)">全部职位',r)#全部职位
   jobs = 'https://www.xinyurc.com' + jobs#全部职位链接
   r2 = s.get(url=jobs).text
   print('jobs是:',jobs)
   html2 = etree.HTML(r2)
   #alljobs = html2.xpath('/html/body//div[@class="jname"]/a/text()')#全部职位
   try:
         allurl = html2.xpath('/html/body//div[@class="jname"]/a/@href')#全部职位URl
   except:
         pass

   for url in allurl:# 遍历全部职位url
         r = s.get(url).text
         print('Url:',url)
         html = etree.HTML(r)
         job = html.xpath('/html/body/div/div/div/div/div/text()')# 岗位名称
         cname = html.xpath('/html/body//div[@class="comname"]/a/text()')# 公司名称
         jobin = html.xpath('/html/body/div/div/div/div/div/text()')# 全职/兼职
         joba = html.xpath('/html/body//div[@class="itemli"]/text()')# 职位类别
         if len(joba) == 0:
            joba = ['无']
         jobnum = html.xpath('/html/body/div/div/div/div/div/text()')# 招聘人数
         #jobsc = html.xpath('/html/body/div/div/div/div/div/text()')# 学历要求
         #jobyear = html.xpath('/html/body/div/div/div/div/div/text()')# 工作经验
         sex = html.xpath('/html/body/div/div/div/div/div/text()')# 性别要求
         if sex == '不限':
            sex = 0
         elif sex == '女':
            sex = 2
         elif sex == '男':
            sex = 1
         #old = html.xpath('/html/body/div/div/div/div/div/text()')# 年龄要求
         #intype = html.xpath('/html/body/div/div/div/div/div/text()')# 招聘部门
         jobaddress = html.xpath('/html/body/div/div/div/div/div/text()')# 工作地点
         #whos = html.xpath('/html/body/div/div/div/div/div/text()')# 联系人
         andtel = html.xpath('/html/body/div/div/div/div/div/span/text()')# 联系电话
         classjob = html.xpath('/html/body//div[@class="describe"]/div[@class="txt"]/text()')#职位描述
         money = html.xpath('/html/body//div[@class="jobstit"]/div[@class="wage"]/text()')#薪资

         if '1K' in money:
            money = 2
         elif '2K' in money:
            money = 3
         elif '3K' in money:
            money = 4
         elif '4K' in money:
            moeny = 5
         elif '5K' in money:
            money = 6
         elif '6K' in money:
            money = 6
         elif '7K' in money:
            money = 6
         elif '8K' in money or '9K' in money or '10K' in money or '11K' in money or '12K' in money:
            money = 7
         elif '13K' in money or '14K' in money or '15K' in money or '16K' in money or '17K' in money or '18K' in money or '19K' in money or '20K' in money :
            money = 8
         else:
            money = 0

         if len(andtel) != 0:
            db = pymysql.connect(host = 'localhost' ,user='root', password='6330055', port=3306, db='spiders')
            cursor = db.cursor()

            data = {
            'uniacid' : 1,
            'uid' : 24,
            'isc' : 2,
            'cid' : 0,
            'title' : job,
            'jobcatindex' : 0,
            'salaryindex' : money,
            'flow' : 0,
            'recruitnum' : jobnum,
            'sexindex' : sex,
            'degreesindex' : 0,
            'experiencesindex' : 0,
            'des' : classjob,
            'chosewelfare' : ' a:1:{i:0;s:12:"其他补助";}',
            'imgs' : 'a:1:{i:0;s:0:"";}',
            'time':int(time.time()),
            'status' : 1,
            'istop' : 0,
            'toptime' : 'NULL',
            'flow' : 0,
            'istou' : 0,
            'isting' : 0,
            'citycode' : '360500',
            'telnum' : andtel,
            'address' : jobaddress,
            'state' : '1'
            }

            table = 'ims_lshd_zhaopinhign_zpxx'
            keys = ','.join(data.keys())
            values = ','.join(['%s'] * len(data))
            sql = 'INSERT INTO {table}({keys}) VALUES ({values})'.format(table=table,keys=keys,values=values)
            try:
               if cursor.execute(sql,tuple(data.values())):
                     print('Successful')
                     db.commit()
            except:
               print('Failed')
               db.rollback()
            db.close()

         # with open(f'{down_path}/新余快速人才网数据222222.txt', 'a') as f:
         #       f.write(cname)
         #       f.write(' ')
         #       f.write('\n\n')
         #       f.write('岗位名称:' + job)
         #       f.write(' ')
         #       f.write('职位类别:' + joba)
         #       f.write(' ')
         #       f.write('招聘人数:' + jobnum)
         #       f.write(' ')
         #       f.write('性别要求' + sex)
         #       f.write(' ')
         #       f.write(jobaddress)
         #       f.write(' ')
         #       f.write('联系电话' + andtel)
         #       f.write('\n\n')
         #       f.write('岗位职责:' + classjob)
         #       f.write('\r\n\n')
         #
         #
         # else:
         # pass

if __name__ == '__main__':

url = 'https://www.xinyurc.com/index.php?m=Home&c=Members&a=login'
url2 = 'https://www.xinyurc.com/index.php?m=Home&c=jobs&a=jobs_list'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
'Sec-Fetch-Site': 'same-origin',
'Origin': 'https: // www.xinyurc.com',
'Referer': 'https://www.xinyurc.com/members/login',
'Accept-Encoding': 'gzip, deflate, br',
'sec-ch-ua-mobile': '?0',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'sec-ch-ua': '\" Not;A Brand\";v = \"99\", \"Google Chrome\";v = \"91", \"Chromium\";v = \"91\"',
'Host': 'www.xinyurc.com',
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
'Content-Length': '46',
'Connection': 'keep-alive',
'Accept-Language': 'zh-CN, zh;q=0.9, en;q=0.8, zh-TW;q=0.7',
'Accept': 'application/json,text/javascript, */*; q=0.01',
'Date': 'Sat, 12 Jun 2021 02:34: 55 GMT',
'Expires': 'Thu, 19 Nov 1981 08: 52:00 GMT',
'Transfer-Encoding': 'chunked',
'Vary': 'Accept-Encoding',
            }
data = {
'username': '15579001118',
'password':'6330055',
'expire': '1'
}
s = requests.session()
s.headers.update()
r = s.post(url=url,data=data,headers=headers,allow_redirects = True);

r = s.get(url=url2).text
print(r,'*****' * 10)
html = etree.HTML(r)

lastpage = html.xpath('/html/body/div/div/div/div/a/@href')
lastpage = re.findall('page=(\d{3})', lastpage)
nextpage(lastpage)

这里研究了蛮久，不知道为什么现在好像登陆不成功了

wp231957 发表于 2021-6-12 11:15:38

盘子开的太大，应先研究能否成功登录

xiaosi4081 发表于 2021-6-12 13:32:53

加个登陆后的cookie试试

页: [1]

鱼C论坛's Archiver

太久没玩了，请教一个登陆问题