|
发表于 2020-3-11 16:20:59
|
显示全部楼层
- import requests
- import re
- import json
- headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
- url="http://www.12365auto.com/js/cTypeInfo.js?version=20200311 "#json页面,这个页面变动可能很少,做成文本本地读取较快。内容做成字典查找更快。
- response=requests.get(url,headers=headers)
- response_1=response.text[16:]#去掉前面无用字母
- json = json.loads(response_1)#加载
- def daima(dm):
- jieguo=""
- gz_lit = dm.split(",")#分割故障代码
- for i_gz in range(len(gz_lit) - 1):#-1最后一个为空
- daima = int(gz_lit[i_gz][1:])
- for i in range(len(json)):
- if (json[i]["value"] == gz_lit[i_gz][:1]):#判断大类
- for ii in range(len(json[i]["items"])):
- if (json[i]["items"][ii]["id"] == daima):#判断小类
- #print(json[i]["name"], "--", json[i]["items"][ii]["title"])
- jieguo=jieguo+json[i]["name"]+ "--"+ json[i]["items"][ii]["title"]+" "
- return (jieguo)
- url="http://www.12365auto.com/zlts/0-0-0-0-0-0_0-0-1.shtml"#主页面
- headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
- response=requests.get(url,headers=headers)
- regex = re.compile(r'<td>(.*?)</td><td bid=(.*?)>(.*?)</td><td>(.*?)</td><td class="tsjs"><a href="(.*?)" target="_blank">(.*?)</a></td><td class="tsgztj">(.*?)</td><td>(.*?)</td>',re.S)#正则
- r_1 = regex.findall(response.text)#正则
- for i in range(len(r_1)):
- print(r_1[i][2],"---",r_1[i][3],"---",r_1[i][5],"---",daima(r_1[i][6]),"---",r_1[i][7])
复制代码 |
|