import requests
import re
import json
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
url="http://www.12365auto.com/js/cTypeInfo.js?version=20200311 "#json页面,这个页面变动可能很少,做成文本本地读取较快。内容做成字典查找更快。
response=requests.get(url,headers=headers)
response_1=response.text[16:]#去掉前面无用字母
json = json.loads(response_1)#加载
def daima(dm):
jieguo=""
gz_lit = dm.split(",")#分割故障代码
for i_gz in range(len(gz_lit) - 1):#-1最后一个为空
daima = int(gz_lit[i_gz][1:])
for i in range(len(json)):
if (json[i]["value"] == gz_lit[i_gz][:1]):#判断大类
for ii in range(len(json[i]["items"])):
if (json[i]["items"][ii]["id"] == daima):#判断小类
#print(json[i]["name"], "--", json[i]["items"][ii]["title"])
jieguo=jieguo+json[i]["name"]+ "--"+ json[i]["items"][ii]["title"]+" "
return (jieguo)
url="http://www.12365auto.com/zlts/0-0-0-0-0-0_0-0-1.shtml"#主页面
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
response=requests.get(url,headers=headers)
regex = re.compile(r'<td>(.*?)</td><td bid=(.*?)>(.*?)</td><td>(.*?)</td><td class="tsjs"><a href="(.*?)" target="_blank">(.*?)</a></td><td class="tsgztj">(.*?)</td><td>(.*?)</td>',re.S)#正则
r_1 = regex.findall(response.text)#正则
for i in range(len(r_1)):
print(r_1[i][2],"---",r_1[i][3],"---",r_1[i][5],"---",daima(r_1[i][6]),"---",r_1[i][7])
|