本帖最后由 YunGuo 于 2021-3-16 15:10 编辑 import re
import csv
import requests
def get_js():
url = 'https://contact.audi.cn/dictionary_js/map_dealer.js?v=0.3'
res = requests.get(url)
js = re.findall(r'var dealers = \[];(.*?)type\[0]', res.text, re.S)[0]
return js
def get_city(js):
# 获取省份
provinces = re.findall("dprovinces\[\d+\]=\['(\d+)','\w(.*?)'\];", js)
province_dic = {code : province_name for code, province_name in provinces}
# 获取城市
city_infos = re.findall(r"dcitys\['(\d+)'\]=(.*?);", js)
citys_dic = {}
for city_info in city_infos:
c_code, citys = city_info
cit = {city[0]: city[1][2:] for city in eval(citys)}
citys_dic.update({province_dic[c_code]: cit})
return citys_dic
def get_distributor(js, citys_dic):
# 获取经销商信息
distributor_infos = re.findall(r"dealers\['(\d+)'\]=(.*?);", js)
distributor_dic = {}
distributor_lis = []
for distributor_info in distributor_infos:
codes, infos = distributor_info
province = ''
city = ''
for k,v in citys_dic.items():
for kk,vv in v.items():
if codes == kk:
province = k
city = vv
break
for info in eval(infos):
distributor_name = info[1]
distributor_add = info[4]
distributor_tel = f'{info[-1]}-{info[3]}'
distributor_lis.append([province, city, distributor_name, distributor_add, distributor_tel])
return distributor_lis
def save(item):
# 保存
with open('audi.csv', 'a', newline='') as f:
writer = csv.writer(f)
writer.writerow(['省份', '城市', '经销商', '地址', '联系方式'])
writer.writerows(item)
print('保存完成')
if __name__ == '__main__':
js_code = get_js()
city_info = get_city(js_code)
distributor_lis = get_distributor(js_code, city_info)
save(distributor_lis)
|