|
发表于 2021-3-16 00:25:16
|
显示全部楼层
本楼为最佳答案
本帖最后由 YunGuo 于 2021-3-16 15:10 编辑
- import re
- import csv
- import requests
- def get_js():
- url = 'https://contact.audi.cn/dictionary_js/map_dealer.js?v=0.3'
- res = requests.get(url)
- js = re.findall(r'var dealers = \[];(.*?)type\[0]', res.text, re.S)[0]
- return js
- def get_city(js):
- # 获取省份
- provinces = re.findall("dprovinces\[\d+\]=\['(\d+)','\w(.*?)'\];", js)
- province_dic = {code : province_name for code, province_name in provinces}
- # 获取城市
- city_infos = re.findall(r"dcitys\['(\d+)'\]=(.*?);", js)
- citys_dic = {}
- for city_info in city_infos:
- c_code, citys = city_info
- cit = {city[0]: city[1][2:] for city in eval(citys)}
- citys_dic.update({province_dic[c_code]: cit})
- return citys_dic
- def get_distributor(js, citys_dic):
- # 获取经销商信息
- distributor_infos = re.findall(r"dealers\['(\d+)'\]=(.*?);", js)
- distributor_dic = {}
- distributor_lis = []
- for distributor_info in distributor_infos:
- codes, infos = distributor_info
- province = ''
- city = ''
- for k,v in citys_dic.items():
- for kk,vv in v.items():
- if codes == kk:
- province = k
- city = vv
- break
- for info in eval(infos):
- distributor_name = info[1]
- distributor_add = info[4]
- distributor_tel = f'{info[-1]}-{info[3]}'
- distributor_lis.append([province, city, distributor_name, distributor_add, distributor_tel])
- return distributor_lis
- def save(item):
- # 保存
- with open('audi.csv', 'a', newline='') as f:
- writer = csv.writer(f)
- writer.writerow(['省份', '城市', '经销商', '地址', '联系方式'])
- writer.writerows(item)
- print('保存完成')
- if __name__ == '__main__':
- js_code = get_js()
- city_info = get_city(js_code)
- distributor_lis = get_distributor(js_code, city_info)
- save(distributor_lis)
复制代码 |
|