|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 johnnyb 于 2020-7-11 20:09 编辑
新手第一个简单爬虫.
爬取全国KFC门店地址. 输入城市名称即可.
请指点1 2 感谢感谢
- import requests
- import json
- def getJSON_text(url, num=str(10), city='北京'):
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
- }
- data = {
- "cname": city,
- "pid": "",
- "pageIndex": "1",
- "pageSize": num
- }
- try:
- response = requests.post(url=url, data=data, headers=headers, timeout=30)
- response.raise_for_status()
- response.encoding = response.apparent_encoding
- return response.text
- except:
- print('出错了,请检查城市名称!')
- def user_input():
- city = input("请输入目标城市(回车默认'北京'):")
- if city == '':
- return '北京'
- else:
- return city
- def main():
- url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname'
- city = user_input() # 用户指定城市
- JSON = getJSON_text(url, city=city) # 第一次调用函数 获取json内容
- num = json.loads(JSON)["Table"][0]['rowcount'] # 解析json取得地址总数num
- print("含有地址{}个.如下!".format(num))
- str_json = getJSON_text(url, num, city) # 第二次调用函数 指定总数.
- for i in json.loads(str_json)['Table1']: # 开始遍历
- print("{}{}{} 辅助标记地点:({}), 特色: {}".format(i['provinceName'], i['cityName'], i['addressDetail'], i['storeName'], i['pro']))
- if __name__ == '__main__':
- main()
- '''
- 这是返回的json内容, 对照最后的格式化输出,方便理解.
- {
- "Table": [
- {
- "rowcount": 83
- }
- ],
- "Table1": [
- {
- "rownum": 1,
- "storeName": "车百餐厅",
- "addressDetail": "东风大街38号欧亚车百一楼肯德基",
- "pro": "Wi-Fi,店内参观,礼品卡,生日餐会",
- "provinceName": "吉林省",
- "cityName": "长春市"
- },
- {
- "rownum": 2,
- "storeName": "福林",
- "addressDetail": "同志街68号",
- "pro": "24小时,Wi-Fi,礼品卡",
- "provinceName": "吉林省",
- "cityName": "长春市"
- }
- ]
-
- }
- '''
复制代码 |
评分
-
查看全部评分
|