马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 johnnyb 于 2020-7-11 20:09 编辑
新手第一个简单爬虫.
爬取全国KFC门店地址. 输入城市名称即可.
请指点1 2 感谢感谢
import requests
import json
def getJSON_text(url, num=str(10), city='北京'):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
data = {
"cname": city,
"pid": "",
"pageIndex": "1",
"pageSize": num
}
try:
response = requests.post(url=url, data=data, headers=headers, timeout=30)
response.raise_for_status()
response.encoding = response.apparent_encoding
return response.text
except:
print('出错了,请检查城市名称!')
def user_input():
city = input("请输入目标城市(回车默认'北京'):")
if city == '':
return '北京'
else:
return city
def main():
url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname'
city = user_input() # 用户指定城市
JSON = getJSON_text(url, city=city) # 第一次调用函数 获取json内容
num = json.loads(JSON)["Table"][0]['rowcount'] # 解析json取得地址总数num
print("含有地址{}个.如下!".format(num))
str_json = getJSON_text(url, num, city) # 第二次调用函数 指定总数.
for i in json.loads(str_json)['Table1']: # 开始遍历
print("{}{}{} 辅助标记地点:({}), 特色: {}".format(i['provinceName'], i['cityName'], i['addressDetail'], i['storeName'], i['pro']))
if __name__ == '__main__':
main()
'''
这是返回的json内容, 对照最后的格式化输出,方便理解.
{
"Table": [
{
"rowcount": 83
}
],
"Table1": [
{
"rownum": 1,
"storeName": "车百餐厅",
"addressDetail": "东风大街38号欧亚车百一楼肯德基",
"pro": "Wi-Fi,店内参观,礼品卡,生日餐会",
"provinceName": "吉林省",
"cityName": "长春市"
},
{
"rownum": 2,
"storeName": "福林",
"addressDetail": "同志街68号",
"pro": "24小时,Wi-Fi,礼品卡",
"provinceName": "吉林省",
"cityName": "长春市"
}
]
}
'''
|