冷回清风暖 发表于 2020-4-1 15:18:31

微博疫情数据爬取

import requests
import xlwt

def get_json():
    url = "https://interface.sina.cn/news/wap/fymap2020_data.d.json"
    headers = {
      'cookie': 'Apache=2885827998838.657.1585714305110; SINAGLOBAL=2885827998838.657.1585714305110; ULV=1585714305112:1:1:1:2885827998838.657.1585714305110:; NCP-SINA-CN=; genTime=1585715444; statuid=__10.41.1.25_1585715444_0.40395200; statuidsrc=Mozilla%2F5.0+%28Linux%3B+Android+10%3B+VOG-AL00+Build%2FHUAWEIVOG-AL00%3B+wv%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Version%2F4.0+Chrome%2F74.0.3729.186+Mobile+Safari%2F537.36%3B+HUAWEI-VOG-AL00__sinanews__7.33.5__android__10__525%3B+hybrid__0.8__online%6010.41.1.25%60http%3A%2F%2Finterface.sina.cn%2Fnews%2Fwap%2Ffyzt_newsv2.d.json%3Fcallback%3D_aLocalNewsFunction%26_%3D1585715443935%26type%3Dprovince%26code%3Dhenan%26page%3D1%60%60__10.41.1.25_1585715444_0.40395200; ustat=__10.41.1.25_1585715444_0.40395200; historyRecord={"href":"https://news.sina.cn/zt_d/yiqing0121","refer":"https://cn.bing.com/"}; vt=4',
      'referer': 'https://news.sina.cn/zt_d/yiqing0121',
      'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36 Edg/80.0.361.69',
    }
    try:
      response = requests.get(url, headers=headers)
      if response.content:
            return response.json()
      else:
            return None
    except requests.ConnectionError:
      return "连接网页失败"

def get_data(json):
    data= json['data']['list']
    #['data']['list']['city']
    for item in data:
      for city in item['city']:
            cit = city['name']
            conNum = city['conNum']#累计确诊病例
            cureNum = city['cureNum']#治愈
            deathNum = city['deathNum']#死亡
            if cit != '境外输入人员':
                yield {
                  'city':cit,
                  'conNum':conNum,
                  'cureNum':cureNum,
                  'deathNum':deathNum,
                }

    return data

def save_data(data):
    myxls = xlwt.Workbook()
    sheet1 = myxls.add_sheet(u'20200401', cell_overwrite_ok=True)
    sheet1.write(0, 0, "FID")
    sheet1.write(0, 1, "city")
    sheet1.write(0, 2, "conNum")
    sheet1.write(0, 3, "cureNum")
    sheet1.write(0, 4, "deathNum")
    i = 1
    for item in data:
      sheet1.write(i, 0, i)
      sheet1.write(i, 1, item['city'])
      sheet1.write(i, 2, item['conNum'])
      sheet1.write(i, 3, item['cureNum'])
      sheet1.write(i, 4, item['deathNum'])
      i = i + 1
    myxls.save('疫情0401.xls')


if __name__ == '__main__':
    json = get_json()
    data = get_data(json)
    save_data(data)
    for item in data:
      print(item)



爬虫小白 由于作业要用到这些数据 所以做了下
然后给大家分享一下

风陵渡口杨过 发表于 2020-4-2 21:15:47

{:10_297:}厉害

妈妈l我错乐 发表于 2021-12-9 08:20:42

牛啊
页: [1]
查看完整版本: 微博疫情数据爬取