30 鱼币
本帖最后由 黑夜之惑 于 2020-6-24 10:13 编辑
import time
import json
import pandas as pd
import requests
import re
header = {'content-type': 'application/json; charset=utf-8', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'}
Cookie = {'Cookie': '_T_WM: 16302431710; WEIBOCN_FROM=1110006030; ALF: 1595143722; SUB: _2A25z6Bl7DeRhGeBN7FcS8CbMyD-IHXVREqczrDV6PUNbktANLVH9kW1NRCJy5XgYpfllO6KntJ0z9VlG3WS8Dk5B;SUBP: 0033WrSXqPxfM725Ws9jqgMF55529P9D9Wh-s9cYGJn3yMhQAD89z7Ve5JpX5KzhUgL.Foq0S0-0ehn7e0e2dJLoIpjLxK-LBo5LBo.LxKqL1-eL1h.LxKqLBozLB.zt; SUHB: 0auQfAd6YqJgiP; SSOLoginState=1592205572; MLOGIN=1; M_WEIBOCN_PARAMS: luicode%3D10000011%26lfid%3D102803%26fid%3D231522type%253D1%2526t%253D10%2526q%253D%2523%25E5%25A4%2596%25E4%25BA%25A4%25E9%2583%25A8%25E5%259B%259E%25E5%25BA%2594%25E4%25B8%25AD%25E5%258D%25B0%25E8%25BE%25B9%25E5%25A2%2583%25E5%2586%25B2%25E7%25AA%2581%2523%26uicode%3D10000011; XSRF-TOKEN: 49dc89'}
for ii in range(100):
url_base = 'https://m.weibo.cn/api/container/getIndex?containerid=231522type%3D1%26t%3D10%26q%3D%23%E5%A4%96%E4%BA%A4%E9%83%A8%E5%9B%9E%E5%BA%94%E4%B8%AD%E5%8D%B0%E8%BE%B9%E5%A2%83%E5%86%B2%E7%AA%81%23&extparam=%23%E5%A4%96%E4%BA%A4%E9%83%A8%E5%9B%9E%E5%BA%94%E4%B8%AD%E5%8D%B0%E8%BE%B9%E5%A2%83%E5%86%B2%E7%AA%81%23&luicode=10000011&lfid=102803&page_type=searchall'
url = url_base + str(ii + 1)
# print(url)
html = requests.get(url, headers=header, cookies=Cookie)
print(html.text)
try:
for jj in range(len(html.json()['data']['cards'])):
print(html.json()['data']['cards'][jj]['mblog']['isLongText'])
if not html.json()['data']['cards'][jj]['mblog']['isLongText']:
data1 = [(html.json()['data']['cards'][jj]['mblog']['user']['id'],
html.json()['data']['cards'][jj]['mblog']['user']['screen_name'],
html.json()['data']['cards'][jj]['mblog']['reposts_count'],
html.json()['data']['cards'][jj]['mblog']['comments_count'],
html.json()['data']['cards'][jj]['mblog']['attitudes_count'],
html.json()['data']['cards'][jj]['mblog']['text'],
html.json()['data']['cards'][jj]['mblog']['created_at'],
html.json()['data']['cards'][jj]['mblog']['source'])]
else:
data1 = [(html.json()['data']['cards'][jj]['mblog']['user']['id'],
html.json()['data']['cards'][jj]['mblog']['user']['screen_name'],
html.json()['data']['cards'][jj]['mblog']['reposts_count'],
html.json()['data']['cards'][jj]['mblog']['comments_count'],
html.json()['data']['cards'][jj]['mblog']['attitudes_count'],
html.json()['data']['cards'][jj]['mblog']['longText']['longTextContent'],
html.json()['data']['cards'][jj]['mblog']['created_at'],
html.json()['data']['cards'][jj]['mblog']['source'])]
data2 = pd.DataFrame(data1)
data2.to_csv('微博评论.csv', header=False, index=False, mode='a+')
except:
print("抓取失败")
print('page ' + str(ii + 1) + ' has done')
time.sleep(3)
1、import json显示没有引用
2、try except一直运行抓取失败
我来回答