|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
初学半个月,写了这样一个脚本,用于获取用户关注列表,组成网络(我只能说我是跟老师做社会网络,数据挖掘的)。发上来,给大家看看,希望大家能给点意见,初学,肯定有很多需要修改的地方。
- # python 3.4 gui
- import urllib.request
- import json
- import time
- # import easygui as g
- import sys
- '''
- 函数定义
- '''
- def user_follow(user_name):
- api_key = '回复不可见' # 我申请的key
- start_index = '1'
- max_results = '50'#目前最大
- '''
- 函数定义
- '''
- def connectfunction():
- succeedornot = 0
- while succeedornot == 0:
- try:
- url_conn = urllib.request.urlopen('http://api.douban.com/people/' + user_name + '/contacts?apikey=' + api_key + '&alt=json&start-index=' + str(start_index) + '&max-results=' + str(max_results)) # 打开url
- succeedornot = 1
- except (TimeoutError,urllib.error.URLError):
- print('网络不稳定,正在重试...')
- succeedornot = 0
- return url_conn
-
- File1 = connectfunction()
- doubanHTML = File1.read().decode('utf-8') # 读入打开的url
- doubanJSON = json.JSONDecoder().decode(doubanHTML) # 创建json
- # 信息截取
- # openSearch:itemsPerPage
- itemsPerPage = doubanJSON['openSearch:itemsPerPage']['$t'] # 每次获取的关注人数
-
- # author
- author = doubanJSON['author'] # 研究对象
-
- # name
- author_name = author['name']['$t'] # 用户名称
- # openSearch:totalResults
- totalResults = doubanJSON['openSearch:totalResults']['$t'] # 关注人数
-
- # 打印信息
- print('关注人数:', totalResults)
- print('每次获取的关注人数:', itemsPerPage)
- print('对象名称:', author_name)
- # file_name = 'E:\\' + author_name + '_follow_file.txt'
- file_name = 'E:\\follow_file.txt'
- follow_list_pair = []
- follow_list_solo = []
- follow_file = open(file_name,'a',encoding='utf-8')# 以写入模式打开,如果文件存在,则在末尾追加写入
- '''
- 函数定义
- '''
- def milk_run(contact_list,contact_index):
- # db:uid
- uid = contact_list[contact_index]['db:uid']['$t'] # 关注对象uid
-
- # db:title
- title = contact_list[contact_index]['title']['$t'] # 关注对象名称
-
- return (title,uid)
- '''
- 循环开始
- '''
- while int(start_index) <= int(totalResults): # 循环分段获取
- if int(start_index) + int(max_results) - 1 <= int(totalResults):
- end_index = int(start_index) + int(max_results) - 1 # 确定范围
- subtract = int(max_results)
- else:
- end_index = int(totalResults)
- subtract = end_index - int(start_index) + 1
- print(start_index,'-',end_index)
-
- File1 = connectfunction()
- doubanHTML = File1.read().decode('utf-8') # 读入打开的url
- doubanJSON = json.JSONDecoder().decode(doubanHTML) # 创建json
- # entry
- entry = doubanJSON['entry'] # 关注对象列表
- for index in range(subtract):
- follow_list_pair.append(user_name + ' ' + milk_run(entry,index)[1] + '\n')# 存入列表
- follow_list_solo.append(milk_run(entry,index)[1])# 存入列表
- print('第' + str(int(start_index) + index) + '个关注:',milk_run(entry,index),',已保存')
-
- start_index = int(start_index) + int(max_results) # 步进
- str(start_index)
- follow_file.writelines(follow_list_pair)
- follow_file.close()
-
- return follow_list_solo
- # user_input = g.enterbox(msg='请输入用户id或uid:', title='查询用户关注网络', default='ahbei', strip=True, image=None, root=None)
- user_input = input('请输入用户名称:')
- done_user = []
- done_user.append(user_input)
- user_input_follow = user_follow(user_input)
- for followers in user_input_follow:# 二层
- if str(followers) not in done_user:
- done_user.append(str(followers))
- user_follow_follow = user_follow(str(followers))
- # for followers_2 in user_follow_follow:# 三层
- # if str(followers_2) not in done_user:
- # done_user.append(str(followers_2))
- # user_follow_follow_follow = user_follow(str(followers_2))
- # time.sleep(2) # 延迟2s
复制代码 后期会使用igraph进行网络分析(这个我以前用R语言玩过)
douban_gui.zip
(1.66 KB, 下载次数: 7, 售价: 1 鱼币)
|
评分
-
参与人数 1 | 荣誉 +10 |
鱼币 +10 |
贡献 +5 |
收起
理由
|
小甲鱼
| + 10 |
+ 10 |
+ 5 |
感谢楼主无私奉献! |
查看全部评分
|