转发层级蒲公英图怎么写?
在博文转发传播的过程中,如何才能对每一转发层级进行科学抽样,保证数据在每一转发层级的数据特征,从最后一层转发层级往上面转发层级抽,先从最后一级抽200条博文,如果不足200条全抽,根据每条博文的父ID字段找到上一层的父博文,如果不足200条,则随机抽取同层的博文凑够200条,一起找上一层父博文,循环直到第一层,这样抽取的博文,用蒲公英图来展示长什么样子,有大神可以修改一下吗?import matplotlib.pyplot as plt
import networkx as nx
import random
import numpy as np
# 假设我们有一个字典,存储了博文ID和父ID的关系
# {child_id: parent_id}
tweets = {
# 示例数据,实际数据应从数据库或API获取
1: 0,# 第一层的博文没有父ID
2: 1,
3: 1,
4: 2,
5: 2,
6: 3,
7: 3,
8: 2,
9: 3,
10: 4,
11: 2,
12: 3,
13: 6,
14: 4,
15: 6,
16: 7,
18: 4,
19: 4,
20: 4,
21: 4,
22: 4,
23: 4,
24: 6,
25: 4,
26: 9
# ... 更多数据
}
# 构建层级关系
def build_hierarchy(tweets):
hierarchy = {}
for child, parent in tweets.items():
if parent is not None:
if parent not in hierarchy:
hierarchy = []
hierarchy.append(child)
return hierarchy
hierarchy = build_hierarchy(tweets)
def sample_tweets(hierarchy, sample_size=200):
max_depth = max()
layers = [[] for _ in range(max_depth + 1)]
for level in range(max_depth, 0, -1):
tweets_at_level = get_tweets_at_level(hierarchy, level)
sampled_tweets = random.sample(tweets_at_level, min(len(tweets_at_level), sample_size))
layers = sampled_tweets
# 更新 hierarchy,移除已经抽样过的推文
hierarchy = {tweet: for tweet, children in hierarchy.items() if tweet not in sampled_tweets}
return layers
# 确保在调用 plot_dandelion_diagram 之前,layers 被正确接收
layers = sample_tweets(hierarchy)
plot_dandelion_diagram(layers)
# 从最后一层开始抽样
for level in range(max_depth(hierarchy), 0, -1):
current_layer = get_tweets_at_level(hierarchy, level)
if len(current_layer) <= sample_size:
sampled_tweets.update({tweet: hierarchy.get(tweet) for tweet in current_layer})
else:
sampled_tweets.update({tweet: hierarchy.get(tweet) for tweet in random.sample(current_layer, sample_size)})
layers = list(sampled_tweets.keys())
return sampled_tweets, layers
def max_depth(hierarchy):
max_depth = 0
for tweet, children in hierarchy.items():
depth = 0
while tweet in hierarchy:
tweet = hierarchy if hierarchy else None
depth += 1
max_depth = max(max_depth, depth)
return max_depth
def get_tweets_at_level(hierarchy, level):
tweets_at_level = []
for tweet, children in hierarchy.items():
if level == 1:
tweets_at_level.append(tweet)
else:
# 过滤掉不存在的子节点
valid_children =
if valid_children:
tweets_at_level.extend(get_tweets_at_level({child: hierarchy for child in valid_children}, level-1))
return tweets_at_level
# 绘制蒲公英图
def plot_dandelion_diagram(layers):
G = nx.DiGraph()
for level, tweets in enumerate(layers):
for tweet in tweets:
if tweet is not None:
G.add_node(tweet, level=level)
if tweet in hierarchy and hierarchy:
for child in hierarchy:
G.add_edge(tweet, child)
pos = nx.spring_layout(G)
levels = set(nx.get_node_attributes(G, 'level').values())
colors = plt.cm.rainbow(np.linspace(0, 1, len(levels)))
for level in levels:
nx.draw_networkx_nodes(G, pos, nodelist=['level'] == level],
node_color=colors, node_size=100)
nx.draw_networkx_edges(G, pos, edgelist=G.edges, width=0.5, alpha=0.5)
plt.show()
hierarchy = build_hierarchy(tweets)
layers = sample_tweets(hierarchy)
plot_dandelion_diagram(layers)
页:
[1]