|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import numpy as np
# 冗余序列不影响k_mer数量
def getk_mer(str):
global k
n = len(str)
k_mer = []
step = n - k + 1
for i in range(step):
k_mer.append(str[i:i + k])
return k_mer
# 对这条reads的反向互补序列再取一次Kmer
def getinverst(str):
return str[::-1]
k = int(input("输入k值:"))
seq = input("输入序列:")
list_seq = seq.split(" ")
list_kmer = []
for s in list_seq:
k_mer = getk_mer(s)
list_kmer.append(k_mer[:])
new_list = np.array(list_kmer).flatten() #flatten 扁平化,压成一维
new_list = new_list.tolist()
print(new_list)
list_kmer_inv = []
inverst = getinverst(seq)
list_inv = inverst.split(" ")
# print(inverst)
for i in list_inv:
k_mer_inv = getk_mer(i)
list_kmer_inv.append(k_mer_inv[:])
new_list_inv = np.array(list_kmer_inv).flatten() #flatten 扁平化,压成一维
new_list_inv = new_list_inv.tolist()
print(new_list_inv)
# 将reads分割成若干k_mer,可以统计各k_mer出现的次数,
# 最多的次数为深度,深度可以估计基因组大小
def getdbg(seq):
global k
length = len(seq)
overlap = k - 1
minigraph = {}
graph = {}
for i in range(length):
if i == length - 1:
break
mininode = seq[i][:overlap]
mininextnode = seq[i][1:overlap + 1]
node = seq[i][:k]
nextnode = seq[i + 1][:k + 1]
if mininode not in minigraph:
minigraph[mininode] = []
if node not in graph:
graph[node] = []
minigraph[mininode].append(mininextnode)
graph[node].append(nextnode)
return minigraph,graph
minigraph = getdbg(new_list)[0]
graph = getdbg(new_list)[1]
print(graph)
# ACTGCCTGACTA
for node in graph:
print(node, '->', ', '.join(graph[node]))
# print(list(graph.keys())[0],"-->",graph[0],"-->",)
def getnum(dic):
listlen = []
for _,value in dic.items():
m = len(value)
listlen.append(m)
return listlen
a = []
n = len(new_list)
listlen = getnum(graph)
keys = list(graph.keys())
lastkey = keys[-1]
lastvalue = graph[lastkey]
# keys.append()
for i in range(n - 1):
if i == 0:
temp = keys[0]
a.append(temp)
m = keys.index(temp) # 修改:将该节点在字典中的位置赋值给 m
t = temp # 修改:将 temp 赋值给 t(上一节点)
# if a[-1] == lastvalue:
# break
print('1',t)
# else: # 更新变量 t 和 m
t = a[-1] # 修改:将 a 的最后一个元素(上一节点)赋值给 t
# print('1',t)
m = keys.index(t) # 修改:将 t 在字典中的位置赋值给 m
num_kids = listlen[m] # 该节点的子节点个数
a.append(graph[t][0])
if num_kids > 1:
graph[t].pop(0)
listlen[m] = num_kids - 1
print(a)
import networkx as nx
import matplotlib.pyplot as plt
# 创建有向图
G = nx.DiGraph()
# 添加节点和边
nodes = a
# nodes1 = ['A', 'B', 'C']
# nodes2 = [1, 2, 3]
edges = [(a[i],a[i + 1])for i in range(len(a) - 1) ]
# edges = [('A', 1), ('B', 2), ('C', 3)]
G.add_nodes_from(nodes)
# G.add_nodes_from(nodes2)
G.add_edges_from(edges)
# 绘制有向图
pos = nx.spring_layout(G) # 设置节点位置
nx.draw_networkx_nodes(G, pos, nodelist=nodes, node_color='r', node_size=500) # 绘制节点1
# nx.draw_networkx_nodes(G, pos, nodelist=nodes2, node_color='b', node_size=500) # 绘制节点2
nx.draw_networkx_labels(G, pos) # 绘制节点标签
nx.draw_networkx_edges(G, pos, edgelist=edges, edge_color='black', arrows=True) # 绘制边
plt.axis('off') # 关闭坐标轴
plt.show() # 显示图形
# CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCTGCGAGATATAG
nodes = list(graph.keys())
edges = []
for node, targets in graph.items():
edges.extend((node, target) for target in targets)
# 创建图形对象
fig, ax = plt.subplots(figsize=(10, 10))
# 绘制节点和边
for edge in edges:
ax.annotate("", xy=(nodes.index(edge[1]), nodes.index(edge[0])), xycoords='data',
xytext=(0, 10), textcoords='offset points',
arrowprops=dict(arrowstyle="->", color='black'))
# 绘制节点标签
for i, node in enumerate(nodes):
ax.annotate(node, xy=(i, 0), xycoords='data',
xytext=(0, -30), textcoords='offset points',
ha='center', va='center', fontsize=12, fontweight='bold')
所有print都有,图片也有,但关掉图片之后报错:
Traceback (most recent call last):
File "E:\Pycharm\pythonProject\ROSALIND\DBG.py", line 154, in <module>
ax.annotate("", xy=(nodes.index(edge[1]), nodes.index(edge[0])), xycoords='data',
ValueError: 'CTA' is not in list
这个错误是由于节点列表nodes中没有包含边列表edges中的某个节点导致的。在这个特定的例子中,节点列表nodes是由字典graph的键组成的,而edges是由字典graph的键和对应值组成的元组列表。因此,出现这个错误的原因是edges中有一个元组包含一个不在nodes中的节点。检查字典graph中的节点是否正确,并确保节点列表nodes包含所有字典graph中的节点。
|
|