|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import numpy as np
- # 冗余序列不影响k_mer数量
- def getk_mer(str):
- global k
- n = len(str)
- k_mer = []
- step = n - k + 1
- for i in range(step):
- k_mer.append(str[i:i + k])
- return k_mer
- # 对这条reads的反向互补序列再取一次Kmer
- def getinverst(str):
- return str[::-1]
- k = int(input("输入k值:"))
- seq = input("输入序列:")
- list_seq = seq.split(" ")
- list_kmer = []
- for s in list_seq:
- k_mer = getk_mer(s)
- list_kmer.append(k_mer[:])
- new_list = np.array(list_kmer).flatten() #flatten 扁平化,压成一维
- new_list = new_list.tolist()
- print(new_list)
- list_kmer_inv = []
- inverst = getinverst(seq)
- list_inv = inverst.split(" ")
- # print(inverst)
- for i in list_inv:
- k_mer_inv = getk_mer(i)
- list_kmer_inv.append(k_mer_inv[:])
- new_list_inv = np.array(list_kmer_inv).flatten() #flatten 扁平化,压成一维
- new_list_inv = new_list_inv.tolist()
- print(new_list_inv)
- # 将reads分割成若干k_mer,可以统计各k_mer出现的次数,
- # 最多的次数为深度,深度可以估计基因组大小
- def getdbg(seq):
- global k
- length = len(seq)
- overlap = k - 1
- minigraph = {}
- graph = {}
- for i in range(length):
- if i == length - 1:
- break
- mininode = seq[i][:overlap]
- mininextnode = seq[i][1:overlap + 1]
- node = seq[i][:k]
- nextnode = seq[i + 1][:k + 1]
- if mininode not in minigraph:
- minigraph[mininode] = []
- if node not in graph:
- graph[node] = []
- minigraph[mininode].append(mininextnode)
- graph[node].append(nextnode)
- return minigraph,graph
- minigraph = getdbg(new_list)[0]
- graph = getdbg(new_list)[1]
- print(graph)
- # ACTGCCTGACTA
- for node in graph:
- print(node, '->', ', '.join(graph[node]))
- # print(list(graph.keys())[0],"-->",graph[0],"-->",)
- def getnum(dic):
- listlen = []
- for _,value in dic.items():
- m = len(value)
- listlen.append(m)
- return listlen
- a = []
- n = len(new_list)
- listlen = getnum(graph)
- keys = list(graph.keys())
- lastkey = keys[-1]
- lastvalue = graph[lastkey]
- # keys.append()
- for i in range(n - 1):
- if i == 0:
- temp = keys[0]
- a.append(temp)
- m = keys.index(temp) # 修改:将该节点在字典中的位置赋值给 m
- t = temp # 修改:将 temp 赋值给 t(上一节点)
- # if a[-1] == lastvalue:
- # break
- print('1',t)
- # else: # 更新变量 t 和 m
- t = a[-1] # 修改:将 a 的最后一个元素(上一节点)赋值给 t
- # print('1',t)
- m = keys.index(t) # 修改:将 t 在字典中的位置赋值给 m
- num_kids = listlen[m] # 该节点的子节点个数
- a.append(graph[t][0])
- if num_kids > 1:
- graph[t].pop(0)
- listlen[m] = num_kids - 1
- print(a)
- import networkx as nx
- import matplotlib.pyplot as plt
- # 创建有向图
- G = nx.DiGraph()
- # 添加节点和边
- nodes = a
- # nodes1 = ['A', 'B', 'C']
- # nodes2 = [1, 2, 3]
- edges = [(a[i],a[i + 1])for i in range(len(a) - 1) ]
- # edges = [('A', 1), ('B', 2), ('C', 3)]
- G.add_nodes_from(nodes)
- # G.add_nodes_from(nodes2)
- G.add_edges_from(edges)
- # 绘制有向图
- pos = nx.spring_layout(G) # 设置节点位置
- nx.draw_networkx_nodes(G, pos, nodelist=nodes, node_color='r', node_size=500) # 绘制节点1
- # nx.draw_networkx_nodes(G, pos, nodelist=nodes2, node_color='b', node_size=500) # 绘制节点2
- nx.draw_networkx_labels(G, pos) # 绘制节点标签
- nx.draw_networkx_edges(G, pos, edgelist=edges, edge_color='black', arrows=True) # 绘制边
- plt.axis('off') # 关闭坐标轴
- plt.show() # 显示图形
- # CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCTGCGAGATATAG
- nodes = list(graph.keys())
- edges = []
- for node, targets in graph.items():
- edges.extend((node, target) for target in targets)
- # 创建图形对象
- fig, ax = plt.subplots(figsize=(10, 10))
- # 绘制节点和边
- for edge in edges:
- ax.annotate("", xy=(nodes.index(edge[1]), nodes.index(edge[0])), xycoords='data',
- xytext=(0, 10), textcoords='offset points',
- arrowprops=dict(arrowstyle="->", color='black'))
- # 绘制节点标签
- for i, node in enumerate(nodes):
- ax.annotate(node, xy=(i, 0), xycoords='data',
- xytext=(0, -30), textcoords='offset points',
- ha='center', va='center', fontsize=12, fontweight='bold')
复制代码
所有print都有,图片也有,但关掉图片之后报错:
Traceback (most recent call last):
File "E:\Pycharm\pythonProject\ROSALIND\DBG.py", line 154, in <module>
ax.annotate("", xy=(nodes.index(edge[1]), nodes.index(edge[0])), xycoords='data',
ValueError: 'CTA' is not in list
这个错误是由于节点列表nodes中没有包含边列表edges中的某个节点导致的。在这个特定的例子中,节点列表nodes是由字典graph的键组成的,而edges是由字典graph的键和对应值组成的元组列表。因此,出现这个错误的原因是edges中有一个元组包含一个不在nodes中的节点。检查字典graph中的节点是否正确,并确保节点列表nodes包含所有字典graph中的节点。
|
|