|
楼主 |
发表于 2019-7-23 16:19:27
|
显示全部楼层
通过请教给位大神,终于在今日写出了能正常跑的程序。
如下:
import os
x = ''
y = ''
aa = ['G','A','S','T','C','V','L','I','M','P','F','Y','W','D','E','N','Q','H','K','R']
dipeptide = []
for x in aa:
for y in aa:
z = x + y #得到所有的二肽
dipeptide.append(z) # 将所有二肽整合到一个列表中
path = 'G:\\Protein probability\\viral_split\\' #文件夹目录
files = os.listdir(path) #遍历文件夹下的所有文件名称
text = 'text'
file1 = open (path + text, 'w')
print(file = file1,end="\t")
for each in dipeptide:
print(each.strip(),file = file1,end="\t") #打印二肽到文件
print(file = file1)
for file in files:
if '.fa'in os.path.splitext(file)[1]: #获取所有含‘.fa’的文件
fa_path = path + file
content = open(fa_path , 'r') #读文档内的内容
seq1 = []
dimertime = []
j = {} #将二肽建一个字典
for s in dipeptide:
j[s] = 0
for seq in content:
if '>' in seq:
del seq
else:
seq = seq.strip() #strip去掉末尾空格和换行符
seq1.append(seq)
for b in seq1:
c=list(b)
for each_char1_index in range(len(c)-1):
dimer = c[each_char1_index]+c[each_char1_index+1]
if dimer in dipeptide:
j[dimer] +=1
print(file,file = file1,end="\t") #打印文件名到文件
for a in dipeptide:
print(j[a],file = file1, end = "\t")
print(file = file1)
file1.close()
在此本小白向给位大神致敬! |
|