|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
非常感谢你在百忙之中抽空为我解答。
以下代码作用是在EXCEL表ABCDE列中选三列随机抽取4行做比较,由于效率很低,如果想通过调用GPU的并行运算去提高效率,该如何改呢?
- import xlrd
- import multiprocessing as mp
- from functools import partial
- import csv
- import random as rd
- def read_file(file):
- data=xlrd.open_workbook(file)
- names=data.sheet_names()
- res=[]
- for i in names:
- table=data.sheet_by_name(i)
- rows=table.nrows
- res.append([table.row_values(j) for j in range(rows)])
- res=[[[j[0],j[1],j[2],tuple(j[3].split('A')[1:3]),j[4],j[3]] for j in i] for i in res]
- return(res,names)
- def write_file(file,res,names,maxlength):
- head=[k for j in [[i]+['']*4 for i in names] for k in j]
- f=open(file,'w',newline='')
- csvw=csv.writer(f)
- csvw.writerow(head)
- for i in range(maxlength):
- csvw.writerow([k for j in [data[i][:3]+data[i][:-3:-1] for data in res] for k in j])
- f.close()
- def check(sample,data2,index,around):
- res=[]
- for i in sample:
- if around!=-1:
- temp=[j for j in data2 if i[index[0]]==j[index[0]] and i[index[1]]==j[index[1]] and abs(i[index[2]]-j[index[2]])<=around]
- else:
- temp=[j for j in data2 if i[index[0]]==j[index[0]] and i[index[1]]==j[index[1]] and i[index[2]]==j[index[2]]]
- if len(temp):
- res.append(temp[0])
- else:
- break
- else:
- return(res)
- return([])
-
- def cal(label,data,index,num,around):
- if len(data[label])>=num:
- sample=rd.sample(data[label],num)
- else:
- sample=data[label]
- temp=[check(sample,i,index,around) for i in data[:label]+data[label+1:]]
- if [j for i in temp for j in i]:
- return([sample]+temp)
- else:
- return([])
- if __name__=='__main__':
- alphatdict=dict([[chr(i),i-65] for i in range(65,91)])
- file = "test.xlsx"
- index=sorted([alphatdict[i] for i in input('需要比对的三列:').upper()])
- if 4 in index:
- around=0
- else:
- around=-1
- num=4
- filenum=10000
- label=0
- data,names=read_file(file)
- ff=partial(cal,data=data,index=index,num=num,around=around)
- while label<filenum:
- pool=mp.Pool(mp.cpu_count())
- res=pool.map(ff,range(len(data)))
- pool.close()
- pool.join()
- res=[[k for j in i for k in j] for i in res]
- maxlength=max([len(i) for i in res])
- res=[i+[['']*6]*(maxlength-len(i)) for i in res]
- if sum([len(i) for i in res]):
- write_file('res'+str(label+1)+'.csv',res,names,maxlength)
- label+=1
- else:
- pass
复制代码 |
|