|
|

楼主 |
发表于 2019-12-12 14:16:23
|
显示全部楼层
对了大佬请教一个问题
我想读取19个表格,分别用多进程和单进程写了程序。单进程就是一个个读取,多进程用Pool一起读取。但为什么单进程耗时365s。多进程不设置pool居然要370s,设置pool(19)也就349s。运行中感觉cpu没有被充分利用,怎么办
import pandas as pd
import time
from multiprocessing import Pool
def df_read(filename, header1=0, index_col1=None, sheet_name1=0):
df = pd.read_excel(filename, header=header1, index_col=index_col1, sheet_name=sheet_name1)
return df
if __name__ == '__main__':
start = time.time()
pool = Pool()
df1 = pool.apply_async(df_read, ('数据源1台账.xlsx',)).get()
df2 = pool.apply_async(df_read, ('数据源2财务报表.xlsx',), dict(sheet_name1='19统计报表明细', index_col1='项目')).get()
df2ly = pool.apply_async(df_read, ('数据源2财务报表(去年).xlsx',), dict(sheet_name1='18统计报表明细', index_col1='项目')).get()
df3 = pool.apply_async(df_read, ('数据源3维保站点.xlsx',)).get()
df4 = pool.apply_async(df_read, ('数据源4维保人员.xlsx',)).get()
df5 = pool.apply_async(df_read, ('数据源5远程监视.xlsx',)).get()
df6 = pool.apply_async(df_read, ('数据源6急修工单.xlsx',)).get()
df7 = pool.apply_async(df_read, ('数据源7用户满意度.xlsx',)).get()
df8 = pool.apply_async(df_read, ('数据源8移动终端.xlsx',)).get()
df9 = pool.apply_async(df_read, ('数据源9市场投放量.xlsx',), dict(header1=1)).get()
df10 = pool.apply_async(df_read, ('数据源10指标.xlsx',), dict(sheet_name1='表6 - 总保养台数分解', header1=[1, 2])).get()
df10a = pool.apply_async(df_read, ('数据源10指标.xlsx',), dict(sheet_name1='表15 - 保养收入毛利分解', header1=[1, 2])).get()
df10b = pool.apply_async(df_read, ('数据源10指标.xlsx',), dict(sheet_name1='表20 - 备件收入毛利分解', header1=[1, 2])).get()
df11 = pool.apply_async(df_read, ('数据源11应收账款.xlsx',), dict(header1=[1, 2])).get()
df12 = pool.apply_async(df_read, ('数据源12保养经营情况.xlsx',), dict(header1=[2])).get()
df13 = pool.apply_async(df_read, ('数据源13安装预留费用.xlsx',)).get()
df14 = pool.apply_async(df_read, ('数据源14LEHY-MRL.xlsx',)).get()
df15 = pool.apply_async(df_read, ('数据源15MESE.xlsx',)).get()
df16 = pool.apply_async(df_read, ('数据源16LEHY-III.xlsx',)).get()
pool.close()
pool.join()
end = time.time()
print('多进程19张表耗时', int(end - start), 's', sep='')
"""
df4 = pd.read_excel('数据源4维保人员.xlsx')
df8 = pd.read_excel('数据源8移动终端.xlsx')
df11 = pd.read_excel('数据源11应收账款.xlsx', header=[1, 2])
# df1是数据源台账,df是每个数据可能的返回值,wb是所有程序所有列表的汇总表
# df2是数据源2财务报表
start = time.time()
this_year = time.localtime(time.time()).tm_year # this_year指的是2019
this_year1 = this_year - 2000 # this_year指的是19
this_year2 = this_year1 - 1 # this_year指的是18
year1 = str(this_year1)
year2 = str(this_year2)
df2 = pd.read_excel('数据源2财务报表.xlsx', sheet_name=(year1 + '统计报表明细'), index_col='项目')
df3 = pd.read_excel('数据源3维保站点.xlsx')
df5 = pd.read_excel('数据源5远程监视.xlsx')
df6 = pd.read_excel('数据源6急修工单.xlsx')
df7 = pd.read_excel('数据源7用户满意度.xlsx')
df9 = pd.read_excel('数据源9市场投放量.xlsx', header=1)
df10 = pd.read_excel('数据源10指标.xlsx', sheet_name='表6 - 总保养台数分解', header=[1, 2])
df10a = pd.read_excel('数据源10指标.xlsx', sheet_name='表15 - 保养收入毛利分解', header=[1, 2])
df10b = pd.read_excel('数据源10指标.xlsx', sheet_name='表20 - 备件收入毛利分解', header=[1, 2])
df12 = pd.read_excel('数据源12保养经营情况.xlsx', header=[2])
df13 = pd.read_excel('数据源13安装预留费用.xlsx')
df14 = pd.read_excel('数据源14LEHY-MRL.xlsx')
df15 = pd.read_excel('数据源15MESE.xlsx')
df16 = pd.read_excel('数据源16LEHY-III.xlsx')
end = time.time()
print('单进程读取耗时', (end - start), 's', sep='')
"""
|
|