|
楼主 |
发表于 2023-5-18 10:25:02
|
显示全部楼层
import pandas as pd
# 读取 Excel 文件
file_path = "58730.xls"
df = pd.read_excel(file_path, sheet_name=None, dtype='object')
# 将数据写入新的 Excel 文件中
writer = pd.ExcelWriter('N58730.xlsx', engine='xlsxwriter')
for sheet_name in df.keys():
# 转换文件格式
df[sheet_name].to_excel(writer, sheet_name=sheet_name, index=False)
# 自动设置数字格式为数值,保留一位小数
workbook = writer.book
worksheet = writer.sheets[sheet_name]
num_format = workbook.add_format({'num_format': '0.0'})
for i, width in enumerate(get_col_widths(df[sheet_name])):
worksheet.set_column(i, i, width, num_format)
writer.save()
# 添加 time 列
df['Sheet1']['time'] = pd.to_datetime(df['Sheet1'][['年', '月', '日']])
df['Sheet1']['time'] = df['Sheet1']['time'].dt.strftime('%Y-%m-%d')
# 删除年月日列
df['Sheet1'] = df['Sheet1'].drop(columns=['年', '月', '日'])
# 提取年月信息,然后按照年月分组,将 D 列数据求和
df['Sheet2'] = df['Sheet1'].groupby(['年', '月'])['D'].sum().reset_index()
# 在 Sheet2 中添加表头和相关指标数据
df['Sheet2'].columns = ['年', '月', '月降水']
df['Sheet2']['time'] = pd.to_datetime(df['Sheet2'][['年', '月']].assign(day=1))
df['Sheet2']['月降水均值'] = df['Sheet2']['月降水'].mean()
df['Sheet2']['月降水中值'] = df['Sheet2']['月降水'].median()
df['Sheet2']['月降水极大值'] = df['Sheet2']['月降水'].max()
df['Sheet2']['月降水极小值'] = df['Sheet2']['月降水'].min()
# 设置时间列格式为 yyyy-(/)mm-(/)dd
df['Sheet2']['time'] = df['Sheet2']['time'].dt.strftime('%Y-%m-%d')
# 重新排列 Sheet2 中的列的顺序
df['Sheet2'] = df['Sheet2'][['time', '月降水', '月降水均值', '月降水中值', '月降水极大值', '月降水极小值']]
# 将新的 Excel 文件保存
new_file_path = '新文件路径/N' + file_path.split('/')[-1].replace('.xls', '') + '.xlsx'
writer = pd.ExcelWriter(new_file_path, engine='xlsxwriter')
for sheet_name in df.keys():
df[sheet_name].to_excel(writer, sheet_name=sheet_name, index=False)
# 自动设置数字格式为数值,保留一位小数
workbook = writer.book
worksheet = writer.sheets[sheet_name]
num_format = workbook.add_format({'num_format': '0.0'})
for i, width in enumerate(get_col_widths(df[sheet_name])):
worksheet.set_column(i, i, width, num_format)
writer.save()
NameError: name 'get_col_widths' is not defined
请问该如何修改? |
|