|

楼主 |
发表于 2024-7-24 10:23:46
|
显示全部楼层
还是不行
原始表格 已经被修正为 只有一列的数据 含标题行
代码如下:
- import pandas as pd
- import numpy as np
- import re
- #导入数据
- dfa = pd.read_excel('shujufenge2.xlsx')
- dfa.columns=["value"]
- def process_row(row):
- # 获取当前行的数据
- current_value = str(row['value']) if pd.notna(row) else ""
- # 使用正则表达式提取所需的信息
- first_part = re.match(r"\d+", current_value).group() # 提取第一个数字
- text_part = re.split(",", current_value)[1] # 提取第二部分文本
- numeric_parts = re.findall(r"\d{1,3}(?:,\d{3})*(?:\.\d{2})|\d+\.\d{2}", current_value) # 提取数值部分
- while len(numeric_parts) < 4:
- numeric_parts.append(np.nan)
- # 将千分位格式化数值去掉逗号
- numeric_parts = [part.replace(',', '') for part in numeric_parts]
- return [first_part, text_part] + numeric_parts
- # 应用处理函数到每一行
- df2 = dfa.apply(process_row, axis=1, result_type="expand")
- # 打印结果
- #print(df2)
复制代码
错误信息如下:
PS D:\wp> & D:/Python39/python.exe d:/wp/提取带千分位的数值.py
Traceback (most recent call last):
File "d:\wp\提取带千分位的数值.py", line 22, in <module>
df2 = dfa.apply(process_row, axis=1, result_type="expand")
File "D:\Python39\lib\site-packages\pandas\core\frame.py", line 10374, in apply
return op.apply().__finalize__(self, method="apply")
File "D:\Python39\lib\site-packages\pandas\core\apply.py", line 916, in apply
return self.apply_standard()
File "D:\Python39\lib\site-packages\pandas\core\apply.py", line 1063, in apply_standard
results, res_index = self.apply_series_generator()
File "D:\Python39\lib\site-packages\pandas\core\apply.py", line 1081, in apply_series_generator
results[i] = self.func(v, *self.args, **self.kwargs)
File "d:\wp\提取带千分位的数值.py", line 10, in process_row
current_value = str(row['value']) if pd.notna(row) else ""
File "D:\Python39\lib\site-packages\pandas\core\generic.py", line 1577, in __nonzero__
raise ValueError(
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
PS D:\wp> |
|