dataframe使用groupby出现以下异常
import pandas as pdbuycount = pd.DataFrame([['苹果',9000],['香蕉',4000],['梨子',2000],['菠萝',3000]],columns=['种类','购入数量'])
data = [['苹果','2022-02-09',300],
['苹果','2022-02-25',150],
['苹果','2022-09-05',550],
['苹果','2022-09-29',154],
['苹果','2023-01-29',354],
['苹果','2023-04-19',950],
['菠萝','2022-09-29',250],
['梨子','2023-09-20',350],
['香蕉','2022-07-29',650],
['菠萝','2023-12-22',900]]
columns = ['种类','时间','卖出数量']
df1 = pd.DataFrame(data=data,columns=columns)
# print(df1)
# 将'date'列转换为日期时间类型
df1['时间'] = pd.to_datetime(df1['时间'])
# df1['时间'] = df1['时间'].dt.strftime('%Y-%m-%d')
# df1['时间'] = df1['时间'].astype(int)
# 按年份和月份分组
df1_grouped = df1.groupby(.dt.year, df1['时间'].dt.month]).sum()
# 打印结果
print(df1_grouped)
Traceback (most recent call last):
File "C:\Users\K\PycharmProjects\pythonProject\666.py", line 22, in <module>
df1_grouped = df1.groupby(.dt.year, df1['时间'].dt.month]).sum()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Python\Python311\Lib\site-packages\pandas\core\groupby\groupby.py", line 2263, in sum
result = self._agg_general(
^^^^^^^^^^^^^^^^^^
File "C:\Python\Python311\Lib\site-packages\pandas\core\groupby\groupby.py", line 1422, in _agg_general
result = self._cython_agg_general(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Python\Python311\Lib\site-packages\pandas\core\groupby\groupby.py", line 1507, in _cython_agg_general
new_mgr = data.grouped_reduce(array_func)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Python\Python311\Lib\site-packages\pandas\core\internals\managers.py", line 1506, in grouped_reduce
applied = blk.apply(func)
^^^^^^^^^^^^^^^
File "C:\Python\Python311\Lib\site-packages\pandas\core\internals\blocks.py", line 329, in apply
result = func(self.values, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Python\Python311\Lib\site-packages\pandas\core\groupby\groupby.py", line 1490, in array_func
result = self.grouper._cython_operation(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Python\Python311\Lib\site-packages\pandas\core\groupby\ops.py", line 959, in _cython_operation
return cy_op.cython_operation(
^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Python\Python311\Lib\site-packages\pandas\core\groupby\ops.py", line 645, in cython_operation
self._disallow_invalid_ops(dtype, is_numeric)
File "C:\Python\Python311\Lib\site-packages\pandas\core\groupby\ops.py", line 257, in _disallow_invalid_ops
raise TypeError(f"datetime64 type does not support {how} operations")
TypeError: datetime64 type does not support sum operations 问题原因是尝试对datetime64类型的列进行求和操作,而datetime64类型不支持求和操作。
解决方法是将需要求和的列转换为数值类型或者整数类型,然后再进行求和操作。
在这个案例中,需要将'卖出数量'列转换为整数类型,然后再进行求和操作。可以使用astype()方法将列转换为整数类型。
以下是修改后的代码:
import pandas as pd
buycount = pd.DataFrame([['苹果',9000],['香蕉',4000],['梨子',2000],['菠萝',3000]],columns=['种类','购入数量'])
data = [['苹果','2022-02-09',300],
['苹果','2022-02-25',150],
['苹果','2022-09-05',550],
['苹果','2022-09-29',154],
['苹果','2023-01-29',354],
['苹果','2023-04-19',950],
['菠萝','2022-09-29',250],
['梨子','2023-09-20',350],
['香蕉','2022-07-29',650],
['菠萝','2023-12-22',900]]
columns = ['种类','时间','卖出数量']
df1 = pd.DataFrame(data=data,columns=columns)
# 将'date'列转换为日期时间类型
df1['时间'] = pd.to_datetime(df1['时间'])
# 将'卖出数量'列转换为整数类型
df1['卖出数量'] = df1['卖出数量'].astype(int)
# 按年份和月份分组并求和
df1_grouped = df1.groupby(.dt.year, df1['时间'].dt.month]).sum()
# 打印结果
print(df1_grouped)
运行以上代码,即可得到按年份和月份分组后的求和结果。 isdkz 发表于 2023-8-13 08:04
问题原因是尝试对datetime64类型的列进行求和操作,而datetime64类型不支持求和操作。
解决方法是将需要 ...
pycharm还是出现一样的异常 加多一个指定求和的数据列,即可
>>> data = [['苹果','2022-02-09',300],
['苹果','2022-02-25',150],
['苹果','2022-09-05',550],
['苹果','2022-09-29',154],
['苹果','2023-01-29',354],
['苹果','2023-04-19',950],
['菠萝','2022-09-29',250],
['梨子','2023-09-20',350],
['香蕉','2022-07-29',650],
['菠萝','2023-12-22',900]]
>>> columns = ['种类','时间','卖出数量']
>>> import pandas as pd
>>> df = pd.DataFrame(data, columns=columns)
>>> df
种类 时间卖出数量
0苹果2022-02-09 300
1苹果2022-02-25 150
2苹果2022-09-05 550
3苹果2022-09-29 154
4苹果2023-01-29 354
5苹果2023-04-19 950
6菠萝2022-09-29 250
7梨子2023-09-20 350
8香蕉2022-07-29 650
9菠萝2023-12-22 900
>>> df['时间'] = pd.to_datetime(df['时间'])
>>> df.dtypes
种类 object
时间 datetime64
卖出数量 int64
dtype: object
>>> df.groupby(df['时间'].dt.year)['卖出数量'].sum()
时间
2022 2054
2023 2554
Name: 卖出数量, dtype: int64
>>> df.groupby(.dt.year, df['时间'].dt.month])['卖出数量'].sum()
时间 时间
20222 450
7 650
9 954
20231 354
4 950
9 350
12 900
Name: 卖出数量, dtype: int64
>>> hahadaxiao 发表于 2023-8-13 08:06
pycharm还是出现一样的异常
代码是没有问题的,看看你的库版本对不对吧。实再不行重新安装下库,并指定下版本。
页:
[1]