gzj137070928 发表于 2020-12-11 17:29:52

pandas的DataFrame的创建、增删改

pandas的DataFrame的创建和增删改
import pandas as pd
import time

idx =
name = ["apple","pearl","orange", "apple","orange","orange","apple","pearl","orange"]
price =
N = 1# 数据越大,内存差距越大
df = pd.DataFrame({ "fruit": name*N , "price" : price*N}, index = idx*N)
print (df,"\n")
print ('memory_usage',df.memory_usage(),"\n")
print (df.dtypes)
print ("*" * 20)
df['fruit'] = df['fruit'].astype('category')
# 将fruit列由Series改为了category类型,通过codes和categories组合出fruit的values
# 第二种创建方法:
# cat = pd.categorial(name)
# df['fruit'] = cat
print (df)
print ('memory_usage',df.memory_usage(),"\n")
print (df.dtypes)
print('fruit.values:',df.fruit.values)
print('fruit.values.codes:',df.fruit.values.codes)
print('fruit.values.categories:',df.fruit.values.categories)
# categories数据的修改
df.fruit.values.categories = ["Pearl", "Orange", "Apple"]
df.fruit.values.rename_categories(["Apple", "Orange", "Pearl"],inplace = True)
# categories数据的增加
df_new = pd.DataFrame({"fruit":["watermelon"] * 3,
                     "price":},
                     index = )
df.fruit.values.add_categories("watermelon", inplace = True)
df = df.append(df_new)
# 这里需要注意的是add_categories函数需要在插入数据之前调用,
# 否则数据增加进去了但是codes并未更新都是-1。
# 下面是删除categories
df = df# 利用布尔选择删除了所有的"apple"的记录
df.fruit.values.remove_categories("apple", inplace = True)
# 删除了df的fruit这个categorical data的categories里的种类"apple",如果注释掉此语句,codes则还是用原categories进行编码。
df.fruit.cat.remove_unused_categories(inplace = True)# 删除未使用的categories
print('总的数据个数:',df.fruit.count())
print('categories分别出现的次数:',df.fruit.value_counts())
页: [1]
查看完整版本: pandas的DataFrame的创建、增删改