贝叶斯
import pandas as pd
import numpy as np
d= {'Gender':['male','male','male','male','female','female'],
'Height':,
'Weight':,
'Size':,
'Team':['i100','i100','i500','i100','i500','i100']}
data=pd.DataFrame(d)
n_i100 = data['Team'] == 'i100'].count()
n_i500 = data['Team'] == 'i500'].count()
total_ppl = data['Team'].count()
P_i100 = n_i100 * 1.0 / total_ppl
P_i500 = n_i500 * 1.0 / total_ppl
df1 = data.groupby(['Team', 'Gender']).size().rename('cnt').reset_index().set_index('Team')
df2 = pd.DataFrame(data.groupby(['Team']).size().rename('total'))
df3 = df1.merge(df2, left_index=True, right_index=True)
df3['p'] = df3['cnt'] * 1.0 / df3['total']
data_means = data.groupby('Team').mean()
data_variance = data.groupby('Team').var()
i100_height_mean = data_means['Height'].values
i100_weight_mean = data_means['Weight'].values
i100_size_mean = data_means['Size'].values
i100_height_variance = data_variance['Height'].values
i100_weight_variance = data_variance['Weight'].values
i100_size_variance = data_variance['Size'].values
i500_height_mean = data_means['Height'].values
i500_weight_mean = data_means['Weight'].values
i500_size_mean = data_means['Size'].values
i500_height_variance = data_variance['Height'].values
i500_weight_variance = data_variance['Weight'].values
i500_size_variance = data_variance['Size'].values
def p_x_given_y_1(team, gender):
return df3['p'] ==team] == gender].values
def p_x_given_y_2(x, mean_y, variance_y):
p = 1 / (np.sqrt(2 * np.pi * variance_y)) * np.exp((-(x - mean_y) **2) / (2 * variance_y))
return p
per ={'Gender':['female'],
'Height':,
'Weight':,
'Size':
}
person=pd.DataFrame(per)
P_i100 * p_x_given_y_1('i100', person['Gender']) *\
p_x_given_y_2(person['Height'], i100_height_mean, i100_height_variance) *\
p_x_given_y_2(person['Weight'], i100_weight_mean, i100_weight_variance) *\
p_x_given_y_2(person['Size'], i100_size_mean, i100_size_variance)
问题
raceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2895, in get_loc
return self._engine.get_loc(casted_key)
File "pandas\_libs\index.pyx", line 70, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 101, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1675, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1683, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Team '
求解 数学战5渣渣,
但是,
报错信息提示你键值错误
请根据你所学的,验证字典键值为: 'Team' 的使用地方,是否异常. z5560636 发表于 2021-12-3 10:29
数学战5渣渣,
但是,
报错信息提示你键值错误
使用地方 z5560636 发表于 2021-12-3 10:29
数学战5渣渣,
但是,
报错信息提示你键值错误
找不到问题啊 顶顶顶顶一 发表于 2021-12-3 21:35
找不到问题啊
p_x_given_y_1这个函数里的返回的 'Team '错了,多了个空格!!!
不信你自己在当前页面Ctrl+F查一下,到底是 'Team',还是 'Team ' ? 你的df3没有team这一列,df1,df2也没有
页:
[1]