abalone年龄预测案例,Python交流,编程语言专区,鱼C论坛

yinda_peng 发表于 2024-4-12 17:14:57

abalone年龄预测案例

本帖最后由 yinda_peng 于 2024-4-12 17:24 编辑

由abalone的特征、标签数据来预测其年龄import pandas as pd

## 读取数据
columns = ['Abalone sex','Longest shell length','Vertical length','Whole height','Whole weight','Shucked weight','Viscera weight','Shell weight','age']
df = pd.read_csv('./input/abalone-data.txt',names=columns, header=None, sep=",")
## 查看数据前5行
df.head()记得更换路径## 查看数据集的基本信息
df.info()
# 返回各类数据的统计变量
df.describe()
# 可观测到数据近似服从正态分布

import seaborn as sb

# 查看各变量两两之间的数据关系，同一变量使用直方图

sb.pairplot(df)
# 绘制热力图，查看各变量间的线性相关性

# 变量之间的线性关系默认使用皮尔森相关系数(-1,1)进行衡量

sb.heatmap(df.corr(),annot=True,cmap='Greens')
接下来对数据做一些处理，并完成求闭式解后得到的预测结果的验证
# 处理数据

def loadDataset(filepath):
data = []
label = []

# 读取(r)的方式打开 filepath 路径下的数据集
# with 语句可实现自动调用 close的功能

with open(filepath,'r') as f:

   #读取文件的所有行，返回一个列表，每一行为一个元素
   # 元素的数据类型为字符型，末尾部分存在符号：\n，需要删除
   files = f.readlines()

   # 观察数据，第一列为鲍鱼性别，转为整型数据
   # 分出特征与标签

   for line in files:
         line = line.strip('\n').split(',')
         # 去掉末尾符号：\n，将字符串以','为分隔符转化为一系列字符并存入一个列表中
         if line=='M':
            line = 0
         elif line == 'F':
            line = 1
         else:
            line=2
         # 利用 for循环将字符类型强制转换为浮点类型
         line =
         data.append(line[:-1])
         label.append(line[-1])
return data,label

abalonedata,abalonelabel = loadDataset('./input/abalone-data.txt')
# 划分数据集
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(np.array(abalonedata),np.array(abalonelabel),test_size=0.2)
print(x_train)
print(y_train)

# 按列添加偏置项
x_train_1 = np.c_
x_test_1 = np.c_
# 求闭式解
w=np.linalg.pinv(x_train_1).dot(y_train)
print(w)
# 验证预测结果
# 从测试集中随机抽取某个样本数据 x得到预测结构y_predict与对应的标签数据 y进行对比

# 随机抽取样本序号
a = np.random.choice(list(range(len(y_test))))

# 取出样本序号对应的样本与标签
test_data,test_label = x_test,y_test
print(test_label)
predict_label = np.sum(np.c_ * w)
print(predict_label)
# 评价指标：均值绝对误差与均方误差
from sklearn.metrics import mean_absolute_error,mean_squared_error

# 对整个测试集进行预测，并计算与测试标签之间的误差
predict_label = x_test_1.dot(w.reshape(-1,1))
print(mean_squared_error(y_test,predict_label),mean_absolute_error(y_test,predict_label))

FishC 发表于 2024-4-12 17:15:23

这是一个用于预测abalone（一种贻贝）年龄的案例。我们首先要导入必要的包，并读取数据集。

import pandas as pd

# 读取数据
columns = ['Sex', 'Length', 'Diameter', 'Height', 'Whole weight',
      'Shucked weight', 'Viscera weight', 'Shell weight', 'Rings']
df = pd.read_csv('./input/abalone-data.txt',names=columns, header=None, sep=",")
# 查看数据前5行
df.head()
# 查看数据集的基本信息
df.info()
# 返回各类数据的统计变量
df.describe()

接下来，我们可以使用seaborn库绘制两个变量之间的关系图和热力图，以了解它们之间的线性相关性。

import seaborn as sb

# 查看各变量两两之间的数据关系，同一变量使用直方图
sb.pairplot(df)
# 绘制热力图，查看各变量间的线性相关性。变量之间的线性关系默认使用皮尔森相关系数(-1,1)进行衡量
sb.heatmap(df.corr(),annot=True)

接下来，我们需要对数据进行处理并完成闭式解。

import numpy as np

# 处理数据
def loadDataset(filepath):
data = []
label = []

with open(filepath,'r') as f:
   files = f.readlines()

   for line in files:
         line = line.strip('\n').split(',')

         if line=='M':
            line = 0
         elif line == 'F':
            line = 1
         else:
            line=2

         line =

         data.append(line)
         label.append(line)
return data,label

abalonedata,abalonelabel = loadDataset('./input/abalone-data.txt')

from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(np.array(abalonedata),np.array(abalonelabel),test_size=0.2)

# 按列添加偏置项
x_train_1 = np.c_
x_test_1 = np.c_

# 求闭式解
w=np.linalg.pinv(x_train_1).dot(y_train)

最后，我们可以验证预测结果，并计算均值绝对误差和均方误差作为评价指标。

# 验证预测结果
a = np.random.choice(list(range(len(y_test))))
test_data,test_label = x_test,y_test
predict_label = np.sum(x_test_1 * w)

# 评价指标：均值绝对误差与均方误差
from sklearn.metrics import mean_absolute_error,mean_squared_error

predict_label = x_test_1.dot(w.reshape(-1,1))
print(mean_squared_error(y_test,predict_label),mean_absolute_error(y_test,predict_label))

希望这个案例能帮助到你！

以上回复来自 FishC AI 小助理，如未能正确解答您的问题，请继续追问。

页: [1]

鱼C论坛's Archiver

abalone年龄预测案例