zmllzw 发表于 2024-6-14 17:31:46

代码有什么问题吗

使用sklearn的函数对Diabetes数据集进行加载,并按照自己设定的比例将数据集进行训练集与测试集的划分。线性回归算法对训练集进行训练,并基于测试集进行评估。
# 读取相关数据包
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt

def visualize_feature(X, Y, name_data):
    plt.figure(figsize=(20, 18))
    for i in range(len(name_data)):
      plt.subplot(5, 2, i + 1)
      plt.scatter(X[:, i], Y[:, i])
      plt.title(name_data)
    plt.savefig("./Feature_Visualization.jpg")
    plt.show()

def create_dataset():
    # 加载数据集
    diabetes = datasets.load_diabetes()
    X = diabetes.data
    Y = diabetes.target
    name_data = diabetes.feature_names
    visualize_feature(X, Y, name_data)
    # 划分训练集和测试集
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=11)
    return x_train, y_train, x_test, y_test

x_train, y_train, x_test, y_test = create_dataset()

def Regression(x_train, y_train, x_test, y_test):
    # 初始化参数
    rng = np.random.RandomState(10)
    w = rng.randn(1, x_train.shape).reshape(-1, )
    b = 0
    w_grad_sum = np.zeros((1, x_train.shape)).reshape(-1, )
    b_grad_sum = np.zeros((1, 1))
    iteration = 30000
    lr = 1
    lv = 0.0001
    train_acc_list, test_acc_list, epoch_list = [], [], []

    # 训练和预测
    for i in range(iteration):
      w_grad = np.zeros((1, x_train.shape)).reshape(-1, )
      b_grad = np.zeros((1, 1))
      y = (np.dot(x_train, w) + b).reshape(-1, 1)

      for j in range(y_train.size):
            w_grad = w_grad - 2 * (y_train - y) * x_train - 2 * lv * np.sum(w)
            b_grad = b_grad - 2 * (y_train - y)
      w_grad_sum += w_grad ** 2
      b_grad_sum += b_grad ** 2

      w = w - lr * w_grad / (w_grad_sum ** 0.5)
      b = b - lr * b_grad / (b_grad_sum ** 0.5)
      if i % 200 == 0:
            print("-" * 20)
            train_pred = np.array(np.dot(x_train, w) + b).reshape(-1, 1)
            test_pred = np.array(np.dot(x_test, w) + b).reshape(-1, 1)
            score1 = np.clip(r2_score(y_train, train_pred), 0, 100)
            train_acc_list.append(score1)
            print("Iteration:[{}/{}] Train acc: {}".format(i, iteration, score1))
            score2 = np.clip(r2_score(y_test, test_pred), 0, 100)
            test_acc_list.append(score2)
            print("Iteration:[{}/{}] Test acc: {}".format(i, iteration, score2))
            epoch_list.append(i)

    return train_acc_list, test_acc_list, epoch_list


# %%

# 结果可视化
def visualize_results(epoch, train_acc, test_acc):
    plt.figure()
    plt.plot(epoch, train_acc, c='red', label='train')
    plt.plot(epoch, test_acc, c='blue', label='test')
    plt.xlabel('Epoch')
    plt.ylabel('Acc')
    plt.title('Regression')
    plt.legend()
    plt.savefig("./Regression_results.jpg")
    plt.show()


if __name__ == '__main__':
    train_acc, test_acc, epoch = Regression(x_train, y_train, x_test, y_test)
    visualize_results(epoch, train_acc, test_acc)
页: [1]
查看完整版本: 代码有什么问题吗