|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
使用sklearn的函数对Diabetes数据集进行加载,并按照自己设定的比例将数据集进行训练集与测试集的划分。线性回归算法对训练集进行训练,并基于测试集进行评估。
- # 读取相关数据包
- from sklearn import datasets
- from sklearn.model_selection import train_test_split
- import numpy as np
- from sklearn.metrics import r2_score
- import matplotlib.pyplot as plt
- def visualize_feature(X, Y, name_data):
- plt.figure(figsize=(20, 18))
- for i in range(len(name_data)):
- plt.subplot(5, 2, i + 1)
- plt.scatter(X[:, i], Y[:, i])
- plt.title(name_data[i])
- plt.savefig("./Feature_Visualization.jpg")
- plt.show()
- def create_dataset():
- # 加载数据集
- diabetes = datasets.load_diabetes()
- X = diabetes.data
- Y = diabetes.target
- name_data = diabetes.feature_names
- visualize_feature(X, Y, name_data)
- # 划分训练集和测试集
- x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=11)
- return x_train, y_train, x_test, y_test
- x_train, y_train, x_test, y_test = create_dataset()
- def Regression(x_train, y_train, x_test, y_test):
- # 初始化参数
- rng = np.random.RandomState(10)
- w = rng.randn(1, x_train.shape[1]).reshape(-1, )
- b = 0
- w_grad_sum = np.zeros((1, x_train.shape[1])).reshape(-1, )
- b_grad_sum = np.zeros((1, 1))
- iteration = 30000
- lr = 1
- lv = 0.0001
- train_acc_list, test_acc_list, epoch_list = [], [], []
- # 训练和预测
- for i in range(iteration):
- w_grad = np.zeros((1, x_train.shape[1])).reshape(-1, )
- b_grad = np.zeros((1, 1))
- y = (np.dot(x_train, w) + b).reshape(-1, 1)
- for j in range(y_train.size):
- w_grad = w_grad - 2 * (y_train[j] - y[j]) * x_train[j] - 2 * lv * np.sum(w)
- b_grad = b_grad - 2 * (y_train[j] - y[j])
- w_grad_sum += w_grad ** 2
- b_grad_sum += b_grad ** 2
- w = w - lr * w_grad / (w_grad_sum ** 0.5)
- b = b - lr * b_grad / (b_grad_sum ** 0.5)
- if i % 200 == 0:
- print("-" * 20)
- train_pred = np.array(np.dot(x_train, w) + b).reshape(-1, 1)
- test_pred = np.array(np.dot(x_test, w) + b).reshape(-1, 1)
- score1 = np.clip(r2_score(y_train, train_pred), 0, 100)
- train_acc_list.append(score1)
- print("Iteration:[{}/{}] Train acc: {}".format(i, iteration, score1))
- score2 = np.clip(r2_score(y_test, test_pred), 0, 100)
- test_acc_list.append(score2)
- print("Iteration:[{}/{}] Test acc: {}".format(i, iteration, score2))
- epoch_list.append(i)
- return train_acc_list, test_acc_list, epoch_list
- # %%
- # 结果可视化
- def visualize_results(epoch, train_acc, test_acc):
- plt.figure()
- plt.plot(epoch, train_acc, c='red', label='train')
- plt.plot(epoch, test_acc, c='blue', label='test')
- plt.xlabel('Epoch')
- plt.ylabel('Acc')
- plt.title('Regression')
- plt.legend()
- plt.savefig("./Regression_results.jpg")
- plt.show()
- if __name__ == '__main__':
- train_acc, test_acc, epoch = Regression(x_train, y_train, x_test, y_test)
- visualize_results(epoch, train_acc, test_acc)
复制代码 |
|