# 读取相关数据包
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
def visualize_feature(X, Y, name_data):
plt.figure(figsize=(20, 18))
for i in range(len(name_data)):
plt.subplot(5, 2, i + 1)
plt.scatter(X[:, i], Y[:, i])
plt.title(name_data[i])
plt.savefig("./Feature_Visualization.jpg")
plt.show()
def create_dataset():
# 加载数据集
diabetes = datasets.load_diabetes()
X = diabetes.data
Y = diabetes.target
name_data = diabetes.feature_names
visualize_feature(X, Y, name_data)
# 划分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=11)
return x_train, y_train, x_test, y_test
x_train, y_train, x_test, y_test = create_dataset()
def Regression(x_train, y_train, x_test, y_test):
# 初始化参数
rng = np.random.RandomState(10)
w = rng.randn(1, x_train.shape[1]).reshape(-1, )
b = 0
w_grad_sum = np.zeros((1, x_train.shape[1])).reshape(-1, )
b_grad_sum = np.zeros((1, 1))
iteration = 30000
lr = 1
lv = 0.0001
train_acc_list, test_acc_list, epoch_list = [], [], []
# 训练和预测
for i in range(iteration):
w_grad = np.zeros((1, x_train.shape[1])).reshape(-1, )
b_grad = np.zeros((1, 1))
y = (np.dot(x_train, w) + b).reshape(-1, 1)
for j in range(y_train.size):
w_grad = w_grad - 2 * (y_train[j] - y[j]) * x_train[j] - 2 * lv * np.sum(w)
b_grad = b_grad - 2 * (y_train[j] - y[j])
w_grad_sum += w_grad ** 2
b_grad_sum += b_grad ** 2
w = w - lr * w_grad / (w_grad_sum ** 0.5)
b = b - lr * b_grad / (b_grad_sum ** 0.5)
if i % 200 == 0:
print("-" * 20)
train_pred = np.array(np.dot(x_train, w) + b).reshape(-1, 1)
test_pred = np.array(np.dot(x_test, w) + b).reshape(-1, 1)
score1 = np.clip(r2_score(y_train, train_pred), 0, 100)
train_acc_list.append(score1)
print("Iteration:[{}/{}] Train acc: {}".format(i, iteration, score1))
score2 = np.clip(r2_score(y_test, test_pred), 0, 100)
test_acc_list.append(score2)
print("Iteration:[{}/{}] Test acc: {}".format(i, iteration, score2))
epoch_list.append(i)
return train_acc_list, test_acc_list, epoch_list
# %%
# 结果可视化
def visualize_results(epoch, train_acc, test_acc):
plt.figure()
plt.plot(epoch, train_acc, c='red', label='train')
plt.plot(epoch, test_acc, c='blue', label='test')
plt.xlabel('Epoch')
plt.ylabel('Acc')
plt.title('Regression')
plt.legend()
plt.savefig("./Regression_results.jpg")
plt.show()
if __name__ == '__main__':
train_acc, test_acc, epoch = Regression(x_train, y_train, x_test, y_test)
visualize_results(epoch, train_acc, test_acc)