本帖最后由 方大侠 于 2021-5-14 12:33 编辑
train.rar
(8.87 KB, 下载次数: 0)
我写了一个机器学习svm用哪些参数能够让预测误差最小的一个简单代码。。。不过运行了一段时间内存就爆了。。
我del掉了list好像没什么用,可能的原因估计是split出来的数据python没有自动删除。。。这该怎么办呀
不应该是变量名指向新的内存,旧的内存空间就自动释放了吗
这里的x_train,y_train,x_valid,y_valid 指向新的内存不应该就自动释放了。。
# -*- coding:
import numpy as np # linear algebra
import pandas as pd # data processing
from sklearn import svm
train_data = pd.read_csv('train.csv')
print('The shape of the train_data is: {}\n'.format(train_data.shape))
def split(data, pt, size):
valid_data = data[pt:pt + size]
train_data = data[0:pt].append(data[pt + size:])
y_valid = valid_data.Survived.values.reshape(-1, 1)
y_train = train_data.Survived.values.reshape(-1, 1)
x_train = train_data.drop(columns=['Survived']).values
x_valid = valid_data.drop(columns=['Survived']).values
return x_train, y_train, x_valid, y_valid
m = train_data.shape[0]
sub_size = m // 10
ec_dict = {}
for d in range(1, 5):
list_error = []
list_c = []
for exp in range(-20, 21):
pt = 0
sum = 0
for i in range(1, 11):
x_train, y_train, x_valid, y_valid = split(train_data, pt, sub_size)
pt = pt + 89
svc = svm.SVC(C=2 ** (exp * 0.5), kernel='poly', degree=d)
svc.fit(x_train, y_train.ravel())
score = 1 - svc.score(x_valid, y_valid.ravel())
sum = sum + score
print('i:',i)
sum = sum / 10
list_error.append(sum)
list_c.append(exp)
print('c:',exp)
min_error = min(list_error)
index = list_error.index(min_error)
min_c = list_c[index]
print("d: %d, c: %.10f, error: %f" % (d, min_c, min_error))
ec_dict['error' + str(d)] = list_error
ec_dict['c' + str(d)] = list_c
del list_error, list_c
|