data_set = pd.concat((train.drop(['Survived'], axis = 1), test), axis = 0)
data_set = data_set.drop(['PassengerId', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], axis = 1)
data_set = data_set.fillna(data_set.mean())
n_train = train.shape[0]
train_x, test_x = data_set[:n_train], data_set[n_train:]
train_y = train['Survived']
train_x = train_x[train_x.keys()].values
test_x = test_x[test_x.keys()].values
train_y = train_y.values
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader, SubsetRandomSampler
from sklearn.model_selection import KFold
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True)
simple_dataset = SimpleDataset(train_x, train_y)
loss_list = []
acc_list = []
for fold, (train_ids, valid_ids) in enumerate(kfold.split(simple_dataset)):
print(f'FOLD {fold}')
print('--------------------------------')
train_subsampler = SubsetRandomSampler(train_ids)
valid_subsampler = SubsetRandomSampler(valid_ids)
train_loader = DataLoader(simple_dataset, batch_size=99, sampler=train_subsampler)
valid_loader = DataLoader(simple_dataset, batch_size=99, sampler=valid_subsampler)
simple_nn = SimpleNN()
optimizer = optim.Adam(simple_nn.parameters(), lr=0.01)
error = nn.BCELoss()
for epoch in range(300):
with torch.no_grad():
valid_loss = 0
num_right = 0
for tensor_x, tensor_y in valid_loader:
tensor_x = tensor_x.float()
tensor_y = tensor_y.float().reshape(-1, 1)
output = simple_nn(tensor_x)
loss = error(output, tensor_y)
valid_loss += loss.item() * len(tensor_x)
result = [1 if out >= 0.5 else 0 for out in output]
num_right += np.sum(np.array(result) == tensor_y.numpy().reshape(-1))
valid_loss = valid_loss / len(valid_loader.sampler.indices)
valid_accuracy = num_right / len(valid_loader.sampler.indices)
if epoch % 50 == 0:
print('Valid Loss: {} Accuracy: {} Epoch:{}'.format(valid_loss, valid_accuracy, epoch))
train_loss = 0
num_right = 0
for tensor_x, tensor_y in train_loader:
tensor_x = tensor_x.float()
tensor_y = tensor_y.float().reshape(-1, 1)
optimizer.zero_grad()
output = simple_nn(tensor_x)
loss = error(output, tensor_y)
loss.backward()
optimizer.step()
train_loss += loss.item() * len(tensor_x)
result = [1 if out >= 0.5 else 0 for out in output]
num_right += np.sum(np.array(result) == tensor_y.numpy().reshape(-1))
train_loss = train_loss / len(train_loader.sampler.indices)
accuracy = num_right / len(train_loader.sampler.indices)
if epoch % 50 == 0:
print('Loss: {} Accuracy: {} Epoch:{}'.format(train_loss, accuracy, epoch))
loss_list.append(valid_loss)
acc_list.append(valid_accuracy)
print('Training Ended')
print('Average Loss: {} Average Accuracy: {}'.format(np.mean(loss_list), np.mean(acc_list)))
帮我解读一下以上代码 |