从提供的错误信息和代码来看,主要问题出现在自定义数据集 `SimpleDataset` 的 `__getitem__` 方法中。 ...
#dnn模型(train_test_split)
import torch
import torch.nn as nn
from torch.utils import data
from torch.utils.data import Dataset,DataLoader
from torch import optim
#定义神经网络模型
dropout1, dropout2 = 0.3, 0.6
class SimpleNN(nn.Module):
def __init__(self):
super(SimpleNN, self).__init__()# 继承需要用 SimpleNN
self.dense = nn.Sequential(
nn.Flatten(),
nn.Linear(12, 128),
nn.ReLU(),
nn.Dropout(dropout1),
nn.Linear(128, 256),
nn.ReLU(),
nn.Dropout(dropout2),
nn.Linear(256, 1),
)
def forward(self, X):
x = self.dense(X)
output = torch.sigmoid(x)
return output
#初始化模型和优化器
simple_dnn_model = SimpleNN()
loss = nn.BCELoss() #定义损失函数
optimizer = optim.Adam(nn_model.parameters(),lr=0.0001) #定义优化器
#初始化列表
acc_list = []
loss_list = []
# 开始训练
batch_size = 99
num_epochs = 1000
#创建数据集
train_dataset = data.TensorDataset(X_train, y_train)
valid_dataset = data.TensorDataset(X_valid, y_valid)
# 获取一个数据迭代器
train_iter = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True,num_workers=2)#shuffle=True相当于sampler=RandomSampler(dataset)
valid_iter = DataLoader(dataset=valid_dataset,batch_size=batch_size,shuffle=True,num_workers=2)
#开始迭代
for epoch in range(num_epochs):
train_loss = 0
num_right = 0
for tensor_x, tensor_y in train_iter:#训练集执行梯度更新
tensor_x = tensor_x.float()
tensor_y = tensor_y.float().reshape(-1, 1)
optimizer.zero_grad() #梯度清零
pre_train = simple_dnn_model(tensor_x)
train_l = loss(pre_train, tensor_y) #损失应避免与全局变量loss重名
train_l.backward()#前向传播
optimizer.step()#梯度下降
train_loss += train_l.item() * len(tensor_x)#批量损失
result =
num_right += np.sum(np.array(result) == tensor_y.numpy().reshape(-1))
train_loss = train_loss / len(train_iter.dataset)
train_accuracy = num_right / len(train_iter.dataset)
if epoch % 200 == 0:
print('Loss: {} Accuracy: {} Epoch:{}'.format(train_loss, train_accuracy, epoch))
with torch.no_grad():
valid_loss = 0
num_right = 0
for tensor_x, tensor_y in valid_iter:
tensor_x = tensor_x.float()
tensor_y = tensor_y.float().reshape(-1, 1)
pre_valid = simple_dnn_model(tensor_x)
valid_l = loss(pre_valid, tensor_y)
valid_loss += valid_l.item() * len(tensor_x)
result =
num_right += np.sum(np.array(result) == tensor_y.numpy().reshape(-1))
valid_loss = valid_loss / len(valid_iter.dataset)
valid_accuracy = num_right / len(valid_iter.dataset)
if epoch % 200 == 0:
print('Valid Loss: {} Accuracy: {} Epoch:{}'.format(valid_loss, valid_accuracy, epoch))
#将每次迭代的结果写入列表
loss_list.append(valid_loss)
acc_list.append(valid_accuracy)
print('Training Ended')
print('Average Loss: {} Average Accuracy: {}'.format(np.mean(loss_list), np.mean(acc_list)))
数据的特征和标签类型是tensor,转换为numpy后形状分别是X_train.values.shape,y_train.values.shape,X_valid.values.shape,y_valid.values.shape
((569, 12), (569,), (143, 12), (143,))
训练模型代码报错如下---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In, line 47
43 num_epochs = 1000
46 #创建数据集
---> 47 train_dataset = data.TensorDataset(X_train, y_train)
48 valid_dataset = data.TensorDataset(X_valid, y_valid)
50 # 获取一个数据迭代器
File /opt/conda/lib/python3.10/site-packages/torch/utils/data/dataset.py:204, in TensorDataset.__init__(self, *tensors)
203 def __init__(self, *tensors: Tensor) -> None:
--> 204 assert all(tensors.size(0) == tensor.size(0) for tensor in tensors), "Size mismatch between tensors"
205 self.tensors = tensors
File /opt/conda/lib/python3.10/site-packages/torch/utils/data/dataset.py:204, in <genexpr>(.0)
203 def __init__(self, *tensors: Tensor) -> None:
--> 204 assert all(tensors.size(0) == tensor.size(0) for tensor in tensors), "Size mismatch between tensors"
205 self.tensors = tensors
TypeError: 'int' object is not callable Terence888 发表于 2024-10-22 22:21
#dnn模型(train_test_split)
import torch
import...
您遇到的错误是因为 `data.TensorDataset` 的参数需要是 PyTorch 张量类型。在构造 `TensorDataset` 时,请确保 `X_train` 和 `y_train` 是 PyTorch 张量,而不是 NumPy 数组或其他类型。以下是解决方案:
1. **确保数据是 PyTorch 张量**:
X_train = torch.tensor(X_train.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
X_valid = torch.tensor(X_valid.values, dtype=torch.float32)
y_valid = torch.tensor(y_valid.values, dtype=torch.float32)
2. **创建数据集**:
train_dataset = data.TensorDataset(X_train, y_train)
valid_dataset = data.TensorDataset(X_valid, y_valid)
3. **检查一致性**:
确保 `X_train` 和 `y_train` 的第一个维度大小相同,即样本数量保持一致。
请按照这些步骤确保数据类型和维度一致性。
以上回复来自 FishC AI 小助理,如未能正确解答您的问题,请继续追问。 FishC 发表于 2024-10-22 22:24
您遇到的错误是因为 `data.TensorDataset` 的参数需要是 PyTorch 张量类型。在构造 `TensorDataset` 时 ...
#dnn模型(train_test_split)
import torch
import torch.nn as nn
from torch.utils import data
from torch.utils.data import Dataset,DataLoader
from torch import optim
#定义神经网络模型
dropout1, dropout2 = 0.3, 0.6
class SimpleNN(nn.Module):
def __init__(self):
super(SimpleNN, self).__init__()# 继承需要用 SimpleNN
self.dense = nn.Sequential(
nn.Flatten(),
nn.Linear(12, 128),
nn.ReLU(),
nn.Dropout(dropout1),
nn.Linear(128, 256),
nn.ReLU(),
nn.Dropout(dropout2),
nn.Linear(256, 1),
)
def forward(self, X):
x = self.dense(X)
output = torch.sigmoid(x)
return output
#初始化模型和优化器
simple_dnn_model = SimpleNN()
loss = nn.BCELoss() #定义损失函数
optimizer = optim.Adam(nn_model.parameters(),lr=0.0001) #定义优化器
#初始化列表
acc_list = []
loss_list = []
#将DataFrame数据转换为NumPy数组,然后再转换为PyTorch张量
X_train = torch.tensor(X_train.astype(np.float32).values, dtype=torch.float32)
y_train = torch.tensor(y_train.astype(np.float32).values, dtype=torch.float32)
X_valid = torch.tensor(X_valid.astype(np.float32).values, dtype=torch.float32)
y_valid = torch.tensor(y_valid.astype(np.float32).values, dtype=torch.float32)
#创建数据集
train_dataset = data.TensorDataset(X_train, y_train)
valid_dataset = data.TensorDataset(X_valid, y_valid)
# 获取一个数据迭代器
train_iter = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True,num_workers=2)#shuffle=True相当于sampler=RandomSampler(dataset)
valid_iter = DataLoader(dataset=valid_dataset,batch_size=batch_size,shuffle=True,num_workers=2)
# 开始训练
batch_size = 99
num_epochs = 1000
#开始迭代
for epoch in range(num_epochs):
train_loss = 0
num_right = 0
for tensor_x, tensor_y in train_iter:#训练集执行梯度更新
tensor_x = tensor_x.float()
tensor_y = tensor_y.float().reshape(-1, 1)
optimizer.zero_grad() #梯度清零
pre_train = simple_dnn_model(tensor_x)
train_l = loss(pre_train, tensor_y) #损失应避免与全局变量loss重名
train_l.backward()#前向传播
optimizer.step()#梯度下降
train_loss += train_l.item() * len(tensor_x)#批量损失
result =
num_right += np.sum(np.array(result) == tensor_y.numpy().reshape(-1))
train_loss = train_loss / len(train_iter.dataset)
train_accuracy = num_right / len(train_iter.dataset)
if epoch % 200 == 0:
print('Loss: {} Accuracy: {} Epoch:{}'.format(train_loss, train_accuracy, epoch))
with torch.no_grad():
valid_loss = 0
num_right = 0
for tensor_x, tensor_y in valid_iter:
tensor_x = tensor_x.float()
tensor_y = tensor_y.float().reshape(-1, 1)
pre_valid = simple_dnn_model(tensor_x)
valid_l = loss(pre_valid, tensor_y)
valid_loss += valid_l.item() * len(tensor_x)
result =
num_right += np.sum(np.array(result) == tensor_y.numpy().reshape(-1))
valid_loss = valid_loss / len(valid_iter.dataset)
valid_accuracy = num_right / len(valid_iter.dataset)
if epoch % 200 == 0:
print('Valid Loss: {} Accuracy: {} Epoch:{}'.format(valid_loss, valid_accuracy, epoch))
#将每次迭代的结果写入列表
loss_list.append(valid_loss)
acc_list.append(valid_accuracy)
print('Training Ended')
print('Average Loss: {} Average Accuracy: {}'.format(np.mean(loss_list), np.mean(acc_list)))
上述代码存在什么问题导致预测准确率过低 Terence888 发表于 2024-10-22 22:40
#dnn模型(train_test_split)
import torch
import...
在该代码中,有一些地方可能会影响模型的性能和准确性。以下是检查和改进的建议:
1. **初始化列表**:
- 确保 `acc_list` 和 `loss_list` 被正确初始化,例如使用空列表 `acc_list = []` 和 `loss_list = []`。
2. **未定义变量**:
- `nn_model` 应替换为 `simple_dnn_model`。
3. **结果计算**:
- 需要实现对 `result` 的计算。可以将预测值转化为二进制类别,例如:`result = (pre_train > 0.5).float()`。
4. **数据标准化**:
- 确保输入数据经过标准化或归一化,以便网络更好地学习。
5. **学习率调整**:
- 调整 `optimizer` 的学习率(`lr=0.0001`),可能较低或较高的学习率都会影响网络的收敛。
6. **批量大小和迭代次数**:
- 尝试不同的 `batch_size` 和 `num_epochs` 以找到最佳组合。
7. **网络架构和过拟合**:
- 使用较小的网络或正则化技术,减少过拟合。
如果尝试这些改进后,仍发现准确度低,可以尝试进一步探索数据集特性或网络结构。
以上回复来自 FishC AI 小助理,如未能正确解答您的问题,请继续追问。
页:
1
[2]