|
发表于 2023-4-22 18:35:01
|
显示全部楼层
以下是基于VGGNet网络模型的多标签分类网络的实现。每个.py文件的代码和解释如下:
1. model.py
import torch
import torch.nn as nn
# VGG模型架构
cfgs = {
'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
}
class VGG(nn.Module):
def __init__(self, vgg_name, num_classes):
super(VGG, self).__init__()
self.features = self._make_layers(cfgs[vgg_name])
self.classifier = nn.Sequential(
nn.Linear(512 * 7 * 7, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, num_classes),
nn.Sigmoid()
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def _make_layers(self, cfg):
layers = []
in_channels = 3
for x in cfg:
if x == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
nn.BatchNorm2d(x),
nn.ReLU(inplace=True)]
in_channels = x
layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
return nn.Sequential(*layers)
def VGG16(num_classes):
return VGG('VGG16', num_classes)
model.py中定义了VGG模型的架构。VGG16是一个预先定义好的模型架构,我们将这个模型用于多标签分类任务。最后的输出层使用了Sigmoid激活函数,用于输出多个标签的概率。
2. train.py
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from model import VGG16
import matplotlib.pyplot as plt
# 超参数设置
num_classes = 10
batch_size = 100
epochs = 100
learning_rate = 0.001
# 数据预处理
transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32, padding=4),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# 加载训练集和验证集
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
val_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
# 构建模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = VGG16(num_classes).to(device)
# 定义损失函数和优化器
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
# 训练函数
def train(net, dataloader, criterion, optimizer, device):
net.train()
running_loss = 0.0
for i, data in enumerate(dataloader, 0):
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
# 将标签转换为多标签格式
labels = nn.functional.one_hot(labels, num_classes).float()
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
return running_loss / (i + 1)
# 验证函数
def validate(net, dataloader, criterion, device):
net.eval()
running_loss = 0.0
with torch.no_grad():
for i, data in enumerate(dataloader, 0):
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
# 将标签转换为多标签格式
labels = nn.functional.one_hot(labels, num_classes).float()
outputs = net(inputs)
loss = criterion(outputs, labels)
running_loss += loss.item()
return running_loss / (i + 1)
# 训练和验证过程
train_losses = []
val_losses = []
for epoch in range(epochs):
train_loss = train(net, train_loader, criterion, optimizer, device)
val_loss = validate(net, val_loader, criterion, device)
train_losses.append(train_loss)
val_losses.append(val_loss)
print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
# 保存模型
torch.save(net.state_dict(), 'vgg16_multilabel.pth')
# 可视化损失曲线
plt.plot(range(1, epochs + 1), train_losses, label='Train Loss')
plt.plot(range(1, epochs + 1), val_losses, label='Val Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
train.py文件包含了训练VGG模型的过程。首先设置超参数和数据预处理,然后加载训练集和验证集。定义损失函数(BCELoss)和优化器(Adam)。接着定义训练和验证函数,使用循环进行训练和验证,记录损失值。最后,保存模型并可视化损失曲线。
3. predict.py
import torch
import cv2
import numpy as np
from model import VGG16
import matplotlib.pyplot as plt
# 加载模型
num_classes = 10
model = VGG16(num_classes)
model.load_state_dict(torch.load('vgg16_multilabel.pth'))
model.eval()
# 加载并预处理图像
image_path = 'test_image.jpg'
image = cv2.imread(image_path)
image = cv2.resize(image, (224, 224))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_normalized = (image / 255.0 - 0.5) / 0.5
input_tensor = torch.from_numpy(image_normalized.transpose(2, 0, 1)).float().unsqueeze(0)
# 预测和可视化预测结果
with torch.no_grad():
output = model(input_tensor).squeeze().numpy()
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
# 在图像上标记预测结果及概率
fig, ax = plt.subplots()
ax.imshow(image)
for idx, prob in enumerate(output):
plt.text(5, 15 + 20 * idx, f'{class_names[idx]}: {prob:.2f}', fontsize=12, color='red')
plt.show()
predict.py文件包含了对输入图像进行分类预测的过程。首先加载模型并将其设置为评估模式。然后加载图像,并对图像进行预处理。接着对图像进行预测,并输出预测结果。最后,将预测结果可视化并展示。
4. evaluate.py
import torch
from model import VGG16
from sklearn.metrics import hamming_loss, accuracy_score, precision_score, recall_score, f1_score
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
def evaluate(model, dataloader, device):
model.eval()
y_true, y_pred = [], []
with torch.no_grad():
for data in dataloader:
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
preds = torch.round(outputs)
y_true.extend(labels.cpu().numpy().tolist())
y_pred.extend(preds.cpu().numpy().tolist())
return y_true, y_pred
# 加载模型
num_classes = 10
model = VGG16(num_classes)
model.load_state_dict(torch.load('vgg16_multilabel.pth'))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
# 数据预处理
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# 加载数据集
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)
# 评估模型
y_true, y_pred = evaluate(model, test_loader, device)
# 计算评价指标
hl = hamming_loss(y_true, y_pred)
acc = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='samples')
recall = recall_score(y_true, y_pred, average='samples')
f1 = f1_score(y_true, y_pred, average='samples')
print(f'Hamming Loss: {hl:.4f}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
evaluate.py文件包含了评估训练好的VGG模型的过程。首先加载模型并将其设置为评估模式。然后加载测试数据集。接着评估模型,并计算评价指标(包括Hamming Loss,Accuracy,Precision,Recall和F1 Score)。最后输出评价结果。
5. 数据集制作过程
我们在本例中使用了CIFAR-10数据集,它是一个用于图像识别的经典数据集。CIFAR-10数据集包含了60,000张32x32的彩色图像,共分为10个类别,每个类别有6,000张图像。数据集被分为50,000张训练图像和10,000张测试图像。
在train.py中,我们使用了如下代码加载数据集:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
val_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
在evaluate.py中,我们使用了如下代码加载测试数据集:
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)
在加载数据集时,我们还应用了一些数据增强技术(例如随机翻转、随机裁剪等),以提高模型的泛化能力。数据预处理操作在train.py和evaluate.py中的`transform`变量中定义。
这就是利用Python和PyTorch搭建一个基于VGGNet网络模型的多标签分类网络的全部内容。通过这些.py文件,我们可以训练、预测和评估一个多标签分类网络。 |
|