|
发表于 2023-4-22 18:35:01
|
显示全部楼层
以下是基于VGGNet网络模型的多标签分类网络的实现。每个.py文件的代码和解释如下:
1. model.py
- import torch
- import torch.nn as nn
- # VGG模型架构
- cfgs = {
- 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
- }
- class VGG(nn.Module):
- def __init__(self, vgg_name, num_classes):
- super(VGG, self).__init__()
- self.features = self._make_layers(cfgs[vgg_name])
- self.classifier = nn.Sequential(
- nn.Linear(512 * 7 * 7, 4096),
- nn.ReLU(True),
- nn.Dropout(),
- nn.Linear(4096, 4096),
- nn.ReLU(True),
- nn.Dropout(),
- nn.Linear(4096, num_classes),
- nn.Sigmoid()
- )
- def forward(self, x):
- x = self.features(x)
- x = x.view(x.size(0), -1)
- x = self.classifier(x)
- return x
- def _make_layers(self, cfg):
- layers = []
- in_channels = 3
- for x in cfg:
- if x == 'M':
- layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
- else:
- layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
- nn.BatchNorm2d(x),
- nn.ReLU(inplace=True)]
- in_channels = x
- layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
- return nn.Sequential(*layers)
- def VGG16(num_classes):
- return VGG('VGG16', num_classes)
复制代码
model.py中定义了VGG模型的架构。VGG16是一个预先定义好的模型架构,我们将这个模型用于多标签分类任务。最后的输出层使用了Sigmoid激活函数,用于输出多个标签的概率。
2. train.py
- import torch
- import torch.nn as nn
- import torch.optim as optim
- import torchvision
- import torchvision.transforms as transforms
- from torch.utils.data import DataLoader
- from model import VGG16
- import matplotlib.pyplot as plt
- # 超参数设置
- num_classes = 10
- batch_size = 100
- epochs = 100
- learning_rate = 0.001
- # 数据预处理
- transform = transforms.Compose([
- transforms.RandomHorizontalFlip(),
- transforms.RandomCrop(32, padding=4),
- transforms.ToTensor(),
- transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
- ])
- # 加载训练集和验证集
- train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
- val_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
- train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
- val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
- # 构建模型
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
- net = VGG16(num_classes).to(device)
- # 定义损失函数和优化器
- criterion = nn.BCELoss()
- optimizer = optim.Adam(net.parameters(), lr=learning_rate)
- # 训练函数
- def train(net, dataloader, criterion, optimizer, device):
- net.train()
- running_loss = 0.0
- for i, data in enumerate(dataloader, 0):
- inputs, labels = data
- inputs, labels = inputs.to(device), labels.to(device)
- # 将标签转换为多标签格式
- labels = nn.functional.one_hot(labels, num_classes).float()
- optimizer.zero_grad()
- outputs = net(inputs)
- loss = criterion(outputs, labels)
- loss.backward()
- optimizer.step()
- running_loss += loss.item()
- return running_loss / (i + 1)
- # 验证函数
- def validate(net, dataloader, criterion, device):
- net.eval()
- running_loss = 0.0
- with torch.no_grad():
- for i, data in enumerate(dataloader, 0):
- inputs, labels = data
- inputs, labels = inputs.to(device), labels.to(device)
- # 将标签转换为多标签格式
- labels = nn.functional.one_hot(labels, num_classes).float()
- outputs = net(inputs)
- loss = criterion(outputs, labels)
- running_loss += loss.item()
- return running_loss / (i + 1)
- # 训练和验证过程
- train_losses = []
- val_losses = []
- for epoch in range(epochs):
- train_loss = train(net, train_loader, criterion, optimizer, device)
- val_loss = validate(net, val_loader, criterion, device)
- train_losses.append(train_loss)
- val_losses.append(val_loss)
- print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
- # 保存模型
- torch.save(net.state_dict(), 'vgg16_multilabel.pth')
- # 可视化损失曲线
- plt.plot(range(1, epochs + 1), train_losses, label='Train Loss')
- plt.plot(range(1, epochs + 1), val_losses, label='Val Loss')
- plt.xlabel('Epochs')
- plt.ylabel('Loss')
- plt.legend()
- plt.show()
复制代码
train.py文件包含了训练VGG模型的过程。首先设置超参数和数据预处理,然后加载训练集和验证集。定义损失函数(BCELoss)和优化器(Adam)。接着定义训练和验证函数,使用循环进行训练和验证,记录损失值。最后,保存模型并可视化损失曲线。
3. predict.py
- import torch
- import cv2
- import numpy as np
- from model import VGG16
- import matplotlib.pyplot as plt
- # 加载模型
- num_classes = 10
- model = VGG16(num_classes)
- model.load_state_dict(torch.load('vgg16_multilabel.pth'))
- model.eval()
- # 加载并预处理图像
- image_path = 'test_image.jpg'
- image = cv2.imread(image_path)
- image = cv2.resize(image, (224, 224))
- image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
- image_normalized = (image / 255.0 - 0.5) / 0.5
- input_tensor = torch.from_numpy(image_normalized.transpose(2, 0, 1)).float().unsqueeze(0)
- # 预测和可视化预测结果
- with torch.no_grad():
- output = model(input_tensor).squeeze().numpy()
- class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
- # 在图像上标记预测结果及概率
- fig, ax = plt.subplots()
- ax.imshow(image)
- for idx, prob in enumerate(output):
- plt.text(5, 15 + 20 * idx, f'{class_names[idx]}: {prob:.2f}', fontsize=12, color='red')
- plt.show()
复制代码
predict.py文件包含了对输入图像进行分类预测的过程。首先加载模型并将其设置为评估模式。然后加载图像,并对图像进行预处理。接着对图像进行预测,并输出预测结果。最后,将预测结果可视化并展示。
4. evaluate.py
- import torch
- from model import VGG16
- from sklearn.metrics import hamming_loss, accuracy_score, precision_score, recall_score, f1_score
- from torchvision.datasets import CIFAR10
- from torch.utils.data import DataLoader
- import torchvision.transforms as transforms
- def evaluate(model, dataloader, device):
- model.eval()
- y_true, y_pred = [], []
- with torch.no_grad():
- for data in dataloader:
- inputs, labels = data
- inputs, labels = inputs.to(device), labels.to(device)
- outputs = model(inputs)
- preds = torch.round(outputs)
- y_true.extend(labels.cpu().numpy().tolist())
- y_pred.extend(preds.cpu().numpy().tolist())
- return y_true, y_pred
- # 加载模型
- num_classes = 10
- model = VGG16(num_classes)
- model.load_state_dict(torch.load('vgg16_multilabel.pth'))
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
- model = model.to(device)
- # 数据预处理
- transform = transforms.Compose([
- transforms.ToTensor(),
- transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
- ])
- # 加载数据集
- test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)
- test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)
- # 评估模型
- y_true, y_pred = evaluate(model, test_loader, device)
- # 计算评价指标
- hl = hamming_loss(y_true, y_pred)
- acc = accuracy_score(y_true, y_pred)
- precision = precision_score(y_true, y_pred, average='samples')
- recall = recall_score(y_true, y_pred, average='samples')
- f1 = f1_score(y_true, y_pred, average='samples')
- print(f'Hamming Loss: {hl:.4f}')
- print(f'Accuracy: {acc:.4f}')
- print(f'Precision: {precision:.4f}')
- print(f'Recall: {recall:.4f}')
- print(f'F1 Score: {f1:.4f}')
复制代码
evaluate.py文件包含了评估训练好的VGG模型的过程。首先加载模型并将其设置为评估模式。然后加载测试数据集。接着评估模型,并计算评价指标(包括Hamming Loss,Accuracy,Precision,Recall和F1 Score)。最后输出评价结果。
5. 数据集制作过程
我们在本例中使用了CIFAR-10数据集,它是一个用于图像识别的经典数据集。CIFAR-10数据集包含了60,000张32x32的彩色图像,共分为10个类别,每个类别有6,000张图像。数据集被分为50,000张训练图像和10,000张测试图像。
在train.py中,我们使用了如下代码加载数据集:
- train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
- val_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
- train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
- val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
复制代码
在evaluate.py中,我们使用了如下代码加载测试数据集:
- test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)
- test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)
复制代码
在加载数据集时,我们还应用了一些数据增强技术(例如随机翻转、随机裁剪等),以提高模型的泛化能力。数据预处理操作在train.py和evaluate.py中的`transform`变量中定义。
这就是利用Python和PyTorch搭建一个基于VGGNet网络模型的多标签分类网络的全部内容。通过这些.py文件,我们可以训练、预测和评估一个多标签分类网络。 |
|