| 
 | 
 
 
发表于 2023-4-22 18:35:01
|
显示全部楼层
 
 
 
以下是基于VGGNet网络模型的多标签分类网络的实现。每个.py文件的代码和解释如下: 
 
1. model.py 
 
- import torch
 
 - import torch.nn as nn
 
  
- # VGG模型架构
 
 - cfgs = {
 
 -     'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
 
 - }
 
  
- class VGG(nn.Module):
 
 -     def __init__(self, vgg_name, num_classes):
 
 -         super(VGG, self).__init__()
 
 -         self.features = self._make_layers(cfgs[vgg_name])
 
 -         self.classifier = nn.Sequential(
 
 -             nn.Linear(512 * 7 * 7, 4096),
 
 -             nn.ReLU(True),
 
 -             nn.Dropout(),
 
 -             nn.Linear(4096, 4096),
 
 -             nn.ReLU(True),
 
 -             nn.Dropout(),
 
 -             nn.Linear(4096, num_classes),
 
 -             nn.Sigmoid()
 
 -         )
 
  
-     def forward(self, x):
 
 -         x = self.features(x)
 
 -         x = x.view(x.size(0), -1)
 
 -         x = self.classifier(x)
 
 -         return x
 
  
-     def _make_layers(self, cfg):
 
 -         layers = []
 
 -         in_channels = 3
 
 -         for x in cfg:
 
 -             if x == 'M':
 
 -                 layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
 
 -             else:
 
 -                 layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
 
 -                            nn.BatchNorm2d(x),
 
 -                            nn.ReLU(inplace=True)]
 
 -                 in_channels = x
 
 -         layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
 
 -         return nn.Sequential(*layers)
 
  
- def VGG16(num_classes):
 
 -     return VGG('VGG16', num_classes)
 
  
  复制代码 
 
model.py中定义了VGG模型的架构。VGG16是一个预先定义好的模型架构,我们将这个模型用于多标签分类任务。最后的输出层使用了Sigmoid激活函数,用于输出多个标签的概率。 
 
2. train.py 
 
- import torch
 
 - import torch.nn as nn
 
 - import torch.optim as optim
 
 - import torchvision
 
 - import torchvision.transforms as transforms
 
 - from torch.utils.data import DataLoader
 
 - from model import VGG16
 
 - import matplotlib.pyplot as plt
 
  
- # 超参数设置
 
 - num_classes = 10
 
 - batch_size = 100
 
 - epochs = 100
 
 - learning_rate = 0.001
 
  
- # 数据预处理
 
 - transform = transforms.Compose([
 
 -     transforms.RandomHorizontalFlip(),
 
 -     transforms.RandomCrop(32, padding=4),
 
 -     transforms.ToTensor(),
 
 -     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
 
 - ])
 
  
- # 加载训练集和验证集
 
 - train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
 
 - val_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
 
 - train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
 
 - val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
 
  
- # 构建模型
 
 - device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 - net = VGG16(num_classes).to(device)
 
  
- # 定义损失函数和优化器
 
 - criterion = nn.BCELoss()
 
 - optimizer = optim.Adam(net.parameters(), lr=learning_rate)
 
  
- # 训练函数
 
 - def train(net, dataloader, criterion, optimizer, device):
 
 -     net.train()
 
 -     running_loss = 0.0
 
 -     for i, data in enumerate(dataloader, 0):
 
 -         inputs, labels = data
 
 -         inputs, labels = inputs.to(device), labels.to(device)
 
  
-         # 将标签转换为多标签格式
 
 -         labels = nn.functional.one_hot(labels, num_classes).float()
 
  
-         optimizer.zero_grad()
 
  
-         outputs = net(inputs)
 
 -         loss = criterion(outputs, labels)
 
 -         loss.backward()
 
 -         optimizer.step()
 
  
-         running_loss += loss.item()
 
 -     return running_loss / (i + 1)
 
  
- # 验证函数
 
 - def validate(net, dataloader, criterion, device):
 
 -     net.eval()
 
 -     running_loss = 0.0
 
 -     with torch.no_grad():
 
 -         for i, data in enumerate(dataloader, 0):
 
 -             inputs, labels = data
 
 -             inputs, labels = inputs.to(device), labels.to(device)
 
  
-             # 将标签转换为多标签格式
 
 -             labels = nn.functional.one_hot(labels, num_classes).float()
 
  
-             outputs = net(inputs)
 
 -             loss = criterion(outputs, labels)
 
  
-             running_loss += loss.item()
 
 -     return running_loss / (i + 1)
 
  
- # 训练和验证过程
 
 - train_losses = []
 
 - val_losses = []
 
 - for epoch in range(epochs):
 
 -     train_loss = train(net, train_loader, criterion, optimizer, device)
 
 -     val_loss = validate(net, val_loader, criterion, device)
 
 -     train_losses.append(train_loss)
 
 -     val_losses.append(val_loss)
 
 -     print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
 
  
- # 保存模型
 
 - torch.save(net.state_dict(), 'vgg16_multilabel.pth')
 
  
- # 可视化损失曲线
 
 - plt.plot(range(1, epochs + 1), train_losses, label='Train Loss')
 
 - plt.plot(range(1, epochs + 1), val_losses, label='Val Loss')
 
 - plt.xlabel('Epochs')
 
 - plt.ylabel('Loss')
 
 - plt.legend()
 
 - plt.show()
 
 
  复制代码 
 
train.py文件包含了训练VGG模型的过程。首先设置超参数和数据预处理,然后加载训练集和验证集。定义损失函数(BCELoss)和优化器(Adam)。接着定义训练和验证函数,使用循环进行训练和验证,记录损失值。最后,保存模型并可视化损失曲线。 
 
3. predict.py 
 
- import torch
 
 - import cv2
 
 - import numpy as np
 
 - from model import VGG16
 
 - import matplotlib.pyplot as plt
 
  
- # 加载模型
 
 - num_classes = 10
 
 - model = VGG16(num_classes)
 
 - model.load_state_dict(torch.load('vgg16_multilabel.pth'))
 
 - model.eval()
 
  
- # 加载并预处理图像
 
 - image_path = 'test_image.jpg'
 
 - image = cv2.imread(image_path)
 
 - image = cv2.resize(image, (224, 224))
 
 - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 
 - image_normalized = (image / 255.0 - 0.5) / 0.5
 
 - input_tensor = torch.from_numpy(image_normalized.transpose(2, 0, 1)).float().unsqueeze(0)
 
  
- # 预测和可视化预测结果
 
 - with torch.no_grad():
 
 -     output = model(input_tensor).squeeze().numpy()
 
  
- class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
 
  
- # 在图像上标记预测结果及概率
 
 - fig, ax = plt.subplots()
 
 - ax.imshow(image)
 
 - for idx, prob in enumerate(output):
 
 -     plt.text(5, 15 + 20 * idx, f'{class_names[idx]}: {prob:.2f}', fontsize=12, color='red')
 
  
- plt.show()
 
 
  复制代码 
 
predict.py文件包含了对输入图像进行分类预测的过程。首先加载模型并将其设置为评估模式。然后加载图像,并对图像进行预处理。接着对图像进行预测,并输出预测结果。最后,将预测结果可视化并展示。 
 
4. evaluate.py 
 
- import torch
 
 - from model import VGG16
 
 - from sklearn.metrics import hamming_loss, accuracy_score, precision_score, recall_score, f1_score
 
 - from torchvision.datasets import CIFAR10
 
 - from torch.utils.data import DataLoader
 
 - import torchvision.transforms as transforms
 
  
- def evaluate(model, dataloader, device):
 
 -     model.eval()
 
 -     y_true, y_pred = [], []
 
 -     with torch.no_grad():
 
 -         for data in dataloader:
 
 -             inputs, labels = data
 
 -             inputs, labels = inputs.to(device), labels.to(device)
 
 -             outputs = model(inputs)
 
 -             preds = torch.round(outputs)
 
  
-             y_true.extend(labels.cpu().numpy().tolist())
 
 -             y_pred.extend(preds.cpu().numpy().tolist())
 
 -     return y_true, y_pred
 
  
- # 加载模型
 
 - num_classes = 10
 
 - model = VGG16(num_classes)
 
 - model.load_state_dict(torch.load('vgg16_multilabel.pth'))
 
 - device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 - model = model.to(device)
 
  
- # 数据预处理
 
 - transform = transforms.Compose([
 
 -     transforms.ToTensor(),
 
 -     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
 
 - ])
 
  
- # 加载数据集
 
 - test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)
 
 - test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)
 
  
- # 评估模型
 
 - y_true, y_pred = evaluate(model, test_loader, device)
 
  
- # 计算评价指标
 
 - hl = hamming_loss(y_true, y_pred)
 
 - acc = accuracy_score(y_true, y_pred)
 
 - precision = precision_score(y_true, y_pred, average='samples')
 
 - recall = recall_score(y_true, y_pred, average='samples')
 
 - f1 = f1_score(y_true, y_pred, average='samples')
 
  
- print(f'Hamming Loss: {hl:.4f}')
 
 - print(f'Accuracy: {acc:.4f}')
 
 - print(f'Precision: {precision:.4f}')
 
 - print(f'Recall: {recall:.4f}')
 
 - print(f'F1 Score: {f1:.4f}')
 
 
  复制代码 
 
evaluate.py文件包含了评估训练好的VGG模型的过程。首先加载模型并将其设置为评估模式。然后加载测试数据集。接着评估模型,并计算评价指标(包括Hamming Loss,Accuracy,Precision,Recall和F1 Score)。最后输出评价结果。 
 
5. 数据集制作过程 
 
我们在本例中使用了CIFAR-10数据集,它是一个用于图像识别的经典数据集。CIFAR-10数据集包含了60,000张32x32的彩色图像,共分为10个类别,每个类别有6,000张图像。数据集被分为50,000张训练图像和10,000张测试图像。 
 
在train.py中,我们使用了如下代码加载数据集: 
 
- train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
 
 - val_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
 
 - train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
 
 - val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
 
 
  复制代码 
 
在evaluate.py中,我们使用了如下代码加载测试数据集: 
 
- test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)
 
 - test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)
 
 
  复制代码 
 
在加载数据集时,我们还应用了一些数据增强技术(例如随机翻转、随机裁剪等),以提高模型的泛化能力。数据预处理操作在train.py和evaluate.py中的`transform`变量中定义。 
 
这就是利用Python和PyTorch搭建一个基于VGGNet网络模型的多标签分类网络的全部内容。通过这些.py文件,我们可以训练、预测和评估一个多标签分类网络。 |   
 
 
 
 |