小言_互联网的博客

深度学习Week9-YOLOv5-C3模块实现(Pytorch)

277人阅读  评论(0)

了解C3的结构,方便后续YOLOv5算法的学习。采用的数据集是天气识别的数据集。 

一、 前期准备

1. 设置GPU


  
  1. import torch
  2. import torch.nn as nn
  3. import torchvision.transforms as transforms
  4. import torchvision
  5. from torchvision import transforms, datasets
  6. import os,PIL,pathlib,warnings
  7. warnings.filterwarnings( "ignore") #忽略警告信息
  8. device = torch.device( "cuda" if torch.cuda.is_available() else "cpu")
  9. print(device)

输出:cuda

2. 导入数据


  
  1. import os,PIL,random,pathlib
  2. data_dir = './data/'
  3. data_dir = pathlib.Path(data_dir)
  4. data_paths = list(data_dir.glob( '*'))
  5. classeNames = [ str(path).split( "\\")[ 1] for path in data_paths]
  6. print(classeNames)

图形变换,输出一下:用到torchvision.transforms.Compose()


  
  1. train_transforms = transforms.Compose([
  2. transforms.Resize([ 224, 224]), # 将输入图片resize成统一尺寸
  3. # transforms.RandomHorizontalFlip(), # 随机水平翻转
  4. transforms.ToTensor(), # 将PIL Image或numpy.ndarray转换为tensor,并归一化到[0,1]之间
  5. transforms.Normalize( # 标准化处理-->转换为标准正太分布(高斯分布),使模型更容易收敛
  6. mean=[ 0.485, 0.456, 0.406],
  7. std=[ 0.229, 0.224, 0.225]) # 其中 mean=[0.485,0.456,0.406]与std=[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。
  8. ])
  9. test_transform = transforms.Compose([
  10. transforms.Resize([ 224, 224]), # 将输入图片resize成统一尺寸
  11. transforms.ToTensor(), # 将PIL Image或numpy.ndarray转换为tensor,并归一化到[0,1]之间
  12. transforms.Normalize( # 标准化处理-->转换为标准正太分布(高斯分布),使模型更容易收敛
  13. mean=[ 0.485, 0.456, 0.406],
  14. std=[ 0.229, 0.224, 0.225]) # 其中 mean=[0.485,0.456,0.406]与std=[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。
  15. ])
  16. total_data = datasets.ImageFolder( "./data/",transform=train_transforms)
  17. print(total_data.class_to_idx)

输出:{'cloudy': 0, 'rain': 1, 'shine': 2, 'sunrise': 3}

3. 划分数据集


  
  1. train_size = int( 0.8 * len(total_data))
  2. test_size = len(total_data) - train_size
  3. train_dataset, test_dataset = torch.utils.data.random_split(total_data, [train_size, test_size])

  
  1. batch_size = 32
  2. train_dl = torch.utils.data.DataLoader(train_dataset,
  3. batch_size=batch_size,
  4. shuffle= True,
  5. num_workers= 0)
  6. test_dl = torch.utils.data.DataLoader(test_dataset,
  7. batch_size=batch_size,
  8. shuffle= True,
  9. num_workers= 0)
  10. for X, y in test_dl:
  11. print( "Shape of X [N, C, H, W]: ", X.shape)
  12. print( "Shape of y: ", y.shape, y.dtype)
  13. break

二、搭建YOLOv5-C3模型

 1.搭建模型


  
  1. import torch.nn.functional as F
  2. def autopad( k, p=None): # kernel, padding
  3. # Pad to 'same'
  4. if p is None:
  5. p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
  6. return p
  7. class Conv(nn.Module):
  8. # Standard convolution
  9. def __init__( self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  10. super().__init__()
  11. self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias= False)
  12. self.bn = nn.BatchNorm2d(c2)
  13. self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
  14. def forward( self, x):
  15. return self.act(self.bn(self.conv(x)))
  16. def forward_fuse( self, x):
  17. return self.act(self.conv(x))
  18. class Bottleneck(nn.Module):
  19. # Standard bottleneck
  20. def __init__( self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
  21. super().__init__()
  22. c_ = int(c2 * e) # hidden channels
  23. self.cv1 = Conv(c1, c_, 1, 1)
  24. self.cv2 = Conv(c_, c2, 3, 1, g=g)
  25. self.add = shortcut and c1 == c2
  26. def forward( self, x):
  27. return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
  28. class C3(nn.Module):
  29. # CSP Bottleneck with 3 convolutions
  30. def __init__( self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  31. super().__init__()
  32. c_ = int(c2 * e) # hidden channels
  33. self.cv1 = Conv(c1, c_, 1, 1)
  34. self.cv2 = Conv(c1, c_, 1, 1)
  35. self.cv3 = Conv( 2 * c_, c2, 1) # act=FReLU(c2)
  36. self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e= 1.0) for _ in range(n)))
  37. def forward( self, x):
  38. return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim= 1))
  39. class model_K(nn.Module):
  40. def __init__( self):
  41. super(model_K, self).__init__()
  42. # 卷积模块
  43. self.Conv = Conv( 3, 32, 3, 2)
  44. # C3模块1
  45. self.C3_1 = C3( 32, 64, 3, 2)
  46. # 全连接网络层,用于分类
  47. self.classifier = nn.Sequential(
  48. nn.Linear(in_features= 802816, out_features= 100),
  49. nn.ReLU(),
  50. nn.Linear(in_features= 100, out_features= 4)
  51. )
  52. def forward( self, x):
  53. x = self.Conv(x)
  54. x = self.C3_1(x)
  55. x = torch.flatten(x, start_dim= 1)
  56. x = self.classifier(x)
  57. return x
  58. device = "cuda" if torch.cuda.is_available() else "cpu"
  59. print( "Using {} device". format(device))
  60. model = model_K().to(device)
  61. print(model)

2.查看模型详情 

统计模型参数量以及其他指标


  
  1. import torchsummary as summary
  2. summary.summary(model, ( 3, 224, 224))

 三、 训练模型

1. 编写训练和测试函数

和之前cnn网络、vgg一样


  
  1. # 训练循环
  2. def train( dataloader, model, loss_fn, optimizer):
  3. size = len(dataloader.dataset) # 训练集的大小
  4. num_batches = len(dataloader) # 批次数目, (size/batch_size,向上取整)
  5. train_loss, train_acc = 0, 0 # 初始化训练损失和正确率
  6. for X, y in dataloader: # 获取图片及其标签
  7. X, y = X.to(device), y.to(device)
  8. # 计算预测误差
  9. pred = model(X) # 网络输出
  10. loss = loss_fn(pred, y) # 计算网络输出和真实值之间的差距,targets为真实值,计算二者差值即为损失
  11. # 反向传播
  12. optimizer.zero_grad() # grad属性归零
  13. loss.backward() # 反向传播
  14. optimizer.step() # 每一步自动更新
  15. # 记录acc与loss
  16. train_acc += (pred.argmax( 1) == y). type(torch. float). sum().item()
  17. train_loss += loss.item()
  18. train_acc /= size
  19. train_loss /= num_batches
  20. return train_acc, train_loss

  
  1. def test (dataloader, model, loss_fn):
  2. size = len(dataloader.dataset) # 测试集的大小
  3. num_batches = len(dataloader) # 批次数目
  4. test_loss, test_acc = 0, 0
  5. # 当不进行训练时,停止梯度更新,节省计算内存消耗
  6. with torch.no_grad():
  7. for imgs, target in dataloader:
  8. imgs, target = imgs.to(device), target.to(device)
  9. # 计算loss
  10. target_pred = model(imgs)
  11. loss = loss_fn(target_pred, target)
  12. test_loss += loss.item()
  13. test_acc += (target_pred.argmax( 1) == target). type(torch. float). sum().item()
  14. test_acc /= size
  15. test_loss /= num_batches
  16. return test_acc, test_loss

2. 正式训练

这里也设置了训练器,结合前几次实验经验,使用Adam模型


  
  1. import copy
  2. optimizer = torch.optim.Adam(model.parameters(), lr= 1e-4)
  3. loss_fn = nn.CrossEntropyLoss() # 创建损失函数
  4. epochs = 20
  5. train_loss = []
  6. train_acc = []
  7. test_loss = []
  8. test_acc = []
  9. best_acc = 0 # 设置一个最佳准确率,作为最佳模型的判别指标
  10. for epoch in range(epochs):
  11. # 更新学习率(使用自定义学习率时使用)
  12. # adjust_learning_rate(optimizer, epoch, learn_rate)
  13. model.train()
  14. epoch_train_acc, epoch_train_loss = train(train_dl, model, loss_fn, optimizer)
  15. # scheduler.step() # 更新学习率(调用官方动态学习率接口时使用)
  16. model. eval()
  17. epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
  18. # 保存最佳模型到 best_model
  19. if epoch_test_acc > best_acc:
  20. best_acc = epoch_test_acc
  21. best_model = copy.deepcopy(model)
  22. train_acc.append(epoch_train_acc)
  23. train_loss.append(epoch_train_loss)
  24. test_acc.append(epoch_test_acc)
  25. test_loss.append(epoch_test_loss)
  26. # 获取当前的学习率
  27. lr = optimizer.state_dict()[ 'param_groups'][ 0][ 'lr']
  28. template = ( 'Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:.3f}, Test_acc:{:.1f}%, Test_loss:{:.3f}, Lr:{:.2E}')
  29. print(template. format(epoch + 1, epoch_train_acc * 100, epoch_train_loss,
  30. epoch_test_acc * 100, epoch_test_loss, lr))
  31. # 保存最佳模型到文件中
  32. PATH = './best_model.pth' # 保存的参数文件名
  33. torch.save(model.state_dict(), PATH)
  34. print( 'Done')

遇到了问题:RuntimeError: CUDA out of memory。这个在之前也遇到过,我显卡(3050ti)性能一般,但是可以把batch_size减小一半,本实验由32改为16即可运行。

四、 结果可视化

1. Loss与Accuracy图


  
  1. import matplotlib.pyplot as plt
  2. #隐藏警告
  3. import warnings
  4. warnings.filterwarnings( "ignore") #忽略警告信息
  5. plt.rcParams[ 'font.sans-serif'] = [ 'SimHei'] # 用来正常显示中文标签
  6. plt.rcParams[ 'axes.unicode_minus'] = False # 用来正常显示负号
  7. plt.rcParams[ 'figure.dpi'] = 100 #分辨率
  8. epochs_range = range(epochs)
  9. plt.figure(figsize=( 12, 3))
  10. plt.subplot( 1, 2, 1)
  11. plt.plot(epochs_range, train_acc, label= 'Training Accuracy')
  12. plt.plot(epochs_range, test_acc, label= 'Test Accuracy')
  13. plt.legend(loc= 'lower right')
  14. plt.title( 'Training and Validation Accuracy')
  15. plt.subplot( 1, 2, 2)
  16. plt.plot(epochs_range, train_loss, label= 'Training Loss')
  17. plt.plot(epochs_range, test_loss, label= 'Test Loss')
  18. plt.legend(loc= 'upper right')
  19. plt.title( 'Training and Validation Loss')
  20. plt.show()

 2. 指定图片进行预测


  
  1. from PIL import Image
  2. classes = list(total_data.class_to_idx)
  3. def predict_one_image( image_path, model, transform, classes):
  4. test_img = Image. open(image_path).convert( 'RGB')
  5. plt.imshow(test_img) # 展示预测的图片
  6. test_img = transform(test_img)
  7. img = test_img.to(device).unsqueeze( 0)
  8. model. eval()
  9. output = model(img)
  10. _, pred = torch. max(output, 1)
  11. pred_class = classes[pred]
  12. print( f'预测结果是:{pred_class}')
  13. # 预测训练集中的某张照片
  14. predict_one_image(image_path= './data/sunrise/sunrise8.jpg',
  15. model=model,
  16. transform=train_transforms,
  17. classes=classes)

 3. 模型评估

 以往都是看看最后几轮得到准确率,但是跳动比较大就不太好找准确率最高的一回,所以我们用函数返回进行比较。


  
  1. best_model. eval()
  2. epoch_test_acc, epoch_test_loss = test(test_dl, best_model, loss_fn)
  3. print(epoch_test_acc, epoch_test_loss)
  4. print(epoch_test_acc)

*五、优化模型

C3模块
作用:
1 在新版yolov5中,作者将BottleneckCSP(瓶颈层)模块转变为了C3模块,其结构作用基本相同均为CSP架构,只是在修正单元的选择上有所不同,其包含了3个标准卷积层以及多个Bottleneck模块(数量由配置文件.yaml的n和depth_multiple参数乘积决定)

2 C3相对于BottleneckCSP模块不同的是,经历过残差输出后的Conv模块被去掉了,concat后的标准卷积模块中的激活函数也由LeakyRelu变为了SiLU(同上)。

3 该模块是对残差特征进行学习的主要模块,其结构分为两支,一支使用了上述指定多个Bottleneck堆叠和3个标准卷积层,另一支仅经过一个基本卷积模块,最后将两支进行concat操作。


  
  1. class C3(nn.Module):
  2. # CSP Bottleneck with 3 convolutions
  3. def __init__( self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  4. super().__init__()
  5. c_ = int(c2 * e) # hidden channels
  6. self.cv1 = Conv(c1, c_, 1, 1)
  7. self.cv2 = Conv(c1, c_, 1, 1)
  8. self.cv3 = Conv( 2 * c_, c2, 1) # act=FReLU(c2)
  9. self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e= 1.0) for _ in range(n)))
  10. def forward( self, x):
  11. return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim= 1))

提升:

修改BottleNeck层数为4

 最后准确率提升了7%左右


转载:https://blog.csdn.net/m0_62237233/article/details/128135260
查看评论
* 以上用户言论只代表其个人观点,不代表本网站的观点或立场