小言_互联网的博客

神经网络推理加速: 合并卷积和BN层运算原理及实验

413人阅读  评论(0)

转自:https://blog.csdn.net/u014114990/article/details/90665141

1.  为什么要合并BN层

在训练深度网络模型时,BN(Batch Normalization)层能够加速网络收敛,并且能够控制过拟合,一般放在卷积层之后。BN 层将数据归一化后,能够有效解决梯度消失与梯度爆炸问题。虽然 BN 层在训练时起到了积极作用,然而,在网络前向推断时多了一些层的运算,影响了模型的性能,且占用了更多的内存或者显存空间。目前,很多先进的网络模型(ResNet,MobileNet,Xception,ShuffleNet 等)都使用了BN技术,因此,我们有必要将 BN 层的参数合并到卷积层,来提升模型前向推断的速度。

2.  BN层与卷积层合并的数学原理

卷积层中

3.  实验结果

机器:显卡 GTX 1080Ti,i7 CPU

本实验对比了Resnet50 模型合并BN层前后的性能,分类精度保持不变,速度显著提升。

模型 CPU前向时间 GPU前向时间
Resnet50(合并前) 176.17ms 11.03ms
Resnet50(合并后) 161.69ms 7.3ms
提升 10% 51%

 4.  合并的python脚本

该脚本需要caffe的python接口


 
  1. #!/usr/bin/env python

  2. # -*- coding: UTF-8 -*-

  3.  
  4. import numpy as np

  5. import sys

  6. import os

  7. import os.path as osp

  8. import google.protobuf as pb

  9. import google.protobuf.text_format

  10. from argparse import ArgumentParser

  11. import caffe

  12.  
  13. caffe.set_mode_cpu()

  14.  
  15. def load_and_fill_biases(src_model, src_weights, dst_model, dst_weights):

  16. with open(src_model) as f:

  17. model = caffe.proto.caffe_pb2.NetParameter()

  18. pb.text_format.Merge(f.read(), model)

  19.  
  20. for i, layer in enumerate(model.layer):

  21. if layer.type == 'Convolution': # or layer.type == 'Scale':

  22. # Add bias layer if needed

  23. if layer.convolution_param.bias_term == False:

  24. layer.convolution_param.bias_term = True

  25. layer.convolution_param.bias_filler.type = 'constant'

  26. layer.convolution_param.bias_filler.value = 0.0

  27.  
  28. with open(dst_model, 'w') as f:

  29. f.write(pb.text_format.MessageToString(model))

  30.  
  31. caffe.set_mode_cpu()

  32. net_src = caffe.Net(src_model, src_weights, caffe.TEST)

  33. net_dst = caffe.Net(dst_model, caffe.TEST)

  34. for key in net_src.params.keys():

  35. for i in range(len(net_src.params[key])):

  36. net_dst.params[key][i].data[:] = net_src.params[key][i].data[:]

  37.  
  38. if dst_weights is not None:

  39. # Store params

  40. pass

  41.  
  42. return net_dst

  43.  
  44.  
  45. def merge_conv_and_bn(net, i_conv, i_bn, i_scale):

  46. # This is based on Kyeheyon's work

  47. assert(i_conv != None)

  48. assert(i_bn != None)

  49.  
  50. def copy_double(data):

  51. return np.array(data, copy=True, dtype=np.double)

  52.  
  53. key_conv = net._layer_names[i_conv]

  54. key_bn = net._layer_names[i_bn]

  55. key_scale = net._layer_names[i_scale] if i_scale else None

  56.  
  57. # Copy

  58. bn_mean = copy_double(net.params[key_bn][0].data)

  59. bn_variance = copy_double(net.params[key_bn][1].data)

  60. num_bn_samples = copy_double(net.params[key_bn][2].data)

  61.  
  62. # and Invalidate the BN layer

  63. net.params[key_bn][0].data[:] = 0

  64. net.params[key_bn][1].data[:] = 1

  65. net.params[key_bn][2].data[:] = 1

  66.  
  67. if num_bn_samples[0] == 0:

  68. num_bn_samples[0] = 1

  69.  
  70. if net.params.has_key(key_scale):

  71. print 'Combine {:s} + {:s} + {:s}'.format(key_conv, key_bn, key_scale)

  72. scale_weight = copy_double(net.params[key_scale][0].data)

  73. scale_bias = copy_double(net.params[key_scale][1].data)

  74. net.params[key_scale][0].data[:] = 1

  75. net.params[key_scale][1].data[:] = 0

  76.  
  77. else:

  78. print 'Combine {:s} + {:s}'.format(key_conv, key_bn)

  79. scale_weight = 1

  80. scale_bias = 0

  81.  
  82. weight = copy_double(net.params[key_conv][0].data)

  83. bias = copy_double(net.params[key_conv][1].data)

  84.  
  85. alpha = scale_weight / np.sqrt(bn_variance / num_bn_samples[0] + 1e-5)

  86. net.params[key_conv][1].data[:] = bias * alpha + (scale_bias - (bn_mean / num_bn_samples[0]) * alpha)

  87. for i in range(len(alpha)):

  88. net.params[key_conv][0].data[i] = weight[i] * alpha[i]

  89.  
  90.  
  91. def merge_batchnorms_in_net(net):

  92. # for each BN

  93. for i, layer in enumerate(net.layers):

  94. if layer.type != 'BatchNorm':

  95. continue

  96.  
  97. l_name = net._layer_names[i]

  98.  
  99. l_bottom = net.bottom_names[l_name]

  100. assert(len(l_bottom) == 1)

  101. l_bottom = l_bottom[0]

  102. l_top = net.top_names[l_name]

  103. assert(len(l_top) == 1)

  104. l_top = l_top[0]

  105.  
  106. can_be_absorbed = True

  107.  
  108. # Search all (bottom) layers

  109. for j in xrange(i - 1, -1, -1):

  110. tops_of_j = net.top_names[net._layer_names[j]]

  111. if l_bottom in tops_of_j:

  112. if net.layers[j].type not in ['Convolution', 'InnerProduct']:

  113. can_be_absorbed = False

  114. else:

  115. # There must be only one layer

  116. conv_ind = j

  117. break

  118.  
  119. if not can_be_absorbed:

  120. continue

  121.  
  122. # find the following Scale

  123. scale_ind = None

  124. for j in xrange(i + 1, len(net.layers)):

  125. bottoms_of_j = net.bottom_names[net._layer_names[j]]

  126. if l_top in bottoms_of_j:

  127. if scale_ind:

  128. # Followed by two or more layers

  129. scale_ind = None

  130. break

  131.  
  132. if net.layers[j].type in ['Scale']:

  133. scale_ind = j

  134.  
  135. top_of_j = net.top_names[net._layer_names[j]][0]

  136. if top_of_j == bottoms_of_j[0]:

  137. # On-the-fly => Can be merged

  138. break

  139.  
  140. else:

  141. # Followed by a layer which is not 'Scale'

  142. scale_ind = None

  143. break

  144.  
  145.  
  146. merge_conv_and_bn(net, conv_ind, i, scale_ind)

  147.  
  148. return net

  149.  
  150.  
  151. def process_model(net, src_model, dst_model, func_loop, func_finally):

  152. with open(src_model) as f:

  153. model = caffe.proto.caffe_pb2.NetParameter()

  154. pb.text_format.Merge(f.read(), model)

  155.  
  156. for i, layer in enumerate(model.layer):

  157. map(lambda x: x(layer, net, model, i), func_loop)

  158.  
  159. map(lambda x: x(net, model), func_finally)

  160.  
  161. with open(dst_model, 'w') as f:

  162. f.write(pb.text_format.MessageToString(model))

  163.  
  164.  
  165. # Functions to remove (redundant) BN and Scale layers

  166. to_delete_empty = []

  167. def pick_empty_layers(layer, net, model, i):

  168. if layer.type not in ['BatchNorm', 'Scale']:

  169. return

  170.  
  171. bottom = layer.bottom[0]

  172. top = layer.top[0]

  173.  
  174. if (bottom != top):

  175. # Not supperted yet

  176. return

  177.  
  178. if layer.type == 'BatchNorm':

  179. zero_mean = np.all(net.params[layer.name][0].data == 0)

  180. one_var = np.all(net.params[layer.name][1].data == 1)

  181.  
  182. if zero_mean and one_var:

  183. print 'Delete layer: {}'.format(layer.name)

  184. to_delete_empty.append(layer)

  185.  
  186. if layer.type == 'Scale':

  187. no_scaling = np.all(net.params[layer.name][0].data == 1)

  188. zero_bias = np.all(net.params[layer.name][1].data == 0)

  189.  
  190. if no_scaling and zero_bias:

  191. print 'Delete layer: {}'.format(layer.name)

  192. to_delete_empty.append(layer)

  193.  
  194.  
  195. def remove_empty_layers(net, model):

  196. map(model.layer.remove, to_delete_empty)

  197.  
  198.  
  199. # A function to add 'engine: CAFFE' param into 1x1 convolutions

  200. def set_engine_caffe(layer, net, model, i):

  201. if layer.type == 'Convolution':

  202. if layer.convolution_param.kernel_size == 1\

  203. or (layer.convolution_param.kernel_h == layer.convolution_param.kernel_w == 1):

  204. layer.convolution_param.engine = dict(layer.convolution_param.Engine.items())['CAFFE']

  205.  
  206.  
  207. def main():

  208. # Set default output file names

  209. if args.output_model is None:

  210. file_name = osp.splitext(args.model)[0]

  211. args.output_model = file_name + '_inference.prototxt'

  212. if args.output_weights is None:

  213. file_name = osp.splitext(args.weights)[0]

  214. args.output_weights = file_name + '_inference.caffemodel'

  215.  
  216. net = load_and_fill_biases(args.model, args.weights, args.model + '.temp.pt', None)

  217. net = merge_batchnorms_in_net(net)

  218.  
  219. process_model(net, args.model + '.temp.pt', args.output_model,

  220. [pick_empty_layers, set_engine_caffe],

  221. [remove_empty_layers])

  222.  
  223. # Store params

  224. net.save(args.output_weights)

  225.  
  226.  
  227. if __name__ == '__main__':

  228. parser = ArgumentParser(

  229. description="Generate Batch Normalized model for inference")

  230. parser.add_argument('--model', default="MobileNetSSD_deploy.prototxt", help="The net definition prototxt")

  231. parser.add_argument('--weights', default="MobileNetSSD_deploy.caffemodel", help="The weights caffemodel")

  232. parser.add_argument('--output_model')

  233. parser.add_argument('--output_weights')

  234. args = parser.parse_args()

  235. main()

脚本下载地址:

https://download.csdn.net/download/kangdi7547/10578152


转载:https://blog.csdn.net/haima1998/article/details/101386218
查看评论
* 以上用户言论只代表其个人观点,不代表本网站的观点或立场