一、添加centerloss层
可参考github上的caffe工程:https://github.com/ydwen/caffe-face
添加hpp,cpp,cu文件,以及修改caffe.proto文件重新编译。
二、在mnist数据集上使用centerloss训练模型,测试效果
下面训练和测试的文件都是在上面的github中。
1、训练协议文件:mnist_train_test.prototxt
name: "LeNet++"
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
#mean_value: 127.5
scale: 0.00390625
}
data_param {
source: "D:/caffe-master_extend/examples/mnist/mnist_train_lmdb"
batch_size: 128
backend: LMDB
}
}
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
#mean_value: 127.5
scale: 0.00390625
}
data_param {
source: "D:/caffe-master_extend/examples/mnist/mnist_test_lmdb"
batch_size: 100
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 32
kernel_size: 5
stride: 1
pad: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu1"
type: "PReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "conv1+"
type: "Convolution"
bottom: "conv1"
top: "conv1+"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 32
kernel_size: 5
stride: 1
pad: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu1+"
type: "PReLU"
bottom: "conv1+"
top: "conv1+"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1+"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 5
stride: 1
pad: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu2"
type: "PReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "conv2+"
type: "Convolution"
bottom: "conv2"
top: "conv2+"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 5
stride: 1
pad: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu2+"
type: "PReLU"
bottom: "conv2+"
top: "conv2+"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2+"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
kernel_size: 5
stride: 1
pad: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu3"
type: "PReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv3+"
type: "Convolution"
bottom: "conv3"
top: "conv3+"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
kernel_size: 5
stride: 1
pad: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu3+"
type: "PReLU"
bottom: "conv3+"
top: "conv3+"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3+"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool3"
top: "ip1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "preluip1"
type: "PReLU"
bottom: "ip1"
top: "ip1"
}
################## train ##################
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
decay_mult: 1
}
inner_product_param {
num_output: 10
weight_filler {
type: "xavier"
}
bias_term: false
}
}
############# softmax loss ###############
layer {
name: "softmax_loss"
type: "SoftmaxWithLoss"
bottom: "ip2"
bottom: "label"
top: "softmax_loss"
}
############# center loss ###############
layer {
name: "center_loss"
type: "CenterLoss"
bottom: "ip1"
bottom: "label"
top: "center_loss"
param {
lr_mult: 1
decay_mult: 0
}
center_loss_param {
num_output: 10
center_filler {
type: "xavier"
}
}
loss_weight: 1 #这个参数是centerloss最重要的一个参数,他是控制centerloss和softmax在最后的loss中的占比的
}
在训练协议文件中,保留原来的softmaxloss层,然后再新增centerloss层,必须同时存在,因为最终的损失是softmaxloss+lamda*centerloss。这里的centerloss实现的时候,是可以直接指定输出类别个数的,所以不用像softmaxloss那样,需要先加一个全连接层( 输出个数是类别个数)。所以协议文件中centerloss的上层是ip1,softmaxloss的上层是ip2.
2、mnist_solver.prototxt
net: "mnist_train_test.prototxt"
test_iter: 100
test_interval: 1000
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
lr_policy: "multistep"
gamma: 0.8
stepvalue: 5000
stepvalue: 8000
stepvalue: 10000
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 1000
snapshot_prefix: "D:/c_workspace/centerloss/model/center_1/"
# solver mode: CPU or GPU
solver_mode: GPU
3、mnist_deploy.prototxt
input: "data"
input_dim: 1000
input_dim: 1
input_dim: 28
input_dim: 28
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 32
kernel_size: 5
stride: 1
pad: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu1"
type: "PReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "conv1+"
type: "Convolution"
bottom: "conv1"
top: "conv1+"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 32
kernel_size: 5
stride: 1
pad: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu1+"
type: "PReLU"
bottom: "conv1+"
top: "conv1+"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1+"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 5
stride: 1
pad: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu2"
type: "PReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "conv2+"
type: "Convolution"
bottom: "conv2"
top: "conv2+"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 5
stride: 1
pad: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu2+"
type: "PReLU"
bottom: "conv2+"
top: "conv2+"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2+"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
kernel_size: 5
stride: 1
pad: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu3"
type: "PReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv3+"
type: "Convolution"
bottom: "conv3"
top: "conv3+"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
kernel_size: 5
stride: 1
pad: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu3+"
type: "PReLU"
bottom: "conv3+"
top: "conv3+"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3+"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool3"
top: "ip1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "preluip1"
type: "PReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
inner_product_param {
num_output: 10
bias_term: false
}
}
deploy文件最后不用加loss层。测试模型时,需要提取ip1层的特征(2维向量,方便画图展示)
4、测试训练好的模型
#-*- coding:utf-8 -*-
import caffe
import numpy as np
import struct
import matplotlib.pyplot as plt
import os
import cv2
import json
import random
cnames = {
'aliceblue': '#F0F8FF',
'gold': '#FFD700',
'goldenrod': '#DAA520',
'gray': '#808080',
'green': '#008000',
'greenyellow': '#ADFF2F',
'honeydew': '#F0FFF0',
'hotpink': '#FF69B4',
'indianred': '#CD5C5C',
'indigo': '#4B0082',
'ivory': '#FFFFF0',
'khaki': '#F0E68C',
'lavender': '#E6E6FA',
'beige': '#F5F5DC',
'purple': '#800080',
'red': '#FF0000',
'rosybrown': '#BC8F8F',
'royalblue': '#4169E1',
'saddlebrown': '#8B4513',
'salmon': '#FA8072',
'sandybrown': '#FAA460',}
def get_class(net,img_path):
'''
根据训练好的网络推断图片类别
:param net_file:
:param model_path:
:param img_path:
:return:
'''
#net = caffe.Net(net_file, model_path, caffe.TEST)
transformer = data_tranformer(net)
im = caffe.io.load_image(img_path)
im=im[:,:,0]
#im = cv2.resize(im, (224, 224))
im= im.reshape(28, 28,1)
net.blobs['data'].data[...] = transformer.preprocess('data', im)
out = net.forward()
loss = np.squeeze(out["ip2"])
pred_class = str(loss.argmax())
print (pred_class)
return pred_class
#输入图片得到ip1层的特征
def get_feat(net,img_path):
transformer = data_tranformer(net)
im = caffe.io.load_image(img_path)
im = im[:, :, 0]
# im = cv2.resize(im, (224, 224))
im = im.reshape(28, 28, 1)
net.blobs['data'].data[...] = transformer.preprocess('data', im)
out = net.forward()
feat = (net.blobs["ip1"].data).copy() # 提取某层数据(特征)
feat = np.squeeze(feat)
return feat
#模型和部署文件的加载
model="center_1"
deploy='D:/c_workspace/centerloss/mnist_deploy.prototxt' #deploy文件
caffe_model= 'D:/c_workspace/centerloss/model/'+model+'/_iter_10000.caffemodel' #训练好的 caffemodel
caffe.set_mode_gpu()
net=caffe.Net(deploy, caffe_model, caffe.TEST)
net.blobs['data'].reshape(1, 1, 28, 28)
# 图像1
feats=[]
def data_tranformer(net):
'''
数据转换
'''
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
#transformer.set_mean("data", np.array([127.5]))
#transformer.set_raw_scale('data', 255)
#transformer.set_channel_swap('data', (2, 1, 0))
return transformer
imgs_path="E:/dataset/mnist/bmp/test"
test_imgs=os.listdir(imgs_path)
all_lb_ft={}
for index in range(len(test_imgs)):
#提取第1张图片进行测试
#index = 1 #0代表第一张图,784*(n-1)代表第n张图片
img=imgs_path+"/"+test_imgs[index]
print img
cls=get_class(net,img)
feat = get_feat(net, img)
if cls not in all_lb_ft.keys():
all_lb_ft[cls]=[]
all_lb_ft[cls].append([float(feat[0]),float(feat[1])])
feats.append([feat[0],feat[1]])
# #将向量展开为28*28的图片
# im = np.array(im)
# im = im.reshape(28, 28)
#
# #显示图片
# # fig = plt.figure()
# # plotwindow = fig.add_subplot(111)
# # plt.imshow(im, cmap='gray')
# # plt.show()
print feats
print all_lb_ft
with open("D:/c_workspace/centerloss/test/record_"+model+".json","w") as f:
json.dump(all_lb_ft,f)
print("加载入文件完成...")
for i,key in enumerate(all_lb_ft):
feats=all_lb_ft[key]
feats=np.asarray(feats)
color = cnames[cnames.keys()[i]]
plt.scatter(feats[:,0],feats[:,1],color=color)
plt.savefig("D:/c_workspace/centerloss/test/mnist_"+model+".png")
plt.show()
根据loss_weight的不同值,得到多个模型测试效果如下:
(1)不使用centerloss,只有softmaxloss
(2)loss_weight=0.01
(3)loss_weight=0.05
(4)loss_weight=0.1
(5)loss_weight=1
可以看出使用了centerloss之后,每个类的样本更加聚合,因此类间的距离也随着增大。随着loss_weight的值越来越大,类间距越来越大。达到了增大类间距,缩小类内距离的效果。
转载:https://blog.csdn.net/sinat_33486980/article/details/101214447