小言_互联网的博客

基于CNN卷积神经网络 猫狗图像识别

314人阅读  评论(0)

目录

一:数据集准备

二:读取自己的数据集

三:搭建网络 训练模型

四:猫狗图像识别


一:数据集准备

从官网下载比较麻烦,可根据以下链接,从百度网盘获取数据集

https://pan.baidu.com/s/13hw4LK8ihR6-6-8mpjLKDA

密码:dmp4

猫狗图像识别数据集 如下

二:读取自己的数据集


  
  1. import os
  2. import cv2
  3. import numpy as np
  4. from tqdm import tqdm # 进度条
  5. from random import shuffle # 随机打乱
  6. IMAGE_SIZE = 50
  7. def label_img( img_name):
  8. label_name = img_name.split( '.')[ 0]
  9. # print(label_name)
  10. if label_name == 'cat':
  11. return [ 1, 0]
  12. elif label_name == 'dog':
  13. return [ 0, 1]
  14. return []
  15. def create_train_data( dir_path):
  16. train_data = []
  17. # 遍历目录获取每一张图片的路径
  18. for img_path in tqdm(os.listdir(dir_path)):
  19. # print(img_path)
  20. label = label_img(img_path)
  21. if len(label) != 0:
  22. img_path = os.path.join(dir_path, img_path)
  23. # 单通道
  24. img_mat = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
  25. # print(img_mat, type(img_mat), img_mat.shape)
  26. if img_mat is not None:
  27. img = cv2.resize(img_mat, (IMAGE_SIZE, IMAGE_SIZE))
  28. # cv2.imshow("resize", img)
  29. # cv2.waitKey(0)
  30. # cv2.destoryAllWindows()
  31. train_data.append([np.array(img), np.array(label)])
  32. shuffle(train_data)
  33. np.save( 'mark_catDog_TrainData.npy', train_data)
  34. create_train_data( "./train/train")

等待加载........

直至加载完成100% 

三:搭建网络 训练模型

1 导入库


  
  1. import tflearn
  2. import numpy as np
  3. from tflearn.layers.estimator import regression
  4. from tflearn.layers.conv import conv_2d, max_pool_2d
  5. from tflearn.layers.core import input_data, fully_connected, dropout

2 搭建网络


  
  1. # 搭建网络 卷积 激活 池化 全连
  2. # 输入层
  3. conv_input = input_data([ None, IMAGE_SIZE, IMAGE_SIZE, 1], name= 'input')
  4. # 第一层卷积
  5. conv1 = conv_2d(conv_input, 32, 5, activation= 'relu')
  6. conv1 = max_pool_2d(conv1, 2)
  7. # 第二层网络
  8. conv2 = conv_2d(conv1, 64, 5, activation= 'relu')
  9. conv2 = max_pool_2d(conv2, 2)
  10. # 第三层网络
  11. conv3 = conv_2d(conv2, 1024, 5, activation= 'relu')
  12. conv3 = max_pool_2d(conv3, 2)
  13. # 全连层1
  14. fully_layer1 = fully_connected(conv3, 1024, activation= 'relu')
  15. # 防止过拟合
  16. fully_layer1 = dropout(fully_layer1, 0.6)
  17. # 全连层2
  18. fully_layer2 = fully_connected(fully_layer1, 2, activation= 'softmax')
  19. # 设置损失函数和优化器
  20. model_net = regression(fully_layer2, optimizer= 'adam', learning_rate= 0.0005, loss= 'categorical_crossentropy',
  21. name= 'model_net')

3 数据集划分


  
  1. # 创建模型
  2. model = tflearn.DNN(model_net, tensorboard_dir= 'log')
  3. # 加载数据npy文件
  4. train_data = np.load( 'mark_catDog_TrainData.npy', allow_pickle= True)
  5. print(train_data, len(train_data), type(train_data))
  6. # 数据划分
  7. train = train_data[:- 30]
  8. test = train_data[- 30:]
  9. X = np.array([i[ 0] for i in train]).reshape((- 1, IMAGE_SIZE, IMAGE_SIZE, 1))
  10. Y = [i[ 1] for i in train]
  11. x_test = np.array([i[ 0] for i in test]).reshape((- 1, IMAGE_SIZE, IMAGE_SIZE, 1))
  12. y_test = [i[ 1] for i in test]

4 模型训练及保存


  
  1. # 模型训练
  2. model.fit({ 'input': X},
  3. { 'model_net': Y},
  4. n_epoch= 2,
  5. validation_set=({ 'input': x_test}, { 'model_net': y_test}),
  6. snapshot_step= 10,
  7. show_metric= True,
  8. run_id= "model_class"
  9. )
  10. # 模型保存
  11. model.save( "model/cat-dog.model")

训练时间较长,需要耐心等待

直至加载完毕,将模型保存

训练模型 完整源码:


  
  1. import tflearn
  2. import numpy as np
  3. from tflearn.layers.estimator import regression
  4. from tflearn.layers.conv import conv_2d, max_pool_2d
  5. from tflearn.layers.core import input_data, fully_connected, dropout
  6. IMAGE_SIZE = 50
  7. # 搭建网络 卷积 激活 池化 全连
  8. # 输入层
  9. conv_input = input_data([ None, IMAGE_SIZE, IMAGE_SIZE, 1], name= 'input')
  10. # 第一层卷积
  11. conv1 = conv_2d(conv_input, 32, 5, activation= 'relu')
  12. conv1 = max_pool_2d(conv1, 2)
  13. # 第二层网络
  14. conv2 = conv_2d(conv1, 64, 5, activation= 'relu')
  15. conv2 = max_pool_2d(conv2, 2)
  16. # 第三层网络
  17. conv3 = conv_2d(conv2, 1024, 5, activation= 'relu')
  18. conv3 = max_pool_2d(conv3, 2)
  19. # 全连层1
  20. fully_layer1 = fully_connected(conv3, 1024, activation= 'relu')
  21. # 防止过拟合
  22. fully_layer1 = dropout(fully_layer1, 0.6)
  23. # 全连层2
  24. fully_layer2 = fully_connected(fully_layer1, 2, activation= 'softmax')
  25. # 设置损失函数和优化器
  26. model_net = regression(fully_layer2, optimizer= 'adam', learning_rate= 0.0001, loss= 'categorical_crossentropy',
  27. name= 'model_net')
  28. # 创建模型
  29. model = tflearn.DNN(model_net, tensorboard_dir= 'log')
  30. # 加载数据npy文件
  31. train_data = np.load( 'mark_catDog_TrainData.npy', allow_pickle= True)
  32. print(train_data, len(train_data), type(train_data))
  33. # 数据划分
  34. train = train_data[:- 30]
  35. test = train_data[- 30:]
  36. X = np.array([i[ 0] for i in train]).reshape((- 1, IMAGE_SIZE, IMAGE_SIZE, 1))
  37. Y = [i[ 1] for i in train]
  38. x_test = np.array([i[ 0] for i in test]).reshape((- 1, IMAGE_SIZE, IMAGE_SIZE, 1))
  39. y_test = [i[ 1] for i in test]
  40. # 模型训练
  41. model.fit({ 'input': X},
  42. { 'model_net': Y},
  43. n_epoch= 2,
  44. validation_set=({ 'input': x_test}, { 'model_net': y_test}),
  45. snapshot_step= 10,
  46. show_metric= True,
  47. run_id= "model_class"
  48. )
  49. # 模型保存
  50. model.save( "model/cat-dog.model")

四:猫狗图像识别

模型保存后,可以使用模型测试,看看猫狗识别的准确率如何

可以从数据集中下载图片至工程进行测试:

测试1 


  
  1. # 将检测的图片变成np array
  2. def classify( img_path):
  3. # 灰度读取
  4. img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
  5. if img is not None:
  6. img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
  7. data = img.reshape(- 1, IMAGE_SIZE, IMAGE_SIZE, 1)
  8. return data
  9. predict = model.predict(classify( 'dog.11392.jpg'))[ 0]
  10. print(predict)
  11. max_index = np.argmax(predict)
  12. if max_index == 0:
  13. print( "猫")
  14. elif max_index == 1:
  15. print( "狗")

测试结果如下,识别为狗,识别正确

测试2 


  
  1. predict = model.predict(classify( 'cat.299.jpg'))[ 0]
  2. print(predict)
  3. max_index = np.argmax(predict)
  4. if max_index == 0:
  5. print( "猫")
  6. elif max_index == 1:
  7. print( "狗")

测试结果如下,错将猫识别为狗,识别错误

 经过多次测试,识别的准确率并不高,模型训练的不够好,需要调参优化,再多花时间训练


  
  1. # 设置损失函数和优化器
  2. model_net = regression(fully_layer2, optimizer= 'adam', learning_rate= 0.0005, loss= 'categorical_crossentropy',
  3. name= 'model_net')

深度学习,一般学习率 learning_rate是设置为0.0001,

若是对这个猫狗识别比较感兴趣的,可以自己调参优化,多搭建几层网络,如果训练出比较好的模型可以告诉我一下,嘻嘻(可以理解为博主很懒)

如果对于使用模型不够熟悉的,可以参考以下源码(图片自行准备,图片名称记得修改)


  
  1. import tflearn
  2. import numpy as np
  3. import cv2
  4. from tflearn.layers.estimator import regression
  5. from tflearn.layers.conv import conv_2d, max_pool_2d
  6. from tflearn.layers.core import input_data, fully_connected, dropout
  7. IMAGE_SIZE = 50
  8. # 搭建网络 卷积 激活 池化 全连
  9. # 输入层
  10. conv_input = input_data([ None, IMAGE_SIZE, IMAGE_SIZE, 1], name= 'input')
  11. # 第一层卷积
  12. conv1 = conv_2d(conv_input, 32, 5, activation= 'relu')
  13. conv1 = max_pool_2d(conv1, 2)
  14. # 第二层网络
  15. conv2 = conv_2d(conv1, 64, 5, activation= 'relu')
  16. conv2 = max_pool_2d(conv2, 2)
  17. # 第三层网络
  18. conv3 = conv_2d(conv2, 1024, 5, activation= 'relu')
  19. conv3 = max_pool_2d(conv3, 2)
  20. # 全连层1
  21. fully_layer1 = fully_connected(conv3, 1024, activation= 'relu')
  22. # 防止过拟合
  23. fully_layer1 = dropout(fully_layer1, 0.6)
  24. # 全连层2
  25. fully_layer2 = fully_connected(fully_layer1, 2, activation= 'softmax')
  26. # 设置损失函数和优化器
  27. model_net = regression(fully_layer2, optimizer= 'adam', learning_rate= 0.0005, loss= 'categorical_crossentropy',
  28. name= 'model_net')
  29. # 加载数据npy文件
  30. train_data = np.load( 'mark_catDog_TrainData.npy', allow_pickle= True)
  31. print(train_data, len(train_data), type(train_data))
  32. # 数据划分
  33. train = train_data[:- 30]
  34. test = train_data[- 30:]
  35. X = np.array([i[ 0] for i in train]).reshape((- 1, IMAGE_SIZE, IMAGE_SIZE, 1))
  36. Y = [i[ 1] for i in train]
  37. x_test = np.array([i[ 0] for i in test]).reshape((- 1, IMAGE_SIZE, IMAGE_SIZE, 1))
  38. y_test = [i[ 1] for i in test]
  39. # 创建模型
  40. model = tflearn.DNN(model_net, tensorboard_dir= 'log')
  41. # 加载模型
  42. model.load( "./model/cat-dog.model")
  43. # 将检测的图片变成np array
  44. def classify( img_path):
  45. # 灰度读取
  46. img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
  47. if img is not None:
  48. img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
  49. data = img.reshape(- 1, IMAGE_SIZE, IMAGE_SIZE, 1)
  50. return data
  51. predict = model.predict(classify( 'cat.299.jpg'))[ 0]
  52. print(predict)
  53. max_index = np.argmax(predict)
  54. if max_index == 0:
  55. print( "猫")
  56. elif max_index == 1:
  57. print( "狗")

转载:https://blog.csdn.net/m0_56051805/article/details/128420677
查看评论
* 以上用户言论只代表其个人观点,不代表本网站的观点或立场