小言_互联网的博客

六、项目实战---识别猫和狗

328人阅读  评论(0)

一、准备数据集

kagglecatsanddogs网上一搜一大堆,这里我就不上传了,需要的话可以私信

导包

import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile

猫和狗的照片各12500张

print(len(os.listdir('./temp/cats/')))
print(len(os.listdir('./temp/dogs/')))
"""
12500
12500
"""

生成训练数据文件夹和测试数据文件夹

import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile


def create_dir(file_dir):
    if os.path.exists(file_dir):
        print("True")
        shutil.rmtree(file_dir)#删除再创建
        os.makedirs(file_dir)
    else:
        os.makedirs(file_dir)


cat_source_dir = "./temp/cats/"
train_cats_dir = "./temp/train/cats/"
test_cats_dir = "./temp/test/cats/"

dot_source_dir = "./temp/dogs/"
train_dogs_dir = "./temp/train/dogs/"
test_dogs_dir = "./temp/test/dogs/"


create_dir(train_cats_dir)#创建猫的训练集文件夹
create_dir(test_cats_dir)#创建猫的测试集文件夹
create_dir(train_dogs_dir)#创建狗的训练集文件夹
create_dir(test_dogs_dir)#创建狗的测试集文件夹

"""
True
True
True
True
"""


将总的猫狗图像按9:1分成训练集和测试集,猫和狗各12500张
最终temp/train/catstemp/train/dogs两个文件夹下各12500 * 0.9=11250张
temp/test/catstemp/test/dogs这两个文件夹下各12500 * 0.1=1250张
cats和dogs为总共的猫狗图像
test和train为准备的数据集文件

import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile


def split_data(source,train,test,split_size):
    files = []
    for filename in os.listdir(source):
        file = source + filename
        if os.path.getsize(file)>0:
            files.append(filename)
        else:
            print(filename + "is zero file,please ignoring")
            
        train_length = int(len(files)*split_size)
        test_length = int(len(files)-train_length)
        shuffled_set = random.sample(files,len(files))
        train_set = shuffled_set[0:train_length]
        test_set = shuffled_set[-test_length:]
        
        for filename in train_set:
            this_file = source + filename
            destination = train + filename
            copyfile(this_file,destination)
            
        for filename in test_set:
            this_file = source + filename
            destination = test + filename
            copyfile(this_file,destination)


cat_source_dir = "./temp/cats/"
train_cats_dir = "./temp/train/cats/"
test_cats_dir = "./temp/test/cats/"

dot_source_dir = "./temp/dogs/"
train_dogs_dir = "./temp/train/dogs/"
test_dogs_dir = "./temp/test/dogs/"


split_size = 0.9
split_data(cat_source_dir,train_cats_dir,test_cats_dir,split_size)
split_data(dog_source_dir,train_dogs_dir,test_dogs_dir,split_size)

二、模型的搭建和训练

先对数据进行归一化操作,预处理进行优化一下

import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile


train_dir = "./temp/train/"
train_datagen = ImageDataGenerator(rescale=1.0/255.0)#优化网络,先进行归一化操作
train_generator = train_datagen.flow_from_directory(train_dir,batch_size=100,class_mode='binary',target_size=(150,150))#二分类,训练样本的输入的要一致

validation_dir = "./temp/test/"
validation_datagen = ImageDataGenerator(rescale=1.0/255.0)
validation_generator = validation_datagen.flow_from_directory(validation_dir,batch_size=100,class_mode='binary',target_size=(150,150))
"""
Found 22500 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.
"""

搭建模型架构

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16,(3,3),activation='relu',input_shape=(150,150,3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32,(3,3),activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64,(3,3),activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512,activation='relu'),
    tf.keras.layers.Dense(1,activation='sigmoid')
])
model.compile(optimizer=RMSprop(lr=0.001),loss='binary_crossentropy',metrics=['acc'])

训练模型
225:因为数据一共22500张,猫和狗各12500张,其对于训练集个11250张,故训练集共22500张,在预处理第一段代码中,batch_size=100设置了一批100个,故总共应该有225批
epochs=2:两轮,也就是所有的样本全部训练一次
每轮包含225批,每一批有100张样本

history = model.fit_generator(train_generator,
                             epochs=2,#进行2轮训练,每轮255批
                             verbose=1,#要不记录每次训练的日志,1表示记录
                             validation_data=validation_generator)
                             
"""
Instructions for updating:
Use tf.cast instead.
Epoch 1/2
131/225 [================>.............] - ETA: 2:03 - loss: 0.7204 - acc: 0.6093
"""

history是模型运行过程的结果

三、分析训练结果

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

epoch太少了,导致是直线,多训练几轮实际应该是折线图
准确率

plt.plot(epochs,acc,'r',"training accuracy")
plt.plot(epochs,val_acc,'b',"validation accuracy")
plt.title("training and validation accuracy")
plt.figure()


损失值

plt.plot(epochs,loss,'r',"training loss")
plt.plot(epochs,val_loss,'b',"validation loss")
plt.figure()

四、模型的使用验证

import numpy as np
from google.colab import files
from tensorflow.keras.preprocessing import image

uploaded = files.upload()
for fn in uploaded.keys():
    path = 'G:/Juptyer_workspace/Tensorflow_mooc/sjj/test/' + fn#该路径为要用模型测试的路径
    img = image.load_img(path,target_size=(150,150))
    x = image.img_to_array(img)#多维数组
    x = np.expand_dims(x,axis=0)#拉伸
    
    images = np.vstack([x])#水平方向拉直
    classes = model.predict(images,batch_size=10)
    print(classes[0])
    if classes[0]>0.5:
        print(fn + "it is a dog")
    else:
        print(fn + "it is a cat")

转载:https://blog.csdn.net/qq_41264055/article/details/125460998
查看评论
* 以上用户言论只代表其个人观点,不代表本网站的观点或立场