tensorflow：使用TensorFlow实现神经网络模型的创建

2020-10-23 06:41 754人阅读评论(0)

神经网络

1.全连接层

输出向量为𝒐 = [𝑜1, 𝑜2]。整个网络层可以通过一次矩阵运算完成：

1.1 张量方式实现

在 TensorFlow 中，要实现全连接层，只需要定义好权值张量 W 和偏置张量 b，并利用TensorFlow 提供的批量矩阵相乘函数 tf.matmul()即可完成网络层的计算。如下代码创建输入 X 矩阵为𝑏 = 2个样本，每个样本的输入特征长度为𝑑𝑖𝑛 = 784，输出节点数为𝑑𝑜𝑢𝑡 =256，故定义权值矩阵 W 的 shape 为[784,256]，并采用正态分布初始化 W；偏置向量 b 的 shape 定义为[256]，在计算完X@W后相加即可，最终全连接层的输出 O 的 shape 为 [2,256]，即 2个样本的特征，每个特征长度为 256。

import tensorflow as tf
from matplotlib import pyplot as plt
plt.rcParams['font.size'] = 16
plt.rcParams['font.family'] = ['STKaiti']
plt.rcParams['axes.unicode_minus'] = False

# 创建 W,b 张量
x = tf.random.normal([2,784])
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
# 线性变换
o1 = tf.matmul(x,w1) + b1  
# 激活函数
o1 = tf.nn.relu(o1)

1.2 层方式实现

TensorFlow 中有更加高层、使用更方便的层实现方式：layers.Dense(units, activation)，只需要指定输出节点数 Units 和激活函数类型即可。输入节点数将根据第一次运算时的输入 shape 确定，同时根据输入、输出节点数自动创建并初始化权值矩阵 W 和偏置向量 b，使用非常方便。其中 activation 参数指定当前层的激活函数，可以为常见的激活函数或自定义激活函数，也可以指定为 None 无激活函数。

x = tf.random.normal([4,28*28])
# 导入层模块
from tensorflow.keras import layers 
# 创建全连接层，指定输出节点数和激活函数
fc = layers.Dense(512, activation=tf.nn.relu)
# 通过 fc 类实例完成一次全连接层的计算，返回输出张量
h1 = fc(x)

上述通过一行代码即可以创建一层全连接层 fc，并指定输出节点数为 512，输入的节点数在fc(x)计算时自动获取，并创建内部权值张量 $W$ 和偏置张量 $\mathbf{b}$ 。我们可以通过类内部的成员名 kernel 和 bias 来获取权值张量 $W$ 和偏置张量 $\mathbf{b}$ 对象

# 获取 Dense 类的权值矩阵
fc.kernel

<tf.Variable 'dense_1/kernel:0' shape=(784, 512) dtype=float32, numpy=
array([[-0.06443337, -0.0205344 ,  0.0111495 , ...,  0.03467645,
         0.05734177, -0.04738677],
       [-0.0453011 , -0.0600119 , -0.01896609, ...,  0.00871194,
        -0.04120795, -0.05477473],
       [-0.00870857,  0.03563788, -0.06142728, ...,  0.0419993 ,
        -0.00972366, -0.00750636],
       ...,
       [-0.02801137, -0.0115794 ,  0.06600933, ..., -0.03404392,
        -0.03490314,  0.01931299],
       [-0.01084805,  0.05528106, -0.0051664 , ..., -0.0058347 ,
         0.02473629, -0.04545905],
       [ 0.04825485,  0.01886629,  0.00533567, ...,  0.02645993,
        -0.04923414, -0.05979132]], dtype=float32)>

# 获取 Dense 类的偏置向量
fc.bias

# 待优化参数列表
fc.trainable_variables

实际上，网络层除了保存了待优化张量 trainable_variables，还有部分层包含了不参与梯度
优化的张量,如果希望获得所有参数列表，可以通过类的 variables 返回所有内部张量列表：

# 返回所有参数列表
fc.variables

对于全连接层，内部张量都参与梯度优化，故 variables 返回列表与 trainable_variables 一样。

利用网络层类对象进行前向计算时，只需要调用类的__call__方法即可，即写成 fc(x)方式，它会自动调用类的__call__方法，在__call__方法中自动调用 call 方法，全连接层类在 call 方法中实现了𝜎(𝑋@𝑊 + 𝒃)的运算逻辑，最后返回全连接层的输出张量。

2.神经网络

如下图所示，通过堆叠 4 个全连接层，可以获得层数为 4 的神经网络，由于每层均为全连接层，称为全连接网络。其中第 1~3 个全连接层在网络中间，称之为隐藏层 1,2,3，最后一个全连接层的输出作为网络的输出，称为输出层。隐藏层 1,2,3 的输出节点数分别为[256,128,64]，输出层的输出节点数为 10

2.1 张量方式实现

# 隐藏层 1 张量
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
# 隐藏层 2 张量
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
# 隐藏层 3 张量
w3 = tf.Variable(tf.random.truncated_normal([128, 64], stddev=0.1))
b3 = tf.Variable(tf.zeros([64]))
# 输出层张量
w4 = tf.Variable(tf.random.truncated_normal([64, 10], stddev=0.1))
b4 = tf.Variable(tf.zeros([10]))

with tf.GradientTape() as tape: # 梯度记录器
    # x: [b, 28*28]
    # 隐藏层 1 前向计算， [b, 28*28] => [b, 256]
    h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256])
    h1 = tf.nn.relu(h1)
    # 隐藏层 2 前向计算， [b, 256] => [b, 128]
    h2 = h1@w2 + b2
    h2 = tf.nn.relu(h2)
    # 隐藏层 3 前向计算， [b, 128] => [b, 64]
    h3 = h2@w3 + b3
    h3 = tf.nn.relu(h3)
    # 输出层前向计算， [b, 64] => [b, 10]
    h4 = h3@w4 + b4

2.2 层方式实现

# 导入常用网络层 layers
from tensorflow.keras import layers
# 隐藏层 1
fc1 = layers.Dense(256, activation=tf.nn.relu) 
# 隐藏层 2
fc2 = layers.Dense(128, activation=tf.nn.relu) 
# 隐藏层 3
fc3 = layers.Dense(64, activation=tf.nn.relu) 
# 输出层
fc4 = layers.Dense(10, activation=None)

x = tf.random.normal([4,28*28])
# 通过隐藏层 1 得到输出
h1 = fc1(x) 
# 通过隐藏层 2 得到输出
h2 = fc2(h1) 
# 通过隐藏层 3 得到输出
h3 = fc3(h2) 
# 通过输出层得到网络输出
h4 = fc4(h3)

对于这种数据依次向前传播的网络，也可以通过 Sequential 容器封装成一个网络大类对象，调用大类的前向计算函数一次即可完成所有层的前向计算，使用起来更加方便。

# 导入 Sequential 容器
from tensorflow.keras import layers,Sequential
# 通过 Sequential 容器封装为一个网络类
model = Sequential([
    layers.Dense(256, activation=tf.nn.relu) , # 创建隐藏层 1
    layers.Dense(128, activation=tf.nn.relu) , # 创建隐藏层 2
    layers.Dense(64, activation=tf.nn.relu) , # 创建隐藏层 3
    layers.Dense(10, activation=None) , # 创建输出层
])

out = model(x) # 前向计算得到输出

3.激活函数

3.1 Sigmoid

$\text{Sigmoid}(x) \triangleq \frac{1}{1 + e^{-x}}$

# 构造-6~6 的输入向量
x = tf.linspace(-6.,6.,10)
x

<tf.Tensor: shape=(10,), dtype=float32, numpy=
array([-6.       , -4.6666665, -3.3333333, -2.       , -0.6666665,
        0.666667 ,  2.       ,  3.333334 ,  4.666667 ,  6.       ],
      dtype=float32)>

# 通过 Sigmoid 函数
sigmoid_y = tf.nn.sigmoid(x)
sigmoid_y

<tf.Tensor: shape=(10,), dtype=float32, numpy=
array([0.00247264, 0.00931591, 0.03444517, 0.11920291, 0.33924365,
       0.6607564 , 0.8807971 , 0.96555483, 0.99068403, 0.99752736],
      dtype=float32)>

def set_plt_ax():
    # get current axis 获得坐标轴对象
    ax = plt.gca()                                           

    ax.spines['right'].set_color('none') 
    # 将右边 上边的两条边颜色设置为空 其实就相当于抹掉这两条边
    ax.spines['top'].set_color('none')         

    ax.xaxis.set_ticks_position('bottom')   
    # 指定下边的边作为 x 轴，指定左边的边为 y 轴
    ax.yaxis.set_ticks_position('left') 

    # 指定 data  设置的bottom(也就是指定的x轴)绑定到y轴的0这个点上
    ax.spines['bottom'].set_position(('data', 0)) 
    ax.spines['left'].set_position(('data', 0))

set_plt_ax()
plt.plot(x, sigmoid_y, color='C4', label='Sigmoid')
plt.xlim(-6, 6)
plt.ylim(0, 1)
plt.legend(loc=2)
plt.show()

findfont: Font family ['STKaiti'] not found. Falling back to DejaVu Sans.

3.2 ReLU

$\text{ReLU}(x) \triangleq \max(0, x)$

# 通过 ReLU 激活函数
relu_y = tf.nn.relu(x) 
relu_y

<tf.Tensor: shape=(10,), dtype=float32, numpy=
array([0.      , 0.      , 0.      , 0.      , 0.      , 0.666667,
       2.      , 3.333334, 4.666667, 6.      ], dtype=float32)>

set_plt_ax()

plt.plot(x, relu_y, color='C4', label='ReLU')
plt.xlim(-6, 6)
plt.ylim(0, 6)
plt.legend(loc=2)
plt.show()

3.3 LeakyReLU

ReLU 函数在𝑥 < 0时梯度值恒为 0，也可能会造成梯度弥散现象，为了克服这个问题，LeakyReLU函数被提出：

$\text{LeakyReLU}(x) \triangleq \left\{$

\begin{array}{cc} x x ⩾ 0 \\ p x x < 0 \end{array}

$\begin{array}{cc} x \quad x \geqslant 0 \\ px \quad x < 0 \end{array}$ \right.

LeakyReLU (x) ≜ {x x ⩾ 0 p x x < 0

其中𝑝为用户自行设置的某较小数值的超参数，如 0.02 等。当𝑝 = 0时，LeayReLU 函数退化为 ReLU 函数；当𝑝 ≠ 0时，𝑥 < 0能够获得较小的梯度值𝑝，从而避免出现梯度弥散现象

# 通过 LeakyReLU 激活函数
leakyrelu_y = tf.nn.leaky_relu(x, alpha=0.1)
leakyrelu_y

<tf.Tensor: shape=(10,), dtype=float32, numpy=
array([-0.6       , -0.46666667, -0.33333334, -0.2       , -0.06666666,
        0.666667  ,  2.        ,  3.333334  ,  4.666667  ,  6.        ],
      dtype=float32)>

set_plt_ax()

plt.plot(x, leakyrelu_y, color='C4', label='LeakyReLU')
plt.xlim(-6, 6)
plt.ylim(-1, 6)
plt.legend(loc=2)
plt.show()

3.4 Tanh

Tanh 函数能够将𝑥 ∈ 𝑅的输入“压缩”到[−1,1]区间，定义为：

$\tanh(x)=\frac{e^x-e^{-x}}{e^x + e^{-x}}= 2 \cdot \text{sigmoid}(2x) - 1$

# 通过 tanh 激活函数
tanh_y = tf.nn.tanh(x)
tanh_y

<tf.Tensor: shape=(10,), dtype=float32, numpy=
array([-0.99998784, -0.99982315, -0.9974579 , -0.9640276 , -0.58278286,
        0.58278316,  0.9640276 ,  0.99745804,  0.99982315,  0.99998784],
      dtype=float32)>

set_plt_ax()

plt.plot(x, tanh_y, color='C4', label='Tanh')
plt.xlim(-6, 6)
plt.ylim(-1.5, 1.5)
plt.legend(loc=2)
plt.show()

4.输出层设计

4.1Softmax

$Softmax(z_i) \triangleq \frac{e^{z_i}}{\sum_{j=1}^{d_{out}} e^{z_j}}$

z = tf.constant([2.,1.,0.1])
# 通过 Softmax 函数
tf.nn.softmax(z)

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0.6590012 , 0.24243298, 0.09856589], dtype=float32)>

# 构造输出层的输出
z = tf.random.normal([2,10]) 
# 构造真实值
y_onehot = tf.constant([1,3]) 
# one-hot 编码
y_onehot = tf.one_hot(y_onehot, depth=10) 
# 输出层未使用 Softmax 函数，故 from_logits 设置为 True
# 这样 categorical_crossentropy 函数在计算损失函数前，会先内部调用 Softmax 函数
loss = tf.keras.losses.categorical_crossentropy(y_onehot,z,from_logits=True)
loss = tf.reduce_mean(loss) # 计算平均交叉熵损失
loss

<tf.Tensor: shape=(), dtype=float32, numpy=3.1776986>

# 创建 Softmax 与交叉熵计算类，输出层的输出 z 未使用 softmax
criteon = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
loss = criteon(y_onehot,z) # 计算损失
loss

<tf.Tensor: shape=(), dtype=float32, numpy=3.1776986>

5.误差计算

常见的误差计算函数有均方差、交叉熵、KL 散度、Hinge Loss 函数等，其中均方差函数和交叉熵函数在深度学习中比较常见，均方差主要用于回归问题，交叉熵主要用于分类问题。

5.1 均方差误差函数

$\text{MSE}(y, o) \triangleq \frac{1}{d_{out}} \sum_{i=1}^{d_{out}}(y_i-o_i)^2$
MSE 误差函数的值总是大于等于 0，当 MSE 函数达到最小值 0 时，输出等于真实标签，此时神经网络的参数达到最优状态。

# 构造网络输出
o = tf.random.normal([2,10]) 
# 构造真实值
y_onehot = tf.constant([1,3]) 
y_onehot = tf.one_hot(y_onehot, depth=10)
# 计算均方差
loss = tf.keras.losses.MSE(y_onehot, o) 
loss

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([0.87500876, 1.4305398 ], dtype=float32)>

# 计算 batch 均方差
loss = tf.reduce_mean(loss) 
loss

<tf.Tensor: shape=(), dtype=float32, numpy=1.1527743>

# 创建 MSE 类
criteon = tf.keras.losses.MeanSquaredError()
# 计算 batch 均方差
loss = criteon(y_onehot,o) 
loss

<tf.Tensor: shape=(), dtype=float32, numpy=1.1527743>

5.2 交叉熵误差函数

\begin{aligned} H (p ‖ q) & = D_{K L} (p ‖ q) \\ = \sum_{j} y_{j} \log (\frac{y_{j}}{o_{j}}) \\ = 1 \cdot \log \frac{1}{o_{i}} + \sum_{j \neq i} 0 \cdot \log (\frac{0}{o_{j}}) \\ = - \log o_{i} \end{aligned}

$\begin{aligned} H(p \| q) &=D_{K L}(p \| q) \\ &=\sum_{j} y_{j} \log \left(\frac{y_j}{o_j}\right) \\ &= 1 \cdot \log \frac{1}{o_i}+ \sum_{j \neq i} 0 \cdot \log \left(\frac{0}{o_j}\right) \\ & =-\log o_{i} \end{aligned}$

H (p ∥ q) = D_{K L} (p ∥ q) = j \sum y_{j} lo g (\frac{y _{j}}{o _{j}}) = 1 \cdot lo g \frac{1}{o _{i}} + j \neq = i \sum 0 \cdot lo g (\frac{0}{o _{j}}) = - lo g o_{i}

6.汽车油耗预测实战

我们采用 Auto MPG 数据集，它记录了各种汽车效能指标与气缸数、重量、马力等其
他因子的真实数据，查看数据集的前5项：

导入我们要使用的库

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, losses

我们来下载数据集

def load_dataset():
    # 在线下载汽车效能数据集
    dataset_path = keras.utils.get_file("auto-mpg.data",
                                        "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")

    # 效能（公里数每加仑），气缸数，排量，马力，重量
    # 加速度，型号年份，产地
    column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
                    'Acceleration', 'Model Year', 'Origin']
    raw_dataset = pd.read_csv(dataset_path, names=column_names,
                              na_values="?", comment='\t',
                              sep=" ", skipinitialspace=True)

    dataset = raw_dataset.copy()
    return dataset

dataset = load_dataset()
# 查看部分数据
dataset.head()

	MPG	Cylinders	Displacement	Horsepower	Weight	Acceleration	Model Year	Origin
0	18.0	8	307.0	130.0	3504.0	12.0	70	1
1	15.0	8	350.0	165.0	3693.0	11.5	70	1
2	18.0	8	318.0	150.0	3436.0	11.0	70	1
3	16.0	8	304.0	150.0	3433.0	12.0	70	1
4	17.0	8	302.0	140.0	3449.0	10.5	70	1

原始数据中的数据可能含有空字段(缺失值)的数据项，需要清除这些记录项：

def preprocess_dataset(dataset):
    dataset = dataset.copy()
    # 统计空白数据,并清除
    dataset = dataset.dropna()

    # 处理类别型数据，其中origin列代表了类别1,2,3,分布代表产地：美国、欧洲、日本
    # 其弹出这一列
    origin = dataset.pop('Origin')
    # 根据origin列来写入新列
    dataset['USA'] = (origin == 1) * 1.0
    dataset['Europe'] = (origin == 2) * 1.0
    dataset['Japan'] = (origin == 3) * 1.0

    # 切分为训练集和测试集
    train_dataset = dataset.sample(frac=0.8, random_state=0)
    test_dataset = dataset.drop(train_dataset.index)
    return train_dataset, test_dataset

train_dataset, test_dataset = preprocess_dataset(dataset)

# 统计数据
sns_plot = sns.pairplot(train_dataset[["Cylinders", "Displacement", "Weight", "MPG"]], diag_kind="kde")
plt.figure()
plt.show()

将 MPG 字段移出为标签数据：

# 查看训练集的输入X的统计数据
train_stats = train_dataset.describe()
train_stats.pop("MPG")
train_stats = train_stats.transpose()
train_stats

	count	mean	std	min	25%	50%	75%	max
Cylinders	314.0	5.477707	1.699788	3.0	4.00	4.0	8.00	8.0
Displacement	314.0	195.318471	104.331589	68.0	105.50	151.0	265.75	455.0
Horsepower	314.0	104.869427	38.096214	46.0	76.25	94.5	128.00	225.0
Weight	314.0	2990.251592	843.898596	1649.0	2256.50	2822.5	3608.00	5140.0
Acceleration	314.0	15.559236	2.789230	8.0	13.80	15.5	17.20	24.8
Model Year	314.0	75.898089	3.675642	70.0	73.00	76.0	79.00	82.0
USA	314.0	0.624204	0.485101	0.0	0.00	1.0	1.00	1.0
Europe	314.0	0.178344	0.383413	0.0	0.00	0.0	0.00	1.0
Japan	314.0	0.197452	0.398712	0.0	0.00	0.0	0.00	1.0

数据标准化

def norm(x, train_stats):
    """
    标准化数据
    :param x:
    :param train_stats: get_train_stats(train_dataset)
    :return:
    """
    return (x - train_stats['mean']) / train_stats['std']

# 移动MPG油耗效能这一列为真实标签Y
train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')
# 进行标准化
normed_train_data = norm(train_dataset, train_stats)
normed_test_data = norm(test_dataset, train_stats)

print(normed_train_data.shape,train_labels.shape)
print(normed_test_data.shape, test_labels.shape)

(314, 9) (314,)
(78, 9) (78,)

class Network(keras.Model):
    # 回归网络
    def __init__(self):
        super(Network, self).__init__()
        # 创建3个全连接层
        self.fc1 = layers.Dense(64, activation='relu')
        self.fc2 = layers.Dense(64, activation='relu')
        self.fc3 = layers.Dense(1)

    def call(self, inputs):
        # 依次通过3个全连接层
        x1 = self.fc1(inputs)
        x2 = self.fc2(x1)
        out = self.fc3(x2)

        return out

def build_model():
    # 创建网络
    model = Network()
    # 通过 build 函数完成内部张量的创建，其中 4 为任意的 batch 数量，9 为输入特征长度
    model.build(input_shape=(4, 9))
    model.summary() # 打印网络信息
    return model

model = build_model()
optimizer = tf.keras.optimizers.RMSprop(0.001) # 创建优化器，指定学习率
train_db = tf.data.Dataset.from_tensor_slices((normed_train_data.values, train_labels.values))
train_db = train_db.shuffle(100).batch(32)

Model: "network_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_3 (Dense)              multiple                  640       
_________________________________________________________________
dense_4 (Dense)              multiple                  4160      
_________________________________________________________________
dense_5 (Dense)              multiple                  65        
=================================================================
Total params: 4,865
Trainable params: 4,865
Non-trainable params: 0
_________________________________________________________________

接下来实现网络训练部分。通过 Epoch 和 Step 的双层循环训练网络，共训练 200 个 epoch:

def train(model, train_db, optimizer, normed_test_data, test_labels):
    train_mae_losses = []
    test_mae_losses = []
    for epoch in range(200):
        for step, (x, y) in enumerate(train_db):

            with tf.GradientTape() as tape:
                out = model(x)
                # 均方误差
                loss = tf.reduce_mean(losses.MSE(y, out))
                #平均绝对值误差
                mae_loss = tf.reduce_mean(losses.MAE(y, out))

            if step % 10 == 0:
                print(epoch, step, float(loss))

            grads = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

        train_mae_losses.append(float(mae_loss))
        out = model(tf.constant(normed_test_data.values))
        test_mae_losses.append(tf.reduce_mean(losses.MAE(test_labels, out)))

    return train_mae_losses, test_mae_losses

def plot(train_mae_losses, test_mae_losses):
    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('MAE')
    plt.plot(train_mae_losses, label='Train')
    plt.plot(test_mae_losses, label='Test')
    plt.legend()
    # plt.ylim([0,10])
    plt.legend()
    plt.show()

train_mae_losses, test_mae_losses = train(model, train_db, optimizer, normed_test_data, test_labels)

plot(train_mae_losses, test_mae_losses)

转载：https://blog.csdn.net/qq_43328040/article/details/109167503

查看评论

飞道的博客