watch: TF2 - PKU 04 | NN Framework Extension

4 网络八股扩展

4.1 自制数据集

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56


import tensorflow as tf
from PIL import Image
import numpy as np
import os

train_dir = './mnist_image_label/mnist_train_jpg_60000/'
train_txt = './mnist_image_label/mnist_train_jpg_60000.txt'
x_train_savepath = './mnist_image_label/mnist_x_train.npy'
y_train_savepath = './mnist_image_label/mnist_y_train.npy'

test_dir = './mnist_image_label/mnist_test_jpg_10000/'
test_txt = './mnist_image_label/mnist_test_jpg.txt'
x_test_savepath = './mnist_image_label/mnist_x_test.npy'
y_test_savepath = './mnist_image_label/mnist_y_test.npy'

def generateDS(dir, labels_path):
    f = open(labels_path, 'r')	# 存有 文件名 及其 标签
    contents = f.readlines()	# 读取所有行
    f.close()		# 关闭
    x, y_ = [], []	# 每张图对应的灰度值数据和标签
    for content in contents:	# 逐行读出
        value = content.split()	# 以空格分开
        img_path = dir + value[0]	# 图片名
        img = Image.open(img_path)
        img = np.array(img.convert('L')) # 8位灰度图像
        img = img/255.
        x.append(img)	# 灰度值np.array 放入列表
        y_.append(value[1])	# 标签 放入列表
        print('loading: '+content)
    
    x = np.array(x)		# 列表变 np.array
    y_ = np.array(y_)
    y_ = y_.astype(np.int64)	# 标签是64位整型
    return x, y_

if os.path.exists(x_train_savepath) and os.path.exists(y_train_savepath) and os.path.exists(x_test_savepath) and os.path.exists(y_test_savepath):
    print('---Loading dataset---')
    x_train_save = np.load(x_train_savepath)	# 读
    y_train = np.load(y_train_savepath)
    x_test_save = np.load(x_test_savepath)
    y_test = np.load(y_test_savepath)
    x_train = np.reshape(x_train_save, (len(x_train_save),28,28))	# 变形
    x_test = np.reshape(x_test_save, (len(x_test_save),28,28))
    
else:	#不存在，要制作数据集
    print('---Generating dataset---')
    x_train, y_train = generate(train_dir, train_txt)
    x_test, y_test = generate(test_dir, test_txt)
    
    print('---Saving dataset')	# 保存为以后使用
    x_train_save = np.reshape(x_train, (len(x_train), -1))
    x_test_save = np.reshape(x_test, (len(x_test), -1))
    np.save(x_train_savepath, x_train_save)
    np.save(y_train_savepath, y_train)
    np.save(x_test_savepath, x_test_save)
    np.save(y_test_savepath, y_test)

4.2 数据增强

用于扩展数据集，对图像的增强就是对图像的简单形变，用来应对因拍照角度不同引起的图片变形

1
2
3
4
5
6
7
8
9


image_gen_train = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale = 所有数据将乘以该数值，
    rotation_range = 随机旋转角度数范围，
    width_shift_range = 随机宽度偏移量，
    height_shift_range = 随机高度偏移量，
    horizontal_flip = 是否随机水平翻转，
    zoom_range = 随机缩放的范围[1-n, 1+n])

image_gen_train.fit(x_train)

其中 x_train 需要是四维，需要变形：x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)，把 (60000, 28, 28) -> (60000, 28, 28, 1)

例如：

1
2
3
4
5
6
7
8
9


image_gen_train = ImageDataGenerator(
    rescale = 1. / 1.,  # 若为图像，分母为255时，可归至0~1
    rotation_range = 45,    # 随机45度旋转 
    width_shift_range=.15,  # 宽度偏移
    height_shift_range=.15, # 高度偏移
    horizontal_flip=False,  # 水平翻转
    zoom_range = 0.5        # 将图像随机缩放阈量50%
    )
image_gen_train.fit(x_train)

模型训练也要改，把 model.fit(x_train, y_train, batch_size=32, ...) 改为 model.fit(image_gen_train.flow(x_train, y_train, batch_size=32), ...)

4.3 断点续训, 存取模型

读取模型：load_weights(文件路径)

1
2
3
4


checkpoint_save_path = './checkpoint/mnist.ckpt'    # 定义文件路径
if os.path.exists(checkpoint_save_path + '.index'): # 存在索引表说明已经保存过参数了
    print('---- Loading the model ----')
    model.load_weights(checkpoint_save_path)

保存模型：

1
2
3
4
5
6
7
8
9


cp_callback = tf.keras.callbacks.ModelCheckpoint(
                filepath=checkpoint_save_path,  # 保存路径
                save_weights_only = True,   # 是否只保留 weights
                save_best_only = True,      # 是否只保留最优结果

 # 在训练过程中保存，记录到history中
history = model.fit(x_train, y_train, batch_size=32, epochs=5,
                    validation_data=(x_test, y_test), validation_freq=1,
                    callbacks=[cp_callback])

4.4 提取可训练参数

返回模型中可训练的参数 model.trainable_variables

设置print输出格式：np.set_printoptions(threshold=超过多少省略显示) np.inf 表示无限大

 1
 2
 3
 4
 5
 6
 7
 8
 9
10


print(model.trainable_variables)    # 把参数直接打印出来

file = open('./weights.txt', 'w')   # 把参数存入文件

for v in model.trainable_variables:
    file.write(str(v.name) + '\n')
    file.write(str(v.shape) + '\n')
    file.write(str(v.numpy()) + '\n')

file.close()

4.5 acc/locc 可视化

用于查看训练效果。history中记录了训练集的loss和sparse_categorical_accuracy；测试集上的val_loss 和 val_sparse_categorical_accuracy

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17


acc = history.history['sparse_categorical_accuracy']
val_acc = history.history['val_sparse_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.subplot(1,2,1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.subplot(1,2,2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

4.6 调用模型

返回前向传播计算结果：predict(输入特征，batch_size=整数)

三步：复现模型，加载参数，预测

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39


from PIL import Image
import numpy as np
import tensorflow as tf

model_save_path = './checkpoint/mnist.ckpt'

model = tf.keras.models.Sequential([    # 复现网络
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(10, activation='softmax')])

model.load_weights(model_save_path)     # 加载网络

preNum = int(input("Input the number of test pictures: "))  # 接收用户输入

for i in range(preNum):
    image_path = input("the path of test picture: ")    # 输入 1.png
    img = Image.open(image_path)        # 读取图片
    
    # 预处理输入图片的格式 与 训练数据一致
    img = Img.resize((28, 28), Image.ANTIALIAS) # 变为 28x28 与训练图片尺寸相同
    img_arr = np.array(img.convert('L'))    # 变为灰度图
    img_arr = 255 - img_arr             # 黑白反转

    # 预处理或者变为只有黑白像素的高对比度图片, 2种方法二选一
    for i in range(28):
        for j in range(28):
            if img_arr[i][j] < 200:     # 小于200 全变白. 可滤去背景噪声，当阈值选择合适识别效果更好
                img_arr[i][j] = 255
            else:
                img_arr[i][j] = 0

    img_arr = img_arr / 255.0       # 归一化

    x_predict = img_arr[tf.newaxis, ...]    # (28, 28) -> (1, 28, 28)
    result = model.predict(x_predict)       # 输入网络
    pred = tf.argmax(result, axis=1)        # 返回最大概率值的索引
    print('\n')
    tf.print(pred)