watch: TF2 - PKU 01 | NN Computation Process

创建张量

tf.constant(张量内容, dtype=数据类型(可选))

1
2
3
4
5


import tensorflow as tf
a = tf.constant([1,5], dtype=tf.int64)  # 创建一阶张量, 2个元素
print(a)        # tf.Tensor([1,5], shape=(2,), dtype=int64). tf1.x不会显示元素
print(a.dtype)  # <dtype: 'int64'>
print(a.shape)  # (2,)

将numpy的数据类型转换为 Tensor 数据类型:

tf.convert_to_tensor(数据名，dtype=数据类型（可选）)

1
2
3
4
5
6


import tensorflow as tf
import numpy as np
a = np.arange(0, 5)
b = tf.convert_to_tensor(a, dtype=tf.int64)
print(a)    # [0 1 2 3 4]
print(b)    # tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int64)

生成全0，全1，全指定值的张量：

1
2
3
4
5


tf.zeros([2,3])     #tf.Tensor([[0. 0. 0.] [0. 0. 0 .]],shape=(2,3), dtype=float32)

tf.ones(4)          #tf.Tensor([1. 1. 1. 1.], shape=(4,), dtype=float32)

tf.fill([2,2], 9)   # tf.Tensor([[9 9] [9 9]], shape=(2,2),dtype=int32)

生成正态分布的随机数（默认均值为0，标准差为1），常用于初始化参数:

tf.random.normal(维度，means=均值，stddev=标准差)

生成截断式正态分布的随机数，分布更集中在均值附近，随机数取值在正负2个标准差之内 (mu-2sigma, mu+2sigma)，如果落在外面则重新生成：

tf.random.truncated_normal(维度， mean=均值，stddev=标准差)

1
2


d = tf.random.normal([2,2], mean=0.5, stddev=1)
e = tf.random.truncated_normal([2,2], mean=0.5, stddev=1)

生成均匀分布随机数 [min, max) 左闭右开：

tf.random.uniform(维度，minval=最小值，maxval=最大值)

常用函数

强制 tensor 转换为指定类型 tf.cast(张量名，dtype=数据类型)
计算张量维度上元素的最小值 tf.reduce_min(张量名)
找到张量中的最大元素：tf.reduce_max(张量名)

1
2
3
4
5
6
7


x1 = tf.constant([1., 2., 3.], dtype=tf.float64)
print(x1)   # tf.Tensor([1. 2. 3.], shape=(3,), dtype=float64)

x2 = tf.cast(x1, tf.int32)
print(x2)   # tf.Tensor([1. 2. 3.], shape=(3,), dtype=float64)

print(tf.reduce_min(x2), tf.reduce_max(x2)) # tf.Tensor(1, shape=(), dtype=int32) tf.Tensor(1, shape=(), dtype=int32)

axis指定操作方向，对于二维张量，axis=0表示对第0维操作；若不指定axis，则所有元素参与计算

1
2
3
4
5


x = tf.constant([[1,2,3], [2,2,3]])

print(tf.reduce_mean(x))    # 所有元素的平均值 tf.Tensor(2, shape=(), dtype=int32)
print(tf.reduce_mean(x, axis=0))    # tf.Tensor([1 2 3], shape=(3,) dtype=int32) 
print(tf.reduce_sum(x, axis=1))    # 对第1维求和 tf.Tensor([6 7], shape=(2,) dtype=int32)

tf.Variable() 将变量标记为“可训练”，被标记的变量会在反向传播中记录梯度信息。在神经网络训练中，常用该函数标记待训练参数。

w = tf.Variable(tf.random.normal([2,2], mean=0, stddev=1)), 把生成的随机数标记为可训练

常用数学运算：tf.add, tf.subtract, tf.multiply, tf.divide, tf.square, tf.pow, tf.sqrt, tf.matmul。

1
2
3
4


tf.add(张量1，张量2)
tf.subtract(张量1，张量2)
tf.multiply(张量1，张量2)
tf.divide(张量1，张量2)

只有维度相同的张量才可以做四则运算

1
2
3
4
5
6


a = tf.ones([1,3])
b = tf.fill([1,3], 3.]
print(tf.add(a,b))
print(tf.subtract(a,b)) # tf.Tensor([[-2. -2. -2.]], shape=(1,3), dtype=float32)
print(tf.multiply(a,b))
print(tf.divide(b,a))   # tf.Tensor([[3. 3. 3.]], shape=(1,3), dtype=float32)

两矩阵相乘：

1
2
3


a = tf.ones([3,2])
b = tf.fill([2,3], 3.)
print(tf.matmul(a,b))   # tf.Tensor([[6. 6. 6.] [6. 6. 6.] [6. 6. 6.], shape=(3,3), dtype=float32)

把特征和标签配对 tf.data.Dataset.from_tensor_slices((输入特征，标签))，Numpy和Tensor格式都适用

1
2
3
4
5
6


features = tf.constant([12, 23, 10, 17])  # 一个数是一个样本
labels = tf.constant([0, 1, 1, 0])
dataset = tf.data.Dataset.from_tensor_slices((features, labels))
print(dataset)
for element in dataset:
    print(element)

运行结果：

1
2
3
4
5


<TensorSliceDataset shapes: ((),()), types: (tf.int32, tf.int32)>  （特征，标签）对
(<tf.Tensor: id=9, shape=(), dtype=int32, numpy=12>, <tf.Tensor: id=10, shape=(), dtype=int32, numpy=0>)
(<tf.Tensor: id=11, shape=(), dtype=int32, numpy=23>, <tf.Tensor: id=12, shape=(), dtype=int32, numpy=1>)
(<tf.Tensor: id=13, shape=(), dtype=int32, numpy=10>, <tf.Tensor: id=14, shape=(), dtype=int32, numpy=1>)
(<tf.Tensor: id=15, shape=(), dtype=int32, numpy=17>, <tf.Tensor: id=16, shape=(), dtype=int32, numpy=0>)

实现某函数对指定参数的求导运算，用 with 结构记录计算过程：

1
2
3


with tf.GradientTape() as tape:
    若干个计算过程
grad = tape.gradient(函数，对谁求导)

例如：

1
2
3
4
5


with tf.GradientTape() as tape:
    w = tf.Variable(tf.constant(3.0))	# 初值为3，可以求导
    loss = tf.pow(w, 2)
grad = tape.gradient(loss, w)	# loss 对 w 求导，2w=6
print(grad)	# tf.Tensor(6.0, shape=(), dtype=float32)

在遍历时返回索引号 enumerate(iterable)

1
2
3


seq = ['one', 'two', 'three']
for count, value in enumerate(seq):
    print(count, value)

分类问题中，用独热码表示标签，tf.one_hot(待转换数据, depth=几分类) 将标签列表转换为 one-hot 形式的数据

1
2
3
4


classes = 3
labels = tf.constant([1,0,2])	# 输入的元素值最小为0，最大为2
output = tf.one_hot(labels, depth=classes)
print(output)	# [[0. 1. 0.] [1. 0. 0.] [0. 0. 1.]],shape=(3,3), dtype=float32) 先做大小排序？

使网络输出符合概率分布 tf.nn.softmax(x)

1
2
3


y = tf.constant([1.01, 2.01, -0.66])
y_prob = tf.nn.softmax(y)
print(y_prob)	# tf.Tensor([0.25598174 0.69583046 0.0481878], shape=(3,), dtype=float32)

参数自更新 (自减) assign_sub()，参数要用tf.Variable定义为”可训练“

1
2
3


w = tf.Variable(4)
w.assign_sub(1)	# w -=1, 即 w=w-1
print(w)	# <tf.Variable 'Variable:0' shape=() dtype=int32,numpy=3>

返回指定维度的最大值的索引 tf.argmax(张量名，axis=操作轴)

1
2
3


test = np.array([[1, 2, 3] [2,3,4] [5,4,3] [8,7,2]])	# shape=(4,3)
print(tf.argmax(test, axis=0))	# tf.Tensor([3 3 1], shape=(3,), dtype=int64)
print(tf.argmax(test, axis=1))	# tf.Tensor([2 2 0 0 ],shape=(4,),dtype=int64)

鸢尾花分类

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81


# 1. Prepare data
# read
from sklearn.datasets import datasets
x_data = datasets.load_iris().data	# features
y_data = datasets.load_iris().target	# labels

# mess up
np.random.seed(116)
np.random.shuffle(x_data)
np.random.seed(116)		# 使用相同的seed，特征与标签保持对应
np.random.shuffle(y_data)
tf.random.set_seed(116)

# separate 永不相见的训练集和测试集
x_train = x_data[:-30]	# 前120
y_train = y_data[:-30]
x_test = x_data[-30:]
y_test = y_data[-30:]

# pair， 每次输入一个batch
train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(32)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

# 定义网络中所有可训练参数
w1 = tf.Variable(tf.random.truncated_normal([4,3], stddev=0.1, seed=1)) # 一层网络指的使那个密密麻麻交织的网络啊，重点不在节点，而在网！
b1 = tf.Variable(tf.random.truncated_normal([3], stddev=0.1, seed=1))

lr=0.1
train_loss_results = []	# 存储各epoch的loss
test_acc = []			# 存储各epoch后在测试集上的准确率
epoch = 500
loss_all = 0		# 各batch的loss求和

# 嵌套循环迭代，with结构更新参数，显示当前loss
for epoch in range(epoch):	# 数据集级别迭代
    for step, (x_train, y_train) in enumerate(train_db): # batch 级别迭代
        with tf.GradientTape() as tape:	# 记录梯度信息
            y = tf.matmul(x_train, w1) + b1	# 线性
            y = tf.nn.softmax(y)		# 变成概率
            y_ = tf.one_hot(y_train, depth=3)	# 标签变为独热码，方便分类
            loss = tf.reduce_mean(tf.square(y_ - y))	# 均方误差损失
            loss_all += loss.numpy()	# 各batch的loss累加
        grads = tape.gradients(loss, [w1, b1])	# 1个batch的loss 对 w1,b1 求偏导
        w1.assign_sub(lr*grads[0])	# 参数自更新
        b1.assign_sub(lr*grads[1])
    print("Epoch {}, loss: {}".format(epoch, loss_all/4))	# 4个batch的loss平均一下
    train_loss_results.append(loss_all / 4)
    loss_all = 0
    
    # 每个epoch之后，在test集上的表现
    total_correct, total_number = 0,0
    for x_test, y_test in test_db:
        y = tf.matmul(x_test,w1) + b1	# y
        y = tf.nn.softmax(y)
        pred = tf.argmax(y, axis=1)	# 提取类别
        pred = tf.cast(pred, dtype=y_test.dtype)	# 转换到与y_test相同的数据类型
        correct = tf.cast(tf.equal(pred, y_test), dtype=tf.int32) # 把bool结果转换为int
        correct = tf.reduce_sum(correct)	# 各batch的正确数加起来
        total_correct += int(correct)		# 当前正确率
        total_number += x_test.shape[0]		# 当前已测试过的样本数目
        
    acc = total_correct / total_number	# 一个epoch后，在测试集上的准确率
    test_acc.append(acc)
    print("Test_acc:", acc)
    print("----------------")
    
# 绘制loss曲线
plt.title('Loss Function Curve')
plt.xlabel('Epoch')
plt.ylabel("Loss")
plt.plot(train_loss_results, label="$Loss$")
plt.legend()
plt.show()

plt.title("ACC Curve")
plt.xlabel("Epoch")
plt.ylabel('Acc')
plt.plot(test_acc, label="$Accuracy$")
plt.legend()
plt.show()