本笔记不使用layer相关API,搭建一个三层的神经网络来训练MNIST数据集。
前向传播和梯度更新都使用最基础的tensorflow API来做。
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
import numpy as npdef load_mnist():path = r'./mnist.npz' #放置mnist.py的目录。注意斜杠f = np.load(path)x_train, y_train = f['x_train'], f['y_train']x_test, y_test = f['x_test'], f['y_test']f.close()return (x_train, y_train), (x_test, y_test)#加载mnist数据集
#X_train: [60000, 28, 28] 图片
#Y_train: [60000] 标签
#mnist数据集下载:https://blog.csdn.net/charles_neil/article/details/107851880
# https://www.zhihu.com/question/56773355
(X_train,Y_train),(X_test,Y_test) = load_mnist()#转换为tensor
#图片数据值转换到0-1
x = tf.convert_to_tensor(X_train, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(Y_train, dtype=tf.int32)
print(x.shape,y.shape)
print(tf.reduce_min(x), tf.reduce_max(x))
print(tf.reduce_min(y), tf.reduce_max(y))#数据集切分为多个batch
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
train_iter = iter(train_db)sample = next(train_iter)
print(sample[0].shape, sample[1].shape)#学习率
lr = 0.1
#用三个神经元,[b:784] => [b,256] => [b,128] => [b,10]
w1 = tf.Variable(tf.random.truncated_normal([784,256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256,128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128,10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))for epoch in range(10):print("[==================Epoch ", epoch, "========================]")for step, (x,y) in enumerate(train_db):x = tf.reshape(x, [-1, 28*28])#对标签进行onehot编码y_onehot = tf.one_hot(y, depth=10)with tf.GradientTape() as tape:#第一层,输入x [128,784]#x@w + b: [batch, 784] [784,256] + [256] => [batch,256]h1 = x@w1 + b1h1 = tf.nn.relu(h1)#第二层:[batch, 256] => [batch, 128]h2 = h1@w2 + b2h2 = tf.nn.relu(h2)#输出层:[batch,128] => [batch,10]out = h2@w3 + b3#计算损失#使用MSE: mean(sum(y - out)^2)loss = tf.reduce_mean(tf.square(y_onehot - out))#计算梯度grads = tape.gradient(loss, [w1,b1,w2,b2,w3,b3])#更新w和b: w = w - lr * w_gradw1.assign_sub(lr * grads[0])b1.assign_sub(lr * grads[1])w2.assign_sub(lr * grads[2])b2.assign_sub(lr * grads[3])w3.assign_sub(lr * grads[4])b3.assign_sub(lr * grads[5])if (step % 100 == 0):print("Batch:", step, "loss:", float(loss))
运行结果: