static_rnn和dynamic_rnn的区别主要在于实现不同。
-
static_rnn会把RNN展平,用空间换时间。 gpu会吃不消(个人测试结果)
-
dynamic_rnn则是使用for或者while循环。
调用static_rnn实际上是生成了rnn按时间序列展开之后的图。打开tensorboard你会看到sequence_length个rnn_cell stack在一起,只不过这些cell是share weight的。因此,sequence_length就和图的拓扑结构绑定在了一起,因此也就限制了每个batch的sequence_length必须是一致。
调用dynamic_rnn不会将rnn展开,而是利用tf.while_loop这个api,通过Enter, Switch, Merge, LoopCondition, NextIteration等这些control flow的节点,生成一个可以执行循环的图(这个图应该还是静态图,因为图的拓扑结构在执行时是不会变化的)。在tensorboard上,你只会看到一个rnn_cell, 外面被一群control flow节点包围着。对于dynamic_rnn来说,sequence_length仅仅代表着循环的次数,而和图本身的拓扑没有关系,所以每个batch可以有不同sequence_length。
static_rnn
导包、加载数据、定义变量
import tensorflow as tf tf.reset_default_graph() #流式计算图形graph 循环神经网络 将名字相同重置了图 import datetime #打印时间 import os #保存文件 from tensorflow.examples.tutorials.mnist import input_data # minst测试集 mnist = input_data.read_data_sets('../', one_hot=True) # 每次使用100条数据进行训练 batch_size = 100 # 图像向量 width = 28 height = 28 # LSTM隐藏神经元数量 rnn_size = 256 # 输出层one-hot向量长度的 out_size = 10
声明变量
def weight_variable(shape, w_alpha=0.01): initial = w_alpha * tf.random_normal(shape) return tf.Variable(initial) def bias_variable(shape, b_alpha=0.1): initial = b_alpha * tf.random_normal(shape) return tf.Variable(initial) # 权重及偏置 w = weight_variable([rnn_size, out_size]) b = bias_variable([out_size])
将数据转化成RNN所要求的数据
# 按照图片大小申请占位符 X = tf.placeholder(tf.float32, [None, height, width]) # 原排列[0,1,2]transpose为[1,0,2]代表前两维装置,如shape=(1,2,3)转为shape=(2,1,3) # 这里的实际意义是把所有图像向量的相同行号向量转到一起,如x1的第一行与x2的第一行 x = tf.transpose(X, [1, 0, 2]) # reshape -1 代表自适应,这里按照图像每一列的长度为reshape后的列长度 x = tf.reshape(x, [-1, width]) # split默任在第一维即0 dimension进行分割,分割成height份,这里实际指把所有图片向量按对应行号进行重组 x = tf.split(x, height)
构建静态的循环神经网络
# LSTM lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size) # 这里RNN会有与输入层相同数量的输出层,我们只需要最后一个输出 outputs, status = tf.nn.static_rnn(lstm_cell, x, dtype=tf.float32) #取最后一个进行矩阵乘法 y_conv = tf.add(tf.matmul(outputs[-1], w), b) # 最小化损失优化 Y = tf.placeholder(dtype=tf.float32,shape = [None,10]) #损失使用的交叉熵 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_conv, labels=Y)) optimizer = tf.train.AdamOptimizer(0.01).minimize(loss) # 计算准确率 correct = tf.equal(tf.argmax(y_conv, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
模型的训练
# 启动会话.开始训练 saver = tf.train.Saver() session = tf.Session() session.run(tf.global_variables_initializer()) step = 0 acc_rate = 0.90 while 1: batch_x, batch_y = mnist.train.next_batch(batch_size) batch_x = batch_x.reshape((batch_size, height, width)) session.run(optimizer, feed_dict={X:batch_x,Y:batch_y}) # 每训练10次测试一次 if step % 10 == 0: batch_x_test = mnist.test.images batch_y_test = mnist.test.labels batch_x_test = batch_x_test.reshape([-1, height, width]) acc = session.run(accuracy, feed_dict={X: batch_x_test, Y: batch_y_test}) print(datetime.datetime.now().strftime('%c'), ' step:', step, ' accuracy:', acc) # 偏差满足要求,保存模型 if acc >= acc_rate: # os.sep = ‘/’ model_path = os.getcwd() + os.sep + str(acc_rate) + "mnist.model" saver.save(session, model_path, global_step=step) break step += 1 session.close()
Wed Dec 18 10:08:45 2019 step: 0 accuracy: 0.1006 Wed Dec 18 10:08:46 2019 step: 10 accuracy: 0.1009 Wed Dec 18 10:08:46 2019 step: 20 accuracy: 0.1028 ... Wed Dec 18 10:08:57 2019 step: 190 accuracy: 0.9164
dynamic_rnn
加载数据,声明变量
import tensorflow as tf tf.reset_default_graph() from tensorflow.examples.tutorials.mnist import input_data # 载入数据 mnist = input_data.read_data_sets("../", one_hot=True) # 输入图片是28 n_input = 28 max_time = 28 lstm_size = 100 # 隐藏单元 可调 n_class = 10 # 10个分类 batch_size = 100 # 每次50个样本 可调 n_batch_size = mnist.train.num_examples // batch_size # 计算一共有多少批次
Extracting ../train-images-idx3-ubyte.gz Extracting ../train-labels-idx1-ubyte.gz Extracting ../t10k-images-idx3-ubyte.gz Extracting ../t10k-labels-idx1-ubyte.gz
占位符、权重
# 这里None表示第一个维度可以是任意长度 # 创建占位符 x = tf.placeholder(tf.float32,[None, 28*28]) # 正确的标签 y = tf.placeholder(tf.float32,[None, 10]) # 初始化权重 ,stddev为标准差 weight = tf.Variable(tf.truncated_normal([lstm_size, n_class], stddev=0.1)) # 初始化偏置层 biases = tf.Variable(tf.constant(0.1, shape=[n_class]))
构建动态RNN、损失函数、准确率
# 定义RNN网络 def RNN(X, weights, biases): # 原始数据为[batch_size,28*28] # input = [batch_size, max_time, n_input] input_ = tf.reshape(X,[-1, max_time, n_input]) # 定义LSTM的基本单元 # lstm_cell = tf.contrib.rnn.BasicLSTMCell(lstm_size) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(lstm_size) # final_state[0] 是cell state # final_state[1] 是hidden stat outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, input_, dtype=tf.float32) display(final_state) results = tf.nn.softmax(tf.matmul(final_state[1],weights)+biases) return results # 计算RNN的返回结果 prediction = RNN(x, weight, biases) # 损失函数 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) # 使用AdamOptimizer进行优化 train_step = tf.train.AdamOptimizer(1e-4).minimize(loss) # 将结果存下来 correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(prediction, 1)) # 计算正确率 accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
LSTMStateTuple(c=
训练数据
saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(6): for batch in range(n_batch_size): # 取出下一批次数据 batch_xs,batch_ys = mnist.train.next_batch(batch_size) sess.run(train_step, feed_dict={x: batch_xs,y: batch_ys}) if(batch%100==0): print(str(batch)+"/" + str(n_batch_size)) acc = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels}) print("Iter" + str(epoch) + " ,Testing Accuracy = " + str(acc)) if acc >0.9: saver.save(sess,'./rnn_dynamic') break
0/550 100/550 200/550 300/550 400/550 500/550 Iter0 ,Testing Accuracy = 0.5903 ... Iter5 ,Testing Accuracy = 0.9103
版权声明:如无特殊说明,文章均为本站原创,转载请注明出处
本文链接:http://wakemeupnow.cn/article/rnn/