From 201105e2089bd2f4df1b7e3cdbbdd78dbb58532c Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 27 Dec 2017 13:57:14 +0800 Subject: [PATCH] Add stacked dynamic lstm model for tf. --- tensorflow/stacked_dynamic_lstm.py | 205 +++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 tensorflow/stacked_dynamic_lstm.py diff --git a/tensorflow/stacked_dynamic_lstm.py b/tensorflow/stacked_dynamic_lstm.py new file mode 100644 index 0000000..e1b3e74 --- /dev/null +++ b/tensorflow/stacked_dynamic_lstm.py @@ -0,0 +1,205 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import argparse +import time +import tensorflow as tf + +import paddle.v2 as paddle + + +def parse_args(): + parser = argparse.ArgumentParser("LSTM model benchmark.") + parser.add_argument( + '--batch_size', + type=int, + default=32, + help='The sequence number of a batch data. (default: %(default)d)') + parser.add_argument( + '--stacked_num', + type=int, + default=5, + help='Number of lstm layers to stack. (default: %(default)d)') + parser.add_argument( + '--embedding_dim', + type=int, + default=512, + help='Dimension of embedding table. (default: %(default)d)') + parser.add_argument( + '--hidden_dim', + type=int, + default=512, + help='Hidden size of lstm unit. (default: %(default)d)') + parser.add_argument( + '--pass_num', + type=int, + default=10, + help='Epoch number to train. (default: %(default)d)') + parser.add_argument( + '--learning_rate', + type=float, + default=0.0002, + help='Learning rate used to train. (default: %(default)f)') + parser.add_argument( + '--infer_only', action='store_true', help='If set, run forward only.') + args = parser.parse_args() + return args + + +def print_arguments(args): + print('----------- Configuration Arguments -----------') + for arg, value in sorted(vars(args).iteritems()): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +def dynamic_lstm_model(dict_size, + embedding_dim, + hidden_dim, + stacked_num, + class_num=2, + is_train=True): + word_idx = tf.placeholder(tf.int64, shape=[None, None]) + sequence_length = tf.placeholder(tf.int64, shape=[None, ]) + + embedding_weights = tf.get_variable('word_embeddings', + [dict_size, embedding_dim]) + embedding = tf.nn.embedding_lookup(embedding_weights, word_idx) + + lstm_cell = tf.nn.rnn_cell.LSTMCell( + num_units=hidden_dim, use_peepholes=False) + stacked_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * stacked_num) + + # final_state [LSTMTuple(c, h), LSTMTuple(c, h) ...] total stacked_num LSTMTuples + _, final_state = tf.nn.dynamic_rnn( + cell=stacked_cell, + inputs=embedding, + dtype=tf.float32, + sequence_length=sequence_length) + + w = tf.Variable( + tf.truncated_normal([hidden_dim, class_num]), dtype=tf.float32) + bias = tf.Variable( + tf.constant( + value=0.0, shape=[class_num], dtype=tf.float32)) + prediction = tf.matmul(final_state[-1][1], w) + bias + + if not is_train: + return (word_idx, sequence_length), tf.nn.softmax(prediction) + + label = tf.placeholder(tf.int64, shape=[None, ]) + loss = tf.nn.softmax_cross_entropy_with_logits( + labels=tf.one_hot(label, 2), logits=prediction) + avg_loss = tf.reduce_mean(loss) + + correct_count = tf.equal(tf.argmax(prediction, 1), label) + acc = tf.reduce_mean(tf.cast(correct_count, tf.float32)) + + with tf.variable_scope("reset_metrics_accuracy_scope") as scope: + g_acc = tf.metrics.accuracy(label, tf.argmax(prediction, axis=1)) + vars = tf.contrib.framework.get_variables( + scope, collection=tf.GraphKeys.LOCAL_VARIABLES) + reset_op = tf.variables_initializer(vars) + + return (word_idx, sequence_length, label), avg_loss, acc, g_acc, reset_op + + +def padding_data(data, padding_size, value): + data = data + [value] * padding_size + return data[:padding_size] + + +def train(args): + word_dict = paddle.dataset.imdb.word_dict() + dict_size = len(word_dict) + + feeding_list, avg_loss, acc, g_acc, reset_op = dynamic_lstm_model( + dict_size, args.embedding_dim, args.hidden_dim, args.stacked_num) + + adam_optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) + train_op = adam_optimizer.minimize(avg_loss) + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=25000), + batch_size=args.batch_size) + + test_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.test(word_dict), buf_size=25000), + batch_size=args.batch_size) + + def do_validation(sess): + sess.run(reset_op) + for batch_id, data in enumerate(test_reader()): + word_idx = map(lambda x: x[0], data) + sequence_length = np.array( + [len(seq) for seq in word_idx]).astype('int64') + maxlen = np.max(sequence_length) + word_idx = [padding_data(seq, maxlen, 0) for seq in word_idx] + word_idx = np.array(word_idx).astype('int64') + label = np.array(map(lambda x: x[1], data)).astype('int64') + + _, loss, fetch_acc, fetch_g_acc = sess.run( + [train_op, avg_loss, acc, g_acc], + feed_dict={ + feeding_list[0]: word_idx, + feeding_list[1]: sequence_length, + feeding_list[2]: label + }) + + return fetch_g_acc[1] + + config = tf.ConfigProto( + intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) + with tf.Session(config=config) as sess: + init_g = tf.global_variables_initializer() + init_l = tf.local_variables_initializer() + sess.run(init_l) + sess.run(init_g) + + for pass_id in xrange(args.pass_num): + # clear accuracy local variable + sess.run(reset_op) + pass_start_time = time.time() + words_seen = 0 + + for batch_id, data in enumerate(train_reader()): + word_idx = map(lambda x: x[0], data) + sequence_length = np.array( + [len(seq) for seq in word_idx]).astype('int64') + words_seen += np.sum(sequence_length) + maxlen = np.max(sequence_length) + word_idx = [padding_data(seq, maxlen, 0) for seq in word_idx] + word_idx = np.array(word_idx).astype('int64') + label = np.array(map(lambda x: x[1], data)).astype('int64') + + _, loss, fetch_acc, fetch_g_acc = sess.run( + [train_op, avg_loss, acc, g_acc], + feed_dict={ + feeding_list[0]: word_idx, + feeding_list[1]: sequence_length, + feeding_list[2]: label + }) + + print("pass_id=%d, batch_id=%d, loss: %f, acc: %f, avg_acc: %f" + % (pass_id, batch_id, loss, fetch_acc, fetch_g_acc[1])) + + pass_end_time = time.time() + time_consumed = pass_end_time - pass_start_time + words_per_sec = words_seen / time_consumed + test_acc = do_validation(sess) + print("pass_id=%d, test_acc: %f, words/s: %f, sec/pass: %f" % + (pass_id, test_acc, words_per_sec, time_consumed)) + + +if __name__ == '__main__': + args = parse_args() + print_arguments(args) + + if args.infer_only: + pass + else: + train(args)