Skip to content

Commit

Permalink
Refine the script.
Browse files Browse the repository at this point in the history
  • Loading branch information
pkuyym committed Jan 16, 2018
1 parent ba2ea17 commit 5b51504
Showing 1 changed file with 25 additions and 32 deletions.
57 changes: 25 additions & 32 deletions fluid/machine_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--word_vector_dim",
"--embedding_dim",
type=int,
default=512,
help="The dimension of embedding table. (default: %(default)d)")
Expand All @@ -35,15 +35,15 @@
"--batch_size",
type=int,
default=16,
help="The sequence number of a batch data. (default: %(default)d)")
help="The sequence number of a mini-batch data. (default: %(default)d)")
parser.add_argument(
"--dict_size",
type=int,
default=30000,
help="The dictionary capacity. Dictionaries of source sequence and "
"target dictionary have same capacity. (default: %(default)d)")
parser.add_argument(
"--pass_number",
"--pass_num",
type=int,
default=2,
help="The pass number to train. (default: %(default)d)")
Expand All @@ -53,11 +53,7 @@
default=0.0002,
help="Learning rate used to train the model. (default: %(default)f)")
parser.add_argument(
"--mode",
type=str,
default='train',
choices=['train', 'infer'],
help="Do training or inference. (default: %(default)s)")
"--infer_only", action='store_true', help="If set, run forward only.")
parser.add_argument(
"--beam_size",
type=int,
Expand All @@ -67,12 +63,12 @@
"--use_gpu",
type=distutils.util.strtobool,
default=True,
help="Whether use gpu. (default: %(default)d)")
help="Whether to use gpu. (default: %(default)d)")
parser.add_argument(
"--max_length",
type=int,
default=250,
help="The max length of sequence when doing generation. "
help="The maximum length of sequence when doing generation. "
"(default: %(default)d)")


Expand All @@ -97,40 +93,37 @@ def linear(inputs):
return hidden_t, cell_t


def seq_to_seq_net(word_vector_dim,
encoder_size,
decoder_size,
source_dict_dim,
target_dict_dim,
is_generating=False,
beam_size=3,
max_length=250):
def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim,
target_dict_dim, is_generating, beam_size, max_length):
"""Construct a seq2seq network."""
feeding_list = ["source_sequence", "target_sequence", "label_sequence"]

def bi_lstm_encoder(input_seq, size):
def bi_lstm_encoder(input_seq, gate_size):
# Linear transformation part for input gate, output gate, forget gate
# and cell activation vectors need be done outside of dynamic_lstm.
# So the output size is 4 times of gate_size.
input_forward_proj = fluid.layers.fc(input=input_seq,
size=size * 4,
size=gate_size * 4,
act='tanh')
forward, _ = fluid.layers.dynamic_lstm(
input=input_forward_proj, size=size * 4)
input=input_forward_proj, size=gate_size * 4)
input_reversed_proj = fluid.layers.fc(input=input_seq,
size=size * 4,
size=gate_size * 4,
act='tanh')
reversed, _ = fluid.layers.dynamic_lstm(
input=input_reversed_proj, size=size * 4, is_reverse=True)
input=input_reversed_proj, size=gate_size * 4, is_reverse=True)
return forward, reversed

src_word_idx = fluid.layers.data(
name=feeding_list[0], shape=[1], dtype='int64', lod_level=1)

src_embedding = fluid.layers.embedding(
input=src_word_idx,
size=[source_dict_dim, word_vector_dim],
size=[source_dict_dim, embedding_dim],
dtype='float32')

src_forward, src_reversed = bi_lstm_encoder(
input_seq=src_embedding, size=encoder_size)
input_seq=src_embedding, gate_size=encoder_size)

encoded_vector = fluid.layers.concat(
input=[src_forward, src_reversed], axis=1)
Expand Down Expand Up @@ -158,7 +151,7 @@ def simple_attention(encoder_vec, encoder_proj, decoder_state):
input=[decoder_state_expand, encoder_proj], axis=1)
attention_weights = fluid.layers.fc(input=concated,
size=1,
bias_attr=False)
act='tanh')
attention_weights = fluid.layers.sequence_softmax(
x=attention_weights)
weigths_reshape = fluid.layers.reshape(
Expand Down Expand Up @@ -202,7 +195,7 @@ def simple_attention(encoder_vec, encoder_proj, decoder_state):

trg_embedding = fluid.layers.embedding(
input=trg_word_idx,
size=[target_dict_dim, word_vector_dim],
size=[target_dict_dim, embedding_dim],
dtype='float32')

prediction = lstm_decoder_with_attention(trg_embedding, encoded_vector,
Expand Down Expand Up @@ -242,7 +235,7 @@ def lodtensor_to_ndarray(lod_tensor):

def train():
avg_cost, feeding_list = seq_to_seq_net(
args.word_vector_dim,
args.embedding_dim,
args.encoder_size,
args.decoder_size,
args.dict_size,
Expand Down Expand Up @@ -290,7 +283,7 @@ def do_validation():

return total_loss / count

for pass_id in xrange(args.pass_number):
for pass_id in xrange(args.pass_num):
pass_start_time = time.time()
words_seen = 0
for batch_id, data in enumerate(train_batch_generator()):
Expand Down Expand Up @@ -323,7 +316,7 @@ def infer():

if __name__ == '__main__':
args = parser.parse_args()
if args.mode == 'train':
train()
else:
if args.infer_only:
infer()
else:
train()

0 comments on commit 5b51504

Please sign in to comment.