From 520c52733b6c967add52d0a55f5adb6ce72db945 Mon Sep 17 00:00:00 2001 From: alvations Date: Thu, 20 Oct 2016 09:33:28 +0800 Subject: [PATCH 01/14] Added demo for stacked LSTM in quick_start --- demo/quick_start/train_config.dblstm.py | 71 +++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 demo/quick_start/train_config.dblstm.py diff --git a/demo/quick_start/train_config.dblstm.py b/demo/quick_start/train_config.dblstm.py new file mode 100644 index 0000000000000..93bf512c5e474 --- /dev/null +++ b/demo/quick_start/train_config.dblstm.py @@ -0,0 +1,71 @@ +# edit-mode: -*- python -*- + +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +dict_file = "./data/dict.txt" +word_dict = dict() +with open(dict_file, 'r') as f: + for i, line in enumerate(f): + w = line.strip().split()[0] + word_dict[w] = i + +is_predict = get_config_arg('is_predict', bool, False) +trn = 'data/train.list' if not is_predict else None +tst = 'data/test.list' if not is_predict else 'data/pred.list' +process = 'process' if not is_predict else 'process_predict' +define_py_data_sources2(train_list=trn, + test_list=tst, + module="dataprovider_emb", + obj=process, + args={"dictionary": word_dict}) + +batch_size = 128 if not is_predict else 1 +settings( + batch_size=batch_size, + learning_rate=2e-3, + learning_method=AdamOptimizer(), + regularization=L2Regularization(8e-4), + gradient_clipping_threshold=25 +) + +bias_attr = ParamAttr(initial_std=0.,l2_rate=0.) + +data = data_layer(name="word", size=len(word_dict)) +emb = embedding_layer(input=data, size=128) + +hidden_0 = mixed_layer(size=128, input=[full_matrix_projection(input=emb)]) +lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1)) + +input_layers = [hidden_0, lstm_0] +for i in range(1,8): + fc = fc_layer(input=input_layers, size=128) + lstm = lstmemory(input=fc, layer_attr=ExtraAttr(drop_rate=0.1)) + input_layers = [fc, lstm] + +lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling()) +output = fc_layer(input=lstm_last, size=2, + bias_attr=bias_attr, + act=SoftmaxActivation()) + + +if is_predict: + maxid = maxid_layer(output) + outputs([maxid, output]) +else: + label = data_layer(name="label", size=2) + cls = classification_cost(input=output, label=label) + outputs(cls) From 1a556152fe042547f16db826a38626964d49fb3a Mon Sep 17 00:00:00 2001 From: alvations Date: Thu, 20 Oct 2016 09:47:36 +0800 Subject: [PATCH 02/14] Rename train_config.dblstm.py to trainer_config.dblstm.py --- .../{train_config.dblstm.py => trainer_config.dblstm.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename demo/quick_start/{train_config.dblstm.py => trainer_config.dblstm.py} (100%) diff --git a/demo/quick_start/train_config.dblstm.py b/demo/quick_start/trainer_config.dblstm.py similarity index 100% rename from demo/quick_start/train_config.dblstm.py rename to demo/quick_start/trainer_config.dblstm.py From 4092ae33b8dd32cc6a96449c7147cd7162c177a3 Mon Sep 17 00:00:00 2001 From: alvations Date: Thu, 20 Oct 2016 09:48:09 +0800 Subject: [PATCH 03/14] Create trainer_config.bidilstm.py --- demo/quick_start/trainer_config.bidilstm.py | 66 +++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 demo/quick_start/trainer_config.bidilstm.py diff --git a/demo/quick_start/trainer_config.bidilstm.py b/demo/quick_start/trainer_config.bidilstm.py new file mode 100644 index 0000000000000..c405452428eb3 --- /dev/null +++ b/demo/quick_start/trainer_config.bidilstm.py @@ -0,0 +1,66 @@ +# edit-mode: -*- python -*- + +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from paddle.trainer_config_helpers import * + +dict_file = "./data/dict.txt" +word_dict = dict() +with open(dict_file, 'r') as f: + for i, line in enumerate(f): + w = line.strip().split()[0] + word_dict[w] = i + +is_predict = get_config_arg('is_predict', bool, False) +trn = 'data/train.list' if not is_predict else None +tst = 'data/test.list' if not is_predict else 'data/pred.list' +process = 'process' if not is_predict else 'process_predict' +define_py_data_sources2(train_list=trn, + test_list=tst, + module="dataprovider_emb", + obj=process, + args={"dictionary": word_dict}) + +batch_size = 128 if not is_predict else 1 +settings( + batch_size=batch_size, + learning_rate=2e-3, + learning_method=AdamOptimizer(), + regularization=L2Regularization(8e-4), + gradient_clipping_threshold=25 +) + + +bias_attr = ParamAttr(initial_std=0.,l2_rate=0.) + +data = data_layer(name="word", size=len(word_dict)) +emb = embedding_layer(input=data, size=128) + +bi_lstm = bidirectional_lstm(input=emb, size=128) +dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5) + +output = fc_layer(input=dropout, size=2, + bias_attr=bias_attr, + act=SoftmaxActivation()) + + +if is_predict: + maxid = maxid_layer(output) + outputs([maxid, output]) +else: + label = data_layer(name="label", size=2) + cls = classification_cost(input=output, label=label) + outputs(cls) From b7393038d0306f3ca8c3c26aa1d5d50bd93d50e5 Mon Sep 17 00:00:00 2001 From: alvations Date: Thu, 20 Oct 2016 10:13:00 +0800 Subject: [PATCH 04/14] Delete trainer_config.dblstm.py --- demo/quick_start/trainer_config.dblstm.py | 71 ----------------------- 1 file changed, 71 deletions(-) delete mode 100644 demo/quick_start/trainer_config.dblstm.py diff --git a/demo/quick_start/trainer_config.dblstm.py b/demo/quick_start/trainer_config.dblstm.py deleted file mode 100644 index 93bf512c5e474..0000000000000 --- a/demo/quick_start/trainer_config.dblstm.py +++ /dev/null @@ -1,71 +0,0 @@ -# edit-mode: -*- python -*- - -# Copyright (c) 2016 Baidu, Inc. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -dict_file = "./data/dict.txt" -word_dict = dict() -with open(dict_file, 'r') as f: - for i, line in enumerate(f): - w = line.strip().split()[0] - word_dict[w] = i - -is_predict = get_config_arg('is_predict', bool, False) -trn = 'data/train.list' if not is_predict else None -tst = 'data/test.list' if not is_predict else 'data/pred.list' -process = 'process' if not is_predict else 'process_predict' -define_py_data_sources2(train_list=trn, - test_list=tst, - module="dataprovider_emb", - obj=process, - args={"dictionary": word_dict}) - -batch_size = 128 if not is_predict else 1 -settings( - batch_size=batch_size, - learning_rate=2e-3, - learning_method=AdamOptimizer(), - regularization=L2Regularization(8e-4), - gradient_clipping_threshold=25 -) - -bias_attr = ParamAttr(initial_std=0.,l2_rate=0.) - -data = data_layer(name="word", size=len(word_dict)) -emb = embedding_layer(input=data, size=128) - -hidden_0 = mixed_layer(size=128, input=[full_matrix_projection(input=emb)]) -lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1)) - -input_layers = [hidden_0, lstm_0] -for i in range(1,8): - fc = fc_layer(input=input_layers, size=128) - lstm = lstmemory(input=fc, layer_attr=ExtraAttr(drop_rate=0.1)) - input_layers = [fc, lstm] - -lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling()) -output = fc_layer(input=lstm_last, size=2, - bias_attr=bias_attr, - act=SoftmaxActivation()) - - -if is_predict: - maxid = maxid_layer(output) - outputs([maxid, output]) -else: - label = data_layer(name="label", size=2) - cls = classification_cost(input=output, label=label) - outputs(cls) From e71fd84452521c0559aea801a199c1da243e60fc Mon Sep 17 00:00:00 2001 From: alvations Date: Thu, 20 Oct 2016 10:13:17 +0800 Subject: [PATCH 05/14] Delete trainer_config.bidilstm.py --- demo/quick_start/trainer_config.bidilstm.py | 66 --------------------- 1 file changed, 66 deletions(-) delete mode 100644 demo/quick_start/trainer_config.bidilstm.py diff --git a/demo/quick_start/trainer_config.bidilstm.py b/demo/quick_start/trainer_config.bidilstm.py deleted file mode 100644 index c405452428eb3..0000000000000 --- a/demo/quick_start/trainer_config.bidilstm.py +++ /dev/null @@ -1,66 +0,0 @@ -# edit-mode: -*- python -*- - -# Copyright (c) 2016 Baidu, Inc. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from paddle.trainer_config_helpers import * - -dict_file = "./data/dict.txt" -word_dict = dict() -with open(dict_file, 'r') as f: - for i, line in enumerate(f): - w = line.strip().split()[0] - word_dict[w] = i - -is_predict = get_config_arg('is_predict', bool, False) -trn = 'data/train.list' if not is_predict else None -tst = 'data/test.list' if not is_predict else 'data/pred.list' -process = 'process' if not is_predict else 'process_predict' -define_py_data_sources2(train_list=trn, - test_list=tst, - module="dataprovider_emb", - obj=process, - args={"dictionary": word_dict}) - -batch_size = 128 if not is_predict else 1 -settings( - batch_size=batch_size, - learning_rate=2e-3, - learning_method=AdamOptimizer(), - regularization=L2Regularization(8e-4), - gradient_clipping_threshold=25 -) - - -bias_attr = ParamAttr(initial_std=0.,l2_rate=0.) - -data = data_layer(name="word", size=len(word_dict)) -emb = embedding_layer(input=data, size=128) - -bi_lstm = bidirectional_lstm(input=emb, size=128) -dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5) - -output = fc_layer(input=dropout, size=2, - bias_attr=bias_attr, - act=SoftmaxActivation()) - - -if is_predict: - maxid = maxid_layer(output) - outputs([maxid, output]) -else: - label = data_layer(name="label", size=2) - cls = classification_cost(input=output, label=label) - outputs(cls) From 3be9054bd8ea3e9ada42ab49938d08699ebba653 Mon Sep 17 00:00:00 2001 From: alvations Date: Thu, 20 Oct 2016 10:30:31 +0800 Subject: [PATCH 06/14] Added zoo to LSTM quick_start examples --- demo/quick_start/trainer_config.lstm.py | 60 ++++++++++++++++++++----- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/demo/quick_start/trainer_config.lstm.py b/demo/quick_start/trainer_config.lstm.py index ec8a2cb00abd1..2855f563ef750 100644 --- a/demo/quick_start/trainer_config.lstm.py +++ b/demo/quick_start/trainer_config.lstm.py @@ -24,6 +24,7 @@ word_dict[w] = i is_predict = get_config_arg('is_predict', bool, False) +network_type = get_config_arg('network_type', str, 'lstm') trn = 'data/train.list' if not is_predict else None tst = 'data/test.list' if not is_predict else 'data/pred.list' process = 'process' if not is_predict else 'process_predict' @@ -42,21 +43,56 @@ gradient_clipping_threshold=25 ) -bias_attr = ParamAttr(initial_std=0.,l2_rate=0.) +def lstm_architecture(bias_attr, data, emb): + fc = fc_layer(input=emb, size=512, + act=LinearActivation(), + bias_attr=bias_attr, + layer_attr=ExtraAttr(drop_rate=0.1)) + lstm = lstmemory(input=fc, act=TanhActivation(), + bias_attr=bias_attr, + layer_attr=ExtraAttr(drop_rate=0.25)) + lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling()) + output = fc_layer(input=lstm_last, size=2, + bias_attr=bias_attr, + act=SoftmaxActivation()) + return output + +def dblstm_architecture(bias_attr, data, emb): + hidden_0 = mixed_layer(size=128, input=[full_matrix_projection(input=emb)]) + lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1)) + + input_layers = [hidden_0, lstm_0] + for i in range(1,8): + fc = fc_layer(input=input_layers, size=128) + lstm = lstmemory(input=fc, layer_attr=ExtraAttr(drop_rate=0.1), + reverse=(i % 2) == 1,) + input_layers = [fc, lstm] + + lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling()) + output = fc_layer(input=lstm_last, size=2, + bias_attr=bias_attr, + act=SoftmaxActivation()) + return output + +def bidilstm_architecture(bias_attr, data, emb): + bi_lstm = bidirectional_lstm(input=emb, size=128) + dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5) + output = fc_layer(input=dropout, size=2, + bias_attr=bias_attr, + act=SoftmaxActivation()) + return output + + +bias_attr = ParamAttr(initial_std=0.,l2_rate=0.) data = data_layer(name="word", size=len(word_dict)) emb = embedding_layer(input=data, size=128) -fc = fc_layer(input=emb, size=512, - act=LinearActivation(), - bias_attr=bias_attr, - layer_attr=ExtraAttr(drop_rate=0.1)) -lstm = lstmemory(input=fc, act=TanhActivation(), - bias_attr=bias_attr, - layer_attr=ExtraAttr(drop_rate=0.25)) -lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling()) -output = fc_layer(input=lstm_last, size=2, - bias_attr=bias_attr, - act=SoftmaxActivation()) +lstm_architectures = {'lstm': lstm_architecture, + 'bidi-lstm': bidilstm_architecture, + 'db-lstm': dblstm_architecture} +network = lstm_architectures[network_type] +output = network(bias_attr, data, emb) + if is_predict: maxid = maxid_layer(output) outputs([maxid, output]) From 76945c1cbdbf494c0c77251f75ad8f0133c03585 Mon Sep 17 00:00:00 2001 From: alvations Date: Thu, 20 Oct 2016 10:30:58 +0800 Subject: [PATCH 07/14] Update train.sh --- demo/quick_start/train.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/demo/quick_start/train.sh b/demo/quick_start/train.sh index 1f0a137c8bd59..ab6bf9ea1fec8 100755 --- a/demo/quick_start/train.sh +++ b/demo/quick_start/train.sh @@ -14,12 +14,15 @@ # limitations under the License. set -e -cfg=trainer_config.lr.py +#cfg=trainer_config.lr.py #cfg=trainer_config.emb.py #cfg=trainer_config.cnn.py -#cfg=trainer_config.lstm.py +cfg=trainer_config.lstm.py paddle train \ --config=$cfg \ + #--config_args=network_type='lstm'; \ + --config_args=network_type='bidi-lstm' \ + #--config_args=network_type='db-lstm' \ --save_dir=./output \ --trainer_count=4 \ --log_period=20 \ From 1a6d40ec2700903bc002f141d2d03f315bf9bc8c Mon Sep 17 00:00:00 2001 From: alvations Date: Thu, 20 Oct 2016 10:31:18 +0800 Subject: [PATCH 08/14] Update train.sh --- demo/quick_start/train.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/demo/quick_start/train.sh b/demo/quick_start/train.sh index ab6bf9ea1fec8..2a51bb5b5a7bf 100755 --- a/demo/quick_start/train.sh +++ b/demo/quick_start/train.sh @@ -14,14 +14,14 @@ # limitations under the License. set -e -#cfg=trainer_config.lr.py +cfg=trainer_config.lr.py #cfg=trainer_config.emb.py #cfg=trainer_config.cnn.py -cfg=trainer_config.lstm.py +#cfg=trainer_config.lstm.py paddle train \ --config=$cfg \ #--config_args=network_type='lstm'; \ - --config_args=network_type='bidi-lstm' \ + #--config_args=network_type='bidi-lstm' \ #--config_args=network_type='db-lstm' \ --save_dir=./output \ --trainer_count=4 \ From 1ec4beb3c512efa10f7230535493d7f3d1dbceb4 Mon Sep 17 00:00:00 2001 From: alvations Date: Thu, 20 Oct 2016 10:32:16 +0800 Subject: [PATCH 09/14] Renamed option --- demo/quick_start/train.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/demo/quick_start/train.sh b/demo/quick_start/train.sh index 2a51bb5b5a7bf..65b68b799a505 100755 --- a/demo/quick_start/train.sh +++ b/demo/quick_start/train.sh @@ -14,15 +14,15 @@ # limitations under the License. set -e -cfg=trainer_config.lr.py +#cfg=trainer_config.lr.py #cfg=trainer_config.emb.py #cfg=trainer_config.cnn.py -#cfg=trainer_config.lstm.py +cfg=trainer_config.lstm.py paddle train \ --config=$cfg \ - #--config_args=network_type='lstm'; \ - #--config_args=network_type='bidi-lstm' \ - #--config_args=network_type='db-lstm' \ + #--config_args=lstm_network='lstm'; \ + #--config_args=lstm_network='bidi-lstm' \ + #--config_args=lstm_network='db-lstm' \ --save_dir=./output \ --trainer_count=4 \ --log_period=20 \ From a14a41954ea6a5ce8993ef99ac2461d84db905de Mon Sep 17 00:00:00 2001 From: alvations Date: Thu, 20 Oct 2016 10:33:29 +0800 Subject: [PATCH 10/14] Update train.sh --- demo/quick_start/train.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demo/quick_start/train.sh b/demo/quick_start/train.sh index 65b68b799a505..7a9b2adda99d7 100755 --- a/demo/quick_start/train.sh +++ b/demo/quick_start/train.sh @@ -14,10 +14,10 @@ # limitations under the License. set -e -#cfg=trainer_config.lr.py +cfg=trainer_config.lr.py #cfg=trainer_config.emb.py #cfg=trainer_config.cnn.py -cfg=trainer_config.lstm.py +#cfg=trainer_config.lstm.py paddle train \ --config=$cfg \ #--config_args=lstm_network='lstm'; \ From e41d1de145d028fb9bae57df65e5554d0c6e11d0 Mon Sep 17 00:00:00 2001 From: alvations Date: Thu, 20 Oct 2016 10:34:21 +0800 Subject: [PATCH 11/14] Update trainer_config.lstm.py --- demo/quick_start/trainer_config.lstm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/quick_start/trainer_config.lstm.py b/demo/quick_start/trainer_config.lstm.py index 2855f563ef750..bfefafde5b282 100644 --- a/demo/quick_start/trainer_config.lstm.py +++ b/demo/quick_start/trainer_config.lstm.py @@ -24,7 +24,7 @@ word_dict[w] = i is_predict = get_config_arg('is_predict', bool, False) -network_type = get_config_arg('network_type', str, 'lstm') +network_type = get_config_arg('lstm_network', str, 'lstm') trn = 'data/train.list' if not is_predict else None tst = 'data/test.list' if not is_predict else 'data/pred.list' process = 'process' if not is_predict else 'process_predict' From 21f1ebccf1e6fadf96471c4dd48f87356b1f1ab3 Mon Sep 17 00:00:00 2001 From: Tan Date: Thu, 20 Oct 2016 11:15:15 +0800 Subject: [PATCH 12/14] added bidi-lstm and db-lstm for quick_start --- demo/quick_start/train.sh | 4 +- demo/quick_start/trainer_config.bidi-lstm.py | 62 +++++++++++++++++ demo/quick_start/trainer_config.db-lstm.py | 73 ++++++++++++++++++++ demo/quick_start/trainer_config.lstm.py | 60 ++++------------ 4 files changed, 150 insertions(+), 49 deletions(-) create mode 100644 demo/quick_start/trainer_config.bidi-lstm.py create mode 100644 demo/quick_start/trainer_config.db-lstm.py diff --git a/demo/quick_start/train.sh b/demo/quick_start/train.sh index 7a9b2adda99d7..eea4c9c8a1523 100755 --- a/demo/quick_start/train.sh +++ b/demo/quick_start/train.sh @@ -14,10 +14,12 @@ # limitations under the License. set -e -cfg=trainer_config.lr.py +#cfg=trainer_config.lr.py #cfg=trainer_config.emb.py #cfg=trainer_config.cnn.py #cfg=trainer_config.lstm.py +#cfg=trainer_config.bidi-lstm.py +cfg=trainer_config.db-lstm.py paddle train \ --config=$cfg \ #--config_args=lstm_network='lstm'; \ diff --git a/demo/quick_start/trainer_config.bidi-lstm.py b/demo/quick_start/trainer_config.bidi-lstm.py new file mode 100644 index 0000000000000..3be3d37342271 --- /dev/null +++ b/demo/quick_start/trainer_config.bidi-lstm.py @@ -0,0 +1,62 @@ +# edit-mode: -*- python -*- + +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +dict_file = "./data/dict.txt" +word_dict = dict() +with open(dict_file, 'r') as f: + for i, line in enumerate(f): + w = line.strip().split()[0] + word_dict[w] = i + +is_predict = get_config_arg('is_predict', bool, False) +trn = 'data/train.list' if not is_predict else None +tst = 'data/test.list' if not is_predict else 'data/pred.list' +process = 'process' if not is_predict else 'process_predict' +define_py_data_sources2(train_list=trn, + test_list=tst, + module="dataprovider_emb", + obj=process, + args={"dictionary": word_dict}) + +batch_size = 128 if not is_predict else 1 +settings( + batch_size=batch_size, + learning_rate=2e-3, + learning_method=AdamOptimizer(), + regularization=L2Regularization(8e-4), + gradient_clipping_threshold=25 +) + +bias_attr = ParamAttr(initial_std=0.,l2_rate=0.) +data = data_layer(name="word", size=len(word_dict)) +emb = embedding_layer(input=data, size=128) + +bi_lstm = bidirectional_lstm(input=emb, size=128) +dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5) + +output = fc_layer(input=dropout, size=2, + bias_attr=bias_attr, + act=SoftmaxActivation()) + +if is_predict: + maxid = maxid_layer(output) + outputs([maxid, output]) +else: + label = data_layer(name="label", size=2) + cls = classification_cost(input=output, label=label) + outputs(cls) diff --git a/demo/quick_start/trainer_config.db-lstm.py b/demo/quick_start/trainer_config.db-lstm.py new file mode 100644 index 0000000000000..b35bdf5a61b47 --- /dev/null +++ b/demo/quick_start/trainer_config.db-lstm.py @@ -0,0 +1,73 @@ +# edit-mode: -*- python -*- + +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +dict_file = "./data/dict.txt" +word_dict = dict() +with open(dict_file, 'r') as f: + for i, line in enumerate(f): + w = line.strip().split()[0] + word_dict[w] = i + +is_predict = get_config_arg('is_predict', bool, False) +trn = 'data/train.list' if not is_predict else None +tst = 'data/test.list' if not is_predict else 'data/pred.list' +process = 'process' if not is_predict else 'process_predict' +define_py_data_sources2(train_list=trn, + test_list=tst, + module="dataprovider_emb", + obj=process, + args={"dictionary": word_dict}) + +batch_size = 128 if not is_predict else 1 +settings( + batch_size=batch_size, + learning_rate=2e-3, + learning_method=AdamOptimizer(), + regularization=L2Regularization(8e-4), + gradient_clipping_threshold=25 +) + +bias_attr = ParamAttr(initial_std=0.,l2_rate=0.) + +data = data_layer(name="word", size=len(word_dict)) +emb = embedding_layer(input=data, size=128) + +hidden_0 = mixed_layer(size=128, input=[full_matrix_projection(input=emb)]) +lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1)) + +input_layers = [hidden_0, lstm_0] + +for i in range(1,8): + fc = fc_layer(input=input_layers, size=128) + lstm = lstmemory(input=fc, layer_attr=ExtraAttr(drop_rate=0.1), + reverse=(i % 2) == 1,) + input_layers = [fc, lstm] + +lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling()) + +output = fc_layer(input=lstm_last, size=2, + bias_attr=bias_attr, + act=SoftmaxActivation()) + +if is_predict: + maxid = maxid_layer(output) + outputs([maxid, output]) +else: + label = data_layer(name="label", size=2) + cls = classification_cost(input=output, label=label) + outputs(cls) diff --git a/demo/quick_start/trainer_config.lstm.py b/demo/quick_start/trainer_config.lstm.py index bfefafde5b282..ec8a2cb00abd1 100644 --- a/demo/quick_start/trainer_config.lstm.py +++ b/demo/quick_start/trainer_config.lstm.py @@ -24,7 +24,6 @@ word_dict[w] = i is_predict = get_config_arg('is_predict', bool, False) -network_type = get_config_arg('lstm_network', str, 'lstm') trn = 'data/train.list' if not is_predict else None tst = 'data/test.list' if not is_predict else 'data/pred.list' process = 'process' if not is_predict else 'process_predict' @@ -43,56 +42,21 @@ gradient_clipping_threshold=25 ) -def lstm_architecture(bias_attr, data, emb): - fc = fc_layer(input=emb, size=512, - act=LinearActivation(), - bias_attr=bias_attr, - layer_attr=ExtraAttr(drop_rate=0.1)) - lstm = lstmemory(input=fc, act=TanhActivation(), - bias_attr=bias_attr, - layer_attr=ExtraAttr(drop_rate=0.25)) - lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling()) - output = fc_layer(input=lstm_last, size=2, - bias_attr=bias_attr, - act=SoftmaxActivation()) - return output - -def dblstm_architecture(bias_attr, data, emb): - hidden_0 = mixed_layer(size=128, input=[full_matrix_projection(input=emb)]) - lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1)) - - input_layers = [hidden_0, lstm_0] - for i in range(1,8): - fc = fc_layer(input=input_layers, size=128) - lstm = lstmemory(input=fc, layer_attr=ExtraAttr(drop_rate=0.1), - reverse=(i % 2) == 1,) - input_layers = [fc, lstm] - - lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling()) - output = fc_layer(input=lstm_last, size=2, - bias_attr=bias_attr, - act=SoftmaxActivation()) - return output - -def bidilstm_architecture(bias_attr, data, emb): - bi_lstm = bidirectional_lstm(input=emb, size=128) - dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5) - - output = fc_layer(input=dropout, size=2, - bias_attr=bias_attr, - act=SoftmaxActivation()) - return output - - bias_attr = ParamAttr(initial_std=0.,l2_rate=0.) + data = data_layer(name="word", size=len(word_dict)) emb = embedding_layer(input=data, size=128) -lstm_architectures = {'lstm': lstm_architecture, - 'bidi-lstm': bidilstm_architecture, - 'db-lstm': dblstm_architecture} -network = lstm_architectures[network_type] -output = network(bias_attr, data, emb) - +fc = fc_layer(input=emb, size=512, + act=LinearActivation(), + bias_attr=bias_attr, + layer_attr=ExtraAttr(drop_rate=0.1)) +lstm = lstmemory(input=fc, act=TanhActivation(), + bias_attr=bias_attr, + layer_attr=ExtraAttr(drop_rate=0.25)) +lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling()) +output = fc_layer(input=lstm_last, size=2, + bias_attr=bias_attr, + act=SoftmaxActivation()) if is_predict: maxid = maxid_layer(output) outputs([maxid, output]) From a11f5fbc4ab5adf27248833a6a7f1cd75f3ddff9 Mon Sep 17 00:00:00 2001 From: liling Date: Thu, 20 Oct 2016 11:18:10 +0800 Subject: [PATCH 13/14] reverted to using LR as default config --- demo/quick_start/train.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demo/quick_start/train.sh b/demo/quick_start/train.sh index eea4c9c8a1523..067b0657ab3d4 100755 --- a/demo/quick_start/train.sh +++ b/demo/quick_start/train.sh @@ -14,12 +14,12 @@ # limitations under the License. set -e -#cfg=trainer_config.lr.py +cfg=trainer_config.lr.py #cfg=trainer_config.emb.py #cfg=trainer_config.cnn.py #cfg=trainer_config.lstm.py #cfg=trainer_config.bidi-lstm.py -cfg=trainer_config.db-lstm.py +#cfg=trainer_config.db-lstm.py paddle train \ --config=$cfg \ #--config_args=lstm_network='lstm'; \ From 0b849b3f82df0641ae1b35479599ab0fa7317845 Mon Sep 17 00:00:00 2001 From: liling Date: Thu, 20 Oct 2016 11:20:32 +0800 Subject: [PATCH 14/14] reverted to using LR as default config --- demo/quick_start/train.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/demo/quick_start/train.sh b/demo/quick_start/train.sh index 067b0657ab3d4..ea4e32249a3d0 100755 --- a/demo/quick_start/train.sh +++ b/demo/quick_start/train.sh @@ -22,9 +22,6 @@ cfg=trainer_config.lr.py #cfg=trainer_config.db-lstm.py paddle train \ --config=$cfg \ - #--config_args=lstm_network='lstm'; \ - #--config_args=lstm_network='bidi-lstm' \ - #--config_args=lstm_network='db-lstm' \ --save_dir=./output \ --trainer_count=4 \ --log_period=20 \