-
Notifications
You must be signed in to change notification settings - Fork 130
/
demo-rhn-enwik8.config
82 lines (70 loc) · 2.28 KB
/
demo-rhn-enwik8.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!returnn/rnn.py
# kate: syntax python;
import os
import numpy
from returnn.util.basic import get_login_username
demo_name, _ = os.path.splitext(__file__)
print("Hello, experiment: %s" % demo_name)
# task
use_tensorflow = True
task = config.value("task", "train")
#device = "gpu"
batch_num_seqs = 10
batch_seq_len = 200
def get_dataset(key):
return {
'class': 'Enwik8Corpus', 'path': '/tmp',
'subset': key,
'seq_len': batch_seq_len,
'batch_num_seqs': batch_num_seqs,
'fixed_random_seed': 1 if key == 'validation' else None
}
train = get_dataset("training")
dev = get_dataset("validation")
# data
num_inputs = 205
num_outputs = {"data": {'sparse': True, "dim": num_inputs, "dtype": "uint8"}}
num_outputs["classes"] = num_outputs['data'].copy()
# network
# (also defined by num_inputs & num_outputs)
network = {
"input_embed": {'class': "linear", "activation": None, "with_bias": False, "n_out": 128},
"hidden": {
'class': "rec", 'unit': 'rhn', 'unit_opts': {'dropout': 0.3}, "dropout": 0.1,
'from': ['input_embed'], 'n_out': 1000,
"initial_state": "keep_over_epoch"},
'output': {'class': 'softmax', 'from': ['hidden'], 'loss': "ce"}
}
debug_print_layer_output_template = True
# trainer
batching = "random"
log_batch_size = True
batch_size = 0
max_seq_length = 0 # controlled via dataset
max_seqs = batch_num_seqs
num_epochs = 150
model = "/tmp/%s/returnn/%s/model" % (get_login_username(), demo_name)
cleanup_old_models = True
gradient_clip = 0
#gradient_clip_global_norm = 1.0
optimizer = {"class": "adam"}
optimizer_epsilon = 1e-8
#debug_add_check_numerics_ops = True
#debug_add_check_numerics_on_output = True
stop_on_nonfinite_train_score = False
tf_log_memory_usage = True
gradient_noise = 0.0
learning_rate = 0.001
#learning_rate = 0.0008
#learning_rates = list(numpy.linspace(0.0003, learning_rate, num=10)) # warmup
#learning_rate_control = "newbob_multi_epoch"
#learning_rate_control_error_measure = "dev_score_output"
learning_rate_control_relative_error_relative_lr = True
learning_rate_control_min_num_epochs_per_new_lr = 3
use_learning_rate_control_always = True
newbob_multi_num_epochs = 3
newbob_multi_update_interval = 1
newbob_learning_rate_decay = 0.9
#learning_rate_file = "newbob.data"
# log
log_verbosity = 5