-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.py
87 lines (74 loc) · 3.15 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""Model configuration."""
from src.layers import *
from src.layers_recurrent import *
from src.utils import _get_config_info
def build_config(args: dict, device: str, PATH: str) -> dict:
"""
Returns configuration dictionary for the model.
@param args (dict): arguments from terminal (wether to train, test, or fine-tune)
@param device (str): device to run model training and evaluation on.
@param PATH (str): path to the repository.
@returns MODEL_CONFIG (dict): dictionary with hyperparameters and layers of the model.
"""
# Edit HERE if you want to train a model from scratch:
training_params = {
'--corpus': f"{PATH}/data/shakespeare.txt",
'--to_path': f"{PATH}/models/my_model.json",
"character_level": False,
"n_iter": 500000,
"n_timesteps": 196,
"batch_size": 16,
"learning_rate": 2e-4,
"regularization": 2e-4,
"dropout_prob": 0.2,
"patience": 7,
"evaluation_interval": 1000,
"evaluation_n_timesteps": 1000
}
# Edit HERE if you want to fine_tune a pretrained model:
fine_tuning_params = {
'--corpus': f"{PATH}/data/jules_verne.txt",
'--to_path': f"{PATH}/models/my_fine_tuned_model.json",
'--from_path': f"{PATH}/models/my_model.json",
"character_level": True,
"n_iter": 100000,
"batch_size": 16,
"learning_rate": 5e-5,
"regularization": 2e-4,
"dropout_prob": 0.2,
"patience": 5,
"evaluation_interval": 500,
"evaluation_n_timesteps": 600
}
# Edit HERE if you want to test a model (generate a sample):
testing_params = {
'--from_path': f"{PATH}/models/shakespeare_c_model.json",
'--testing_corpus': f"{PATH}/data/shakespeare.txt",
'seed': "Nemo",
'evaluation_n_timesteps': 1000
}
# Gets the vocabulary size (num of unique characters) that the model will accept as input.
vocab_size, n_timesteps, p = _get_config_info(args,training_params,fine_tuning_params,testing_params)
# Edit HERE to build your own custom model:
model_layers = [
Embedding(vocab_size, 512, device=device),
PositionalEmbedding(n_timesteps, 512, device=device),
Block(512, 512, 8, n_timesteps, dropout_prob=p, device=device),
Block(512, 512, 8, n_timesteps, dropout_prob=p, device=device),
Block(512, 512, 8, n_timesteps, dropout_prob=p, device=device),
Block(512, 512, 8, n_timesteps, dropout_prob=p, device=device),
Block(512, 512, 8, n_timesteps, dropout_prob=p, device=device),
Block(512, 512, 8, n_timesteps, dropout_prob=p, device=device),
Block(512, 512, 8, n_timesteps, dropout_prob=p, device=device),
Block(512, 512, 8, n_timesteps, dropout_prob=p, device=device),
LayerNorm(512, device=device),
TemporalDense(512, vocab_size, device=device),
CrossEntropyLoss(device=device)
]
MODEL_CONFIG = {
'training_params': training_params,
'fine_tuning_params': fine_tuning_params,
'testing_params':testing_params,
'model_layers':model_layers
}
return MODEL_CONFIG