forked from ConnorJL/GPT2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
1.5B.json
33 lines (33 loc) · 871 Bytes
/
1.5B.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
{
"n_head": 25,
"encoder_path": "gs://openwebtext/stuff/encoder",
"n_vocab": 50257,
"embed_dropout": 0.1,
"lr": 0.00025,
"warmup_steps": 2000,
"beta1": 0.0,
"decay_exponent": 0.8,
"opt_name": "adafactor",
"decay_type": "pow",
"train_batch_size": 512,
"attn_dropout": 0.1,
"train_steps": 100000,
"eval_steps": 10,
"max_steps": 300000,
"data_path": "gs://connors-datasets/openwebtext/",
"res_dropout": 0.1,
"predict_batch_size": 1,
"eval_batch_size": 8,
"iterations": 100,
"n_embd": 1600,
"input": "openwebtext_longbiased",
"model": "GPT2",
"model_path": "gs://connors-models/1.5B",
"n_ctx": 1024,
"predict_path": "logs/predictions_1.5B.txt",
"n_layer": 48,
"precision": "float32",
"weight_decay": 0.01,
"scale_by_depth": true,
"scale_by_in": true
}