forked from ConnorJL/GPT2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
345M.json
32 lines (32 loc) · 812 Bytes
/
345M.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
{
"n_head": 16,
"encoder_path": "gs://openwebtext/stuff/encoder",
"n_vocab": 50257,
"embed_dropout": 0.1,
"lr": 0.00025,
"warmup_steps": 2000,
"weight_decay": 0.01,
"beta1": 0.9,
"beta2": 0.98,
"epsilon": 1e-9,
"opt_name": "adam",
"train_batch_size": 8,
"attn_dropout": 0.1,
"train_steps": 10000,
"eval_steps": 10,
"max_steps": 500000,
"data_path": "gs://connors-datasets/openwebtext/",
"res_dropout": 0.1,
"predict_batch_size": 8,
"eval_batch_size": 8,
"iterations": 500,
"n_embd": 1024,
"input": "openwebtext",
"model": "GPT2",
"model_path": "gs://connors-models/GPT2-345M",
"n_ctx": 1024,
"predict_path": "logs/predictions.txt",
"n_layer": 24,
"scale_by_depth": true,
"scale_by_in": true
}