diff --git a/src/gpt.py b/src/gpt.py index 9fc803c..be42c2e 100644 --- a/src/gpt.py +++ b/src/gpt.py @@ -7,11 +7,11 @@ torch.manual_seed(1995) default_context_window = 256 -default_embedding_dimension = 256 +default_embedding_dimension = 384 default_vocabulary_size = 300 -default_attention_heads_count = 4 +default_attention_heads_count = 6 default_transformer_blocks_count = 6 -default_batch_size = 32 +default_batch_size = 64 class GPT(nn.Module): diff --git a/src/train.py b/src/train.py index 94d86d6..dd23590 100644 --- a/src/train.py +++ b/src/train.py @@ -14,7 +14,7 @@ default_num_epochs = 50 -default_learning_rate = 3e-4 # 0.001 before, but then I took it from Karpathy's videos +default_learning_rate = 3e-4 # 0.001 before, but then I took it from Karpathy's videos checkpoints_directory = "checkpoints" os.makedirs(checkpoints_directory, exist_ok=True)