llama : on Metal, by default offload the full model

ggml-ci
ggerganov · Jan 10, 2024 · 07a1b05 · 07a1b05
1 parent 3cb1c1f
commit 07a1b05
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/llama.cpp b/llama.cpp
@@ -9069,7 +9069,8 @@ struct llama_model_params llama_model_default_params() {
     };
 
 #ifdef GGML_USE_METAL
-    result.n_gpu_layers = 1;
+    // note: we usually have plenty of VRAM, so by default offload all layers to the GPU
+    result.n_gpu_layers = 999;
 #endif
 
     return result;