From 7469f202ea8c2a8fcd7ff3ea63c22b3d7f658619 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Fri, 8 Dec 2023 18:16:14 +0800 Subject: [PATCH] use lowvram flag for offload qkv --- gpttype_adapter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 2924f947bc34f..0e8b4b259bbab 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -895,7 +895,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in //llama_ctx_paran_parts = -1; llama_ctx_params.seed = -1; //llama_ctx_params.f16_kv = true; - //llama_ctx_params.low_vram = inputs.low_vram; + llama_ctx_params.offload_kqv = !inputs.low_vram; llama_ctx_params.mul_mat_q = inputs.use_mmq; llama_ctx_params.logits_all = false; model_params.use_mmap = inputs.use_mmap;