diff --git a/llama.cpp b/llama.cpp
index 5221ab5a2dd27..ee6ec0920fc9c 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1613,14 +1613,11 @@ static bool llama_eval_internal(
     // run the computation
     ggml_build_forward_expand(&gf, cur);
 
-    bool call_ggml_graph_compute = true;
-
 #ifdef GGML_USE_METAL
     if (lctx.ctx_metal && N == 1) {
         ggml_metal_set_n_cb     (lctx.ctx_metal, n_threads);
         ggml_metal_graph_compute(lctx.ctx_metal, &gf);
         ggml_metal_get_tensor   (lctx.ctx_metal, cur);
-        call_ggml_graph_compute = false;
     } else {
         // IMPORTANT:
         // Since we don't have efficient Matrix x Matrix Metal multiplication yet, we fallback to vanilla
@@ -1637,12 +1634,12 @@ static bool llama_eval_internal(
             ggml_metal_get_tensor(lctx.ctx_metal, kv_self.k);
             ggml_metal_get_tensor(lctx.ctx_metal, kv_self.v);
         }
-    }
-#endif
 
-    if (call_ggml_graph_compute) {
         ggml_graph_compute_helper(lctx.work_buffer, &gf, n_threads);
     }
+#else
+    ggml_graph_compute_helper(lctx.work_buffer, &gf, n_threads);
+#endif
 
     if (cgraph_fname) {
         ggml_graph_export(&gf, cgraph_fname);