From a5d4b0121c8dcb14288349539e7e7920966f8aed Mon Sep 17 00:00:00 2001
From: Zhuohan Li <zhuohan123@gmail.com>
Date: Wed, 13 Mar 2024 07:06:42 +0000
Subject: [PATCH] [FIX] Simpler fix for async engine running on ray

---
 vllm/executor/ray_gpu_executor.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm/executor/ray_gpu_executor.py b/vllm/executor/ray_gpu_executor.py
index 261fcfb7dad9b..82a2b456895e8 100644
--- a/vllm/executor/ray_gpu_executor.py
+++ b/vllm/executor/ray_gpu_executor.py
@@ -430,8 +430,7 @@ async def execute_model_async(
                 "blocks_to_swap_in": blocks_to_swap_in,
                 "blocks_to_swap_out": blocks_to_swap_out,
                 "blocks_to_copy": blocks_to_copy,
-            },
-            use_ray_compiled_dag=USE_RAY_COMPILED_DAG)
+            })
 
         # Only the driver worker returns the sampling results.
         output = all_outputs[0]