From a5d4b0121c8dcb14288349539e7e7920966f8aed Mon Sep 17 00:00:00 2001 From: Zhuohan Li Date: Wed, 13 Mar 2024 07:06:42 +0000 Subject: [PATCH] [FIX] Simpler fix for async engine running on ray --- vllm/executor/ray_gpu_executor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/executor/ray_gpu_executor.py b/vllm/executor/ray_gpu_executor.py index 261fcfb7dad9b..82a2b456895e8 100644 --- a/vllm/executor/ray_gpu_executor.py +++ b/vllm/executor/ray_gpu_executor.py @@ -430,8 +430,7 @@ async def execute_model_async( "blocks_to_swap_in": blocks_to_swap_in, "blocks_to_swap_out": blocks_to_swap_out, "blocks_to_copy": blocks_to_copy, - }, - use_ray_compiled_dag=USE_RAY_COMPILED_DAG) + }) # Only the driver worker returns the sampling results. output = all_outputs[0]