[FIX] Simpler fix for async engine running on ray (#3371)

vllm-project · Mar 13, 2024 · eeab52a · eeab52a
1 parent c33afd8
commit eeab52a
Showing 1 changed file with 1 addition and 2 deletions.
diff --git a/vllm/executor/ray_gpu_executor.py b/vllm/executor/ray_gpu_executor.py
@@ -430,8 +430,7 @@ async def execute_model_async(
                 "blocks_to_swap_in": blocks_to_swap_in,
                 "blocks_to_swap_out": blocks_to_swap_out,
                 "blocks_to_copy": blocks_to_copy,
-            },
-            use_ray_compiled_dag=USE_RAY_COMPILED_DAG)
+            })
 
         # Only the driver worker returns the sampling results.
         output = all_outputs[0]