diff --git a/vllm/executor/ray_gpu_executor.py b/vllm/executor/ray_gpu_executor.py index 261fcfb7dad9b..82a2b456895e8 100644 --- a/vllm/executor/ray_gpu_executor.py +++ b/vllm/executor/ray_gpu_executor.py @@ -430,8 +430,7 @@ async def execute_model_async( "blocks_to_swap_in": blocks_to_swap_in, "blocks_to_swap_out": blocks_to_swap_out, "blocks_to_copy": blocks_to_copy, - }, - use_ray_compiled_dag=USE_RAY_COMPILED_DAG) + }) # Only the driver worker returns the sampling results. output = all_outputs[0]