From a2e9754514e68772c80e901f4a1d2bd0992745d3 Mon Sep 17 00:00:00 2001 From: roy Date: Mon, 26 Feb 2024 21:51:21 +0800 Subject: [PATCH] don't use cupy for eager mode --- vllm/engine/llm_engine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index c1a75924c6d72..f5b2145c22d6f 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -284,7 +284,10 @@ def _init_workers_ray(self, placement_group: "PlacementGroup", is_driver_worker=True, ) - self._run_workers("init_model", cupy_port=get_open_port()) + # don't use cupy for eager mode + self._run_workers("init_model", + cupy_port=get_open_port() + if not model_config.enforce_eager else None) self._run_workers( "load_model", max_concurrent_workers=self.parallel_config.