From d55c43c1d99462db5d3c851912aaed202c9da9ce Mon Sep 17 00:00:00 2001 From: Roy Date: Tue, 27 Feb 2024 09:33:38 +0800 Subject: [PATCH] Don't use cupy when `enforce_eager=True` (#3037) --- vllm/engine/llm_engine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index c1a75924c6d72..f5b2145c22d6f 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -284,7 +284,10 @@ def _init_workers_ray(self, placement_group: "PlacementGroup", is_driver_worker=True, ) - self._run_workers("init_model", cupy_port=get_open_port()) + # don't use cupy for eager mode + self._run_workers("init_model", + cupy_port=get_open_port() + if not model_config.enforce_eager else None) self._run_workers( "load_model", max_concurrent_workers=self.parallel_config.