From 85df8128d890c5d2c5da799336c2367f82161c27 Mon Sep 17 00:00:00 2001 From: Wen Sun <35923278+HermitSun@users.noreply.github.com> Date: Wed, 31 Jan 2024 00:17:05 +0800 Subject: [PATCH] Fix 'Actor methods cannot be called directly' when using `--engine-use-ray` (#2664) * fix: engine-useray complain * fix: typo --- vllm/engine/async_llm_engine.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 33e1abc063b8b..b9b5a6d442088 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -450,11 +450,19 @@ async def add_request( if arrival_time is None: arrival_time = time.time() - prompt_token_ids = await self.engine.encode_request_async( - request_id=request_id, - prompt=prompt, - prompt_token_ids=prompt_token_ids, - lora_request=lora_request) + + if self.engine_use_ray: + prompt_token_ids = await self.engine.encode_request_async.remote( + request_id=request_id, + prompt=prompt, + prompt_token_ids=prompt_token_ids, + lora_request=lora_request) + else: + prompt_token_ids = await self.engine.encode_request_async( + request_id=request_id, + prompt=prompt, + prompt_token_ids=prompt_token_ids, + lora_request=lora_request) stream = self._request_tracker.add_request( request_id,