From b8eaa4d655dca416b0d2c456d90c9fab8e881f28 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Sat, 16 Nov 2024 23:58:03 +0800 Subject: [PATCH] fix cpu embedding runner tp Signed-off-by: Isotr0py <2037008807@qq.com> --- vllm/worker/cpu_embedding_model_runner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/worker/cpu_embedding_model_runner.py b/vllm/worker/cpu_embedding_model_runner.py index 7053075bf4d8f..d0b8fec48d74f 100644 --- a/vllm/worker/cpu_embedding_model_runner.py +++ b/vllm/worker/cpu_embedding_model_runner.py @@ -66,6 +66,10 @@ def execute_model( hidden_states = model_executable(**execute_model_kwargs) + # Only perform pooling in the driver worker. + if not self.is_driver_worker: + return [] + return [ self.model.pooler(hidden_states=hidden_states, pooling_metadata=model_input.pooling_metadata)