From 71c459f891111f428340a0606f5a88abd056e019 Mon Sep 17 00:00:00 2001 From: zifeitong Date: Wed, 15 May 2024 07:22:09 -0700 Subject: [PATCH] [Bugfix] Properly set distributed_executor_backend in ParallelConfig (#4816) --- vllm/config.py | 1 + vllm/engine/arg_utils.py | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index d457710ba64e8..de7cae6a7be84 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -575,6 +575,7 @@ class ParallelConfig: If None, will use synchronous tokenization. ray_workers_use_nsight: Whether to profile Ray workers with nsight, see https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler. + placement_group: ray distributed model workers placement group. distributed_executor_backend: Backend to use for distributed model workers, either "ray" or "mp" (multiprocessing). If either pipeline_parallel_size or tensor_parallel_size is greater than 1, diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index fbe6fa4c8e34a..71f11ec6f16e6 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -576,14 +576,18 @@ def create_engine_config(self, ) -> EngineConfig: model_config.get_sliding_window(), self.enable_prefix_caching) parallel_config = ParallelConfig( - self.pipeline_parallel_size, self.tensor_parallel_size, - self.worker_use_ray, self.max_parallel_loading_workers, + self.pipeline_parallel_size, + self.tensor_parallel_size, + self.worker_use_ray, + self.max_parallel_loading_workers, self.disable_custom_all_reduce, TokenizerPoolConfig.create_config( self.tokenizer_pool_size, self.tokenizer_pool_type, self.tokenizer_pool_extra_config, - ), self.ray_workers_use_nsight) + ), + self.ray_workers_use_nsight, + distributed_executor_backend=self.distributed_executor_backend) speculative_config = SpeculativeConfig.maybe_create_spec_config( target_model_config=model_config,