Skip to content

Commit

Permalink
[Fix] Avoid pickling entire LLMEngine for Ray workers (vllm-project#3207
Browse files Browse the repository at this point in the history
)

Co-authored-by: Antoni Baum <[email protected]>
  • Loading branch information
njhill and Yard1 authored Mar 6, 2024
1 parent 8999ec3 commit 2efce05
Showing 1 changed file with 14 additions and 7 deletions.
21 changes: 14 additions & 7 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,11 @@ def __init__(
if USE_RAY_COMPILED_DAG:
self.forward_dag = self._compiled_ray_dag()

def __reduce__(self):
# This is to ensure that the LLMEngine is not referenced in
# the closure used to initialize Ray worker actors
raise RuntimeError("LLMEngine should not be pickled!")

def get_tokenizer_for_seq(self, sequence: Sequence):
return self.tokenizer.get_lora_tokenizer(sequence.lora_request)

Expand Down Expand Up @@ -280,6 +285,8 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
parallel_config = copy.deepcopy(self.parallel_config)
scheduler_config = copy.deepcopy(self.scheduler_config)
device_config = copy.deepcopy(self.device_config)
lora_config = copy.deepcopy(self.lora_config)
kv_cache_dtype = self.cache_config.cache_dtype

for rank, (worker, (node_id,
_)) in enumerate(zip(self.workers,
Expand All @@ -295,22 +302,22 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
local_rank,
rank,
distributed_init_method,
lora_config=self.lora_config,
kv_cache_dtype=self.cache_config.cache_dtype,
lora_config=lora_config,
kv_cache_dtype=kv_cache_dtype,
))

driver_rank = 0
driver_local_rank = node_workers[driver_node_id].index(driver_rank)
self.driver_worker = Worker(
model_config,
parallel_config,
scheduler_config,
device_config,
self.model_config,
self.parallel_config,
self.scheduler_config,
self.device_config,
driver_local_rank,
driver_rank,
distributed_init_method,
lora_config=self.lora_config,
kv_cache_dtype=self.cache_config.cache_dtype,
kv_cache_dtype=kv_cache_dtype,
is_driver_worker=True,
)

Expand Down

0 comments on commit 2efce05

Please sign in to comment.