Skip to content

Commit

Permalink
[Bugfix] Fix Llava inference with Tensor Parallelism. (vllm-project#3883
Browse files Browse the repository at this point in the history
)
  • Loading branch information
Isotr0py authored Apr 7, 2024
1 parent 2f19283 commit 0ce0539
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions vllm/executor/ray_gpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
scheduler_config = copy.deepcopy(self.scheduler_config)
device_config = copy.deepcopy(self.device_config)
lora_config = copy.deepcopy(self.lora_config)
vision_language_config = copy.deepcopy(self.vision_language_config)
kv_cache_dtype = self.cache_config.cache_dtype

# Initialize the actual workers with the Worker class.
Expand All @@ -172,6 +173,7 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
rank,
distributed_init_method,
lora_config=lora_config,
vision_language_config=vision_language_config,
kv_cache_dtype=kv_cache_dtype,
))

Expand Down

0 comments on commit 0ce0539

Please sign in to comment.