Skip to content

Commit

Permalink
Update ray_utils.py FIX vllm-project#1058
Browse files Browse the repository at this point in the history
  • Loading branch information
premsa authored Apr 4, 2024
1 parent 819a309 commit 85ca041
Showing 1 changed file with 19 additions and 6 deletions.
25 changes: 19 additions & 6 deletions vllm/engine/ray_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,26 @@ def initialize_ray_cluster(
"Ray is not installed. Please install Ray to use distributed "
"serving.")

# Connect to a ray cluster.
if is_hip():
ray.init(address=ray_address,
ignore_reinit_error=True,
num_gpus=parallel_config.world_size)
# fix for https://github.com/vllm-project/vllm/issues/1058
if "SLURM_JOB_ID" in os.environ:
num_cpus = int(os.environ.get('SLURM_CPUS_PER_TASK'))
# Connect to a ray cluster.
if is_hip():
ray.init(num_cpus=num_cpus,
address=ray_address,
ignore_reinit_error=True,
num_gpus=parallel_config.world_size)
else:
ray.init(num_cpus=num_cpus, address=ray_address, ignore_reinit_error=True)
else:
ray.init(address=ray_address, ignore_reinit_error=True)
# Connect to a ray cluster.
if is_hip():
ray.init(address=ray_address,
ignore_reinit_error=True,
num_gpus=parallel_config.world_size)
else:
ray.init(address=ray_address, ignore_reinit_error=True)


if parallel_config.placement_group:
# Placement group is already set.
Expand Down

0 comments on commit 85ca041

Please sign in to comment.