Skip to content

Commit

Permalink
[Bugfix] Fix CustomAllreduce nvlink topology detection (#3974)
Browse files Browse the repository at this point in the history
[Bugfix] Fix CustomAllreduce pcie nvlink topology detection (#3974) (#4159)
  • Loading branch information
agt authored Apr 18, 2024
1 parent cd2f63f commit 8f9c28f
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions vllm/distributed/device_communicators/custom_all_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,10 @@ def _is_full_nvlink(rank, world_size):
for i in range(world_size):
if i != rank:
try:
link_state = pynvml.nvmlDeviceGetNvLinkState(handle, i)
if not link_state:
peer_handle = pynvml.nvmlDeviceGetHandleByIndex(i)
p2p_status = pynvml.nvmlDeviceGetP2PStatus(
handle, peer_handle, pynvml.NVML_P2P_CAPS_INDEX_NVLINK)
if p2p_status != pynvml.NVML_P2P_STATUS_OK:
return False
except pynvml.NVMLError as error:
logger.info(
Expand Down

0 comments on commit 8f9c28f

Please sign in to comment.