From de0b86df2403c70084d12f04c9701916dbb476b9 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Tue, 29 Oct 2024 17:06:24 -0700 Subject: [PATCH] [core][distributed] fix custom allreduce in pytorch 2.5 (#9815) Signed-off-by: youkaichao --- .../device_communicators/custom_all_reduce.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/vllm/distributed/device_communicators/custom_all_reduce.py b/vllm/distributed/device_communicators/custom_all_reduce.py index 7de5b05a0b053..c3632aee6d11a 100644 --- a/vllm/distributed/device_communicators/custom_all_reduce.py +++ b/vllm/distributed/device_communicators/custom_all_reduce.py @@ -191,8 +191,20 @@ def capture(self): def _get_ipc_meta(self, inp: torch.Tensor): data = inp.untyped_storage()._share_cuda_() + handle = data[1] + # https://github.com/pytorch/pytorch/pull/130890 changes + # the binary format of the ipc handle + # it starts from pytorch 2.5 + if len(handle) > 64: + assert len(handle) == 66 + # only support SHAREABLE_HANDLE_VERSION = 1 + assert int(handle[0]) == 1 + # only support SHAREABLE_CUDA_MALLOC = 'c' + assert handle[1] == ord("c") + handle = handle[2:] + # TODO: support expandable segment shard_data = ( - data[1], # ipc handle to base ptr + handle, # ipc handle to base ptr data[3], # offset of base ptr ) return self._gather_ipc_meta(shard_data)