[core][distributed] fix custom allreduce in pytorch 2.5 (vllm-project…

…#9815) Signed-off-by: youkaichao <[email protected]> Signed-off-by: NickLucche <[email protected]>
NickLucche · Oct 31, 2024 · 543400f · 543400f
1 parent e4817da
commit 543400f
Showing 1 changed file with 13 additions and 1 deletion.
diff --git a/vllm/distributed/device_communicators/custom_all_reduce.py b/vllm/distributed/device_communicators/custom_all_reduce.py
@@ -191,8 +191,20 @@ def capture(self):
 
     def _get_ipc_meta(self, inp: torch.Tensor):
         data = inp.untyped_storage()._share_cuda_()
+        handle = data[1]
+        # https://github.com/pytorch/pytorch/pull/130890 changes
+        # the binary format of the ipc handle
+        # it starts from pytorch 2.5
+        if len(handle) > 64:
+            assert len(handle) == 66
+            # only support SHAREABLE_HANDLE_VERSION = 1
+            assert int(handle[0]) == 1
+            # only support SHAREABLE_CUDA_MALLOC = 'c'
+            assert handle[1] == ord("c")
+            handle = handle[2:]
+            # TODO: support expandable segment
         shard_data = (
-            data[1],  # ipc handle to base ptr
+            handle,  # ipc handle to base ptr
             data[3],  # offset of base ptr
         )
         return self._gather_ipc_meta(shard_data)