From 5fb27b4950b19a3f4b51c52cf345851e7715d68a Mon Sep 17 00:00:00 2001
From: Thomas Viehmann <tv.code@beamnet.de>
Date: Fri, 21 Jan 2022 08:50:42 -0800
Subject: [PATCH] Bump dlpack.h to latest version (#65047)

Summary:
Fixes https://github.com/pytorch/pytorch/issues/64995

Pull Request resolved: https://github.com/pytorch/pytorch/pull/65047

Reviewed By: VitalyFedyunin

Differential Revision: D32468916

Pulled By: mruberry

fbshipit-source-id: 3e0a17a3a264a77956ea7b795bd472c6fc79566c
(cherry picked from commit bd480b9892b9fa8a3a46fd0d7babeaf5d649a8b6)
---
 aten/src/ATen/DLConvertor.cpp                |  4 +-
 aten/src/ATen/DLConvertor.h                  |  2 +-
 aten/src/ATen/dlpack.h                       | 28 ++++++----
 aten/src/ATen/test/cuda_dlconvertor_test.cpp |  2 +-
 caffe2/python/dlpack.h                       | 57 ++++++++++++++------
 caffe2/python/pybind_state_dlpack.cc         |  2 +-
 caffe2/python/pybind_state_dlpack.h          |  8 +--
 7 files changed, 67 insertions(+), 36 deletions(-)

diff --git a/aten/src/ATen/DLConvertor.cpp b/aten/src/ATen/DLConvertor.cpp
index 93af385c543..fb3f3596e1f 100644
--- a/aten/src/ATen/DLConvertor.cpp
+++ b/aten/src/ATen/DLConvertor.cpp
@@ -81,7 +81,7 @@ DLDevice getDLDevice(const Tensor& tensor, const int64_t& device_id) {
       // while everyone else should see HIP
       ctx.device_type = DLDeviceType::kDLROCM;
 #else
-      ctx.device_type = DLDeviceType::kDLGPU;
+      ctx.device_type = DLDeviceType::kDLCUDA;
 #endif
       break;
     case DeviceType::OPENCL:
@@ -102,7 +102,7 @@ static Device getATenDevice(const DLDevice& ctx) {
       return at::Device(DeviceType::CPU);
 #ifndef USE_ROCM
     // if we are compiled under HIP, we cannot do cuda
-    case DLDeviceType::kDLGPU:
+    case DLDeviceType::kDLCUDA:
       return at::Device(DeviceType::CUDA, ctx.device_id);
 #endif
     case DLDeviceType::kDLOpenCL:
diff --git a/aten/src/ATen/DLConvertor.h b/aten/src/ATen/DLConvertor.h
index a34d4b3e7a4..2d9a90adf8e 100644
--- a/aten/src/ATen/DLConvertor.h
+++ b/aten/src/ATen/DLConvertor.h
@@ -14,6 +14,6 @@ TORCH_API ScalarType toScalarType(const DLDataType& dtype);
 TORCH_API DLManagedTensor* toDLPack(const Tensor& src);
 TORCH_API Tensor fromDLPack(const DLManagedTensor* src);
 TORCH_API DLDataType getDLDataType(const Tensor& t);
-TORCH_API DLContext getDLContext(const Tensor& tensor, const int64_t& device_id);
+TORCH_API DLDevice getDLContext(const Tensor& tensor, const int64_t& device_id);
 
 } //namespace at
diff --git a/aten/src/ATen/dlpack.h b/aten/src/ATen/dlpack.h
index bc346d9c71e..f749b326e46 100644
--- a/aten/src/ATen/dlpack.h
+++ b/aten/src/ATen/dlpack.h
@@ -13,7 +13,7 @@
 #endif
 
 /*! \brief The current version of dlpack */
-#define DLPACK_VERSION 040
+#define DLPACK_VERSION 60
 
 /*! \brief DLPACK_DLL prefix for windows */
 #ifdef _WIN32
@@ -39,12 +39,11 @@ typedef enum {
   /*! \brief CPU device */
   kDLCPU = 1,
   /*! \brief CUDA GPU device */
-  kDLGPU = 2,
+  kDLCUDA = 2,
   /*!
-   * \brief Pinned CUDA GPU device by cudaMallocHost
-   * \note kDLCPUPinned = kDLCPU | kDLGPU
+   * \brief Pinned CUDA CPU memory by cudaMallocHost
    */
-  kDLCPUPinned = 3,
+  kDLCUDAHost = 3,
   /*! \brief OpenCL devices. */
   kDLOpenCL = 4,
   /*! \brief Vulkan buffer for next generation graphics. */
@@ -55,12 +54,20 @@ typedef enum {
   kDLVPI = 9,
   /*! \brief ROCm GPUs for AMD GPUs */
   kDLROCM = 10,
+  /*!
+   * \brief Pinned ROCm CPU memory allocated by hipMallocHost
+   */
+  kDLROCMHost = 11,
   /*!
    * \brief Reserved extension device type,
    * used for quickly test extension device
    * The semantics can differ depending on the implementation.
    */
   kDLExtDev = 12,
+  /*!
+   * \brief CUDA managed/unified memory allocated by cudaMallocManaged
+   */
+  kDLCUDAManaged = 13,
 } DLDeviceType;
 
 /*!
@@ -69,15 +76,13 @@ typedef enum {
 typedef struct {
   /*! \brief The device type used in the device. */
   DLDeviceType device_type;
-  /*! \brief The device index */
+  /*!
+   * \brief The device index.
+   * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
+   */
   int device_id;
 } DLDevice;
 
-/*!
- * \brief This is an alias for DLDevice. Notice that this will be removed in the next release.
- */
-typedef DLDevice DLContext;
-
 /*!
  * \brief The type code options DLDataType.
  */
@@ -109,6 +114,7 @@ typedef enum {
  *   - float: type_code = 2, bits = 32, lanes=1
  *   - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
  *   - int8: type_code = 0, bits = 8, lanes=1
+ *   - std::complex<float>: type_code = 5, bits = 64, lanes = 1
  */
 typedef struct {
   /*!
diff --git a/aten/src/ATen/test/cuda_dlconvertor_test.cpp b/aten/src/ATen/test/cuda_dlconvertor_test.cpp
index 77bb9f6433f..9a89f67ef59 100644
--- a/aten/src/ATen/test/cuda_dlconvertor_test.cpp
+++ b/aten/src/ATen/test/cuda_dlconvertor_test.cpp
@@ -47,7 +47,7 @@ TEST(TestDlconvertor, TestDlconvertorCUDAHIP) {
 #if AT_ROCM_ENABLED()
   ASSERT_TRUE(dlMTensor->dl_tensor.device.device_type == DLDeviceType::kDLROCM);
 #else
-  ASSERT_TRUE(dlMTensor->dl_tensor.device.device_type == DLDeviceType::kDLGPU);
+  ASSERT_TRUE(dlMTensor->dl_tensor.device.device_type == DLDeviceType::kDLCUDA);
 #endif
 
   Tensor b = fromDLPack(dlMTensor);
diff --git a/caffe2/python/dlpack.h b/caffe2/python/dlpack.h
index 7536cf9db22..f749b326e46 100644
--- a/caffe2/python/dlpack.h
+++ b/caffe2/python/dlpack.h
@@ -13,7 +13,7 @@
 #endif
 
 /*! \brief The current version of dlpack */
-#define DLPACK_VERSION 020
+#define DLPACK_VERSION 60
 
 /*! \brief DLPACK_DLL prefix for windows */
 #ifdef _WIN32
@@ -26,25 +26,24 @@
 #define DLPACK_DLL
 #endif
 
-#include <stddef.h>
 #include <stdint.h>
+#include <stddef.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 /*!
- * \brief The device type in DLContext.
+ * \brief The device type in DLDevice.
  */
 typedef enum {
   /*! \brief CPU device */
   kDLCPU = 1,
   /*! \brief CUDA GPU device */
-  kDLGPU = 2,
+  kDLCUDA = 2,
   /*!
-   * \brief Pinned CUDA GPU device by cudaMallocHost
-   * \note kDLCPUPinned = kDLCPU | kDLGPU
+   * \brief Pinned CUDA CPU memory by cudaMallocHost
    */
-  kDLCPUPinned = 3,
+  kDLCUDAHost = 3,
   /*! \brief OpenCL devices. */
   kDLOpenCL = 4,
   /*! \brief Vulkan buffer for next generation graphics. */
@@ -55,32 +54,57 @@ typedef enum {
   kDLVPI = 9,
   /*! \brief ROCm GPUs for AMD GPUs */
   kDLROCM = 10,
+  /*!
+   * \brief Pinned ROCm CPU memory allocated by hipMallocHost
+   */
+  kDLROCMHost = 11,
   /*!
    * \brief Reserved extension device type,
    * used for quickly test extension device
    * The semantics can differ depending on the implementation.
    */
   kDLExtDev = 12,
+  /*!
+   * \brief CUDA managed/unified memory allocated by cudaMallocManaged
+   */
+  kDLCUDAManaged = 13,
 } DLDeviceType;
 
 /*!
- * \brief A Device context for Tensor and operator.
+ * \brief A Device for Tensor and operator.
  */
 typedef struct {
   /*! \brief The device type used in the device. */
   DLDeviceType device_type;
-  /*! \brief The device index */
+  /*!
+   * \brief The device index.
+   * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
+   */
   int device_id;
-} DLContext;
+} DLDevice;
 
 /*!
  * \brief The type code options DLDataType.
  */
 typedef enum {
+  /*! \brief signed integer */
   kDLInt = 0U,
+  /*! \brief unsigned integer */
   kDLUInt = 1U,
+  /*! \brief IEEE floating point */
   kDLFloat = 2U,
+  /*!
+   * \brief Opaque handle type, reserved for testing purposes.
+   * Frameworks need to agree on the handle data type for the exchange to be well-defined.
+   */
+  kDLOpaqueHandle = 3U,
+  /*! \brief bfloat16 */
   kDLBfloat = 4U,
+  /*!
+   * \brief complex number
+   * (C/C++/Python layout: compact struct per complex number)
+   */
+  kDLComplex = 5U,
 } DLDataTypeCode;
 
 /*!
@@ -90,6 +114,7 @@ typedef enum {
  *   - float: type_code = 2, bits = 32, lanes=1
  *   - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
  *   - int8: type_code = 0, bits = 8, lanes=1
+ *   - std::complex<float>: type_code = 5, bits = 64, lanes = 1
  */
 typedef struct {
   /*!
@@ -130,8 +155,8 @@ typedef struct {
    * \endcode
    */
   void* data;
-  /*! \brief The device context of the tensor */
-  DLContext ctx;
+  /*! \brief The device of the tensor */
+  DLDevice device;
   /*! \brief Number of dimensions */
   int ndim;
   /*! \brief The data type of the pointer*/
@@ -160,15 +185,15 @@ typedef struct DLManagedTensor {
   /*! \brief the context of the original host framework of DLManagedTensor in
    *   which DLManagedTensor is used in the framework. It can also be NULL.
    */
-  void* manager_ctx;
+  void * manager_ctx;
   /*! \brief Destructor signature void (*)(void*) - this should be called
    *   to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
    *   if there is no way for the caller to provide a reasonable destructor.
    *   The destructors deletes the argument self as well.
    */
-  void (*deleter)(struct DLManagedTensor* self);
+  void (*deleter)(struct DLManagedTensor * self);
 } DLManagedTensor;
 #ifdef __cplusplus
-} // DLPACK_EXTERN_C
+}  // DLPACK_EXTERN_C
 #endif
-#endif // DLPACK_DLPACK_H_
+#endif  // DLPACK_DLPACK_H_
diff --git a/caffe2/python/pybind_state_dlpack.cc b/caffe2/python/pybind_state_dlpack.cc
index 36070f58d8b..83b856f672a 100644
--- a/caffe2/python/pybind_state_dlpack.cc
+++ b/caffe2/python/pybind_state_dlpack.cc
@@ -8,7 +8,7 @@ namespace py = pybind11;
 const DLDeviceType* CaffeToDLDeviceType(int device_type) {
   static std::map<int, DLDeviceType> dl_device_type_map{
       {PROTO_CPU, kDLCPU},
-      {PROTO_CUDA, kDLGPU},
+      {PROTO_CUDA, kDLCUDA},
   };
   const auto it = dl_device_type_map.find(device_type);
   return it == dl_device_type_map.end() ? nullptr : &it->second;
diff --git a/caffe2/python/pybind_state_dlpack.h b/caffe2/python/pybind_state_dlpack.h
index bfb5b04922e..ab987a2e4da 100644
--- a/caffe2/python/pybind_state_dlpack.h
+++ b/caffe2/python/pybind_state_dlpack.h
@@ -28,7 +28,7 @@ class DLPackWrapper {
       : tensor(tensor), device_option(device_option) {}
 
   py::object data() {
-    DLContext tensor_context;
+    DLDevice tensor_context;
     auto device_type_ptr = CaffeToDLDeviceType(device_option.device_type());
     CAFFE_ENFORCE(
         device_type_ptr,
@@ -55,7 +55,7 @@ class DLPackWrapper {
 
     DLTensor dlTensor;
     dlTensor.data = const_cast<void*>(tensor->raw_data());
-    dlTensor.ctx = tensor_context;
+    dlTensor.device = tensor_context;
     dlTensor.ndim = tensor->dim();
     dlTensor.dtype = tensor_type;
     dlTensor.shape = const_cast<int64_t*>(&(tensor->sizes()[0]));
@@ -83,9 +83,9 @@ class DLPackWrapper {
         "Unsupported device type: ",
         device_option.device_type());
     CAFFE_ENFORCE(
-        dlTensor->ctx.device_type == *device_type_ptr,
+        dlTensor->device.device_type == *device_type_ptr,
         "DLPack tensor device type mismatch");
-    int dlpack_device_id = dlTensor->ctx.device_id;
+    int dlpack_device_id = dlTensor->device.device_id;
     CAFFE_ENFORCE_EQ(
         dlpack_device_id,
         device_option.device_id(),