From 5fb27b4950b19a3f4b51c52cf345851e7715d68a Mon Sep 17 00:00:00 2001 From: Thomas Viehmann Date: Fri, 21 Jan 2022 08:50:42 -0800 Subject: [PATCH] Bump dlpack.h to latest version (#65047) Summary: Fixes https://github.com/pytorch/pytorch/issues/64995 Pull Request resolved: https://github.com/pytorch/pytorch/pull/65047 Reviewed By: VitalyFedyunin Differential Revision: D32468916 Pulled By: mruberry fbshipit-source-id: 3e0a17a3a264a77956ea7b795bd472c6fc79566c (cherry picked from commit bd480b9892b9fa8a3a46fd0d7babeaf5d649a8b6) --- aten/src/ATen/DLConvertor.cpp | 4 +- aten/src/ATen/DLConvertor.h | 2 +- aten/src/ATen/dlpack.h | 28 ++++++---- aten/src/ATen/test/cuda_dlconvertor_test.cpp | 2 +- caffe2/python/dlpack.h | 57 ++++++++++++++------ caffe2/python/pybind_state_dlpack.cc | 2 +- caffe2/python/pybind_state_dlpack.h | 8 +-- 7 files changed, 67 insertions(+), 36 deletions(-) diff --git a/aten/src/ATen/DLConvertor.cpp b/aten/src/ATen/DLConvertor.cpp index 93af385c543..fb3f3596e1f 100644 --- a/aten/src/ATen/DLConvertor.cpp +++ b/aten/src/ATen/DLConvertor.cpp @@ -81,7 +81,7 @@ DLDevice getDLDevice(const Tensor& tensor, const int64_t& device_id) { // while everyone else should see HIP ctx.device_type = DLDeviceType::kDLROCM; #else - ctx.device_type = DLDeviceType::kDLGPU; + ctx.device_type = DLDeviceType::kDLCUDA; #endif break; case DeviceType::OPENCL: @@ -102,7 +102,7 @@ static Device getATenDevice(const DLDevice& ctx) { return at::Device(DeviceType::CPU); #ifndef USE_ROCM // if we are compiled under HIP, we cannot do cuda - case DLDeviceType::kDLGPU: + case DLDeviceType::kDLCUDA: return at::Device(DeviceType::CUDA, ctx.device_id); #endif case DLDeviceType::kDLOpenCL: diff --git a/aten/src/ATen/DLConvertor.h b/aten/src/ATen/DLConvertor.h index a34d4b3e7a4..2d9a90adf8e 100644 --- a/aten/src/ATen/DLConvertor.h +++ b/aten/src/ATen/DLConvertor.h @@ -14,6 +14,6 @@ TORCH_API ScalarType toScalarType(const DLDataType& dtype); TORCH_API DLManagedTensor* toDLPack(const Tensor& src); TORCH_API Tensor fromDLPack(const DLManagedTensor* src); TORCH_API DLDataType getDLDataType(const Tensor& t); -TORCH_API DLContext getDLContext(const Tensor& tensor, const int64_t& device_id); +TORCH_API DLDevice getDLContext(const Tensor& tensor, const int64_t& device_id); } //namespace at diff --git a/aten/src/ATen/dlpack.h b/aten/src/ATen/dlpack.h index bc346d9c71e..f749b326e46 100644 --- a/aten/src/ATen/dlpack.h +++ b/aten/src/ATen/dlpack.h @@ -13,7 +13,7 @@ #endif /*! \brief The current version of dlpack */ -#define DLPACK_VERSION 040 +#define DLPACK_VERSION 60 /*! \brief DLPACK_DLL prefix for windows */ #ifdef _WIN32 @@ -39,12 +39,11 @@ typedef enum { /*! \brief CPU device */ kDLCPU = 1, /*! \brief CUDA GPU device */ - kDLGPU = 2, + kDLCUDA = 2, /*! - * \brief Pinned CUDA GPU device by cudaMallocHost - * \note kDLCPUPinned = kDLCPU | kDLGPU + * \brief Pinned CUDA CPU memory by cudaMallocHost */ - kDLCPUPinned = 3, + kDLCUDAHost = 3, /*! \brief OpenCL devices. */ kDLOpenCL = 4, /*! \brief Vulkan buffer for next generation graphics. */ @@ -55,12 +54,20 @@ typedef enum { kDLVPI = 9, /*! \brief ROCm GPUs for AMD GPUs */ kDLROCM = 10, + /*! + * \brief Pinned ROCm CPU memory allocated by hipMallocHost + */ + kDLROCMHost = 11, /*! * \brief Reserved extension device type, * used for quickly test extension device * The semantics can differ depending on the implementation. */ kDLExtDev = 12, + /*! + * \brief CUDA managed/unified memory allocated by cudaMallocManaged + */ + kDLCUDAManaged = 13, } DLDeviceType; /*! @@ -69,15 +76,13 @@ typedef enum { typedef struct { /*! \brief The device type used in the device. */ DLDeviceType device_type; - /*! \brief The device index */ + /*! + * \brief The device index. + * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0. + */ int device_id; } DLDevice; -/*! - * \brief This is an alias for DLDevice. Notice that this will be removed in the next release. - */ -typedef DLDevice DLContext; - /*! * \brief The type code options DLDataType. */ @@ -109,6 +114,7 @@ typedef enum { * - float: type_code = 2, bits = 32, lanes=1 * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4 * - int8: type_code = 0, bits = 8, lanes=1 + * - std::complex: type_code = 5, bits = 64, lanes = 1 */ typedef struct { /*! diff --git a/aten/src/ATen/test/cuda_dlconvertor_test.cpp b/aten/src/ATen/test/cuda_dlconvertor_test.cpp index 77bb9f6433f..9a89f67ef59 100644 --- a/aten/src/ATen/test/cuda_dlconvertor_test.cpp +++ b/aten/src/ATen/test/cuda_dlconvertor_test.cpp @@ -47,7 +47,7 @@ TEST(TestDlconvertor, TestDlconvertorCUDAHIP) { #if AT_ROCM_ENABLED() ASSERT_TRUE(dlMTensor->dl_tensor.device.device_type == DLDeviceType::kDLROCM); #else - ASSERT_TRUE(dlMTensor->dl_tensor.device.device_type == DLDeviceType::kDLGPU); + ASSERT_TRUE(dlMTensor->dl_tensor.device.device_type == DLDeviceType::kDLCUDA); #endif Tensor b = fromDLPack(dlMTensor); diff --git a/caffe2/python/dlpack.h b/caffe2/python/dlpack.h index 7536cf9db22..f749b326e46 100644 --- a/caffe2/python/dlpack.h +++ b/caffe2/python/dlpack.h @@ -13,7 +13,7 @@ #endif /*! \brief The current version of dlpack */ -#define DLPACK_VERSION 020 +#define DLPACK_VERSION 60 /*! \brief DLPACK_DLL prefix for windows */ #ifdef _WIN32 @@ -26,25 +26,24 @@ #define DLPACK_DLL #endif -#include #include +#include #ifdef __cplusplus extern "C" { #endif /*! - * \brief The device type in DLContext. + * \brief The device type in DLDevice. */ typedef enum { /*! \brief CPU device */ kDLCPU = 1, /*! \brief CUDA GPU device */ - kDLGPU = 2, + kDLCUDA = 2, /*! - * \brief Pinned CUDA GPU device by cudaMallocHost - * \note kDLCPUPinned = kDLCPU | kDLGPU + * \brief Pinned CUDA CPU memory by cudaMallocHost */ - kDLCPUPinned = 3, + kDLCUDAHost = 3, /*! \brief OpenCL devices. */ kDLOpenCL = 4, /*! \brief Vulkan buffer for next generation graphics. */ @@ -55,32 +54,57 @@ typedef enum { kDLVPI = 9, /*! \brief ROCm GPUs for AMD GPUs */ kDLROCM = 10, + /*! + * \brief Pinned ROCm CPU memory allocated by hipMallocHost + */ + kDLROCMHost = 11, /*! * \brief Reserved extension device type, * used for quickly test extension device * The semantics can differ depending on the implementation. */ kDLExtDev = 12, + /*! + * \brief CUDA managed/unified memory allocated by cudaMallocManaged + */ + kDLCUDAManaged = 13, } DLDeviceType; /*! - * \brief A Device context for Tensor and operator. + * \brief A Device for Tensor and operator. */ typedef struct { /*! \brief The device type used in the device. */ DLDeviceType device_type; - /*! \brief The device index */ + /*! + * \brief The device index. + * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0. + */ int device_id; -} DLContext; +} DLDevice; /*! * \brief The type code options DLDataType. */ typedef enum { + /*! \brief signed integer */ kDLInt = 0U, + /*! \brief unsigned integer */ kDLUInt = 1U, + /*! \brief IEEE floating point */ kDLFloat = 2U, + /*! + * \brief Opaque handle type, reserved for testing purposes. + * Frameworks need to agree on the handle data type for the exchange to be well-defined. + */ + kDLOpaqueHandle = 3U, + /*! \brief bfloat16 */ kDLBfloat = 4U, + /*! + * \brief complex number + * (C/C++/Python layout: compact struct per complex number) + */ + kDLComplex = 5U, } DLDataTypeCode; /*! @@ -90,6 +114,7 @@ typedef enum { * - float: type_code = 2, bits = 32, lanes=1 * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4 * - int8: type_code = 0, bits = 8, lanes=1 + * - std::complex: type_code = 5, bits = 64, lanes = 1 */ typedef struct { /*! @@ -130,8 +155,8 @@ typedef struct { * \endcode */ void* data; - /*! \brief The device context of the tensor */ - DLContext ctx; + /*! \brief The device of the tensor */ + DLDevice device; /*! \brief Number of dimensions */ int ndim; /*! \brief The data type of the pointer*/ @@ -160,15 +185,15 @@ typedef struct DLManagedTensor { /*! \brief the context of the original host framework of DLManagedTensor in * which DLManagedTensor is used in the framework. It can also be NULL. */ - void* manager_ctx; + void * manager_ctx; /*! \brief Destructor signature void (*)(void*) - this should be called * to destruct manager_ctx which holds the DLManagedTensor. It can be NULL * if there is no way for the caller to provide a reasonable destructor. * The destructors deletes the argument self as well. */ - void (*deleter)(struct DLManagedTensor* self); + void (*deleter)(struct DLManagedTensor * self); } DLManagedTensor; #ifdef __cplusplus -} // DLPACK_EXTERN_C +} // DLPACK_EXTERN_C #endif -#endif // DLPACK_DLPACK_H_ +#endif // DLPACK_DLPACK_H_ diff --git a/caffe2/python/pybind_state_dlpack.cc b/caffe2/python/pybind_state_dlpack.cc index 36070f58d8b..83b856f672a 100644 --- a/caffe2/python/pybind_state_dlpack.cc +++ b/caffe2/python/pybind_state_dlpack.cc @@ -8,7 +8,7 @@ namespace py = pybind11; const DLDeviceType* CaffeToDLDeviceType(int device_type) { static std::map dl_device_type_map{ {PROTO_CPU, kDLCPU}, - {PROTO_CUDA, kDLGPU}, + {PROTO_CUDA, kDLCUDA}, }; const auto it = dl_device_type_map.find(device_type); return it == dl_device_type_map.end() ? nullptr : &it->second; diff --git a/caffe2/python/pybind_state_dlpack.h b/caffe2/python/pybind_state_dlpack.h index bfb5b04922e..ab987a2e4da 100644 --- a/caffe2/python/pybind_state_dlpack.h +++ b/caffe2/python/pybind_state_dlpack.h @@ -28,7 +28,7 @@ class DLPackWrapper { : tensor(tensor), device_option(device_option) {} py::object data() { - DLContext tensor_context; + DLDevice tensor_context; auto device_type_ptr = CaffeToDLDeviceType(device_option.device_type()); CAFFE_ENFORCE( device_type_ptr, @@ -55,7 +55,7 @@ class DLPackWrapper { DLTensor dlTensor; dlTensor.data = const_cast(tensor->raw_data()); - dlTensor.ctx = tensor_context; + dlTensor.device = tensor_context; dlTensor.ndim = tensor->dim(); dlTensor.dtype = tensor_type; dlTensor.shape = const_cast(&(tensor->sizes()[0])); @@ -83,9 +83,9 @@ class DLPackWrapper { "Unsupported device type: ", device_option.device_type()); CAFFE_ENFORCE( - dlTensor->ctx.device_type == *device_type_ptr, + dlTensor->device.device_type == *device_type_ptr, "DLPack tensor device type mismatch"); - int dlpack_device_id = dlTensor->ctx.device_id; + int dlpack_device_id = dlTensor->device.device_id; CAFFE_ENFORCE_EQ( dlpack_device_id, device_option.device_id(),