From 4445bc2543f7eff69a91392a41199795c6035dfd Mon Sep 17 00:00:00 2001
From: Tim Martin <38798827+tmartin-gh@users.noreply.github.com>
Date: Tue, 17 Dec 2024 13:54:41 -0800
Subject: [PATCH] Python integration sample (#812)

* Added DLPack make_tensor

* Add a self contained python calling MatX (calling python calling MatX) integration example

---------

Co-authored-by: cliffburdick <cburdick@nvidia.com>
---
 examples/CMakeLists.txt                       |  10 +-
 .../python_integration_sample/CMakeLists.txt  |  68 ++++++
 .../example_matxutil.py                       |  77 ++++++
 .../python_integration_sample/matxutil.cu     | 231 ++++++++++++++++++
 .../python_integration_sample/mypythonlib.py  |  15 ++
 include/matx/core/make_tensor.h               | 133 ++++++++++
 include/matx/core/tensor.h                    |   3 +-
 test/00_tensor/BasicTensorTests.cu            |   2 +-
 8 files changed, 533 insertions(+), 6 deletions(-)
 create mode 100644 examples/python_integration_sample/CMakeLists.txt
 create mode 100644 examples/python_integration_sample/example_matxutil.py
 create mode 100644 examples/python_integration_sample/matxutil.cu
 create mode 100644 examples/python_integration_sample/mypythonlib.py

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index d56f099b..2397c509 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -20,15 +20,15 @@ set(examples
     print_styles)
 
 
-    
-    
+
+
 add_library(example_lib INTERFACE)
 target_include_directories(example_lib SYSTEM INTERFACE ${CUTLASS_INC} ${pybind11_INCLUDE_DIR} ${PYTHON_INCLUDE_DIRS})
 
 target_link_libraries(example_lib INTERFACE matx::matx) # Transitive properties
 
 set_property(TARGET example_lib PROPERTY ENABLE_EXPORTS 1)
-    
+
 if(eigen_DIR)
     include_directories(SYSTEM ${eigen_DIR})
     add_definitions(-DUSE_EIGEN)
@@ -64,3 +64,7 @@ endforeach()
 # Add host-compiler only example program to catch missing ifdef __CUDACC__ guards
 add_executable(test_host test_host.cpp)
 target_link_libraries(test_host matx::matx)
+
+if(MATX_EN_PYBIND11)
+    add_subdirectory(python_integration_sample)
+endif()
\ No newline at end of file
diff --git a/examples/python_integration_sample/CMakeLists.txt b/examples/python_integration_sample/CMakeLists.txt
new file mode 100644
index 00000000..44208892
--- /dev/null
+++ b/examples/python_integration_sample/CMakeLists.txt
@@ -0,0 +1,68 @@
+# This is a cmake project showing how to build a python importable library
+# using pybind11, how to pass tensors between MatX and python, and
+# how to call MatX operators from python
+
+cmake_minimum_required(VERSION 3.26)
+
+if(NOT DEFINED CMAKE_BUILD_TYPE)
+  message(WARNING "CMAKE_BUILD_TYPE not defined.  Defaulting to release.")
+  set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type: Debug;Release;MinSizeRel;RelWithDebInfo")
+endif()
+
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+  message(WARNING "CMAKE_CUDA_ARCHITECTURES not defined.  Defaulting to 70")
+  set(CMAKE_CUDA_ARCHITECTURES 70 CACHE STRING "Select compile target CUDA Compute Capabilities")
+endif()
+
+if(NOT DEFINED MATX_FETCH_REMOTE)
+  message(WARNING "MATX_FETCH_REMOTE not defined.  Defaulting to OFF, will use local MatX repo")
+  set(MATX_FETCH_REMOTE OFF CACHE BOOL "Set MatX repo fetch location")
+endif()
+
+project(SAMPLE_MATX_PYTHON LANGUAGES CUDA CXX)
+find_package(CUDAToolkit 12.2 REQUIRED)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+# Must enable pybind11 support
+set(MATX_EN_PYBIND11 ON)
+
+# Use this section if you want to configure other MatX options
+#set(MATX_EN_VISUALIZATION ON)  # Uncomment to enable visualizations
+#set(MATX_EN_FILEIO ON)         # Uncomment to file IO
+
+# Skip recursive MatX fetch
+if(MATX_BUILD_EXAMPLES)
+else()
+  if(MATX_FETCH_REMOTE)
+    include(FetchContent)
+    FetchContent_Declare(
+      MatX
+      GIT_REPOSITORY https://github.com/NVIDIA/MatX.git
+      GIT_TAG main
+    )
+  else()
+    include(FetchContent)
+    FetchContent_Declare(
+      MatX
+      SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../
+    )
+  endif()
+  FetchContent_MakeAvailable(MatX)
+endif()
+
+add_library(matxutil MODULE matxutil.cu)
+target_link_libraries(matxutil PRIVATE matx::matx CUDA::cudart)
+set_target_properties(matxutil PROPERTIES SUFFIX ".so" PREFIX "")
+
+configure_file(
+  ${CMAKE_CURRENT_SOURCE_DIR}/mypythonlib.py
+	${CMAKE_BINARY_DIR}
+  COPYONLY
+)
+
+configure_file(
+  ${CMAKE_CURRENT_SOURCE_DIR}/example_matxutil.py
+	${CMAKE_BINARY_DIR}
+  COPYONLY
+)
diff --git a/examples/python_integration_sample/example_matxutil.py b/examples/python_integration_sample/example_matxutil.py
new file mode 100644
index 00000000..259be1fb
--- /dev/null
+++ b/examples/python_integration_sample/example_matxutil.py
@@ -0,0 +1,77 @@
+import cupy as cp
+import sys
+
+# Add path . if we built as a stand-alone project
+sys.path.append('.')
+
+# Add path examples/python_integration_sample/ if we built as part of MatX examples
+sys.path.append('examples/python_integration_sample/')
+
+import matxutil
+
+# Demonstrate dlpack consumption invalidates it for future use
+def dlp_usage_error():
+  a = cp.empty((3,3), dtype=cp.float32)
+  dlp = a.toDlpack()
+  assert(matxutil.check_dlpack_status(dlp) == 0)
+  a2 = cp.from_dlpack(dlp) # causes dlp to become unused
+  assert(matxutil.check_dlpack_status(dlp) != 0)
+  return dlp
+
+# Demonstrate cupy array stays in scope when returning valid dlp
+def scope_okay():
+  a = cp.empty((3,3), dtype=cp.float32)
+  a[1,1] = 2
+  dlp = a.toDlpack()
+  assert(matxutil.check_dlpack_status(dlp) == 0)
+  return dlp
+
+#Do all cupy work using the "with stream" context manager
+stream = cp.cuda.stream.Stream(non_blocking=True)
+with stream:
+   print("Demonstrate dlpack consumption invalidates it for future use:")
+   dlp = dlp_usage_error()
+   assert(matxutil.check_dlpack_status(dlp) != 0)
+   print(f"  dlp capsule name is: {matxutil.get_capsule_name(dlp)}")
+   print()
+
+   print("Demonstrate cupy array stays in scope when returning valid dlpack:")
+   dlp = scope_okay()
+   assert(matxutil.check_dlpack_status(dlp) == 0)
+   print(f"  dlp capsule name is: {matxutil.get_capsule_name(dlp)}")
+   print()
+
+   print("Print info about the dlpack:")
+   matxutil.print_dlpack_info(dlp)
+   print()
+
+   print("Use MatX to print the tensor:")
+   matxutil.print_float_2D(dlp)
+   print()
+
+   print("Print current memory usage info:")
+   gpu_mempool = cp.get_default_memory_pool()
+   pinned_mempool = cp.get_default_pinned_memory_pool()
+   print(f"  GPU mempool used bytes {gpu_mempool.used_bytes()}")
+   print(f"  Pinned mempool n_free_blocks {pinned_mempool.n_free_blocks()}")
+   print()
+
+   print("Demonstrate python to C++ to python to C++ calling chain (uses mypythonlib.py):")
+   # This function calls back into python and executes a from_dlpack, consuming the dlp
+   matxutil.call_python_example(dlp)
+   assert(matxutil.check_dlpack_status(dlp) != 0)
+   del dlp
+
+   print("Demonstrate adding two tensors together using MatX:")
+   a = cp.array([[1,2,3],[4,5,6],[7,8,9]], dtype=cp.float32)
+   b = cp.array([[1,2,3],[4,5,6],[7,8,9]], dtype=cp.float32)
+   c = cp.empty(b.shape, dtype=b.dtype)
+
+   c_dlp = c.toDlpack()
+   a_dlp = a.toDlpack()
+   b_dlp = b.toDlpack()
+   matxutil.add_float_2D(c_dlp, a_dlp, b_dlp, stream.ptr)
+   stream.synchronize()
+   print(f"Tensor a {a}")
+   print(f"Tensor b {b}")
+   print(f"Tensor c=a+b {c}")
diff --git a/examples/python_integration_sample/matxutil.cu b/examples/python_integration_sample/matxutil.cu
new file mode 100644
index 00000000..1e2b800b
--- /dev/null
+++ b/examples/python_integration_sample/matxutil.cu
@@ -0,0 +1,231 @@
+////////////////////////////////////////////////////////////////////////////////
+// BSD 3-Clause License
+//
+// Copyright (c) 2024, NVIDIA Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+//    list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+//    contributors may be used to endorse or promote products derived from
+//    this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/////////////////////////////////////////////////////////////////////////////////
+
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+#include <stdio.h>
+#include <matx.h>
+#include <matx/core/dlpack.h>
+
+namespace py = pybind11;
+
+const char* get_capsule_name(py::capsule capsule)
+{
+  return capsule.name();
+}
+
+typedef DLManagedTensor* PTR_DLManagedTensor;
+int attempt_unpack_dlpack(py::capsule dlpack_capsule, PTR_DLManagedTensor& p_dlpack)
+{
+  const char* capsule_name = dlpack_capsule.name();
+
+  if (strncmp(capsule_name,"dltensor",8) != 0)
+  {
+    fprintf(stderr,"capsule_name %s\n",capsule_name);
+    return -1;
+  }
+
+  p_dlpack = static_cast<PTR_DLManagedTensor>(dlpack_capsule.get_pointer());
+
+  if (p_dlpack == nullptr) {
+    fprintf(stderr,"p_dlpack == nullptr\n");
+    return -2;
+  }
+
+  return 0;
+}
+
+int check_dlpack_status(py::capsule dlpack_capsule)
+{
+  PTR_DLManagedTensor unused;
+  return attempt_unpack_dlpack(dlpack_capsule, unused);
+}
+
+const char* dlpack_device_type_to_string(DLDeviceType device_type)
+{
+  switch(device_type)
+  {
+    case kDLCPU: return "kDLCPU";
+    case kDLCUDA: return "kDLCUDA";
+    case kDLCUDAHost: return "kDLCUDAHost";
+    case kDLOpenCL: return "kDLOpenCL";
+    case kDLVulkan: return "kDLVulkan";
+    case kDLMetal: return "kDLMetal";
+    case kDLVPI: return "kDLVPI";
+    case kDLROCM: return "kDLROCM";
+    case kDLROCMHost: return "kDLROCMHost";
+    case kDLExtDev: return "kDLExtDev";
+    case kDLCUDAManaged: return "kDLCUDAManaged";
+    case kDLOneAPI: return "kDLOneAPI";
+    case kDLWebGPU: return "kDLWebGPU";
+    case kDLHexagon: return "kDLHexagon";
+    default: return "Unknown DLDeviceType";
+  }
+}
+
+const char* dlpack_code_to_string(uint8_t code)
+{
+  switch(code)
+  {
+    case kDLInt: return "kDLInt";
+    case kDLUInt: return "kDLUInt";
+    case kDLFloat: return "kDLFloat";
+    case kDLOpaqueHandle: return "kDLOpaqueHandle";
+    case kDLBfloat: return "kDLBfloat";
+    case kDLComplex: return "kDLComplex";
+    case kDLBool: return "kDLBool";
+    default: return "Unknown DLDataTypeCode";
+  }
+}
+
+void print_dlpack_info(py::capsule dlpack_capsule) {
+  PTR_DLManagedTensor p_tensor;
+  if (attempt_unpack_dlpack(dlpack_capsule, p_tensor))
+  {
+    fprintf(stderr,"Error: capsule not valid dlpack");
+    return;
+  }
+
+  printf("  data: %p\n",p_tensor->dl_tensor.data);
+  printf("  device: device_type %s, device_id %d\n",
+    dlpack_device_type_to_string(p_tensor->dl_tensor.device.device_type),
+    p_tensor->dl_tensor.device.device_id
+  );
+  printf("  ndim: %d\n",p_tensor->dl_tensor.ndim);
+  printf("  dtype: code %s, bits %u, lanes %u\n",
+    dlpack_code_to_string(p_tensor->dl_tensor.dtype.code),
+    p_tensor->dl_tensor.dtype.bits,
+    p_tensor->dl_tensor.dtype.lanes
+  );
+  printf("  shape: ");
+  for (int k=0; k<p_tensor->dl_tensor.ndim; k++)
+  {
+    printf("%ld, ",p_tensor->dl_tensor.shape[k]);
+  }
+  printf("\n");
+  printf("  strides: ");
+  for (int k=0; k<p_tensor->dl_tensor.ndim; k++)
+  {
+    printf("%ld, ",p_tensor->dl_tensor.strides[k]);
+  }
+  printf("\n");
+  printf("  byte_offset: %lu\n",p_tensor->dl_tensor.byte_offset);
+}
+
+template<typename T, int RANK>
+void print(py::capsule dlpack_capsule)
+{
+  PTR_DLManagedTensor p_tensor;
+  if (attempt_unpack_dlpack(dlpack_capsule, p_tensor))
+  {
+    fprintf(stderr,"Error: capsule not valid dlpack");
+    return;
+  }
+
+  matx::tensor_t<T, RANK> a;
+  matx::make_tensor(a, *p_tensor);
+  matx::print(a);
+}
+
+void call_python_example(py::capsule dlpack_capsule)
+{
+  PTR_DLManagedTensor p_tensor;
+  if (attempt_unpack_dlpack(dlpack_capsule, p_tensor))
+  {
+    fprintf(stderr,"Error: capsule not valid dlpack");
+    return;
+  }
+
+  matx::tensor_t<float, 2> a;
+  matx::make_tensor(a, *p_tensor);
+
+  auto pb = matx::detail::MatXPybind{};
+
+  // Example use of python's print
+  pybind11::print("  Example use of python's print function from C++: ", 1, 2.0, "three");
+  pybind11::print("  The dlpack_capsule is a ", dlpack_capsule);
+
+  auto mypythonlib = pybind11::module_::import("mypythonlib");
+  mypythonlib.attr("my_func")(dlpack_capsule);
+}
+
+template<typename T, int RANK>
+void add(py::capsule capsule_c, py::capsule capsule_a, py::capsule capsule_b, int64_t stream = 0)
+{
+  PTR_DLManagedTensor p_tensor_c;
+  PTR_DLManagedTensor p_tensor_a;
+  PTR_DLManagedTensor p_tensor_b;
+
+  // TODO these should matx throw
+  if (attempt_unpack_dlpack(capsule_c, p_tensor_c))
+  {
+    fprintf(stderr,"Error: capsule c not valid dlpack\n");
+    return;
+  }
+
+  if (attempt_unpack_dlpack(capsule_a, p_tensor_a))
+  {
+    fprintf(stderr,"Error: capsule a not valid dlpack\n");
+    return;
+  }
+
+  if (attempt_unpack_dlpack(capsule_b, p_tensor_b))
+  {
+    fprintf(stderr,"Error: capsule b not valid dlpack\n");
+    return;
+  }
+
+  matx::tensor_t<T, RANK> c;
+  matx::tensor_t<T, RANK> a;
+  matx::tensor_t<T, RANK> b;
+  matx::make_tensor(c, *p_tensor_c);
+  matx::make_tensor(a, *p_tensor_a);
+  matx::make_tensor(b, *p_tensor_b);
+
+  matx::cudaExecutor exec{reinterpret_cast<cudaStream_t>(stream)};
+  (c = a + b).run(exec);
+}
+
+PYBIND11_MODULE(matxutil, m) {
+    m.def("get_capsule_name", &get_capsule_name, "Returns PyCapsule name");
+    m.def("print_dlpack_info", &print_dlpack_info, "Print the DLPack tensor metadata");
+    m.def("check_dlpack_status", &check_dlpack_status, "Returns 0 if DLPack is valid, negative error code otherwise");
+    m.def("print_float_2D", &print<float,2>, "Prints a float32 2D tensor");
+    m.def("call_python_example", &call_python_example, "Example C++ function that calls python code");
+    m.def("add_float_2D",
+          &add<float,2>,
+          "Add two float32 2D tensors together",
+          py::arg("c"),
+          py::arg("a"),
+          py::arg("b"),
+          py::arg("stream") = 0);
+}
\ No newline at end of file
diff --git a/examples/python_integration_sample/mypythonlib.py b/examples/python_integration_sample/mypythonlib.py
new file mode 100644
index 00000000..a419e8de
--- /dev/null
+++ b/examples/python_integration_sample/mypythonlib.py
@@ -0,0 +1,15 @@
+import cupy as cp
+import sys
+sys.path.append('.')
+import matxutil
+
+def my_func(dlp):
+  print(f"  type(dlp) before cp.from_dlpack(): {type(dlp)}")
+  print(f"  dlp capsule name is: {matxutil.get_capsule_name(dlp)}")
+  a = cp.from_dlpack(dlp)
+  print(f"  type(dlp) after cp.from_dlpack(): {type(dlp)}")
+  print(f"  dlp capsule name is: {matxutil.get_capsule_name(dlp)}")
+  print(f"  type(cp.from_dlPack(dlp)): {type(a)}")
+  print()
+  print("Finally, print the tensor we received from MatX using python:")
+  print(a)
diff --git a/include/matx/core/make_tensor.h b/include/matx/core/make_tensor.h
index cbd83482..eda351f7 100644
--- a/include/matx/core/make_tensor.h
+++ b/include/matx/core/make_tensor.h
@@ -36,6 +36,7 @@
 #include "matx/core/nvtx.h"
 #include "matx/core/storage.h"
 #include "matx/core/tensor_desc.h"
+#include "matx/core/dlpack.h"
 namespace matx {
 
 /**
@@ -619,4 +620,136 @@ auto make_static_tensor() {
   return tensor_t<T, desc.Rank(), decltype(s), decltype(desc)>{std::move(s), std::move(desc)};
 }
 
+template <typename TensorType,
+  std::enable_if_t<is_tensor_view_v<TensorType>, bool> = true>
+auto make_tensor( TensorType &tensor,
+                  const DLManagedTensor dlp_tensor) {
+  MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  using T = typename TensorType::value_type;
+  const DLTensor &dt = dlp_tensor.dl_tensor;
+
+  // MatX doesn't track the memory type or device ID, so we don't need to copy it
+  MATX_ASSERT_STR_EXP(dt.ndim, TensorType::Rank(), matxInvalidDim, "DLPack rank doesn't match MatX rank!");
+
+  switch (dt.dtype.code) {
+    case kDLComplex: {
+      switch (dt.dtype.bits) {
+        case 128: {
+          [[maybe_unused]] constexpr bool same = std::is_same_v<T, cuda::std::complex<double>>;
+          MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+          break;
+        }
+        case 64: {
+          [[maybe_unused]] constexpr bool same = std::is_same_v<T, cuda::std::complex<float>>;     
+          MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+          break;
+        }
+        case 32: {
+          [[maybe_unused]] constexpr bool same = std::is_same_v<T, matxFp16Complex> || std::is_same_v<T, matxBf16Complex>;
+          MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+          break;
+        }
+        default:
+          MATX_THROW(matxInvalidSize, "Invalid complex float size from DLPack");
+      }
+      break;
+    }
+    
+    case kDLFloat: {
+      switch (dt.dtype.bits) {
+        case 64: {
+          [[maybe_unused]] constexpr bool same = std::is_same_v<T, double>;
+          MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+          break;
+        }
+        case 32: {
+          [[maybe_unused]] constexpr bool same = std::is_same_v<T, float>;
+          MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+          break;
+        }           
+        case 16: {
+          [[maybe_unused]] constexpr bool same = std::is_same_v<T, matxFp16> || std::is_same_v<T, matxBf16>;
+          MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+          break;
+        }
+        default:
+          MATX_THROW(matxInvalidSize, "Invalid float size from DLPack");
+      }
+      break;      
+    }
+    case kDLInt: {
+      switch (dt.dtype.bits) {
+        case 64: {
+          [[maybe_unused]] constexpr bool same = std::is_same_v<T, int64_t>;
+          MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+          break;
+        }
+        case 32: {
+          [[maybe_unused]] constexpr bool same = std::is_same_v<T, int32_t>;
+          MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+          break;
+        }
+        case 16: {
+          [[maybe_unused]] constexpr bool same = std::is_same_v<T, int16_t>;
+          MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+          break;
+        }
+        case 8: {
+          [[maybe_unused]] constexpr bool same = std::is_same_v<T, int8_t>;
+          MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+          break;
+        }                          
+        default:
+          MATX_THROW(matxInvalidSize, "Invalid signed integer size from DLPack");
+      }
+      break;    
+    }
+    case kDLUInt: {
+      switch (dt.dtype.bits) {
+        case 64: {
+          [[maybe_unused]] constexpr bool same = std::is_same_v<T, uint64_t>;
+          MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+          break;
+        }
+        case 32: {
+          [[maybe_unused]] constexpr bool same = std::is_same_v<T, uint32_t>;
+          MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+          break;
+        }
+        case 16: {
+          [[maybe_unused]] constexpr bool same = std::is_same_v<T, uint16_t>;
+          MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+          break;
+        }
+        case 8: {
+          [[maybe_unused]] constexpr bool same = std::is_same_v<T, uint8_t>;
+          MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+          break;
+        }            
+        default:
+          MATX_THROW(matxInvalidSize, "Invalid unsigned integer size from DLPack");
+      }
+      break; 
+    }
+    case kDLBool: {
+      [[maybe_unused]] constexpr bool same = std::is_same_v<T, bool>;
+      MATX_ASSERT_STR(same, matxInvalidType, "DLPack/MatX type mismatch"); 
+      break;
+    }
+  }
+
+  index_t strides[TensorType::Rank()];
+  index_t shape[TensorType::Rank()];
+
+  for (int r = 0; r < TensorType::Rank(); r++) {
+    strides[r] = dt.strides[r];
+    shape[r]   = dt.shape[r];
+  }
+
+  auto tmp = make_tensor<typename TensorType::value_type, TensorType::Rank()>(
+          reinterpret_cast<typename TensorType::value_type*>(dt.data), shape, strides, false);
+  tensor.Shallow(tmp);  
+}
+
 } // namespace matx
diff --git a/include/matx/core/tensor.h b/include/matx/core/tensor.h
index 82b7b62b..cf4d5f22 100644
--- a/include/matx/core/tensor.h
+++ b/include/matx/core/tensor.h
@@ -1441,7 +1441,7 @@ class tensor_t : public detail::tensor_impl_t<T,RANK,Desc> {
    *
    * @returns Pointer to new DLManagedTensorVersioned pointer. The caller must call the deleter function when finished.
    */
-  DLManagedTensor *GetDLPackTensor() const {
+  DLManagedTensor *ToDlPack() const {
     auto mt = new DLManagedTensor;
     DLTensor *t = &mt->dl_tensor;
     CUpointer_attribute attr[] = {CU_POINTER_ATTRIBUTE_MEMORY_TYPE, CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL};
@@ -1509,7 +1509,6 @@ class tensor_t : public detail::tensor_impl_t<T,RANK,Desc> {
     return mt;
   }
 
-
 private:
   Storage storage_;
 };
diff --git a/test/00_tensor/BasicTensorTests.cu b/test/00_tensor/BasicTensorTests.cu
index 759b3adb..85a5225b 100644
--- a/test/00_tensor/BasicTensorTests.cu
+++ b/test/00_tensor/BasicTensorTests.cu
@@ -541,7 +541,7 @@ TYPED_TEST(BasicTensorTestsAll, DLPack)
   using TestType = cuda::std::tuple_element_t<0, TypeParam>;  
 
   auto t = make_tensor<TestType>({5,10,20});
-  auto dl = t.GetDLPackTensor();
+  auto dl = t.ToDlPack();
 
   ASSERT_EQ(dl->dl_tensor.ndim, 3);
   ASSERT_EQ(dl->dl_tensor.data, t.Data());