From f407c406b7904dad5ad1abfd39e00b44cf989e4b Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Fri, 3 Dec 2021 10:41:19 -0500 Subject: [PATCH 01/16] Replace use of custom CUDA bindings with CUDA-Python --- python/rmm/_cuda/10.1/gpu.pxi | 396 -------------------------- python/rmm/_cuda/10.2/gpu.pxi | 400 -------------------------- python/rmm/_cuda/11.x/gpu.pxi | 406 --------------------------- python/rmm/_cuda/{gpu.pyx => gpu.py} | 112 +++----- python/rmm/_cuda/stream.pxd | 2 +- python/rmm/_cuda/stream.pyx | 2 +- python/rmm/_lib/cuda_stream.pxd | 2 +- python/rmm/_lib/cuda_stream.pyx | 1 + python/rmm/_lib/cuda_stream_view.pxd | 3 +- python/rmm/_lib/device_buffer.pyx | 19 +- python/rmm/_lib/lib.pxd | 20 -- python/rmm/_lib/memory_resource.pxd | 3 - python/rmm/_lib/memory_resource.pyx | 6 +- python/setup.py | 38 --- 14 files changed, 54 insertions(+), 1356 deletions(-) delete mode 100644 python/rmm/_cuda/10.1/gpu.pxi delete mode 100644 python/rmm/_cuda/10.2/gpu.pxi delete mode 100644 python/rmm/_cuda/11.x/gpu.pxi rename python/rmm/_cuda/{gpu.pyx => gpu.py} (55%) diff --git a/python/rmm/_cuda/10.1/gpu.pxi b/python/rmm/_cuda/10.1/gpu.pxi deleted file mode 100644 index ab6569711..000000000 --- a/python/rmm/_cuda/10.1/gpu.pxi +++ /dev/null @@ -1,396 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. -# For CUDA 10.1 - -cdef extern from "cuda.h" nogil: - cpdef enum cudaDeviceAttr: - cudaDevAttrMaxThreadsPerBlock = 1 - cudaDevAttrMaxBlockDimX = 2 - cudaDevAttrMaxBlockDimY = 3 - cudaDevAttrMaxBlockDimZ = 4 - cudaDevAttrMaxGridDimX = 5 - cudaDevAttrMaxGridDimY = 6 - cudaDevAttrMaxGridDimZ = 7 - cudaDevAttrMaxSharedMemoryPerBlock = 8 - cudaDevAttrTotalConstantMemory = 9 - cudaDevAttrWarpSize = 10 - cudaDevAttrMaxPitch = 11 - cudaDevAttrMaxRegistersPerBlock = 12 - cudaDevAttrClockRate = 13 - cudaDevAttrTextureAlignment = 14 - cudaDevAttrGpuOverlap = 15 - cudaDevAttrMultiProcessorCount = 16 - cudaDevAttrKernelExecTimeout = 17 - cudaDevAttrIntegrated = 18 - cudaDevAttrCanMapHostMemory = 19 - cudaDevAttrComputeMode = 20 - cudaDevAttrMaxTexture1DWidth = 21 - cudaDevAttrMaxTexture2DWidth = 22 - cudaDevAttrMaxTexture2DHeight = 23 - cudaDevAttrMaxTexture3DWidth = 24 - cudaDevAttrMaxTexture3DHeight = 25 - cudaDevAttrMaxTexture3DDepth = 26 - cudaDevAttrMaxTexture2DLayeredWidth = 27 - cudaDevAttrMaxTexture2DLayeredHeight = 28 - cudaDevAttrMaxTexture2DLayeredLayers = 29 - cudaDevAttrSurfaceAlignment = 30 - cudaDevAttrConcurrentKernels = 31 - cudaDevAttrEccEnabled = 32 - cudaDevAttrPciBusId = 33 - cudaDevAttrPciDeviceId = 34 - cudaDevAttrTccDriver = 35 - cudaDevAttrMemoryClockRate = 36 - cudaDevAttrGlobalMemoryBusWidth = 37 - cudaDevAttrL2CacheSize = 38 - cudaDevAttrMaxThreadsPerMultiProcessor = 39 - cudaDevAttrAsyncEngineCount = 40 - cudaDevAttrUnifiedAddressing = 41 - cudaDevAttrMaxTexture1DLayeredWidth = 42 - cudaDevAttrMaxTexture1DLayeredLayers = 43 - cudaDevAttrMaxTexture2DGatherWidth = 45 - cudaDevAttrMaxTexture2DGatherHeight = 46 - cudaDevAttrMaxTexture3DWidthAlt = 47 - cudaDevAttrMaxTexture3DHeightAlt = 48 - cudaDevAttrMaxTexture3DDepthAlt = 49 - cudaDevAttrPciDomainId = 50 - cudaDevAttrTexturePitchAlignment = 51 - cudaDevAttrMaxTextureCubemapWidth = 52 - cudaDevAttrMaxTextureCubemapLayeredWidth = 53 - cudaDevAttrMaxTextureCubemapLayeredLayers = 54 - cudaDevAttrMaxSurface1DWidth = 55 - cudaDevAttrMaxSurface2DWidth = 56 - cudaDevAttrMaxSurface2DHeight = 57 - cudaDevAttrMaxSurface3DWidth = 58 - cudaDevAttrMaxSurface3DHeight = 59 - cudaDevAttrMaxSurface3DDepth = 60 - cudaDevAttrMaxSurface1DLayeredWidth = 61 - cudaDevAttrMaxSurface1DLayeredLayers = 62 - cudaDevAttrMaxSurface2DLayeredWidth = 63 - cudaDevAttrMaxSurface2DLayeredHeight = 64 - cudaDevAttrMaxSurface2DLayeredLayers = 65 - cudaDevAttrMaxSurfaceCubemapWidth = 66 - cudaDevAttrMaxSurfaceCubemapLayeredWidth = 67 - cudaDevAttrMaxSurfaceCubemapLayeredLayers = 68 - cudaDevAttrMaxTexture1DLinearWidth = 69 - cudaDevAttrMaxTexture2DLinearWidth = 70 - cudaDevAttrMaxTexture2DLinearHeight = 71 - cudaDevAttrMaxTexture2DLinearPitch = 72 - cudaDevAttrMaxTexture2DMipmappedWidth = 73 - cudaDevAttrMaxTexture2DMipmappedHeight = 74 - cudaDevAttrComputeCapabilityMajor = 75 - cudaDevAttrComputeCapabilityMinor = 76 - cudaDevAttrMaxTexture1DMipmappedWidth = 77 - cudaDevAttrStreamPrioritiesSupported = 78 - cudaDevAttrGlobalL1CacheSupported = 79 - cudaDevAttrLocalL1CacheSupported = 80 - cudaDevAttrMaxSharedMemoryPerMultiprocessor = 81 - cudaDevAttrMaxRegistersPerMultiprocessor = 82 - cudaDevAttrManagedMemory = 83 - cudaDevAttrIsMultiGpuBoard = 84 - cudaDevAttrMultiGpuBoardGroupID = 85 - cudaDevAttrHostNativeAtomicSupported = 86 - cudaDevAttrSingleToDoublePrecisionPerfRatio = 87 - cudaDevAttrPageableMemoryAccess = 88 - cudaDevAttrConcurrentManagedAccess = 89 - cudaDevAttrComputePreemptionSupported = 90 - cudaDevAttrCanUseHostPointerForRegisteredMem = 91 - cudaDevAttrReserved92 = 92 - cudaDevAttrReserved93 = 93 - cudaDevAttrReserved94 = 94 - cudaDevAttrCooperativeLaunch = 95 - cudaDevAttrCooperativeMultiDeviceLaunch = 96 - cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 - cudaDevAttrCanFlushRemoteWrites = 98 - cudaDevAttrHostRegisterSupported = 99 - cudaDevAttrPageableMemoryAccessUsesHostPageTables = 100 - cudaDevAttrDirectManagedMemAccessFromHost = 101 - - cpdef enum cudaError: - cudaSuccess = 0 - cudaErrorInvalidValue = 1 - cudaErrorMemoryAllocation = 2 - cudaErrorInitializationError = 3 - cudaErrorCudartUnloading = 4 - cudaErrorProfilerDisabled = 5 - cudaErrorProfilerNotInitialized = 6 - cudaErrorProfilerAlreadyStarted = 7 - cudaErrorProfilerAlreadyStopped = 8 - cudaErrorInvalidConfiguration = 9 - cudaErrorInvalidPitchValue = 12 - cudaErrorInvalidSymbol = 13 - cudaErrorInvalidHostPointer = 16 - cudaErrorInvalidDevicePointer = 17 - cudaErrorInvalidTexture = 18 - cudaErrorInvalidTextureBinding = 19 - cudaErrorInvalidChannelDescriptor = 20 - cudaErrorInvalidMemcpyDirection = 21 - cudaErrorAddressOfConstant = 22 - cudaErrorTextureFetchFailed = 23 - cudaErrorTextureNotBound = 24 - cudaErrorSynchronizationError = 25 - cudaErrorInvalidFilterSetting = 26 - cudaErrorInvalidNormSetting = 27 - cudaErrorMixedDeviceExecution = 28 - cudaErrorNotYetImplemented = 31 - cudaErrorMemoryValueTooLarge = 32 - cudaErrorInsufficientDriver = 35 - cudaErrorInvalidSurface = 37 - cudaErrorDuplicateVariableName = 43 - cudaErrorDuplicateTextureName = 44 - cudaErrorDuplicateSurfaceName = 45 - cudaErrorDevicesUnavailable = 46 - cudaErrorIncompatibleDriverContext = 49 - cudaErrorMissingConfiguration = 52 - cudaErrorPriorLaunchFailure = 53 - cudaErrorLaunchMaxDepthExceeded = 65 - cudaErrorLaunchFileScopedTex = 66 - cudaErrorLaunchFileScopedSurf = 67 - cudaErrorSyncDepthExceeded = 68 - cudaErrorLaunchPendingCountExceeded = 69 - cudaErrorInvalidDeviceFunction = 98 - cudaErrorNoDevice = 100 - cudaErrorInvalidDevice = 101 - cudaErrorStartupFailure = 127 - cudaErrorInvalidKernelImage = 200 - cudaErrorDeviceUninitilialized = 201 - cudaErrorMapBufferObjectFailed = 205 - cudaErrorUnmapBufferObjectFailed = 206 - cudaErrorArrayIsMapped = 207 - cudaErrorAlreadyMapped = 208 - cudaErrorNoKernelImageForDevice = 209 - cudaErrorAlreadyAcquired = 210 - cudaErrorNotMapped = 211 - cudaErrorNotMappedAsArray = 212 - cudaErrorNotMappedAsPointer = 213 - cudaErrorECCUncorrectable = 214 - cudaErrorUnsupportedLimit = 215 - cudaErrorDeviceAlreadyInUse = 216 - cudaErrorPeerAccessUnsupported = 217 - cudaErrorInvalidPtx = 218 - cudaErrorInvalidGraphicsContext = 219 - cudaErrorNvlinkUncorrectable = 220 - cudaErrorJitCompilerNotFound = 221 - cudaErrorInvalidSource = 300 - cudaErrorFileNotFound = 301 - cudaErrorSharedObjectSymbolNotFound = 302 - cudaErrorSharedObjectInitFailed = 303 - cudaErrorOperatingSystem = 304 - cudaErrorInvalidResourceHandle = 400 - cudaErrorIllegalState = 401 - cudaErrorSymbolNotFound = 500 - cudaErrorNotReady = 600 - cudaErrorIllegalAddress = 700 - cudaErrorLaunchOutOfResources = 701 - cudaErrorLaunchTimeout = 702 - cudaErrorLaunchIncompatibleTexturing = 703 - cudaErrorPeerAccessAlreadyEnabled = 704 - cudaErrorPeerAccessNotEnabled = 705 - cudaErrorSetOnActiveProcess = 708 - cudaErrorContextIsDestroyed = 709 - cudaErrorAssert = 710 - cudaErrorTooManyPeers = 711 - cudaErrorHostMemoryAlreadyRegistered = 712 - cudaErrorHostMemoryNotRegistered = 713 - cudaErrorHardwareStackError = 714 - cudaErrorIllegalInstruction = 715 - cudaErrorMisalignedAddress = 716 - cudaErrorInvalidAddressSpace = 717 - cudaErrorInvalidPc = 718 - cudaErrorLaunchFailure = 719 - cudaErrorCooperativeLaunchTooLarge = 720 - cudaErrorNotPermitted = 800 - cudaErrorNotSupported = 801 - cudaErrorSystemNotReady = 802 - cudaErrorSystemDriverMismatch = 803 - cudaErrorCompatNotSupportedOnDevice = 804 - cudaErrorStreamCaptureUnsupported = 900 - cudaErrorStreamCaptureInvalidated = 901 - cudaErrorStreamCaptureMerge = 902 - cudaErrorStreamCaptureUnmatched = 903 - cudaErrorStreamCaptureUnjoined = 904 - cudaErrorStreamCaptureIsolation = 905 - cudaErrorStreamCaptureImplicit = 906 - cudaErrorCapturedEvent = 907 - cudaErrorStreamCaptureWrongThread = 908 - cudaErrorUnknown = 999 - cudaErrorApiFailureBase = 10000 - - ctypedef cudaError cudaError_t - - ctypedef enum CUresult: - CUDA_SUCCESS = 0 - CUDA_ERROR_INVALID_VALUE = 1 - CUDA_ERROR_OUT_OF_MEMORY = 2 - CUDA_ERROR_NOT_INITIALIZED = 3 - CUDA_ERROR_DEINITIALIZED = 4 - CUDA_ERROR_PROFILER_DISABLED = 5 - CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6 - CUDA_ERROR_PROFILER_ALREADY_STARTED = 7 - CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8 - CUDA_ERROR_NO_DEVICE = 100 - CUDA_ERROR_INVALID_DEVICE = 101 - CUDA_ERROR_INVALID_IMAGE = 200 - CUDA_ERROR_INVALID_CONTEXT = 201 - CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202 - CUDA_ERROR_MAP_FAILED = 205 - CUDA_ERROR_UNMAP_FAILED = 206 - CUDA_ERROR_ARRAY_IS_MAPPED = 207 - CUDA_ERROR_ALREADY_MAPPED = 208 - CUDA_ERROR_NO_BINARY_FOR_GPU = 209 - CUDA_ERROR_ALREADY_ACQUIRED = 210 - CUDA_ERROR_NOT_MAPPED = 211 - CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212 - CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213 - CUDA_ERROR_ECC_UNCORRECTABLE = 214 - CUDA_ERROR_UNSUPPORTED_LIMIT = 215 - CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216 - CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217 - CUDA_ERROR_INVALID_PTX = 218 - CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219 - CUDA_ERROR_NVLINK_UNCORRECTABLE = 220 - CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221 - CUDA_ERROR_INVALID_SOURCE = 300 - CUDA_ERROR_FILE_NOT_FOUND = 301 - CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302 - CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303 - CUDA_ERROR_OPERATING_SYSTEM = 304 - CUDA_ERROR_INVALID_HANDLE = 400 - CUDA_ERROR_ILLEGAL_STATE = 401 - CUDA_ERROR_NOT_FOUND = 500 - CUDA_ERROR_NOT_READY = 600 - CUDA_ERROR_ILLEGAL_ADDRESS = 700 - CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701 - CUDA_ERROR_LAUNCH_TIMEOUT = 702 - CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703 - CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704 - CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705 - CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708 - CUDA_ERROR_CONTEXT_IS_DESTROYED = 709 - CUDA_ERROR_ASSERT = 710 - CUDA_ERROR_TOO_MANY_PEERS = 711 - CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712 - CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713 - CUDA_ERROR_HARDWARE_STACK_ERROR = 714 - CUDA_ERROR_ILLEGAL_INSTRUCTION = 715 - CUDA_ERROR_MISALIGNED_ADDRESS = 716 - CUDA_ERROR_INVALID_ADDRESS_SPACE = 717 - CUDA_ERROR_INVALID_PC = 718 - CUDA_ERROR_LAUNCH_FAILED = 719 - CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720 - CUDA_ERROR_NOT_PERMITTED = 800 - CUDA_ERROR_NOT_SUPPORTED = 801 - CUDA_ERROR_SYSTEM_NOT_READY = 802 - CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803 - CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804 - CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900 - CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901 - CUDA_ERROR_STREAM_CAPTURE_MERGE = 902 - CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903 - CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904 - CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905 - CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906 - CUDA_ERROR_CAPTURED_EVENT = 907 - CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908 - CUDA_ERROR_UNKNOWN = 999 - - ctypedef struct CUuuid_st: - char bytes[16] - - ctypedef CUuuid_st cudaUUID_t - - ctypedef struct cudaDeviceProp: - int ECCEnabled - int asyncEngineCount - int canMapHostMemory - int canUseHostPointerForRegisteredMem - int clockRate - int computeMode - int computePreemptionSupported - int concurrentKernels - int concurrentManagedAccess - int cooperativeLaunch - int cooperativeMultiDeviceLaunch - int deviceOverlap - int directManagedMemAccessFromHost - int globalL1CacheSupported - int hostNativeAtomicSupported - int integrated - int isMultiGpuBoard - int kernelExecTimeoutEnabled - int l2CacheSize - int localL1CacheSupported - char luid[8] - unsigned int luidDeviceNodeMask - int major - int managedMemory - int maxGridSize[3] - int maxSurface1D - int maxSurface1DLayered[2] - int maxSurface2D[2] - int maxSurface2DLayered[3] - int maxSurface3D[3] - int maxSurfaceCubemap - int maxSurfaceCubemapLayered[2] - int maxTexture1D - int maxTexture1DLayered[2] - int maxTexture1DLinear - int maxTexture1DMipmap - int maxTexture2D[2] - int maxTexture2DGather[2] - int maxTexture2DLayered[3] - int maxTexture2DLinear[3] - int maxTexture2DMipmap[2] - int maxTexture3D[3] - int maxTexture3DAlt[3] - int maxTextureCubemap - int maxTextureCubemapLayered[2] - int maxThreadsDim[3] - int maxThreadsPerBlock - int maxThreadsPerMultiProcessor - size_t memPitch - int memoryBusWidth - int memoryClockRate - int minor - int multiGpuBoardGroupID - int multiProcessorCount - char name[256] - int pageableMemoryAccess - int pageableMemoryAccessUsesHostPageTables - int pciBusID - int pciDeviceID - int pciDomainID - int regsPerBlock - int regsPerMultiprocessor - size_t sharedMemPerBlock - size_t sharedMemPerBlockOptin - size_t sharedMemPerMultiprocessor - int singleToDoublePrecisionPerfRatio - int streamPrioritiesSupported - size_t surfaceAlignment - int tccDriver - size_t textureAlignment - size_t texturePitchAlignment - size_t totalConstMem - size_t totalGlobalMem - int unifiedAddressing - cudaUUID_t uuid - int warpSize - - CUresult cuDeviceGetName(char* name, int length, int device) - - CUresult cuGetErrorName(CUresult error, const char** pStr) - CUresult cuGetErrorString(CUresult error, const char** pStr) - -cdef extern from "cuda_runtime_api.h" nogil: - - cudaError_t cudaDriverGetVersion(int* driverVersion) - cudaError_t cudaRuntimeGetVersion(int* runtimeVersion) - cudaError_t cudaGetDeviceCount(int* count) - cudaError_t cudaGetDevice(int* device) - cudaError_t cudaDeviceGetAttribute(int* value, - cudaDeviceAttr attr, - int device) - cudaError_t cudaGetDeviceProperties(cudaDeviceProp* prop, int device) - cudaError_t cudaSetDevice(int device) - - const char* cudaGetErrorString(cudaError_t error) - const char* cudaGetErrorName(cudaError_t error) diff --git a/python/rmm/_cuda/10.2/gpu.pxi b/python/rmm/_cuda/10.2/gpu.pxi deleted file mode 100644 index a3a19b642..000000000 --- a/python/rmm/_cuda/10.2/gpu.pxi +++ /dev/null @@ -1,400 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. -# For CUDA 10.2 - -cdef extern from "cuda.h" nogil: - cpdef enum cudaDeviceAttr: - cudaDevAttrMaxThreadsPerBlock = 1 - cudaDevAttrMaxBlockDimX = 2 - cudaDevAttrMaxBlockDimY = 3 - cudaDevAttrMaxBlockDimZ = 4 - cudaDevAttrMaxGridDimX = 5 - cudaDevAttrMaxGridDimY = 6 - cudaDevAttrMaxGridDimZ = 7 - cudaDevAttrMaxSharedMemoryPerBlock = 8 - cudaDevAttrTotalConstantMemory = 9 - cudaDevAttrWarpSize = 10 - cudaDevAttrMaxPitch = 11 - cudaDevAttrMaxRegistersPerBlock = 12 - cudaDevAttrClockRate = 13 - cudaDevAttrTextureAlignment = 14 - cudaDevAttrGpuOverlap = 15 - cudaDevAttrMultiProcessorCount = 16 - cudaDevAttrKernelExecTimeout = 17 - cudaDevAttrIntegrated = 18 - cudaDevAttrCanMapHostMemory = 19 - cudaDevAttrComputeMode = 20 - cudaDevAttrMaxTexture1DWidth = 21 - cudaDevAttrMaxTexture2DWidth = 22 - cudaDevAttrMaxTexture2DHeight = 23 - cudaDevAttrMaxTexture3DWidth = 24 - cudaDevAttrMaxTexture3DHeight = 25 - cudaDevAttrMaxTexture3DDepth = 26 - cudaDevAttrMaxTexture2DLayeredWidth = 27 - cudaDevAttrMaxTexture2DLayeredHeight = 28 - cudaDevAttrMaxTexture2DLayeredLayers = 29 - cudaDevAttrSurfaceAlignment = 30 - cudaDevAttrConcurrentKernels = 31 - cudaDevAttrEccEnabled = 32 - cudaDevAttrPciBusId = 33 - cudaDevAttrPciDeviceId = 34 - cudaDevAttrTccDriver = 35 - cudaDevAttrMemoryClockRate = 36 - cudaDevAttrGlobalMemoryBusWidth = 37 - cudaDevAttrL2CacheSize = 38 - cudaDevAttrMaxThreadsPerMultiProcessor = 39 - cudaDevAttrAsyncEngineCount = 40 - cudaDevAttrUnifiedAddressing = 41 - cudaDevAttrMaxTexture1DLayeredWidth = 42 - cudaDevAttrMaxTexture1DLayeredLayers = 43 - cudaDevAttrMaxTexture2DGatherWidth = 45 - cudaDevAttrMaxTexture2DGatherHeight = 46 - cudaDevAttrMaxTexture3DWidthAlt = 47 - cudaDevAttrMaxTexture3DHeightAlt = 48 - cudaDevAttrMaxTexture3DDepthAlt = 49 - cudaDevAttrPciDomainId = 50 - cudaDevAttrTexturePitchAlignment = 51 - cudaDevAttrMaxTextureCubemapWidth = 52 - cudaDevAttrMaxTextureCubemapLayeredWidth = 53 - cudaDevAttrMaxTextureCubemapLayeredLayers = 54 - cudaDevAttrMaxSurface1DWidth = 55 - cudaDevAttrMaxSurface2DWidth = 56 - cudaDevAttrMaxSurface2DHeight = 57 - cudaDevAttrMaxSurface3DWidth = 58 - cudaDevAttrMaxSurface3DHeight = 59 - cudaDevAttrMaxSurface3DDepth = 60 - cudaDevAttrMaxSurface1DLayeredWidth = 61 - cudaDevAttrMaxSurface1DLayeredLayers = 62 - cudaDevAttrMaxSurface2DLayeredWidth = 63 - cudaDevAttrMaxSurface2DLayeredHeight = 64 - cudaDevAttrMaxSurface2DLayeredLayers = 65 - cudaDevAttrMaxSurfaceCubemapWidth = 66 - cudaDevAttrMaxSurfaceCubemapLayeredWidth = 67 - cudaDevAttrMaxSurfaceCubemapLayeredLayers = 68 - cudaDevAttrMaxTexture1DLinearWidth = 69 - cudaDevAttrMaxTexture2DLinearWidth = 70 - cudaDevAttrMaxTexture2DLinearHeight = 71 - cudaDevAttrMaxTexture2DLinearPitch = 72 - cudaDevAttrMaxTexture2DMipmappedWidth = 73 - cudaDevAttrMaxTexture2DMipmappedHeight = 74 - cudaDevAttrComputeCapabilityMajor = 75 - cudaDevAttrComputeCapabilityMinor = 76 - cudaDevAttrMaxTexture1DMipmappedWidth = 77 - cudaDevAttrStreamPrioritiesSupported = 78 - cudaDevAttrGlobalL1CacheSupported = 79 - cudaDevAttrLocalL1CacheSupported = 80 - cudaDevAttrMaxSharedMemoryPerMultiprocessor = 81 - cudaDevAttrMaxRegistersPerMultiprocessor = 82 - cudaDevAttrManagedMemory = 83 - cudaDevAttrIsMultiGpuBoard = 84 - cudaDevAttrMultiGpuBoardGroupID = 85 - cudaDevAttrHostNativeAtomicSupported = 86 - cudaDevAttrSingleToDoublePrecisionPerfRatio = 87 - cudaDevAttrPageableMemoryAccess = 88 - cudaDevAttrConcurrentManagedAccess = 89 - cudaDevAttrComputePreemptionSupported = 90 - cudaDevAttrCanUseHostPointerForRegisteredMem = 91 - cudaDevAttrReserved92 = 92 - cudaDevAttrReserved93 = 93 - cudaDevAttrReserved94 = 94 - cudaDevAttrCooperativeLaunch = 95 - cudaDevAttrCooperativeMultiDeviceLaunch = 96 - cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 - cudaDevAttrCanFlushRemoteWrites = 98 - cudaDevAttrHostRegisterSupported = 99 - cudaDevAttrPageableMemoryAccessUsesHostPageTables = 100 - cudaDevAttrDirectManagedMemAccessFromHost = 101 - - cpdef enum cudaError: - cudaSuccess = 0 - cudaErrorInvalidValue = 1 - cudaErrorMemoryAllocation = 2 - cudaErrorInitializationError = 3 - cudaErrorCudartUnloading = 4 - cudaErrorProfilerDisabled = 5 - cudaErrorProfilerNotInitialized = 6 - cudaErrorProfilerAlreadyStarted = 7 - cudaErrorProfilerAlreadyStopped = 8 - cudaErrorInvalidConfiguration = 9 - cudaErrorInvalidPitchValue = 12 - cudaErrorInvalidSymbol = 13 - cudaErrorInvalidHostPointer = 16 - cudaErrorInvalidDevicePointer = 17 - cudaErrorInvalidTexture = 18 - cudaErrorInvalidTextureBinding = 19 - cudaErrorInvalidChannelDescriptor = 20 - cudaErrorInvalidMemcpyDirection = 21 - cudaErrorAddressOfConstant = 22 - cudaErrorTextureFetchFailed = 23 - cudaErrorTextureNotBound = 24 - cudaErrorSynchronizationError = 25 - cudaErrorInvalidFilterSetting = 26 - cudaErrorInvalidNormSetting = 27 - cudaErrorMixedDeviceExecution = 28 - cudaErrorNotYetImplemented = 31 - cudaErrorMemoryValueTooLarge = 32 - cudaErrorInsufficientDriver = 35 - cudaErrorInvalidSurface = 37 - cudaErrorDuplicateVariableName = 43 - cudaErrorDuplicateTextureName = 44 - cudaErrorDuplicateSurfaceName = 45 - cudaErrorDevicesUnavailable = 46 - cudaErrorIncompatibleDriverContext = 49 - cudaErrorMissingConfiguration = 52 - cudaErrorPriorLaunchFailure = 53 - cudaErrorLaunchMaxDepthExceeded = 65 - cudaErrorLaunchFileScopedTex = 66 - cudaErrorLaunchFileScopedSurf = 67 - cudaErrorSyncDepthExceeded = 68 - cudaErrorLaunchPendingCountExceeded = 69 - cudaErrorInvalidDeviceFunction = 98 - cudaErrorNoDevice = 100 - cudaErrorInvalidDevice = 101 - cudaErrorStartupFailure = 127 - cudaErrorInvalidKernelImage = 200 - cudaErrorDeviceUninitialized = 201 - cudaErrorMapBufferObjectFailed = 205 - cudaErrorUnmapBufferObjectFailed = 206 - cudaErrorArrayIsMapped = 207 - cudaErrorAlreadyMapped = 208 - cudaErrorNoKernelImageForDevice = 209 - cudaErrorAlreadyAcquired = 210 - cudaErrorNotMapped = 211 - cudaErrorNotMappedAsArray = 212 - cudaErrorNotMappedAsPointer = 213 - cudaErrorECCUncorrectable = 214 - cudaErrorUnsupportedLimit = 215 - cudaErrorDeviceAlreadyInUse = 216 - cudaErrorPeerAccessUnsupported = 217 - cudaErrorInvalidPtx = 218 - cudaErrorInvalidGraphicsContext = 219 - cudaErrorNvlinkUncorrectable = 220 - cudaErrorJitCompilerNotFound = 221 - cudaErrorInvalidSource = 300 - cudaErrorFileNotFound = 301 - cudaErrorSharedObjectSymbolNotFound = 302 - cudaErrorSharedObjectInitFailed = 303 - cudaErrorOperatingSystem = 304 - cudaErrorInvalidResourceHandle = 400 - cudaErrorIllegalState = 401 - cudaErrorSymbolNotFound = 500 - cudaErrorNotReady = 600 - cudaErrorIllegalAddress = 700 - cudaErrorLaunchOutOfResources = 701 - cudaErrorLaunchTimeout = 702 - cudaErrorLaunchIncompatibleTexturing = 703 - cudaErrorPeerAccessAlreadyEnabled = 704 - cudaErrorPeerAccessNotEnabled = 705 - cudaErrorSetOnActiveProcess = 708 - cudaErrorContextIsDestroyed = 709 - cudaErrorAssert = 710 - cudaErrorTooManyPeers = 711 - cudaErrorHostMemoryAlreadyRegistered = 712 - cudaErrorHostMemoryNotRegistered = 713 - cudaErrorHardwareStackError = 714 - cudaErrorIllegalInstruction = 715 - cudaErrorMisalignedAddress = 716 - cudaErrorInvalidAddressSpace = 717 - cudaErrorInvalidPc = 718 - cudaErrorLaunchFailure = 719 - cudaErrorCooperativeLaunchTooLarge = 720 - cudaErrorNotPermitted = 800 - cudaErrorNotSupported = 801 - cudaErrorSystemNotReady = 802 - cudaErrorSystemDriverMismatch = 803 - cudaErrorCompatNotSupportedOnDevice = 804 - cudaErrorStreamCaptureUnsupported = 900 - cudaErrorStreamCaptureInvalidated = 901 - cudaErrorStreamCaptureMerge = 902 - cudaErrorStreamCaptureUnmatched = 903 - cudaErrorStreamCaptureUnjoined = 904 - cudaErrorStreamCaptureIsolation = 905 - cudaErrorStreamCaptureImplicit = 906 - cudaErrorCapturedEvent = 907 - cudaErrorStreamCaptureWrongThread = 908 - cudaErrorTimeout = 909 - cudaErrorGraphExecUpdateFailure = 910 - cudaErrorUnknown = 999 - cudaErrorApiFailureBase = 10000 - - ctypedef cudaError cudaError_t - - ctypedef enum CUresult: - CUDA_SUCCESS = 0 - CUDA_ERROR_INVALID_VALUE = 1 - CUDA_ERROR_OUT_OF_MEMORY = 2 - CUDA_ERROR_NOT_INITIALIZED = 3 - CUDA_ERROR_DEINITIALIZED = 4 - CUDA_ERROR_PROFILER_DISABLED = 5 - CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6 - CUDA_ERROR_PROFILER_ALREADY_STARTED = 7 - CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8 - CUDA_ERROR_NO_DEVICE = 100 - CUDA_ERROR_INVALID_DEVICE = 101 - CUDA_ERROR_INVALID_IMAGE = 200 - CUDA_ERROR_INVALID_CONTEXT = 201 - CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202 - CUDA_ERROR_MAP_FAILED = 205 - CUDA_ERROR_UNMAP_FAILED = 206 - CUDA_ERROR_ARRAY_IS_MAPPED = 207 - CUDA_ERROR_ALREADY_MAPPED = 208 - CUDA_ERROR_NO_BINARY_FOR_GPU = 209 - CUDA_ERROR_ALREADY_ACQUIRED = 210 - CUDA_ERROR_NOT_MAPPED = 211 - CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212 - CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213 - CUDA_ERROR_ECC_UNCORRECTABLE = 214 - CUDA_ERROR_UNSUPPORTED_LIMIT = 215 - CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216 - CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217 - CUDA_ERROR_INVALID_PTX = 218 - CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219 - CUDA_ERROR_NVLINK_UNCORRECTABLE = 220 - CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221 - CUDA_ERROR_INVALID_SOURCE = 300 - CUDA_ERROR_FILE_NOT_FOUND = 301 - CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302 - CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303 - CUDA_ERROR_OPERATING_SYSTEM = 304 - CUDA_ERROR_INVALID_HANDLE = 400 - CUDA_ERROR_ILLEGAL_STATE = 401 - CUDA_ERROR_NOT_FOUND = 500 - CUDA_ERROR_NOT_READY = 600 - CUDA_ERROR_ILLEGAL_ADDRESS = 700 - CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701 - CUDA_ERROR_LAUNCH_TIMEOUT = 702 - CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703 - CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704 - CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705 - CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708 - CUDA_ERROR_CONTEXT_IS_DESTROYED = 709 - CUDA_ERROR_ASSERT = 710 - CUDA_ERROR_TOO_MANY_PEERS = 711 - CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712 - CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713 - CUDA_ERROR_HARDWARE_STACK_ERROR = 714 - CUDA_ERROR_ILLEGAL_INSTRUCTION = 715 - CUDA_ERROR_MISALIGNED_ADDRESS = 716 - CUDA_ERROR_INVALID_ADDRESS_SPACE = 717 - CUDA_ERROR_INVALID_PC = 718 - CUDA_ERROR_LAUNCH_FAILED = 719 - CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720 - CUDA_ERROR_NOT_PERMITTED = 800 - CUDA_ERROR_NOT_SUPPORTED = 801 - CUDA_ERROR_SYSTEM_NOT_READY = 802 - CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803 - CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804 - CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900 - CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901 - CUDA_ERROR_STREAM_CAPTURE_MERGE = 902 - CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903 - CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904 - CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905 - CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906 - CUDA_ERROR_CAPTURED_EVENT = 907 - CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908 - CUDA_ERROR_TIMEOUT = 909 - CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910 - CUDA_ERROR_UNKNOWN = 999 - - ctypedef struct CUuuid_st: - char bytes[16] - - ctypedef CUuuid_st cudaUUID_t - - ctypedef struct cudaDeviceProp: - int ECCEnabled - int asyncEngineCount - int canMapHostMemory - int canUseHostPointerForRegisteredMem - int clockRate - int computeMode - int computePreemptionSupported - int concurrentKernels - int concurrentManagedAccess - int cooperativeLaunch - int cooperativeMultiDeviceLaunch - int deviceOverlap - int directManagedMemAccessFromHost - int globalL1CacheSupported - int hostNativeAtomicSupported - int integrated - int isMultiGpuBoard - int kernelExecTimeoutEnabled - int l2CacheSize - int localL1CacheSupported - char luid[8] - unsigned int luidDeviceNodeMask - int major - int managedMemory - int maxGridSize[3] - int maxSurface1D - int maxSurface1DLayered[2] - int maxSurface2D[2] - int maxSurface2DLayered[3] - int maxSurface3D[3] - int maxSurfaceCubemap - int maxSurfaceCubemapLayered[2] - int maxTexture1D - int maxTexture1DLayered[2] - int maxTexture1DLinear - int maxTexture1DMipmap - int maxTexture2D[2] - int maxTexture2DGather[2] - int maxTexture2DLayered[3] - int maxTexture2DLinear[3] - int maxTexture2DMipmap[2] - int maxTexture3D[3] - int maxTexture3DAlt[3] - int maxTextureCubemap - int maxTextureCubemapLayered[2] - int maxThreadsDim[3] - int maxThreadsPerBlock - int maxThreadsPerMultiProcessor - size_t memPitch - int memoryBusWidth - int memoryClockRate - int minor - int multiGpuBoardGroupID - int multiProcessorCount - char name[256] - int pageableMemoryAccess - int pageableMemoryAccessUsesHostPageTables - int pciBusID - int pciDeviceID - int pciDomainID - int regsPerBlock - int regsPerMultiprocessor - size_t sharedMemPerBlock - size_t sharedMemPerBlockOptin - size_t sharedMemPerMultiprocessor - int singleToDoublePrecisionPerfRatio - int streamPrioritiesSupported - size_t surfaceAlignment - int tccDriver - size_t textureAlignment - size_t texturePitchAlignment - size_t totalConstMem - size_t totalGlobalMem - int unifiedAddressing - cudaUUID_t uuid - int warpSize - - CUresult cuDeviceGetName(char* name, int length, int device) - - CUresult cuGetErrorName(CUresult error, const char** pStr) - CUresult cuGetErrorString(CUresult error, const char** pStr) - -cdef extern from "cuda_runtime_api.h" nogil: - - cudaError_t cudaDriverGetVersion(int* driverVersion) - cudaError_t cudaRuntimeGetVersion(int* runtimeVersion) - cudaError_t cudaGetDeviceCount(int* count) - cudaError_t cudaGetDevice(int* device) - cudaError_t cudaDeviceGetAttribute(int* value, - cudaDeviceAttr attr, - int device) - cudaError_t cudaGetDeviceProperties(cudaDeviceProp* prop, int device) - cudaError_t cudaSetDevice(int device) - - const char* cudaGetErrorString(cudaError_t error) - const char* cudaGetErrorName(cudaError_t error) diff --git a/python/rmm/_cuda/11.x/gpu.pxi b/python/rmm/_cuda/11.x/gpu.pxi deleted file mode 100644 index 856ff04e9..000000000 --- a/python/rmm/_cuda/11.x/gpu.pxi +++ /dev/null @@ -1,406 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. -# For CUDA 11.0, 11.1 and 11.2 - -cdef extern from "cuda.h" nogil: - cpdef enum cudaDeviceAttr: - cudaDevAttrMaxThreadsPerBlock = 1 - cudaDevAttrMaxBlockDimX = 2 - cudaDevAttrMaxBlockDimY = 3 - cudaDevAttrMaxBlockDimZ = 4 - cudaDevAttrMaxGridDimX = 5 - cudaDevAttrMaxGridDimY = 6 - cudaDevAttrMaxGridDimZ = 7 - cudaDevAttrMaxSharedMemoryPerBlock = 8 - cudaDevAttrTotalConstantMemory = 9 - cudaDevAttrWarpSize = 10 - cudaDevAttrMaxPitch = 11 - cudaDevAttrMaxRegistersPerBlock = 12 - cudaDevAttrClockRate = 13 - cudaDevAttrTextureAlignment = 14 - cudaDevAttrGpuOverlap = 15 - cudaDevAttrMultiProcessorCount = 16 - cudaDevAttrKernelExecTimeout = 17 - cudaDevAttrIntegrated = 18 - cudaDevAttrCanMapHostMemory = 19 - cudaDevAttrComputeMode = 20 - cudaDevAttrMaxTexture1DWidth = 21 - cudaDevAttrMaxTexture2DWidth = 22 - cudaDevAttrMaxTexture2DHeight = 23 - cudaDevAttrMaxTexture3DWidth = 24 - cudaDevAttrMaxTexture3DHeight = 25 - cudaDevAttrMaxTexture3DDepth = 26 - cudaDevAttrMaxTexture2DLayeredWidth = 27 - cudaDevAttrMaxTexture2DLayeredHeight = 28 - cudaDevAttrMaxTexture2DLayeredLayers = 29 - cudaDevAttrSurfaceAlignment = 30 - cudaDevAttrConcurrentKernels = 31 - cudaDevAttrEccEnabled = 32 - cudaDevAttrPciBusId = 33 - cudaDevAttrPciDeviceId = 34 - cudaDevAttrTccDriver = 35 - cudaDevAttrMemoryClockRate = 36 - cudaDevAttrGlobalMemoryBusWidth = 37 - cudaDevAttrL2CacheSize = 38 - cudaDevAttrMaxThreadsPerMultiProcessor = 39 - cudaDevAttrAsyncEngineCount = 40 - cudaDevAttrUnifiedAddressing = 41 - cudaDevAttrMaxTexture1DLayeredWidth = 42 - cudaDevAttrMaxTexture1DLayeredLayers = 43 - cudaDevAttrMaxTexture2DGatherWidth = 45 - cudaDevAttrMaxTexture2DGatherHeight = 46 - cudaDevAttrMaxTexture3DWidthAlt = 47 - cudaDevAttrMaxTexture3DHeightAlt = 48 - cudaDevAttrMaxTexture3DDepthAlt = 49 - cudaDevAttrPciDomainId = 50 - cudaDevAttrTexturePitchAlignment = 51 - cudaDevAttrMaxTextureCubemapWidth = 52 - cudaDevAttrMaxTextureCubemapLayeredWidth = 53 - cudaDevAttrMaxTextureCubemapLayeredLayers = 54 - cudaDevAttrMaxSurface1DWidth = 55 - cudaDevAttrMaxSurface2DWidth = 56 - cudaDevAttrMaxSurface2DHeight = 57 - cudaDevAttrMaxSurface3DWidth = 58 - cudaDevAttrMaxSurface3DHeight = 59 - cudaDevAttrMaxSurface3DDepth = 60 - cudaDevAttrMaxSurface1DLayeredWidth = 61 - cudaDevAttrMaxSurface1DLayeredLayers = 62 - cudaDevAttrMaxSurface2DLayeredWidth = 63 - cudaDevAttrMaxSurface2DLayeredHeight = 64 - cudaDevAttrMaxSurface2DLayeredLayers = 65 - cudaDevAttrMaxSurfaceCubemapWidth = 66 - cudaDevAttrMaxSurfaceCubemapLayeredWidth = 67 - cudaDevAttrMaxSurfaceCubemapLayeredLayers = 68 - cudaDevAttrMaxTexture1DLinearWidth = 69 - cudaDevAttrMaxTexture2DLinearWidth = 70 - cudaDevAttrMaxTexture2DLinearHeight = 71 - cudaDevAttrMaxTexture2DLinearPitch = 72 - cudaDevAttrMaxTexture2DMipmappedWidth = 73 - cudaDevAttrMaxTexture2DMipmappedHeight = 74 - cudaDevAttrComputeCapabilityMajor = 75 - cudaDevAttrComputeCapabilityMinor = 76 - cudaDevAttrMaxTexture1DMipmappedWidth = 77 - cudaDevAttrStreamPrioritiesSupported = 78 - cudaDevAttrGlobalL1CacheSupported = 79 - cudaDevAttrLocalL1CacheSupported = 80 - cudaDevAttrMaxSharedMemoryPerMultiprocessor = 81 - cudaDevAttrMaxRegistersPerMultiprocessor = 82 - cudaDevAttrManagedMemory = 83 - cudaDevAttrIsMultiGpuBoard = 84 - cudaDevAttrMultiGpuBoardGroupID = 85 - cudaDevAttrHostNativeAtomicSupported = 86 - cudaDevAttrSingleToDoublePrecisionPerfRatio = 87 - cudaDevAttrPageableMemoryAccess = 88 - cudaDevAttrConcurrentManagedAccess = 89 - cudaDevAttrComputePreemptionSupported = 90 - cudaDevAttrCanUseHostPointerForRegisteredMem = 91 - cudaDevAttrReserved92 = 92 - cudaDevAttrReserved93 = 93 - cudaDevAttrReserved94 = 94 - cudaDevAttrCooperativeLaunch = 95 - cudaDevAttrCooperativeMultiDeviceLaunch = 96 - cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 - cudaDevAttrCanFlushRemoteWrites = 98 - cudaDevAttrHostRegisterSupported = 99 - cudaDevAttrPageableMemoryAccessUsesHostPageTables = 100 - cudaDevAttrDirectManagedMemAccessFromHost = 101 - cudaDevAttrMaxBlocksPerMultiprocessor = 106 - cudaDevAttrReservedSharedMemoryPerBlock = 111 - - cpdef enum cudaError: - cudaSuccess = 0 - cudaErrorInvalidValue = 1 - cudaErrorMemoryAllocation = 2 - cudaErrorInitializationError = 3 - cudaErrorCudartUnloading = 4 - cudaErrorProfilerDisabled = 5 - cudaErrorProfilerNotInitialized = 6 - cudaErrorProfilerAlreadyStarted = 7 - cudaErrorProfilerAlreadyStopped = 8 - cudaErrorInvalidConfiguration = 9 - cudaErrorInvalidPitchValue = 12 - cudaErrorInvalidSymbol = 13 - cudaErrorInvalidHostPointer = 16 - cudaErrorInvalidDevicePointer = 17 - cudaErrorInvalidTexture = 18 - cudaErrorInvalidTextureBinding = 19 - cudaErrorInvalidChannelDescriptor = 20 - cudaErrorInvalidMemcpyDirection = 21 - cudaErrorAddressOfConstant = 22 - cudaErrorTextureFetchFailed = 23 - cudaErrorTextureNotBound = 24 - cudaErrorSynchronizationError = 25 - cudaErrorInvalidFilterSetting = 26 - cudaErrorInvalidNormSetting = 27 - cudaErrorMixedDeviceExecution = 28 - cudaErrorNotYetImplemented = 31 - cudaErrorMemoryValueTooLarge = 32 - cudaErrorInsufficientDriver = 35 - cudaErrorInvalidSurface = 37 - cudaErrorDuplicateVariableName = 43 - cudaErrorDuplicateTextureName = 44 - cudaErrorDuplicateSurfaceName = 45 - cudaErrorDevicesUnavailable = 46 - cudaErrorIncompatibleDriverContext = 49 - cudaErrorMissingConfiguration = 52 - cudaErrorPriorLaunchFailure = 53 - cudaErrorLaunchMaxDepthExceeded = 65 - cudaErrorLaunchFileScopedTex = 66 - cudaErrorLaunchFileScopedSurf = 67 - cudaErrorSyncDepthExceeded = 68 - cudaErrorLaunchPendingCountExceeded = 69 - cudaErrorInvalidDeviceFunction = 98 - cudaErrorNoDevice = 100 - cudaErrorInvalidDevice = 101 - cudaErrorStartupFailure = 127 - cudaErrorInvalidKernelImage = 200 - cudaErrorDeviceUninitialized = 201 - cudaErrorMapBufferObjectFailed = 205 - cudaErrorUnmapBufferObjectFailed = 206 - cudaErrorArrayIsMapped = 207 - cudaErrorAlreadyMapped = 208 - cudaErrorNoKernelImageForDevice = 209 - cudaErrorAlreadyAcquired = 210 - cudaErrorNotMapped = 211 - cudaErrorNotMappedAsArray = 212 - cudaErrorNotMappedAsPointer = 213 - cudaErrorECCUncorrectable = 214 - cudaErrorUnsupportedLimit = 215 - cudaErrorDeviceAlreadyInUse = 216 - cudaErrorPeerAccessUnsupported = 217 - cudaErrorInvalidPtx = 218 - cudaErrorInvalidGraphicsContext = 219 - cudaErrorNvlinkUncorrectable = 220 - cudaErrorJitCompilerNotFound = 221 - cudaErrorInvalidSource = 300 - cudaErrorFileNotFound = 301 - cudaErrorSharedObjectSymbolNotFound = 302 - cudaErrorSharedObjectInitFailed = 303 - cudaErrorOperatingSystem = 304 - cudaErrorInvalidResourceHandle = 400 - cudaErrorIllegalState = 401 - cudaErrorSymbolNotFound = 500 - cudaErrorNotReady = 600 - cudaErrorIllegalAddress = 700 - cudaErrorLaunchOutOfResources = 701 - cudaErrorLaunchTimeout = 702 - cudaErrorLaunchIncompatibleTexturing = 703 - cudaErrorPeerAccessAlreadyEnabled = 704 - cudaErrorPeerAccessNotEnabled = 705 - cudaErrorSetOnActiveProcess = 708 - cudaErrorContextIsDestroyed = 709 - cudaErrorAssert = 710 - cudaErrorTooManyPeers = 711 - cudaErrorHostMemoryAlreadyRegistered = 712 - cudaErrorHostMemoryNotRegistered = 713 - cudaErrorHardwareStackError = 714 - cudaErrorIllegalInstruction = 715 - cudaErrorMisalignedAddress = 716 - cudaErrorInvalidAddressSpace = 717 - cudaErrorInvalidPc = 718 - cudaErrorLaunchFailure = 719 - cudaErrorCooperativeLaunchTooLarge = 720 - cudaErrorNotPermitted = 800 - cudaErrorNotSupported = 801 - cudaErrorSystemNotReady = 802 - cudaErrorSystemDriverMismatch = 803 - cudaErrorCompatNotSupportedOnDevice = 804 - cudaErrorStreamCaptureUnsupported = 900 - cudaErrorStreamCaptureInvalidated = 901 - cudaErrorStreamCaptureMerge = 902 - cudaErrorStreamCaptureUnmatched = 903 - cudaErrorStreamCaptureUnjoined = 904 - cudaErrorStreamCaptureIsolation = 905 - cudaErrorStreamCaptureImplicit = 906 - cudaErrorCapturedEvent = 907 - cudaErrorStreamCaptureWrongThread = 908 - cudaErrorTimeout = 909 - cudaErrorGraphExecUpdateFailure = 910 - cudaErrorUnknown = 999 - cudaErrorApiFailureBase = 10000 - - ctypedef cudaError cudaError_t - - ctypedef enum CUresult: - CUDA_SUCCESS = 0 - CUDA_ERROR_INVALID_VALUE = 1 - CUDA_ERROR_OUT_OF_MEMORY = 2 - CUDA_ERROR_NOT_INITIALIZED = 3 - CUDA_ERROR_DEINITIALIZED = 4 - CUDA_ERROR_PROFILER_DISABLED = 5 - CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6 - CUDA_ERROR_PROFILER_ALREADY_STARTED = 7 - CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8 - CUDA_ERROR_NO_DEVICE = 100 - CUDA_ERROR_INVALID_DEVICE = 101 - CUDA_ERROR_INVALID_IMAGE = 200 - CUDA_ERROR_INVALID_CONTEXT = 201 - CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202 - CUDA_ERROR_MAP_FAILED = 205 - CUDA_ERROR_UNMAP_FAILED = 206 - CUDA_ERROR_ARRAY_IS_MAPPED = 207 - CUDA_ERROR_ALREADY_MAPPED = 208 - CUDA_ERROR_NO_BINARY_FOR_GPU = 209 - CUDA_ERROR_ALREADY_ACQUIRED = 210 - CUDA_ERROR_NOT_MAPPED = 211 - CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212 - CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213 - CUDA_ERROR_ECC_UNCORRECTABLE = 214 - CUDA_ERROR_UNSUPPORTED_LIMIT = 215 - CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216 - CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217 - CUDA_ERROR_INVALID_PTX = 218 - CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219 - CUDA_ERROR_NVLINK_UNCORRECTABLE = 220 - CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221 - CUDA_ERROR_INVALID_SOURCE = 300 - CUDA_ERROR_FILE_NOT_FOUND = 301 - CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302 - CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303 - CUDA_ERROR_OPERATING_SYSTEM = 304 - CUDA_ERROR_INVALID_HANDLE = 400 - CUDA_ERROR_ILLEGAL_STATE = 401 - CUDA_ERROR_NOT_FOUND = 500 - CUDA_ERROR_NOT_READY = 600 - CUDA_ERROR_ILLEGAL_ADDRESS = 700 - CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701 - CUDA_ERROR_LAUNCH_TIMEOUT = 702 - CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703 - CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704 - CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705 - CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708 - CUDA_ERROR_CONTEXT_IS_DESTROYED = 709 - CUDA_ERROR_ASSERT = 710 - CUDA_ERROR_TOO_MANY_PEERS = 711 - CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712 - CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713 - CUDA_ERROR_HARDWARE_STACK_ERROR = 714 - CUDA_ERROR_ILLEGAL_INSTRUCTION = 715 - CUDA_ERROR_MISALIGNED_ADDRESS = 716 - CUDA_ERROR_INVALID_ADDRESS_SPACE = 717 - CUDA_ERROR_INVALID_PC = 718 - CUDA_ERROR_LAUNCH_FAILED = 719 - CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720 - CUDA_ERROR_NOT_PERMITTED = 800 - CUDA_ERROR_NOT_SUPPORTED = 801 - CUDA_ERROR_SYSTEM_NOT_READY = 802 - CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803 - CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804 - CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900 - CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901 - CUDA_ERROR_STREAM_CAPTURE_MERGE = 902 - CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903 - CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904 - CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905 - CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906 - CUDA_ERROR_CAPTURED_EVENT = 907 - CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908 - CUDA_ERROR_TIMEOUT = 909 - CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910 - CUDA_ERROR_UNKNOWN = 999 - - ctypedef struct CUuuid_st: - char bytes[16] - - ctypedef CUuuid_st cudaUUID_t - - ctypedef struct cudaDeviceProp: - int ECCEnabled - int asyncEngineCount - int canMapHostMemory - int canUseHostPointerForRegisteredMem - int clockRate - int computeMode - int computePreemptionSupported - int concurrentKernels - int concurrentManagedAccess - int cooperativeLaunch - int cooperativeMultiDeviceLaunch - int deviceOverlap - int directManagedMemAccessFromHost - int globalL1CacheSupported - int hostNativeAtomicSupported - int integrated - int isMultiGpuBoard - int kernelExecTimeoutEnabled - int l2CacheSize - int localL1CacheSupported - char luid[8] - unsigned int luidDeviceNodeMask - int major - int managedMemory - int maxGridSize[3] - int maxSurface1D - int maxSurface1DLayered[2] - int maxSurface2D[2] - int maxSurface2DLayered[3] - int maxSurface3D[3] - int maxSurfaceCubemap - int maxSurfaceCubemapLayered[2] - int maxTexture1D - int maxTexture1DLayered[2] - int maxTexture1DLinear - int maxTexture1DMipmap - int maxTexture2D[2] - int maxTexture2DGather[2] - int maxTexture2DLayered[3] - int maxTexture2DLinear[3] - int maxTexture2DMipmap[2] - int maxTexture3D[3] - int maxTexture3DAlt[3] - int maxTextureCubemap - int maxTextureCubemapLayered[2] - int maxThreadsDim[3] - int maxThreadsPerBlock - int maxThreadsPerMultiProcessor - size_t memPitch - int memoryBusWidth - int memoryClockRate - int minor - int multiGpuBoardGroupID - int multiProcessorCount - char name[256] - int pageableMemoryAccess - int pageableMemoryAccessUsesHostPageTables - int pciBusID - int pciDeviceID - int pciDomainID - int regsPerBlock - int regsPerMultiprocessor - size_t sharedMemPerBlock - size_t sharedMemPerBlockOptin - size_t sharedMemPerMultiprocessor - int singleToDoublePrecisionPerfRatio - int streamPrioritiesSupported - size_t surfaceAlignment - int tccDriver - size_t textureAlignment - size_t texturePitchAlignment - size_t totalConstMem - size_t totalGlobalMem - int unifiedAddressing - cudaUUID_t uuid - int warpSize - int accessPolicyMaxWindowSize - int maxBlocksPerMultiProcessor - int persistingL2CacheMaxSize - size_t reservedSharedMemPerBlock - - CUresult cuDeviceGetName(char* name, int length, int device) - - CUresult cuGetErrorName(CUresult error, const char** pStr) - CUresult cuGetErrorString(CUresult error, const char** pStr) - -cdef extern from "cuda_runtime_api.h" nogil: - - cudaError_t cudaDriverGetVersion(int* driverVersion) - cudaError_t cudaRuntimeGetVersion(int* runtimeVersion) - cudaError_t cudaGetDeviceCount(int* count) - cudaError_t cudaGetDevice(int* device) - cudaError_t cudaDeviceGetAttribute(int* value, - cudaDeviceAttr attr, - int device) - cudaError_t cudaGetDeviceProperties(cudaDeviceProp* prop, int device) - cudaError_t cudaSetDevice(int device) - - const char* cudaGetErrorString(cudaError_t error) - const char* cudaGetErrorName(cudaError_t error) diff --git a/python/rmm/_cuda/gpu.pyx b/python/rmm/_cuda/gpu.py similarity index 55% rename from python/rmm/_cuda/gpu.pyx rename to python/rmm/_cuda/gpu.py index f570df811..c0a0e96c5 100644 --- a/python/rmm/_cuda/gpu.pyx +++ b/python/rmm/_cuda/gpu.py @@ -1,59 +1,36 @@ # Copyright (c) 2020, NVIDIA CORPORATION. -from rmm._cuda.gpu cimport ( - CUresult, - cudaDeviceAttr, - cudaDeviceGetAttribute, - cudaDeviceProp, - cudaDriverGetVersion, - cudaError, - cudaError_t, - cudaGetDeviceCount, - cudaGetDeviceProperties, - cudaGetErrorName, - cudaGetErrorString, - cudaRuntimeGetVersion, - cuDeviceGetName, - cuGetErrorName, - cuGetErrorString, -) - -from enum import IntEnum +from cuda import cuda, cudart class CUDARuntimeError(RuntimeError): - - def __init__(self, cudaError_t status): + def __init__(self, status: cuda.CUresult.CUDA_SUCCESS): self.status = status - cdef str name = cudaGetErrorName(status).decode() - cdef str msg = cudaGetErrorString(status).decode() + _, name = cudart.cudaGetErrorName(status) + _, msg = cudart.cudaGetErrorString(status) super(CUDARuntimeError, self).__init__( - '%s: %s' % (name, msg)) + "%s: %s" % (name.decode(), msg.decode()) + ) def __reduce__(self): return (type(self), (self.status,)) class CUDADriverError(RuntimeError): - - def __init__(self, CUresult status): + def __init__(self, status: cuda.CUresult): self.status = status - cdef const char* name_cstr - cdef CUresult name_status = cuGetErrorName(status, &name_cstr) - if name_status != 0: - raise CUDADriverError(name_status) - - cdef const char* msg_cstr - cdef CUresult msg_status = cuGetErrorString(status, &msg_cstr) - if msg_status != 0: - raise CUDADriverError(msg_status) + err, name = cuda.cuGetErrorName(status) + if err != cuda.CUresult.CUDA_SUCCESS: + raise CUDADriverError(err.value) - cdef str name = name_cstr.decode() - cdef str msg = msg_cstr.decode() + err, msg = cuda.cuGetErrorString(status) + if err != cuda.CUresult.CUDA_SUCCESS: + raise CUDADriverError(err.value) super(CUDADriverError, self).__init__( - '%s: %s' % (name, msg)) + "%s: %s" % (name.decode(), msg.decode()) + ) def __reduce__(self): return (type(self), (self.status,)) @@ -69,9 +46,8 @@ def driverGetVersion(): This function automatically raises CUDARuntimeError with error message and status code. """ - cdef int version - cdef cudaError_t status = cudaDriverGetVersion(&version) - if status != cudaError.cudaSuccess: + status, version = cudart.cudaDriverGetVersion() + if status != cudart.cudaError_t.cudaSuccess: raise CUDARuntimeError(status) return version @@ -80,14 +56,13 @@ def getDevice(): """ Get the current CUDA device """ - cdef int current_device - cdef cudaError_t status = cudaGetDevice(¤t_device) - if status != cudaError.cudaSuccess: + status, device = cudart.cudaGetDevice() + if status != cudart.cudaError_t.cudaSuccess: raise CUDARuntimeError(status) - return current_device + return device -def setDevice(int device): +def setDevice(device: int): """ Set the current CUDA device Parameters @@ -95,9 +70,8 @@ def setDevice(int device): device : int The ID of the device to set as current """ - cdef cudaError_t status = cudaSetDevice(device) - - if status != cudaError.cudaSuccess: + (status,) = cudart.cudaSetDevice(device) + if status != cudart.cudaError_t.cudaSuccess: raise CUDARuntimeError(status) @@ -110,10 +84,8 @@ def runtimeGetVersion(): This function automatically raises CUDARuntimeError with error message and status code. """ - - cdef int version - cdef cudaError_t status = cudaRuntimeGetVersion(&version) - if status != cudaError.cudaSuccess: + status, version = cudart.cudaRuntimeGetVersion() + if status != cudart.cudaError_t.cudaSuccess: raise CUDARuntimeError(status) return version @@ -126,16 +98,13 @@ def getDeviceCount(): This function automatically raises CUDARuntimeError with error message and status code. """ - - cdef int count - cdef cudaError_t status = cudaGetDeviceCount(&count) - - if status != cudaError.cudaSuccess: + status, count = cudart.cudaGetDeviceCount() + if status != cudart.cudaError_t.cudaSuccess: raise CUDARuntimeError(status) return count -def getDeviceAttribute(cudaDeviceAttr attr, int device): +def getDeviceAttribute(attr: cudart.cudaDeviceAttr, device: int): """ Returns information about the device. @@ -149,15 +118,13 @@ def getDeviceAttribute(cudaDeviceAttr attr, int device): This function automatically raises CUDARuntimeError with error message and status code. """ - - cdef int value - cdef cudaError_t status = cudaDeviceGetAttribute(&value, attr, device) - if status != cudaError.cudaSuccess: + status, value = cudart.cudaDeviceGetAttribute(attr, device) + if status != cudart.cudaError_t.cudaSuccess: raise CUDARuntimeError(status) return value -def getDeviceProperties(int device): +def getDeviceProperties(device: int): """ Returns information about the compute-device. @@ -169,15 +136,13 @@ def getDeviceProperties(int device): This function automatically raises CUDARuntimeError with error message and status code. """ - - cdef cudaDeviceProp prop - cdef cudaError_t status = cudaGetDeviceProperties(&prop, device) - if status != cudaError.cudaSuccess: + status, prop = cudart.cudaGetDeviceProperties(device) + if status != cudart.cudaError_t.cudaSuccess: raise CUDARuntimeError(status) return prop -def deviceGetName(int device): +def deviceGetName(device: int): """ Returns an identifer string for the device. @@ -190,12 +155,7 @@ def deviceGetName(int device): and status code. """ - cdef char[256] device_name - cdef CUresult status = cuDeviceGetName( - device_name, - sizeof(device_name), - device - ) - if status != 0: + status, device_name = cuda.cuDeviceGetName(256, cuda.CUdevice(device)) + if status.value != 0: raise CUDADriverError(status) return device_name.decode() diff --git a/python/rmm/_cuda/stream.pxd b/python/rmm/_cuda/stream.pxd index 0806a7310..6aa4e0b24 100644 --- a/python/rmm/_cuda/stream.pxd +++ b/python/rmm/_cuda/stream.pxd @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +from cuda.ccudart cimport cudaStream_t from libc.stdint cimport uintptr_t from libcpp cimport bool from rmm._lib.cuda_stream_view cimport cuda_stream_view -from rmm._lib.lib cimport cudaStream_t cdef class Stream: diff --git a/python/rmm/_cuda/stream.pyx b/python/rmm/_cuda/stream.pyx index 4c9890d51..4f2ce26d0 100644 --- a/python/rmm/_cuda/stream.pyx +++ b/python/rmm/_cuda/stream.pyx @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from cuda.ccudart cimport cudaStream_t from libc.stdint cimport uintptr_t from libcpp cimport bool @@ -21,7 +22,6 @@ from rmm._lib.cuda_stream_view cimport ( cuda_stream_per_thread, cuda_stream_view, ) -from rmm._lib.lib cimport cudaStream_t from numba import cuda diff --git a/python/rmm/_lib/cuda_stream.pxd b/python/rmm/_lib/cuda_stream.pxd index 4eae4bc76..1eed1cefb 100644 --- a/python/rmm/_lib/cuda_stream.pxd +++ b/python/rmm/_lib/cuda_stream.pxd @@ -13,11 +13,11 @@ # limitations under the License. cimport cython +from cuda.ccudart cimport cudaStream_t from libcpp cimport bool from libcpp.memory cimport unique_ptr from rmm._lib.cuda_stream_view cimport cuda_stream_view -from rmm._lib.lib cimport cudaStream_t cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil: diff --git a/python/rmm/_lib/cuda_stream.pyx b/python/rmm/_lib/cuda_stream.pyx index 44ca6d75e..d93af2509 100644 --- a/python/rmm/_lib/cuda_stream.pyx +++ b/python/rmm/_lib/cuda_stream.pyx @@ -13,6 +13,7 @@ # limitations under the License. cimport cython +from cuda.ccudart cimport cudaStream_t from libc.stdint cimport uintptr_t from libcpp cimport bool diff --git a/python/rmm/_lib/cuda_stream_view.pxd b/python/rmm/_lib/cuda_stream_view.pxd index 7031a03ea..bf0d33c24 100644 --- a/python/rmm/_lib/cuda_stream_view.pxd +++ b/python/rmm/_lib/cuda_stream_view.pxd @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from cuda.ccudart cimport cudaStream_t from libcpp cimport bool -from rmm._lib.lib cimport cudaStream_t - cdef extern from "rmm/cuda_stream_view.hpp" namespace "rmm" nogil: cdef cppclass cuda_stream_view: diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index cbe0bdb33..b6a656992 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -20,20 +20,20 @@ from libc.stdint cimport uintptr_t from libcpp.memory cimport unique_ptr from libcpp.utility cimport move -from rmm._cuda.gpu cimport cudaError, cudaError_t from rmm._cuda.stream cimport Stream from rmm._cuda.stream import DEFAULT_STREAM -from rmm._lib.lib cimport ( +cimport cuda.ccudart as ccudart +from cuda.ccudart cimport ( + cudaError, + cudaError_t, cudaMemcpyAsync, - cudaMemcpyDeviceToDevice, - cudaMemcpyDeviceToHost, - cudaMemcpyHostToDevice, cudaMemcpyKind, cudaStream_t, cudaStreamSynchronize, ) + from rmm._lib.memory_resource cimport get_current_device_resource @@ -73,7 +73,6 @@ cdef class DeviceBuffer: >>> db = rmm.DeviceBuffer(size=5) """ cdef const void* c_ptr - cdef cudaError_t err with nogil: c_ptr = ptr @@ -339,7 +338,7 @@ cpdef DeviceBuffer to_device(const unsigned char[::1] b, cdef void _copy_async(const void* src, void* dst, size_t count, - cudaMemcpyKind kind, + ccudart.cudaMemcpyKind kind, cuda_stream_view stream) nogil: """ Asynchronously copy data between host and/or device pointers @@ -398,7 +397,7 @@ cpdef void copy_ptr_to_host(uintptr_t db, with nogil: _copy_async(db, &hb[0], len(hb), - cudaMemcpyDeviceToHost, stream.view()) + cudaMemcpyKind.cudaMemcpyDeviceToHost, stream.view()) if stream.c_is_default(): stream.c_synchronize() @@ -442,7 +441,7 @@ cpdef void copy_host_to_ptr(const unsigned char[::1] hb, with nogil: _copy_async(&hb[0], db, len(hb), - cudaMemcpyHostToDevice, stream.view()) + cudaMemcpyKind.cudaMemcpyHostToDevice, stream.view()) if stream.c_is_default(): stream.c_synchronize() @@ -475,4 +474,4 @@ cpdef void copy_device_to_ptr(uintptr_t d_src, with nogil: _copy_async(d_src, d_dst, count, - cudaMemcpyDeviceToDevice, stream.view()) + cudaMemcpyKind.cudaMemcpyDeviceToDevice, stream.view()) diff --git a/python/rmm/_lib/lib.pxd b/python/rmm/_lib/lib.pxd index c06d69872..e35b672e4 100644 --- a/python/rmm/_lib/lib.pxd +++ b/python/rmm/_lib/lib.pxd @@ -17,24 +17,4 @@ from libcpp cimport bool from libcpp.utility cimport pair from libcpp.vector cimport vector -from rmm._cuda.gpu cimport cudaError_t - ctypedef pair[const char*, unsigned int] caller_pair - - -cdef extern from * nogil: - - ctypedef void* cudaStream_t "cudaStream_t" - - ctypedef enum cudaMemcpyKind "cudaMemcpyKind": - cudaMemcpyHostToHost = 0 - cudaMemcpyHostToDevice = 1 - cudaMemcpyDeviceToHost = 2 - cudaMemcpyDeviceToDevice = 3 - cudaMemcpyDefault = 4 - - cudaError_t cudaMemcpyAsync(void* dst, const void* src, size_t count, - cudaMemcpyKind kind) - cudaError_t cudaMemcpyAsync(void* dst, const void* src, size_t count, - cudaMemcpyKind kind, cudaStream_t stream) - cudaError_t cudaStreamSynchronize(cudaStream_t stream) diff --git a/python/rmm/_lib/memory_resource.pxd b/python/rmm/_lib/memory_resource.pxd index 3a71fd500..b4e442e83 100644 --- a/python/rmm/_lib/memory_resource.pxd +++ b/python/rmm/_lib/memory_resource.pxd @@ -68,7 +68,4 @@ cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor): cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor): pass -cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor): - cdef object _callback - cpdef DeviceMemoryResource get_current_device_resource() diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index d7711ed8a..16723a0a3 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -23,7 +23,9 @@ from libcpp.cast cimport dynamic_cast from libcpp.memory cimport make_shared, make_unique, shared_ptr, unique_ptr from libcpp.string cimport string -from rmm._cuda.gpu import CUDARuntimeError, cudaError, getDevice, setDevice +from cuda.cudart import cudaError_t + +from rmm._cuda.gpu import CUDARuntimeError, getDevice, setDevice # NOTE: Keep extern declarations in .pyx file as much as possible to avoid @@ -705,7 +707,7 @@ cpdef void _initialize( try: original_device = getDevice() except CUDARuntimeError as e: - if e.status == cudaError.cudaErrorNoDevice: + if e.status == cudaError_t.cudaErrorNoDevice: warnings.warn(e.msg) else: raise e diff --git a/python/setup.py b/python/setup.py index 8edaf4c31..ca70709bd 100644 --- a/python/setup.py +++ b/python/setup.py @@ -1,6 +1,5 @@ # Copyright (c) 2019-2021, NVIDIA CORPORATION. -import filecmp import glob import os import re @@ -75,43 +74,6 @@ def get_cuda_version_from_header(cuda_include_dir): # use uninstalled headers in source tree rmm_include_dir = "../include" -# Preprocessor step to specify correct pxd file with -# valid symbols for specific version of CUDA. - -cwd = os.getcwd() -files_to_preprocess = ["gpu.pxd"] - -# The .pxi file is unchanged between some CUDA versions -# (e.g., 11.0 & 11.1), so we keep only a single copy -# of it -cuda_version_to_pxi_dir = { - "10.1": "10.1", - "10.2": "10.2", - "11": "11.x", -} - -for pxd_basename in files_to_preprocess: - pxi_basename = os.path.splitext(pxd_basename)[0] + ".pxi" - pxi_dir = cuda_version_to_pxi_dir.get(CUDA_VERSION) - if not pxi_dir: - # didn't get an exact match on major.minor version - see if - # we have a match on just the major version - pxi_dir = cuda_version_to_pxi_dir.get(CUDA_VERSION.split(".")[0]) - - if pxi_dir: - pxi_pathname = os.path.join(cwd, "rmm/_cuda", pxi_dir, pxi_basename,) - pxd_pathname = os.path.join(cwd, "rmm/_cuda", pxd_basename) - try: - if filecmp.cmp(pxi_pathname, pxd_pathname): - # files are the same, no need to copy - continue - except FileNotFoundError: - # pxd_pathname doesn't exist yet - pass - shutil.copyfile(pxi_pathname, pxd_pathname) - else: - raise TypeError(f"{CUDA_VERSION} is not supported.") - include_dirs = [ rmm_include_dir, os.path.dirname(sysconfig.get_path("include")), From 9f564bad048ecc953cc4b38ab65887dda6df8051 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Mon, 6 Dec 2021 16:52:37 -0500 Subject: [PATCH 02/16] Undo deletion --- python/rmm/_lib/memory_resource.pxd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/rmm/_lib/memory_resource.pxd b/python/rmm/_lib/memory_resource.pxd index b4e442e83..3a71fd500 100644 --- a/python/rmm/_lib/memory_resource.pxd +++ b/python/rmm/_lib/memory_resource.pxd @@ -68,4 +68,7 @@ cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor): cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor): pass +cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor): + cdef object _callback + cpdef DeviceMemoryResource get_current_device_resource() From c873b89171502153655ce4282937306c9c5aac51 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Mon, 6 Dec 2021 16:53:04 -0500 Subject: [PATCH 03/16] Update python/rmm/_cuda/gpu.py Co-authored-by: Bradley Dice --- python/rmm/_cuda/gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/rmm/_cuda/gpu.py b/python/rmm/_cuda/gpu.py index c0a0e96c5..21c97f062 100644 --- a/python/rmm/_cuda/gpu.py +++ b/python/rmm/_cuda/gpu.py @@ -156,6 +156,6 @@ def deviceGetName(device: int): """ status, device_name = cuda.cuDeviceGetName(256, cuda.CUdevice(device)) - if status.value != 0: + if status != cuda.CUresult.CUDA_SUCCESS: raise CUDADriverError(status) return device_name.decode() From 7e14c23419ad070bad828074f41367b14ec0f805 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Mon, 6 Dec 2021 16:57:04 -0500 Subject: [PATCH 04/16] Add cuda-python to recipe, envs and setup.py --- conda/environments/rmm_dev_cuda10.1.yml | 1 + conda/environments/rmm_dev_cuda10.2.yml | 1 + conda/environments/rmm_dev_cuda11.0.yml | 1 + conda/recipes/rmm/meta.yaml | 2 ++ python/setup.py | 2 +- 5 files changed, 6 insertions(+), 1 deletion(-) diff --git a/conda/environments/rmm_dev_cuda10.1.yml b/conda/environments/rmm_dev_cuda10.1.yml index caccd9541..4df013925 100644 --- a/conda/environments/rmm_dev_cuda10.1.yml +++ b/conda/environments/rmm_dev_cuda10.1.yml @@ -19,3 +19,4 @@ dependencies: - spdlog>=1.8.5,<1.9 - cython>=0.29,<0.30 - gcovr>=5.0 +- cuda-python>=11.5,<12.0 \ No newline at end of file diff --git a/conda/environments/rmm_dev_cuda10.2.yml b/conda/environments/rmm_dev_cuda10.2.yml index 348418014..ce7e70d42 100644 --- a/conda/environments/rmm_dev_cuda10.2.yml +++ b/conda/environments/rmm_dev_cuda10.2.yml @@ -19,3 +19,4 @@ dependencies: - spdlog>=1.8.5,<1.9 - cython>=0.29,<0.30 - gcovr>=5.0 +- cuda-python>=11.5,<12.0 \ No newline at end of file diff --git a/conda/environments/rmm_dev_cuda11.0.yml b/conda/environments/rmm_dev_cuda11.0.yml index 8fbe42e4b..a364e7169 100644 --- a/conda/environments/rmm_dev_cuda11.0.yml +++ b/conda/environments/rmm_dev_cuda11.0.yml @@ -19,3 +19,4 @@ dependencies: - spdlog>=1.8.5,<1.9 - cython>=0.29,<0.30 - gcovr>=5.0 +- cuda-python>=11.5,<12.0 \ No newline at end of file diff --git a/conda/recipes/rmm/meta.yaml b/conda/recipes/rmm/meta.yaml index fd32170df..a8481eab9 100644 --- a/conda/recipes/rmm/meta.yaml +++ b/conda/recipes/rmm/meta.yaml @@ -30,6 +30,7 @@ requirements: - cython >=0.29,<0.30 - spdlog>=1.8.5,<2.0.0a0 - cudatoolkit {{ cuda_version }}.* + - cuda-python {{ cuda_python_version }} run: - python - numba >=0.49 @@ -38,6 +39,7 @@ requirements: - {{ pin_compatible('cudatoolkit', max_pin='x', lower_bound='11.2') }} # cudatoolkit >=11.2,<12.0.0 {% else %} - {{ pin_compatible('cudatoolkit', upper_bound='11.2', lower_bound='11.0') }} # cudatoolkit >=11.0,<11.2 + - cuda-python {{ cuda_python_version }} {% endif %} test: diff --git a/python/setup.py b/python/setup.py index ca70709bd..50888ab3c 100644 --- a/python/setup.py +++ b/python/setup.py @@ -25,7 +25,7 @@ import versioneer -install_requires = ["numba", "cython"] +install_requires = ["numba", "cython", "cuda-python"] def get_cuda_version_from_header(cuda_include_dir): From 54659cc6e7bc02ac2e5d085e8372ef0f6fbb05ce Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Tue, 7 Dec 2021 18:29:49 -0500 Subject: [PATCH 05/16] Small fixes --- conda/environments/rmm_dev_cuda10.1.yml | 22 ---------------------- conda/environments/rmm_dev_cuda10.2.yml | 22 ---------------------- conda/recipes/rmm/meta.yaml | 2 +- 3 files changed, 1 insertion(+), 45 deletions(-) delete mode 100644 conda/environments/rmm_dev_cuda10.1.yml delete mode 100644 conda/environments/rmm_dev_cuda10.2.yml diff --git a/conda/environments/rmm_dev_cuda10.1.yml b/conda/environments/rmm_dev_cuda10.1.yml deleted file mode 100644 index 4df013925..000000000 --- a/conda/environments/rmm_dev_cuda10.1.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: rmm_dev -channels: -- rapidsai -- conda-forge -dependencies: -- clang=11.1.0 -- clang-tools=11.1.0 -- cmake>=3.20.1 -- cmake-format=0.6.11 -- flake8=3.8.3 -- black=19.10 -- isort=5.6.4 -- python>=3.7,<3.9 -- numba>=0.49 -- numpy -- cffi>=1.10.0 -- pytest -- cudatoolkit=10.1 -- spdlog>=1.8.5,<1.9 -- cython>=0.29,<0.30 -- gcovr>=5.0 -- cuda-python>=11.5,<12.0 \ No newline at end of file diff --git a/conda/environments/rmm_dev_cuda10.2.yml b/conda/environments/rmm_dev_cuda10.2.yml deleted file mode 100644 index ce7e70d42..000000000 --- a/conda/environments/rmm_dev_cuda10.2.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: rmm_dev -channels: -- rapidsai -- conda-forge -dependencies: -- clang=11.1.0 -- clang-tools=11.1.0 -- cmake>=3.20.1 -- cmake-format=0.6.11 -- flake8=3.8.3 -- black=19.10 -- isort=5.6.4 -- python>=3.7,<3.9 -- numba>=0.49 -- numpy -- cffi>=1.10.0 -- pytest -- cudatoolkit=10.2 -- spdlog>=1.8.5,<1.9 -- cython>=0.29,<0.30 -- gcovr>=5.0 -- cuda-python>=11.5,<12.0 \ No newline at end of file diff --git a/conda/recipes/rmm/meta.yaml b/conda/recipes/rmm/meta.yaml index a8481eab9..c175a6b32 100644 --- a/conda/recipes/rmm/meta.yaml +++ b/conda/recipes/rmm/meta.yaml @@ -39,8 +39,8 @@ requirements: - {{ pin_compatible('cudatoolkit', max_pin='x', lower_bound='11.2') }} # cudatoolkit >=11.2,<12.0.0 {% else %} - {{ pin_compatible('cudatoolkit', upper_bound='11.2', lower_bound='11.0') }} # cudatoolkit >=11.0,<11.2 - - cuda-python {{ cuda_python_version }} {% endif %} + - cuda-python {{ cuda_python_version }} test: commands: From b07e7f6f1eebffa826e58143a8006c83eab56208 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Wed, 8 Dec 2021 07:43:48 -0500 Subject: [PATCH 06/16] Update conda/environments/rmm_dev_cuda11.0.yml Co-authored-by: Mark Harris --- conda/environments/rmm_dev_cuda11.0.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/environments/rmm_dev_cuda11.0.yml b/conda/environments/rmm_dev_cuda11.0.yml index a364e7169..793f1e5a4 100644 --- a/conda/environments/rmm_dev_cuda11.0.yml +++ b/conda/environments/rmm_dev_cuda11.0.yml @@ -19,4 +19,4 @@ dependencies: - spdlog>=1.8.5,<1.9 - cython>=0.29,<0.30 - gcovr>=5.0 -- cuda-python>=11.5,<12.0 \ No newline at end of file +- cuda-python>=11.5,<12.0 From afc75c2cfad5e7e3a1f3409f38c627b688a4c45b Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Thu, 16 Dec 2021 07:50:05 -0500 Subject: [PATCH 07/16] Fix CUDA Python version in meta.yaml --- conda/recipes/rmm/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/rmm/meta.yaml b/conda/recipes/rmm/meta.yaml index c175a6b32..ebcdc355b 100644 --- a/conda/recipes/rmm/meta.yaml +++ b/conda/recipes/rmm/meta.yaml @@ -30,7 +30,7 @@ requirements: - cython >=0.29,<0.30 - spdlog>=1.8.5,<2.0.0a0 - cudatoolkit {{ cuda_version }}.* - - cuda-python {{ cuda_python_version }} + - cuda-python >=11.5,<12.0 run: - python - numba >=0.49 From cf03ead31eedaa5b848344f89ae8f2e225927990 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Thu, 16 Dec 2021 09:25:32 -0500 Subject: [PATCH 08/16] Missed a meta.yaml --- conda/recipes/rmm/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/rmm/meta.yaml b/conda/recipes/rmm/meta.yaml index ebcdc355b..bd4bbd701 100644 --- a/conda/recipes/rmm/meta.yaml +++ b/conda/recipes/rmm/meta.yaml @@ -40,7 +40,7 @@ requirements: {% else %} - {{ pin_compatible('cudatoolkit', upper_bound='11.2', lower_bound='11.0') }} # cudatoolkit >=11.0,<11.2 {% endif %} - - cuda-python {{ cuda_python_version }} + - cuda-python >=11.5,<12.0 test: commands: From a11bc624abe0a535d391d2e2a96f602ac4d03094 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Wed, 5 Jan 2022 13:18:02 -0500 Subject: [PATCH 09/16] Add 11.5 and 11.6 dev yml files --- ..._dev_cuda11.0.yml => rmm_dev_cuda11.5.yml} | 2 +- conda/environments/rmm_dev_cuda11.6.yml | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) rename conda/environments/{rmm_dev_cuda11.0.yml => rmm_dev_cuda11.5.yml} (94%) create mode 100644 conda/environments/rmm_dev_cuda11.6.yml diff --git a/conda/environments/rmm_dev_cuda11.0.yml b/conda/environments/rmm_dev_cuda11.5.yml similarity index 94% rename from conda/environments/rmm_dev_cuda11.0.yml rename to conda/environments/rmm_dev_cuda11.5.yml index 793f1e5a4..549aca8d5 100644 --- a/conda/environments/rmm_dev_cuda11.0.yml +++ b/conda/environments/rmm_dev_cuda11.5.yml @@ -15,7 +15,7 @@ dependencies: - numpy - cffi>=1.10.0 - pytest -- cudatoolkit=11.0 +- cudatoolkit=11.5 - spdlog>=1.8.5,<1.9 - cython>=0.29,<0.30 - gcovr>=5.0 diff --git a/conda/environments/rmm_dev_cuda11.6.yml b/conda/environments/rmm_dev_cuda11.6.yml new file mode 100644 index 000000000..fba4c6afe --- /dev/null +++ b/conda/environments/rmm_dev_cuda11.6.yml @@ -0,0 +1,22 @@ +name: rmm_dev +channels: +- rapidsai +- conda-forge +dependencies: +- clang=11.1.0 +- clang-tools=11.1.0 +- cmake>=3.20.1 +- cmake-format=0.6.11 +- flake8=3.8.3 +- black=19.10 +- isort=5.6.4 +- python>=3.7,<3.9 +- numba>=0.49 +- numpy +- cffi>=1.10.0 +- pytest +- cudatoolkit=11.6 +- spdlog>=1.8.5,<1.9 +- cython>=0.29,<0.30 +- gcovr>=5.0 +- cuda-python>=11.5,<12.0 From abd30224f68f5271256ed7511c4199c32c643e9f Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Wed, 5 Jan 2022 13:20:24 -0500 Subject: [PATCH 10/16] No cudatoolkit 11.6 --- conda/environments/rmm_dev_cuda11.6.yml | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 conda/environments/rmm_dev_cuda11.6.yml diff --git a/conda/environments/rmm_dev_cuda11.6.yml b/conda/environments/rmm_dev_cuda11.6.yml deleted file mode 100644 index fba4c6afe..000000000 --- a/conda/environments/rmm_dev_cuda11.6.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: rmm_dev -channels: -- rapidsai -- conda-forge -dependencies: -- clang=11.1.0 -- clang-tools=11.1.0 -- cmake>=3.20.1 -- cmake-format=0.6.11 -- flake8=3.8.3 -- black=19.10 -- isort=5.6.4 -- python>=3.7,<3.9 -- numba>=0.49 -- numpy -- cffi>=1.10.0 -- pytest -- cudatoolkit=11.6 -- spdlog>=1.8.5,<1.9 -- cython>=0.29,<0.30 -- gcovr>=5.0 -- cuda-python>=11.5,<12.0 From 84994cbb749f8addb6430556a76cc7ee9bd165b5 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Thu, 13 Jan 2022 07:20:15 -0500 Subject: [PATCH 11/16] Fix type of arg --- python/rmm/_cuda/gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/rmm/_cuda/gpu.py b/python/rmm/_cuda/gpu.py index 21c97f062..076bab026 100644 --- a/python/rmm/_cuda/gpu.py +++ b/python/rmm/_cuda/gpu.py @@ -4,7 +4,7 @@ class CUDARuntimeError(RuntimeError): - def __init__(self, status: cuda.CUresult.CUDA_SUCCESS): + def __init__(self, status: cudart.cudaError_t): self.status = status _, name = cudart.cudaGetErrorName(status) _, msg = cudart.cudaGetErrorString(status) From 8be6c0b41c3261ec46015b5ce74b38d7dfaf7fed Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Thu, 13 Jan 2022 07:22:40 -0500 Subject: [PATCH 12/16] Check status after calls to cudaGetErrorName/String --- python/rmm/_cuda/gpu.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/python/rmm/_cuda/gpu.py b/python/rmm/_cuda/gpu.py index 076bab026..1ebe571da 100644 --- a/python/rmm/_cuda/gpu.py +++ b/python/rmm/_cuda/gpu.py @@ -6,8 +6,15 @@ class CUDARuntimeError(RuntimeError): def __init__(self, status: cudart.cudaError_t): self.status = status - _, name = cudart.cudaGetErrorName(status) - _, msg = cudart.cudaGetErrorString(status) + + err, name = cudart.cudaGetErrorName(status) + if err != cudart.cudaError_t.cudaSuccess: + raise CUDARuntimeError(err.value) + + err, msg = cudart.cudaGetErrorString(status) + if err != cudart.cudaError_t.cudaSuccess: + raise CUDARuntimeError(err.value) + super(CUDARuntimeError, self).__init__( "%s: %s" % (name.decode(), msg.decode()) ) From 73d318114651d889d3d498e75681f2748f75f35a Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Tue, 18 Jan 2022 10:34:49 -0500 Subject: [PATCH 13/16] Update python/rmm/_cuda/gpu.py Co-authored-by: Bradley Dice --- python/rmm/_cuda/gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/rmm/_cuda/gpu.py b/python/rmm/_cuda/gpu.py index 1ebe571da..1e9b93d0e 100644 --- a/python/rmm/_cuda/gpu.py +++ b/python/rmm/_cuda/gpu.py @@ -16,7 +16,7 @@ def __init__(self, status: cudart.cudaError_t): raise CUDARuntimeError(err.value) super(CUDARuntimeError, self).__init__( - "%s: %s" % (name.decode(), msg.decode()) + f"{name.decode()}: {msg.decode()}" ) def __reduce__(self): From ee80f22162aff2e4f216fd56e9210dafec7f9e91 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Tue, 18 Jan 2022 10:34:54 -0500 Subject: [PATCH 14/16] Update python/rmm/_cuda/gpu.py Co-authored-by: Bradley Dice --- python/rmm/_cuda/gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/rmm/_cuda/gpu.py b/python/rmm/_cuda/gpu.py index 1e9b93d0e..ad4cce5dc 100644 --- a/python/rmm/_cuda/gpu.py +++ b/python/rmm/_cuda/gpu.py @@ -36,7 +36,7 @@ def __init__(self, status: cuda.CUresult): raise CUDADriverError(err.value) super(CUDADriverError, self).__init__( - "%s: %s" % (name.decode(), msg.decode()) + f"{name.decode()}: {msg.decode()}" ) def __reduce__(self): From 0ee073c722e903b03f9e5228152d5b768ce8c3ce Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Tue, 18 Jan 2022 10:48:40 -0500 Subject: [PATCH 15/16] Avoid recursion in CUDADriverError and CUDARuntimeError --- python/rmm/_cuda/gpu.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/python/rmm/_cuda/gpu.py b/python/rmm/_cuda/gpu.py index ad4cce5dc..3ae7e736d 100644 --- a/python/rmm/_cuda/gpu.py +++ b/python/rmm/_cuda/gpu.py @@ -7,13 +7,11 @@ class CUDARuntimeError(RuntimeError): def __init__(self, status: cudart.cudaError_t): self.status = status - err, name = cudart.cudaGetErrorName(status) - if err != cudart.cudaError_t.cudaSuccess: - raise CUDARuntimeError(err.value) + _, name = cudart.cudaGetErrorName(status) + _, msg = cudart.cudaGetErrorString(status) - err, msg = cudart.cudaGetErrorString(status) - if err != cudart.cudaError_t.cudaSuccess: - raise CUDARuntimeError(err.value) + if name == "unrecognized error code": + raise RuntimeError(name.decode()) super(CUDARuntimeError, self).__init__( f"{name.decode()}: {msg.decode()}" @@ -27,13 +25,11 @@ class CUDADriverError(RuntimeError): def __init__(self, status: cuda.CUresult): self.status = status - err, name = cuda.cuGetErrorName(status) - if err != cuda.CUresult.CUDA_SUCCESS: - raise CUDADriverError(err.value) + _, name = cuda.cuGetErrorName(status) + _, msg = cuda.cuGetErrorString(status) - err, msg = cuda.cuGetErrorString(status) - if err != cuda.CUresult.CUDA_SUCCESS: - raise CUDADriverError(err.value) + if msg == b"unrecognized error code": + raise RuntimeError(name.decode()) super(CUDADriverError, self).__init__( f"{name.decode()}: {msg.decode()}" From 8e4b680ab303f9b7b3726bcf859eca67ee06a854 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Tue, 18 Jan 2022 11:35:16 -0500 Subject: [PATCH 16/16] Just don't worry about it. --- python/rmm/_cuda/gpu.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/python/rmm/_cuda/gpu.py b/python/rmm/_cuda/gpu.py index 3ae7e736d..a1ce61564 100644 --- a/python/rmm/_cuda/gpu.py +++ b/python/rmm/_cuda/gpu.py @@ -10,9 +10,6 @@ def __init__(self, status: cudart.cudaError_t): _, name = cudart.cudaGetErrorName(status) _, msg = cudart.cudaGetErrorString(status) - if name == "unrecognized error code": - raise RuntimeError(name.decode()) - super(CUDARuntimeError, self).__init__( f"{name.decode()}: {msg.decode()}" ) @@ -28,9 +25,6 @@ def __init__(self, status: cuda.CUresult): _, name = cuda.cuGetErrorName(status) _, msg = cuda.cuGetErrorString(status) - if msg == b"unrecognized error code": - raise RuntimeError(name.decode()) - super(CUDADriverError, self).__init__( f"{name.decode()}: {msg.decode()}" )