From ae7e9793e70c503d2df4a764fc382a15b46a85f3 Mon Sep 17 00:00:00 2001
From: Alfred Xu <lovedreamf@gmail.com>
Date: Sun, 24 Apr 2022 09:53:05 +0800
Subject: [PATCH] JNI: throw CUDA errors more specifically (#10551)

This PR is for https://github.com/NVIDIA/spark-rapids/issues/5029  and https://github.com/NVIDIA/spark-rapids/issues/1870, which enables cuDF JNI to throw CUDA errors with specific error code.  This PR relies on #10630, which exposes the CUDA error code and distinguishes fatal CUDA errors from the others.

With this improvement, it is supposed to be easier to track CUDA errors triggered by JVM APIs.

Authors:
  - Alfred Xu (https://github.com/sperlingxx)

Approvers:
  - Jason Lowe (https://github.com/jlowe)

URL: https://github.com/rapidsai/cudf/pull/10551
---
 .../java/ai/rapids/cudf/CudaException.java    | 281 +++++++++++++++++-
 .../ai/rapids/cudf/CudaFatalException.java    |  31 ++
 java/src/main/native/include/jni_utils.hpp    |  60 +++-
 java/src/main/native/src/CudaJni.cpp          |  63 ++--
 .../test/java/ai/rapids/cudf/CudaTest.java    |  17 +-
 5 files changed, 400 insertions(+), 52 deletions(-)
 create mode 100644 java/src/main/java/ai/rapids/cudf/CudaFatalException.java

diff --git a/java/src/main/java/ai/rapids/cudf/CudaException.java b/java/src/main/java/ai/rapids/cudf/CudaException.java
index 2d862b47ef8..ff7ca308f3c 100755
--- a/java/src/main/java/ai/rapids/cudf/CudaException.java
+++ b/java/src/main/java/ai/rapids/cudf/CudaException.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,9 @@
  */
 package ai.rapids.cudf;
 
+import java.util.HashMap;
+import java.util.Map;
+
 /**
  * Exception from the cuda language/library.  Be aware that because of how cuda does asynchronous
  * processing exceptions from cuda can be thrown by method calls that did not cause the exception
@@ -28,11 +31,283 @@
  * don't switch between threads for different parts of processing that can be retried as a chunk.
  */
 public class CudaException extends RuntimeException {
-  CudaException(String message) {
+  CudaException(String message, int errorCode) {
     super(message);
+    cudaError = CudaError.parseErrorCode(errorCode);
   }
 
-  CudaException(String message, Throwable cause) {
+  CudaException(String message, int errorCode, Throwable cause) {
     super(message, cause);
+    cudaError = CudaError.parseErrorCode(errorCode);
+  }
+
+  public final CudaError cudaError;
+
+  /**
+   * The Java mirror of cudaError, which facilities the tracking of CUDA errors in JVM.
+   */
+  public enum CudaError {
+    UnknownNativeError(-1), // native CUDA error type which Java doesn't have a representation
+    cudaErrorInvalidValue(1),
+    cudaErrorMemoryAllocation(2),
+    cudaErrorInitializationError(3),
+    cudaErrorCudartUnloading(4),
+    cudaErrorProfilerDisabled(5),
+    cudaErrorProfilerNotInitialized(6),
+    cudaErrorProfilerAlreadyStarted(7),
+    cudaErrorProfilerAlreadyStopped(8),
+    cudaErrorInvalidConfiguration(9),
+    cudaErrorInvalidPitchValue(12),
+    cudaErrorInvalidSymbol(13),
+    cudaErrorInvalidHostPointer(16),
+    cudaErrorInvalidDevicePointer(17),
+    cudaErrorInvalidTexture(18),
+    cudaErrorInvalidTextureBinding(19),
+    cudaErrorInvalidChannelDescriptor(20),
+    cudaErrorInvalidMemcpyDirection(21),
+    cudaErrorAddressOfConstant(22),
+    cudaErrorTextureFetchFailed(23),
+    cudaErrorTextureNotBound(24),
+    cudaErrorSynchronizationError(25),
+    cudaErrorInvalidFilterSetting(26),
+    cudaErrorInvalidNormSetting(27),
+    cudaErrorMixedDeviceExecution(28),
+    cudaErrorNotYetImplemented(31),
+    cudaErrorMemoryValueTooLarge(32),
+    cudaErrorStubLibrary(34),
+    cudaErrorInsufficientDriver(35),
+    cudaErrorCallRequiresNewerDriver(36),
+    cudaErrorInvalidSurface(37),
+    cudaErrorDuplicateVariableName(43),
+    cudaErrorDuplicateTextureName(44),
+    cudaErrorDuplicateSurfaceName(45),
+    cudaErrorDevicesUnavailable(46),
+    cudaErrorIncompatibleDriverContext(49),
+    cudaErrorMissingConfiguration(52),
+    cudaErrorPriorLaunchFailure(53),
+    cudaErrorLaunchMaxDepthExceeded(65),
+    cudaErrorLaunchFileScopedTex(66),
+    cudaErrorLaunchFileScopedSurf(67),
+    cudaErrorSyncDepthExceeded(68),
+    cudaErrorLaunchPendingCountExceeded(69),
+    cudaErrorInvalidDeviceFunction(98),
+    cudaErrorNoDevice(100),
+    cudaErrorInvalidDevice(101),
+    cudaErrorDeviceNotLicensed(102),
+    cudaErrorSoftwareValidityNotEstablished(103),
+    cudaErrorStartupFailure(127),
+    cudaErrorInvalidKernelImage(200),
+    cudaErrorDeviceUninitialized(201),
+    cudaErrorMapBufferObjectFailed(205),
+    cudaErrorUnmapBufferObjectFailed(206),
+    cudaErrorArrayIsMapped(207),
+    cudaErrorAlreadyMapped(208),
+    cudaErrorNoKernelImageForDevice(209),
+    cudaErrorAlreadyAcquired(210),
+    cudaErrorNotMapped(211),
+    cudaErrorNotMappedAsArray(212),
+    cudaErrorNotMappedAsPointer(213),
+    cudaErrorECCUncorrectable(214),
+    cudaErrorUnsupportedLimit(215),
+    cudaErrorDeviceAlreadyInUse(216),
+    cudaErrorPeerAccessUnsupported(217),
+    cudaErrorInvalidPtx(218),
+    cudaErrorInvalidGraphicsContext(219),
+    cudaErrorNvlinkUncorrectable(220),
+    cudaErrorJitCompilerNotFound(221),
+    cudaErrorUnsupportedPtxVersion(222),
+    cudaErrorJitCompilationDisabled(223),
+    cudaErrorUnsupportedExecAffinity(224),
+    cudaErrorInvalidSource(300),
+    cudaErrorFileNotFound(301),
+    cudaErrorSharedObjectSymbolNotFound(302),
+    cudaErrorSharedObjectInitFailed(303),
+    cudaErrorOperatingSystem(304),
+    cudaErrorInvalidResourceHandle(400),
+    cudaErrorIllegalState(401),
+    cudaErrorSymbolNotFound(500),
+    cudaErrorNotReady(600),
+    cudaErrorIllegalAddress(700),
+    cudaErrorLaunchOutOfResources(701),
+    cudaErrorLaunchTimeout(702),
+    cudaErrorLaunchIncompatibleTexturing(703),
+    cudaErrorPeerAccessAlreadyEnabled(704),
+    cudaErrorPeerAccessNotEnabled(705),
+    cudaErrorSetOnActiveProcess(708),
+    cudaErrorContextIsDestroyed(709),
+    cudaErrorAssert(710),
+    cudaErrorTooManyPeers(711),
+    cudaErrorHostMemoryAlreadyRegistered(712),
+    cudaErrorHostMemoryNotRegistered(713),
+    cudaErrorHardwareStackError(714),
+    cudaErrorIllegalInstruction(715),
+    cudaErrorMisalignedAddress(716),
+    cudaErrorInvalidAddressSpace(717),
+    cudaErrorInvalidPc(718),
+    cudaErrorLaunchFailure(719),
+    cudaErrorCooperativeLaunchTooLarge(720),
+    cudaErrorNotPermitted(800),
+    cudaErrorNotSupported(801),
+    cudaErrorSystemNotReady(802),
+    cudaErrorSystemDriverMismatch(803),
+    cudaErrorCompatNotSupportedOnDevice(804),
+    cudaErrorMpsConnectionFailed(805),
+    cudaErrorMpsRpcFailure(806),
+    cudaErrorMpsServerNotReady(807),
+    cudaErrorMpsMaxClientsReached(808),
+    cudaErrorMpsMaxConnectionsReached(809),
+    cudaErrorStreamCaptureUnsupported(900),
+    cudaErrorStreamCaptureInvalidated(901),
+    cudaErrorStreamCaptureMerge(902),
+    cudaErrorStreamCaptureUnmatched(903),
+    cudaErrorStreamCaptureUnjoined(904),
+    cudaErrorStreamCaptureIsolation(905),
+    cudaErrorStreamCaptureImplicit(906),
+    cudaErrorCapturedEvent(907),
+    cudaErrorStreamCaptureWrongThread(908),
+    cudaErrorTimeout(909),
+    cudaErrorGraphExecUpdateFailure(910),
+    cudaErrorExternalDevice(911),
+    cudaErrorUnknown(999),
+    cudaErrorApiFailureBase(10000);
+
+    final int code;
+
+    private static Map<Integer, CudaError> codeToError = new HashMap<Integer, CudaError>(){{
+      put(cudaErrorInvalidValue.code, cudaErrorInvalidValue);
+      put(cudaErrorMemoryAllocation.code, cudaErrorMemoryAllocation);
+      put(cudaErrorInitializationError.code, cudaErrorInitializationError);
+      put(cudaErrorCudartUnloading.code, cudaErrorCudartUnloading);
+      put(cudaErrorProfilerDisabled.code, cudaErrorProfilerDisabled);
+      put(cudaErrorProfilerNotInitialized.code, cudaErrorProfilerNotInitialized);
+      put(cudaErrorProfilerAlreadyStarted.code, cudaErrorProfilerAlreadyStarted);
+      put(cudaErrorProfilerAlreadyStopped.code, cudaErrorProfilerAlreadyStopped);
+      put(cudaErrorInvalidConfiguration.code, cudaErrorInvalidConfiguration);
+      put(cudaErrorInvalidPitchValue.code, cudaErrorInvalidPitchValue);
+      put(cudaErrorInvalidSymbol.code, cudaErrorInvalidSymbol);
+      put(cudaErrorInvalidHostPointer.code, cudaErrorInvalidHostPointer);
+      put(cudaErrorInvalidDevicePointer.code, cudaErrorInvalidDevicePointer);
+      put(cudaErrorInvalidTexture.code, cudaErrorInvalidTexture);
+      put(cudaErrorInvalidTextureBinding.code, cudaErrorInvalidTextureBinding);
+      put(cudaErrorInvalidChannelDescriptor.code, cudaErrorInvalidChannelDescriptor);
+      put(cudaErrorInvalidMemcpyDirection.code, cudaErrorInvalidMemcpyDirection);
+      put(cudaErrorAddressOfConstant.code, cudaErrorAddressOfConstant);
+      put(cudaErrorTextureFetchFailed.code, cudaErrorTextureFetchFailed);
+      put(cudaErrorTextureNotBound.code, cudaErrorTextureNotBound);
+      put(cudaErrorSynchronizationError.code, cudaErrorSynchronizationError);
+      put(cudaErrorInvalidFilterSetting.code, cudaErrorInvalidFilterSetting);
+      put(cudaErrorInvalidNormSetting.code, cudaErrorInvalidNormSetting);
+      put(cudaErrorMixedDeviceExecution.code, cudaErrorMixedDeviceExecution);
+      put(cudaErrorNotYetImplemented.code, cudaErrorNotYetImplemented);
+      put(cudaErrorMemoryValueTooLarge.code, cudaErrorMemoryValueTooLarge);
+      put(cudaErrorStubLibrary.code, cudaErrorStubLibrary);
+      put(cudaErrorInsufficientDriver.code, cudaErrorInsufficientDriver);
+      put(cudaErrorCallRequiresNewerDriver.code, cudaErrorCallRequiresNewerDriver);
+      put(cudaErrorInvalidSurface.code, cudaErrorInvalidSurface);
+      put(cudaErrorDuplicateVariableName.code, cudaErrorDuplicateVariableName);
+      put(cudaErrorDuplicateTextureName.code, cudaErrorDuplicateTextureName);
+      put(cudaErrorDuplicateSurfaceName.code, cudaErrorDuplicateSurfaceName);
+      put(cudaErrorDevicesUnavailable.code, cudaErrorDevicesUnavailable);
+      put(cudaErrorIncompatibleDriverContext.code, cudaErrorIncompatibleDriverContext);
+      put(cudaErrorMissingConfiguration.code, cudaErrorMissingConfiguration);
+      put(cudaErrorPriorLaunchFailure.code, cudaErrorPriorLaunchFailure);
+      put(cudaErrorLaunchMaxDepthExceeded.code, cudaErrorLaunchMaxDepthExceeded);
+      put(cudaErrorLaunchFileScopedTex.code, cudaErrorLaunchFileScopedTex);
+      put(cudaErrorLaunchFileScopedSurf.code, cudaErrorLaunchFileScopedSurf);
+      put(cudaErrorSyncDepthExceeded.code, cudaErrorSyncDepthExceeded);
+      put(cudaErrorLaunchPendingCountExceeded.code, cudaErrorLaunchPendingCountExceeded);
+      put(cudaErrorInvalidDeviceFunction.code, cudaErrorInvalidDeviceFunction);
+      put(cudaErrorNoDevice.code, cudaErrorNoDevice);
+      put(cudaErrorInvalidDevice.code, cudaErrorInvalidDevice);
+      put(cudaErrorDeviceNotLicensed.code, cudaErrorDeviceNotLicensed);
+      put(cudaErrorSoftwareValidityNotEstablished.code, cudaErrorSoftwareValidityNotEstablished);
+      put(cudaErrorStartupFailure.code, cudaErrorStartupFailure);
+      put(cudaErrorInvalidKernelImage.code, cudaErrorInvalidKernelImage);
+      put(cudaErrorDeviceUninitialized.code, cudaErrorDeviceUninitialized);
+      put(cudaErrorMapBufferObjectFailed.code, cudaErrorMapBufferObjectFailed);
+      put(cudaErrorUnmapBufferObjectFailed.code, cudaErrorUnmapBufferObjectFailed);
+      put(cudaErrorArrayIsMapped.code, cudaErrorArrayIsMapped);
+      put(cudaErrorAlreadyMapped.code, cudaErrorAlreadyMapped);
+      put(cudaErrorNoKernelImageForDevice.code, cudaErrorNoKernelImageForDevice);
+      put(cudaErrorAlreadyAcquired.code, cudaErrorAlreadyAcquired);
+      put(cudaErrorNotMapped.code, cudaErrorNotMapped);
+      put(cudaErrorNotMappedAsArray.code, cudaErrorNotMappedAsArray);
+      put(cudaErrorNotMappedAsPointer.code, cudaErrorNotMappedAsPointer);
+      put(cudaErrorECCUncorrectable.code, cudaErrorECCUncorrectable);
+      put(cudaErrorUnsupportedLimit.code, cudaErrorUnsupportedLimit);
+      put(cudaErrorDeviceAlreadyInUse.code, cudaErrorDeviceAlreadyInUse);
+      put(cudaErrorPeerAccessUnsupported.code, cudaErrorPeerAccessUnsupported);
+      put(cudaErrorInvalidPtx.code, cudaErrorInvalidPtx);
+      put(cudaErrorInvalidGraphicsContext.code, cudaErrorInvalidGraphicsContext);
+      put(cudaErrorNvlinkUncorrectable.code, cudaErrorNvlinkUncorrectable);
+      put(cudaErrorJitCompilerNotFound.code, cudaErrorJitCompilerNotFound);
+      put(cudaErrorUnsupportedPtxVersion.code, cudaErrorUnsupportedPtxVersion);
+      put(cudaErrorJitCompilationDisabled.code, cudaErrorJitCompilationDisabled);
+      put(cudaErrorUnsupportedExecAffinity.code, cudaErrorUnsupportedExecAffinity);
+      put(cudaErrorInvalidSource.code, cudaErrorInvalidSource);
+      put(cudaErrorFileNotFound.code, cudaErrorFileNotFound);
+      put(cudaErrorSharedObjectSymbolNotFound.code, cudaErrorSharedObjectSymbolNotFound);
+      put(cudaErrorSharedObjectInitFailed.code, cudaErrorSharedObjectInitFailed);
+      put(cudaErrorOperatingSystem.code, cudaErrorOperatingSystem);
+      put(cudaErrorInvalidResourceHandle.code, cudaErrorInvalidResourceHandle);
+      put(cudaErrorIllegalState.code, cudaErrorIllegalState);
+      put(cudaErrorSymbolNotFound.code, cudaErrorSymbolNotFound);
+      put(cudaErrorNotReady.code, cudaErrorNotReady);
+      put(cudaErrorIllegalAddress.code, cudaErrorIllegalAddress);
+      put(cudaErrorLaunchOutOfResources.code, cudaErrorLaunchOutOfResources);
+      put(cudaErrorLaunchTimeout.code, cudaErrorLaunchTimeout);
+      put(cudaErrorLaunchIncompatibleTexturing.code, cudaErrorLaunchIncompatibleTexturing);
+      put(cudaErrorPeerAccessAlreadyEnabled.code, cudaErrorPeerAccessAlreadyEnabled);
+      put(cudaErrorPeerAccessNotEnabled.code, cudaErrorPeerAccessNotEnabled);
+      put(cudaErrorSetOnActiveProcess.code, cudaErrorSetOnActiveProcess);
+      put(cudaErrorContextIsDestroyed.code, cudaErrorContextIsDestroyed);
+      put(cudaErrorAssert.code, cudaErrorAssert);
+      put(cudaErrorTooManyPeers.code, cudaErrorTooManyPeers);
+      put(cudaErrorHostMemoryAlreadyRegistered.code, cudaErrorHostMemoryAlreadyRegistered);
+      put(cudaErrorHostMemoryNotRegistered.code, cudaErrorHostMemoryNotRegistered);
+      put(cudaErrorHardwareStackError.code, cudaErrorHardwareStackError);
+      put(cudaErrorIllegalInstruction.code, cudaErrorIllegalInstruction);
+      put(cudaErrorMisalignedAddress.code, cudaErrorMisalignedAddress);
+      put(cudaErrorInvalidAddressSpace.code, cudaErrorInvalidAddressSpace);
+      put(cudaErrorInvalidPc.code, cudaErrorInvalidPc);
+      put(cudaErrorLaunchFailure.code, cudaErrorLaunchFailure);
+      put(cudaErrorCooperativeLaunchTooLarge.code, cudaErrorCooperativeLaunchTooLarge);
+      put(cudaErrorNotPermitted.code, cudaErrorNotPermitted);
+      put(cudaErrorNotSupported.code, cudaErrorNotSupported);
+      put(cudaErrorSystemNotReady.code, cudaErrorSystemNotReady);
+      put(cudaErrorSystemDriverMismatch.code, cudaErrorSystemDriverMismatch);
+      put(cudaErrorCompatNotSupportedOnDevice.code, cudaErrorCompatNotSupportedOnDevice);
+      put(cudaErrorMpsConnectionFailed.code, cudaErrorMpsConnectionFailed);
+      put(cudaErrorMpsRpcFailure.code, cudaErrorMpsRpcFailure);
+      put(cudaErrorMpsServerNotReady.code, cudaErrorMpsServerNotReady);
+      put(cudaErrorMpsMaxClientsReached.code, cudaErrorMpsMaxClientsReached);
+      put(cudaErrorMpsMaxConnectionsReached.code, cudaErrorMpsMaxConnectionsReached);
+      put(cudaErrorStreamCaptureUnsupported.code, cudaErrorStreamCaptureUnsupported);
+      put(cudaErrorStreamCaptureInvalidated.code, cudaErrorStreamCaptureInvalidated);
+      put(cudaErrorStreamCaptureMerge.code, cudaErrorStreamCaptureMerge);
+      put(cudaErrorStreamCaptureUnmatched.code, cudaErrorStreamCaptureUnmatched);
+      put(cudaErrorStreamCaptureUnjoined.code, cudaErrorStreamCaptureUnjoined);
+      put(cudaErrorStreamCaptureIsolation.code, cudaErrorStreamCaptureIsolation);
+      put(cudaErrorStreamCaptureImplicit.code, cudaErrorStreamCaptureImplicit);
+      put(cudaErrorCapturedEvent.code, cudaErrorCapturedEvent);
+      put(cudaErrorStreamCaptureWrongThread.code, cudaErrorStreamCaptureWrongThread);
+      put(cudaErrorTimeout.code, cudaErrorTimeout);
+      put(cudaErrorGraphExecUpdateFailure.code, cudaErrorGraphExecUpdateFailure);
+      put(cudaErrorExternalDevice.code, cudaErrorExternalDevice);
+      put(cudaErrorUnknown.code, cudaErrorUnknown);
+      put(cudaErrorApiFailureBase.code, cudaErrorApiFailureBase);
+    }};
+
+    CudaError(int errorCode) {
+      this.code = errorCode;
+    }
+
+    public static CudaError parseErrorCode(int errorCode) {
+      if (!codeToError.containsKey(errorCode)) {
+        return UnknownNativeError;
+      }
+      return codeToError.get(errorCode);
+    }
+
   }
 }
diff --git a/java/src/main/java/ai/rapids/cudf/CudaFatalException.java b/java/src/main/java/ai/rapids/cudf/CudaFatalException.java
new file mode 100644
index 00000000000..cf36726aa80
--- /dev/null
+++ b/java/src/main/java/ai/rapids/cudf/CudaFatalException.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.rapids.cudf;
+
+/**
+ * CudaFatalException is a kind of CudaException which leaves the process in an inconsistent state
+ * and any further CUDA work will return the same error.
+ * To continue using CUDA, the process must be terminated and relaunched.
+ */
+public class CudaFatalException extends CudaException {
+  CudaFatalException(String message, int errorCode) {
+    super(message, errorCode);
+  }
+
+  CudaFatalException(String message, int errorCode, Throwable cause) {
+    super(message, errorCode, cause);
+  }
+}
diff --git a/java/src/main/native/include/jni_utils.hpp b/java/src/main/native/include/jni_utils.hpp
index a45716a89b3..eca424132a5 100644
--- a/java/src/main/native/include/jni_utils.hpp
+++ b/java/src/main/native/include/jni_utils.hpp
@@ -30,6 +30,7 @@ namespace jni {
 constexpr jint MINIMUM_JNI_VERSION = JNI_VERSION_1_6;
 
 constexpr char const *CUDA_ERROR_CLASS = "ai/rapids/cudf/CudaException";
+constexpr char const *CUDA_FATAL_ERROR_CLASS = "ai/rapids/cudf/CudaFatalException";
 constexpr char const *CUDF_ERROR_CLASS = "ai/rapids/cudf/CudfException";
 constexpr char const *INDEX_OOB_CLASS = "java/lang/ArrayIndexOutOfBoundsException";
 constexpr char const *ILLEGAL_ARG_CLASS = "java/lang/IllegalArgumentException";
@@ -737,12 +738,26 @@ class native_jstringArray {
  * @brief create a cuda exception from a given cudaError_t
  */
 inline jthrowable cuda_exception(JNIEnv *const env, cudaError_t status, jthrowable cause = NULL) {
-  jclass ex_class = env->FindClass(cudf::jni::CUDA_ERROR_CLASS);
+  const char *ex_class_name;
+
+  // Calls cudaGetLastError twice. It is nearly certain that a fatal error occurred if the second
+  // call doesn't return with cudaSuccess.
+  cudaGetLastError();
+  auto const last = cudaGetLastError();
+  // Call cudaDeviceSynchronize to ensure `last` did not result from an asynchronous error.
+  // between two calls.
+  if (status == last && last == cudaDeviceSynchronize()) {
+    ex_class_name = cudf::jni::CUDA_FATAL_ERROR_CLASS;
+  } else {
+    ex_class_name = cudf::jni::CUDA_ERROR_CLASS;
+  }
+
+  jclass ex_class = env->FindClass(ex_class_name);
   if (ex_class == NULL) {
     return NULL;
   }
   jmethodID ctor_id =
-      env->GetMethodID(ex_class, "<init>", "(Ljava/lang/String;Ljava/lang/Throwable;)V");
+      env->GetMethodID(ex_class, "<init>", "(Ljava/lang/String;ILjava/lang/Throwable;)V");
   if (ctor_id == NULL) {
     return NULL;
   }
@@ -752,19 +767,20 @@ inline jthrowable cuda_exception(JNIEnv *const env, cudaError_t status, jthrowab
     return NULL;
   }
 
-  jobject ret = env->NewObject(ex_class, ctor_id, msg, cause);
+  jint err_code = static_cast<jint>(status);
+
+  jobject ret = env->NewObject(ex_class, ctor_id, msg, err_code, cause);
   return (jthrowable)ret;
 }
 
 inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) {
   if (cudaSuccess != cuda_status) {
-    // Clear the last error so it does not propagate.
-    cudaGetLastError();
     jthrowable jt = cuda_exception(env, cuda_status);
     if (jt != NULL) {
       env->Throw(jt);
-      throw jni_exception("CUDA ERROR");
     }
+    throw jni_exception(std::string("CUDA ERROR: code ") +
+                        std::to_string(static_cast<int>(cuda_status)));
   }
 }
 
@@ -790,18 +806,26 @@ inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) {
     JNI_THROW_NEW(env, class_name, message, ret_val)                                               \
   }
 
-#define JNI_CUDA_TRY(env, ret_val, call)                                                           \
+// Throw a new exception only if one is not pending then always return with the specified value
+#define JNI_CHECK_CUDA_ERROR(env, class_name, e, ret_val)                                          \
   {                                                                                                \
-    cudaError_t internal_cuda_status = (call);                                                     \
-    if (cudaSuccess != internal_cuda_status) {                                                     \
-      /* Clear the last error so it does not propagate.*/                                          \
-      cudaGetLastError();                                                                          \
-      jthrowable jt = cudf::jni::cuda_exception(env, internal_cuda_status);                        \
-      if (jt != NULL) {                                                                            \
-        env->Throw(jt);                                                                            \
-      }                                                                                            \
+    if (env->ExceptionOccurred()) {                                                                \
       return ret_val;                                                                              \
     }                                                                                              \
+    std::string n_msg = e.what() == nullptr ? "" : e.what();                                       \
+    jstring j_msg = env->NewStringUTF(n_msg.c_str());                                              \
+    jint e_code = static_cast<jint>(e.error_code());                                               \
+    jclass ex_class = env->FindClass(class_name);                                                  \
+    if (ex_class != NULL) {                                                                        \
+      jmethodID ctor_id = env->GetMethodID(ex_class, "<init>", "(Ljava/lang/String;I)V");          \
+      if (ctor_id != NULL) {                                                                       \
+        jobject cuda_error = env->NewObject(ex_class, ctor_id, j_msg, e_code);                     \
+        if (cuda_error != NULL) {                                                                  \
+          env->Throw((jthrowable)cuda_error);                                                      \
+        }                                                                                          \
+      }                                                                                            \
+    }                                                                                              \
+    return ret_val;                                                                                \
   }
 
 #define JNI_NULL_CHECK(env, obj, error_msg, ret_val)                                               \
@@ -831,6 +855,12 @@ inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) {
         std::string("Could not allocate native memory: ") + (e.what() == nullptr ? "" : e.what()); \
     JNI_CHECK_THROW_NEW(env, cudf::jni::OOM_CLASS, what.c_str(), ret_val);                         \
   }                                                                                                \
+  catch (const cudf::fatal_cuda_error &e) {                                                        \
+    JNI_CHECK_CUDA_ERROR(env, cudf::jni::CUDA_FATAL_ERROR_CLASS, e, ret_val);                      \
+  }                                                                                                \
+  catch (const cudf::cuda_error &e) {                                                              \
+    JNI_CHECK_CUDA_ERROR(env, cudf::jni::CUDA_ERROR_CLASS, e, ret_val);                            \
+  }                                                                                                \
   catch (const std::exception &e) {                                                                \
     /* If jni_exception caught then a Java exception is pending and this will not overwrite it. */ \
     JNI_CHECK_THROW_NEW(env, class_name, e.what(), ret_val);                                       \
diff --git a/java/src/main/native/src/CudaJni.cpp b/java/src/main/native/src/CudaJni.cpp
index 9862c3bface..926521c55f9 100644
--- a/java/src/main/native/src/CudaJni.cpp
+++ b/java/src/main/native/src/CudaJni.cpp
@@ -15,6 +15,7 @@
  */
 
 #include <cuda_profiler_api.h>
+#include <cudf/utilities/error.hpp>
 #include <rmm/device_buffer.hpp>
 
 #include "jni_utils.hpp"
@@ -66,7 +67,7 @@ JNIEXPORT jobject JNICALL Java_ai_rapids_cudf_Cuda_memGetInfo(JNIEnv *env, jclas
     cudf::jni::auto_set_device(env);
 
     size_t free, total;
-    JNI_CUDA_TRY(env, NULL, cudaMemGetInfo(&free, &total));
+    CUDF_CUDA_TRY(cudaMemGetInfo(&free, &total));
 
     jclass info_class = env->FindClass("Lai/rapids/cudf/CudaMemInfo;");
     if (info_class == NULL) {
@@ -90,7 +91,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Cuda_hostAllocPinned(JNIEnv *env, jc
   try {
     cudf::jni::auto_set_device(env);
     void *ret = nullptr;
-    JNI_CUDA_TRY(env, 0, cudaMallocHost(&ret, size));
+    CUDF_CUDA_TRY(cudaMallocHost(&ret, size));
     return reinterpret_cast<jlong>(ret);
   }
   CATCH_STD(env, 0);
@@ -99,7 +100,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Cuda_hostAllocPinned(JNIEnv *env, jc
 JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_freePinned(JNIEnv *env, jclass, jlong ptr) {
   try {
     cudf::jni::auto_set_device(env);
-    JNI_CUDA_TRY(env, , cudaFreeHost(reinterpret_cast<void *>(ptr)));
+    CUDF_CUDA_TRY(cudaFreeHost(reinterpret_cast<void *>(ptr)));
   }
   CATCH_STD(env, );
 }
@@ -109,8 +110,8 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_memset(JNIEnv *env, jclass, jlon
   JNI_NULL_CHECK(env, dst, "dst memory pointer is null", );
   try {
     cudf::jni::auto_set_device(env);
-    JNI_CUDA_TRY(env, , cudaMemsetAsync((void *)dst, value, count));
-    JNI_CUDA_TRY(env, , cudaStreamSynchronize(0));
+    CUDF_CUDA_TRY(cudaMemsetAsync((void *)dst, value, count));
+    CUDF_CUDA_TRY(cudaStreamSynchronize(0));
   }
   CATCH_STD(env, );
 }
@@ -120,7 +121,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_asyncMemset(JNIEnv *env, jclass,
   JNI_NULL_CHECK(env, dst, "dst memory pointer is null", );
   try {
     cudf::jni::auto_set_device(env);
-    JNI_CUDA_TRY(env, , cudaMemsetAsync((void *)dst, value, count));
+    CUDF_CUDA_TRY(cudaMemsetAsync((void *)dst, value, count));
   }
   CATCH_STD(env, );
 }
@@ -129,7 +130,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getDevice(JNIEnv *env, jclass) {
   try {
     cudf::jni::auto_set_device(env);
     jint dev;
-    JNI_CUDA_TRY(env, -2, cudaGetDevice(&dev));
+    CUDF_CUDA_TRY(cudaGetDevice(&dev));
     return dev;
   }
   CATCH_STD(env, -2);
@@ -139,7 +140,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getDeviceCount(JNIEnv *env, jcla
   try {
     cudf::jni::auto_set_device(env);
     jint count;
-    JNI_CUDA_TRY(env, -2, cudaGetDeviceCount(&count));
+    CUDF_CUDA_TRY(cudaGetDeviceCount(&count));
     return count;
   }
   CATCH_STD(env, -2);
@@ -151,7 +152,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_setDevice(JNIEnv *env, jclass, j
       cudf::jni::throw_java_exception(env, cudf::jni::CUDF_ERROR_CLASS,
                                       "Cannot change device after RMM init");
     }
-    JNI_CUDA_TRY(env, , cudaSetDevice(dev));
+    CUDF_CUDA_TRY(cudaSetDevice(dev));
   }
   CATCH_STD(env, );
 }
@@ -167,7 +168,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getDriverVersion(JNIEnv *env, jc
   try {
     cudf::jni::auto_set_device(env);
     jint driver_version;
-    JNI_CUDA_TRY(env, -2, cudaDriverGetVersion(&driver_version));
+    CUDF_CUDA_TRY(cudaDriverGetVersion(&driver_version));
     return driver_version;
   }
   CATCH_STD(env, -2);
@@ -177,7 +178,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getRuntimeVersion(JNIEnv *env, j
   try {
     cudf::jni::auto_set_device(env);
     jint runtime_version;
-    JNI_CUDA_TRY(env, -2, cudaRuntimeGetVersion(&runtime_version));
+    CUDF_CUDA_TRY(cudaRuntimeGetVersion(&runtime_version));
     return runtime_version;
   }
   CATCH_STD(env, -2);
@@ -187,9 +188,9 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getNativeComputeMode(JNIEnv *env
   try {
     cudf::jni::auto_set_device(env);
     int device;
-    JNI_CUDA_TRY(env, -2, cudaGetDevice(&device));
+    CUDF_CUDA_TRY(cudaGetDevice(&device));
     cudaDeviceProp device_prop;
-    JNI_CUDA_TRY(env, -2, cudaGetDeviceProperties(&device_prop, device));
+    CUDF_CUDA_TRY(cudaGetDeviceProperties(&device_prop, device));
     return device_prop.computeMode;
   }
   CATCH_STD(env, -2);
@@ -199,10 +200,9 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getComputeCapabilityMajor(JNIEnv
   try {
     cudf::jni::auto_set_device(env);
     int device;
-    JNI_CUDA_TRY(env, -2, ::cudaGetDevice(&device));
+    CUDF_CUDA_TRY(::cudaGetDevice(&device));
     int attribute_value;
-    JNI_CUDA_TRY(
-        env, -2,
+    CUDF_CUDA_TRY(
         ::cudaDeviceGetAttribute(&attribute_value, ::cudaDevAttrComputeCapabilityMajor, device));
     return attribute_value;
   }
@@ -213,10 +213,9 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getComputeCapabilityMinor(JNIEnv
   try {
     cudf::jni::auto_set_device(env);
     int device;
-    JNI_CUDA_TRY(env, -2, ::cudaGetDevice(&device));
+    CUDF_CUDA_TRY(::cudaGetDevice(&device));
     int attribute_value;
-    JNI_CUDA_TRY(
-        env, -2,
+    CUDF_CUDA_TRY(
         ::cudaDeviceGetAttribute(&attribute_value, ::cudaDevAttrComputeCapabilityMinor, device));
     return attribute_value;
   }
@@ -226,7 +225,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getComputeCapabilityMinor(JNIEnv
 JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_freeZero(JNIEnv *env, jclass) {
   try {
     cudf::jni::auto_set_device(env);
-    JNI_CUDA_TRY(env, , cudaFree(0));
+    CUDF_CUDA_TRY(cudaFree(0));
   }
   CATCH_STD(env, );
 }
@@ -237,7 +236,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Cuda_createStream(JNIEnv *env, jclas
     cudf::jni::auto_set_device(env);
     cudaStream_t stream = nullptr;
     auto flags = isNonBlocking ? cudaStreamNonBlocking : cudaStreamDefault;
-    JNI_CUDA_TRY(env, 0, cudaStreamCreateWithFlags(&stream, flags));
+    CUDF_CUDA_TRY(cudaStreamCreateWithFlags(&stream, flags));
     return reinterpret_cast<jlong>(stream);
   }
   CATCH_STD(env, 0);
@@ -247,7 +246,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_destroyStream(JNIEnv *env, jclas
   try {
     cudf::jni::auto_set_device(env);
     auto stream = reinterpret_cast<cudaStream_t>(jstream);
-    JNI_CUDA_TRY(env, , cudaStreamDestroy(stream));
+    CUDF_CUDA_TRY(cudaStreamDestroy(stream));
   }
   CATCH_STD(env, );
 }
@@ -258,7 +257,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_streamWaitEvent(JNIEnv *env, jcl
     cudf::jni::auto_set_device(env);
     auto stream = reinterpret_cast<cudaStream_t>(jstream);
     auto event = reinterpret_cast<cudaEvent_t>(jevent);
-    JNI_CUDA_TRY(env, , cudaStreamWaitEvent(stream, event, 0));
+    CUDF_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0));
   }
   CATCH_STD(env, );
 }
@@ -268,7 +267,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_streamSynchronize(JNIEnv *env, j
   try {
     cudf::jni::auto_set_device(env);
     auto stream = reinterpret_cast<cudaStream_t>(jstream);
-    JNI_CUDA_TRY(env, , cudaStreamSynchronize(stream));
+    CUDF_CUDA_TRY(cudaStreamSynchronize(stream));
   }
   CATCH_STD(env, );
 }
@@ -286,7 +285,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Cuda_createEvent(JNIEnv *env, jclass
     if (blockingSync) {
       flags = flags | cudaEventBlockingSync;
     }
-    JNI_CUDA_TRY(env, 0, cudaEventCreateWithFlags(&event, flags));
+    CUDF_CUDA_TRY(cudaEventCreateWithFlags(&event, flags));
     return reinterpret_cast<jlong>(event);
   }
   CATCH_STD(env, 0);
@@ -296,7 +295,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_destroyEvent(JNIEnv *env, jclass
   try {
     cudf::jni::auto_set_device(env);
     auto event = reinterpret_cast<cudaEvent_t>(jevent);
-    JNI_CUDA_TRY(env, , cudaEventDestroy(event));
+    CUDF_CUDA_TRY(cudaEventDestroy(event));
   }
   CATCH_STD(env, );
 }
@@ -311,7 +310,7 @@ JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_Cuda_eventQuery(JNIEnv *env, jcla
     } else if (result == cudaErrorNotReady) {
       return false;
     } // else
-    JNI_CUDA_TRY(env, false, result);
+    CUDF_CUDA_TRY(result);
   }
   CATCH_STD(env, false);
   return false;
@@ -323,7 +322,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_eventRecord(JNIEnv *env, jclass,
     cudf::jni::auto_set_device(env);
     auto event = reinterpret_cast<cudaEvent_t>(jevent);
     auto stream = reinterpret_cast<cudaStream_t>(jstream);
-    JNI_CUDA_TRY(env, , cudaEventRecord(event, stream));
+    CUDF_CUDA_TRY(cudaEventRecord(event, stream));
   }
   CATCH_STD(env, );
 }
@@ -333,7 +332,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_eventSynchronize(JNIEnv *env, jc
   try {
     cudf::jni::auto_set_device(env);
     auto event = reinterpret_cast<cudaEvent_t>(jevent);
-    JNI_CUDA_TRY(env, , cudaEventSynchronize(event));
+    CUDF_CUDA_TRY(cudaEventSynchronize(event));
   }
   CATCH_STD(env, );
 }
@@ -352,8 +351,8 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_memcpyOnStream(JNIEnv *env, jcla
     auto src = reinterpret_cast<void *>(jsrc);
     auto kind = static_cast<cudaMemcpyKind>(jkind);
     auto stream = reinterpret_cast<cudaStream_t>(jstream);
-    JNI_CUDA_TRY(env, , cudaMemcpyAsync(dst, src, count, kind, stream));
-    JNI_CUDA_TRY(env, , cudaStreamSynchronize(stream));
+    CUDF_CUDA_TRY(cudaMemcpyAsync(dst, src, count, kind, stream));
+    CUDF_CUDA_TRY(cudaStreamSynchronize(stream));
   }
   CATCH_STD(env, );
 }
@@ -372,7 +371,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_asyncMemcpyOnStream(JNIEnv *env,
     auto src = reinterpret_cast<void *>(jsrc);
     auto kind = static_cast<cudaMemcpyKind>(jkind);
     auto stream = reinterpret_cast<cudaStream_t>(jstream);
-    JNI_CUDA_TRY(env, , cudaMemcpyAsync(dst, src, count, kind, stream));
+    CUDF_CUDA_TRY(cudaMemcpyAsync(dst, src, count, kind, stream));
   }
   CATCH_STD(env, );
 }
diff --git a/java/src/test/java/ai/rapids/cudf/CudaTest.java b/java/src/test/java/ai/rapids/cudf/CudaTest.java
index 8905c2edd56..1a86dbb374d 100644
--- a/java/src/test/java/ai/rapids/cudf/CudaTest.java
+++ b/java/src/test/java/ai/rapids/cudf/CudaTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@
 
 import org.junit.jupiter.api.Test;
 
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class CudaTest {
 
@@ -32,4 +32,17 @@ public void testGetCudaRuntimeInfo() {
     assertEquals(Cuda.getNativeComputeMode(), Cuda.getComputeMode().nativeId);
   }
 
+  @Test
+  public void testCudaException() {
+    assertThrows(CudaException.class, () -> {
+          try {
+            Cuda.memset(Long.MAX_VALUE, (byte) 0, 1024);
+          } catch (CudaFatalException ignored) {
+          } catch (CudaException ex) {
+            assertEquals(CudaException.CudaError.cudaErrorInvalidValue, ex.cudaError);
+            throw ex;
+          }
+        }
+    );
+  }
 }