diff --git a/java/src/main/java/ai/rapids/cudf/CudaException.java b/java/src/main/java/ai/rapids/cudf/CudaException.java
index 2d862b47ef8..ff7ca308f3c 100755
--- a/java/src/main/java/ai/rapids/cudf/CudaException.java
+++ b/java/src/main/java/ai/rapids/cudf/CudaException.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,9 @@
  */
 package ai.rapids.cudf;
 
+import java.util.HashMap;
+import java.util.Map;
+
 /**
  * Exception from the cuda language/library.  Be aware that because of how cuda does asynchronous
  * processing exceptions from cuda can be thrown by method calls that did not cause the exception
@@ -28,11 +31,283 @@
  * don't switch between threads for different parts of processing that can be retried as a chunk.
  */
 public class CudaException extends RuntimeException {
-  CudaException(String message) {
+  CudaException(String message, int errorCode) {
     super(message);
+    cudaError = CudaError.parseErrorCode(errorCode);
   }
 
-  CudaException(String message, Throwable cause) {
+  CudaException(String message, int errorCode, Throwable cause) {
     super(message, cause);
+    cudaError = CudaError.parseErrorCode(errorCode);
+  }
+
+  public final CudaError cudaError;
+
+  /**
+   * The Java mirror of cudaError, which facilities the tracking of CUDA errors in JVM.
+   */
+  public enum CudaError {
+    UnknownNativeError(-1), // native CUDA error type which Java doesn't have a representation
+    cudaErrorInvalidValue(1),
+    cudaErrorMemoryAllocation(2),
+    cudaErrorInitializationError(3),
+    cudaErrorCudartUnloading(4),
+    cudaErrorProfilerDisabled(5),
+    cudaErrorProfilerNotInitialized(6),
+    cudaErrorProfilerAlreadyStarted(7),
+    cudaErrorProfilerAlreadyStopped(8),
+    cudaErrorInvalidConfiguration(9),
+    cudaErrorInvalidPitchValue(12),
+    cudaErrorInvalidSymbol(13),
+    cudaErrorInvalidHostPointer(16),
+    cudaErrorInvalidDevicePointer(17),
+    cudaErrorInvalidTexture(18),
+    cudaErrorInvalidTextureBinding(19),
+    cudaErrorInvalidChannelDescriptor(20),
+    cudaErrorInvalidMemcpyDirection(21),
+    cudaErrorAddressOfConstant(22),
+    cudaErrorTextureFetchFailed(23),
+    cudaErrorTextureNotBound(24),
+    cudaErrorSynchronizationError(25),
+    cudaErrorInvalidFilterSetting(26),
+    cudaErrorInvalidNormSetting(27),
+    cudaErrorMixedDeviceExecution(28),
+    cudaErrorNotYetImplemented(31),
+    cudaErrorMemoryValueTooLarge(32),
+    cudaErrorStubLibrary(34),
+    cudaErrorInsufficientDriver(35),
+    cudaErrorCallRequiresNewerDriver(36),
+    cudaErrorInvalidSurface(37),
+    cudaErrorDuplicateVariableName(43),
+    cudaErrorDuplicateTextureName(44),
+    cudaErrorDuplicateSurfaceName(45),
+    cudaErrorDevicesUnavailable(46),
+    cudaErrorIncompatibleDriverContext(49),
+    cudaErrorMissingConfiguration(52),
+    cudaErrorPriorLaunchFailure(53),
+    cudaErrorLaunchMaxDepthExceeded(65),
+    cudaErrorLaunchFileScopedTex(66),
+    cudaErrorLaunchFileScopedSurf(67),
+    cudaErrorSyncDepthExceeded(68),
+    cudaErrorLaunchPendingCountExceeded(69),
+    cudaErrorInvalidDeviceFunction(98),
+    cudaErrorNoDevice(100),
+    cudaErrorInvalidDevice(101),
+    cudaErrorDeviceNotLicensed(102),
+    cudaErrorSoftwareValidityNotEstablished(103),
+    cudaErrorStartupFailure(127),
+    cudaErrorInvalidKernelImage(200),
+    cudaErrorDeviceUninitialized(201),
+    cudaErrorMapBufferObjectFailed(205),
+    cudaErrorUnmapBufferObjectFailed(206),
+    cudaErrorArrayIsMapped(207),
+    cudaErrorAlreadyMapped(208),
+    cudaErrorNoKernelImageForDevice(209),
+    cudaErrorAlreadyAcquired(210),
+    cudaErrorNotMapped(211),
+    cudaErrorNotMappedAsArray(212),
+    cudaErrorNotMappedAsPointer(213),
+    cudaErrorECCUncorrectable(214),
+    cudaErrorUnsupportedLimit(215),
+    cudaErrorDeviceAlreadyInUse(216),
+    cudaErrorPeerAccessUnsupported(217),
+    cudaErrorInvalidPtx(218),
+    cudaErrorInvalidGraphicsContext(219),
+    cudaErrorNvlinkUncorrectable(220),
+    cudaErrorJitCompilerNotFound(221),
+    cudaErrorUnsupportedPtxVersion(222),
+    cudaErrorJitCompilationDisabled(223),
+    cudaErrorUnsupportedExecAffinity(224),
+    cudaErrorInvalidSource(300),
+    cudaErrorFileNotFound(301),
+    cudaErrorSharedObjectSymbolNotFound(302),
+    cudaErrorSharedObjectInitFailed(303),
+    cudaErrorOperatingSystem(304),
+    cudaErrorInvalidResourceHandle(400),
+    cudaErrorIllegalState(401),
+    cudaErrorSymbolNotFound(500),
+    cudaErrorNotReady(600),
+    cudaErrorIllegalAddress(700),
+    cudaErrorLaunchOutOfResources(701),
+    cudaErrorLaunchTimeout(702),
+    cudaErrorLaunchIncompatibleTexturing(703),
+    cudaErrorPeerAccessAlreadyEnabled(704),
+    cudaErrorPeerAccessNotEnabled(705),
+    cudaErrorSetOnActiveProcess(708),
+    cudaErrorContextIsDestroyed(709),
+    cudaErrorAssert(710),
+    cudaErrorTooManyPeers(711),
+    cudaErrorHostMemoryAlreadyRegistered(712),
+    cudaErrorHostMemoryNotRegistered(713),
+    cudaErrorHardwareStackError(714),
+    cudaErrorIllegalInstruction(715),
+    cudaErrorMisalignedAddress(716),
+    cudaErrorInvalidAddressSpace(717),
+    cudaErrorInvalidPc(718),
+    cudaErrorLaunchFailure(719),
+    cudaErrorCooperativeLaunchTooLarge(720),
+    cudaErrorNotPermitted(800),
+    cudaErrorNotSupported(801),
+    cudaErrorSystemNotReady(802),
+    cudaErrorSystemDriverMismatch(803),
+    cudaErrorCompatNotSupportedOnDevice(804),
+    cudaErrorMpsConnectionFailed(805),
+    cudaErrorMpsRpcFailure(806),
+    cudaErrorMpsServerNotReady(807),
+    cudaErrorMpsMaxClientsReached(808),
+    cudaErrorMpsMaxConnectionsReached(809),
+    cudaErrorStreamCaptureUnsupported(900),
+    cudaErrorStreamCaptureInvalidated(901),
+    cudaErrorStreamCaptureMerge(902),
+    cudaErrorStreamCaptureUnmatched(903),
+    cudaErrorStreamCaptureUnjoined(904),
+    cudaErrorStreamCaptureIsolation(905),
+    cudaErrorStreamCaptureImplicit(906),
+    cudaErrorCapturedEvent(907),
+    cudaErrorStreamCaptureWrongThread(908),
+    cudaErrorTimeout(909),
+    cudaErrorGraphExecUpdateFailure(910),
+    cudaErrorExternalDevice(911),
+    cudaErrorUnknown(999),
+    cudaErrorApiFailureBase(10000);
+
+    final int code;
+
+    private static Map<Integer, CudaError> codeToError = new HashMap<Integer, CudaError>(){{
+      put(cudaErrorInvalidValue.code, cudaErrorInvalidValue);
+      put(cudaErrorMemoryAllocation.code, cudaErrorMemoryAllocation);
+      put(cudaErrorInitializationError.code, cudaErrorInitializationError);
+      put(cudaErrorCudartUnloading.code, cudaErrorCudartUnloading);
+      put(cudaErrorProfilerDisabled.code, cudaErrorProfilerDisabled);
+      put(cudaErrorProfilerNotInitialized.code, cudaErrorProfilerNotInitialized);
+      put(cudaErrorProfilerAlreadyStarted.code, cudaErrorProfilerAlreadyStarted);
+      put(cudaErrorProfilerAlreadyStopped.code, cudaErrorProfilerAlreadyStopped);
+      put(cudaErrorInvalidConfiguration.code, cudaErrorInvalidConfiguration);
+      put(cudaErrorInvalidPitchValue.code, cudaErrorInvalidPitchValue);
+      put(cudaErrorInvalidSymbol.code, cudaErrorInvalidSymbol);
+      put(cudaErrorInvalidHostPointer.code, cudaErrorInvalidHostPointer);
+      put(cudaErrorInvalidDevicePointer.code, cudaErrorInvalidDevicePointer);
+      put(cudaErrorInvalidTexture.code, cudaErrorInvalidTexture);
+      put(cudaErrorInvalidTextureBinding.code, cudaErrorInvalidTextureBinding);
+      put(cudaErrorInvalidChannelDescriptor.code, cudaErrorInvalidChannelDescriptor);
+      put(cudaErrorInvalidMemcpyDirection.code, cudaErrorInvalidMemcpyDirection);
+      put(cudaErrorAddressOfConstant.code, cudaErrorAddressOfConstant);
+      put(cudaErrorTextureFetchFailed.code, cudaErrorTextureFetchFailed);
+      put(cudaErrorTextureNotBound.code, cudaErrorTextureNotBound);
+      put(cudaErrorSynchronizationError.code, cudaErrorSynchronizationError);
+      put(cudaErrorInvalidFilterSetting.code, cudaErrorInvalidFilterSetting);
+      put(cudaErrorInvalidNormSetting.code, cudaErrorInvalidNormSetting);
+      put(cudaErrorMixedDeviceExecution.code, cudaErrorMixedDeviceExecution);
+      put(cudaErrorNotYetImplemented.code, cudaErrorNotYetImplemented);
+      put(cudaErrorMemoryValueTooLarge.code, cudaErrorMemoryValueTooLarge);
+      put(cudaErrorStubLibrary.code, cudaErrorStubLibrary);
+      put(cudaErrorInsufficientDriver.code, cudaErrorInsufficientDriver);
+      put(cudaErrorCallRequiresNewerDriver.code, cudaErrorCallRequiresNewerDriver);
+      put(cudaErrorInvalidSurface.code, cudaErrorInvalidSurface);
+      put(cudaErrorDuplicateVariableName.code, cudaErrorDuplicateVariableName);
+      put(cudaErrorDuplicateTextureName.code, cudaErrorDuplicateTextureName);
+      put(cudaErrorDuplicateSurfaceName.code, cudaErrorDuplicateSurfaceName);
+      put(cudaErrorDevicesUnavailable.code, cudaErrorDevicesUnavailable);
+      put(cudaErrorIncompatibleDriverContext.code, cudaErrorIncompatibleDriverContext);
+      put(cudaErrorMissingConfiguration.code, cudaErrorMissingConfiguration);
+      put(cudaErrorPriorLaunchFailure.code, cudaErrorPriorLaunchFailure);
+      put(cudaErrorLaunchMaxDepthExceeded.code, cudaErrorLaunchMaxDepthExceeded);
+      put(cudaErrorLaunchFileScopedTex.code, cudaErrorLaunchFileScopedTex);
+      put(cudaErrorLaunchFileScopedSurf.code, cudaErrorLaunchFileScopedSurf);
+      put(cudaErrorSyncDepthExceeded.code, cudaErrorSyncDepthExceeded);
+      put(cudaErrorLaunchPendingCountExceeded.code, cudaErrorLaunchPendingCountExceeded);
+      put(cudaErrorInvalidDeviceFunction.code, cudaErrorInvalidDeviceFunction);
+      put(cudaErrorNoDevice.code, cudaErrorNoDevice);
+      put(cudaErrorInvalidDevice.code, cudaErrorInvalidDevice);
+      put(cudaErrorDeviceNotLicensed.code, cudaErrorDeviceNotLicensed);
+      put(cudaErrorSoftwareValidityNotEstablished.code, cudaErrorSoftwareValidityNotEstablished);
+      put(cudaErrorStartupFailure.code, cudaErrorStartupFailure);
+      put(cudaErrorInvalidKernelImage.code, cudaErrorInvalidKernelImage);
+      put(cudaErrorDeviceUninitialized.code, cudaErrorDeviceUninitialized);
+      put(cudaErrorMapBufferObjectFailed.code, cudaErrorMapBufferObjectFailed);
+      put(cudaErrorUnmapBufferObjectFailed.code, cudaErrorUnmapBufferObjectFailed);
+      put(cudaErrorArrayIsMapped.code, cudaErrorArrayIsMapped);
+      put(cudaErrorAlreadyMapped.code, cudaErrorAlreadyMapped);
+      put(cudaErrorNoKernelImageForDevice.code, cudaErrorNoKernelImageForDevice);
+      put(cudaErrorAlreadyAcquired.code, cudaErrorAlreadyAcquired);
+      put(cudaErrorNotMapped.code, cudaErrorNotMapped);
+      put(cudaErrorNotMappedAsArray.code, cudaErrorNotMappedAsArray);
+      put(cudaErrorNotMappedAsPointer.code, cudaErrorNotMappedAsPointer);
+      put(cudaErrorECCUncorrectable.code, cudaErrorECCUncorrectable);
+      put(cudaErrorUnsupportedLimit.code, cudaErrorUnsupportedLimit);
+      put(cudaErrorDeviceAlreadyInUse.code, cudaErrorDeviceAlreadyInUse);
+      put(cudaErrorPeerAccessUnsupported.code, cudaErrorPeerAccessUnsupported);
+      put(cudaErrorInvalidPtx.code, cudaErrorInvalidPtx);
+      put(cudaErrorInvalidGraphicsContext.code, cudaErrorInvalidGraphicsContext);
+      put(cudaErrorNvlinkUncorrectable.code, cudaErrorNvlinkUncorrectable);
+      put(cudaErrorJitCompilerNotFound.code, cudaErrorJitCompilerNotFound);
+      put(cudaErrorUnsupportedPtxVersion.code, cudaErrorUnsupportedPtxVersion);
+      put(cudaErrorJitCompilationDisabled.code, cudaErrorJitCompilationDisabled);
+      put(cudaErrorUnsupportedExecAffinity.code, cudaErrorUnsupportedExecAffinity);
+      put(cudaErrorInvalidSource.code, cudaErrorInvalidSource);
+      put(cudaErrorFileNotFound.code, cudaErrorFileNotFound);
+      put(cudaErrorSharedObjectSymbolNotFound.code, cudaErrorSharedObjectSymbolNotFound);
+      put(cudaErrorSharedObjectInitFailed.code, cudaErrorSharedObjectInitFailed);
+      put(cudaErrorOperatingSystem.code, cudaErrorOperatingSystem);
+      put(cudaErrorInvalidResourceHandle.code, cudaErrorInvalidResourceHandle);
+      put(cudaErrorIllegalState.code, cudaErrorIllegalState);
+      put(cudaErrorSymbolNotFound.code, cudaErrorSymbolNotFound);
+      put(cudaErrorNotReady.code, cudaErrorNotReady);
+      put(cudaErrorIllegalAddress.code, cudaErrorIllegalAddress);
+      put(cudaErrorLaunchOutOfResources.code, cudaErrorLaunchOutOfResources);
+      put(cudaErrorLaunchTimeout.code, cudaErrorLaunchTimeout);
+      put(cudaErrorLaunchIncompatibleTexturing.code, cudaErrorLaunchIncompatibleTexturing);
+      put(cudaErrorPeerAccessAlreadyEnabled.code, cudaErrorPeerAccessAlreadyEnabled);
+      put(cudaErrorPeerAccessNotEnabled.code, cudaErrorPeerAccessNotEnabled);
+      put(cudaErrorSetOnActiveProcess.code, cudaErrorSetOnActiveProcess);
+      put(cudaErrorContextIsDestroyed.code, cudaErrorContextIsDestroyed);
+      put(cudaErrorAssert.code, cudaErrorAssert);
+      put(cudaErrorTooManyPeers.code, cudaErrorTooManyPeers);
+      put(cudaErrorHostMemoryAlreadyRegistered.code, cudaErrorHostMemoryAlreadyRegistered);
+      put(cudaErrorHostMemoryNotRegistered.code, cudaErrorHostMemoryNotRegistered);
+      put(cudaErrorHardwareStackError.code, cudaErrorHardwareStackError);
+      put(cudaErrorIllegalInstruction.code, cudaErrorIllegalInstruction);
+      put(cudaErrorMisalignedAddress.code, cudaErrorMisalignedAddress);
+      put(cudaErrorInvalidAddressSpace.code, cudaErrorInvalidAddressSpace);
+      put(cudaErrorInvalidPc.code, cudaErrorInvalidPc);
+      put(cudaErrorLaunchFailure.code, cudaErrorLaunchFailure);
+      put(cudaErrorCooperativeLaunchTooLarge.code, cudaErrorCooperativeLaunchTooLarge);
+      put(cudaErrorNotPermitted.code, cudaErrorNotPermitted);
+      put(cudaErrorNotSupported.code, cudaErrorNotSupported);
+      put(cudaErrorSystemNotReady.code, cudaErrorSystemNotReady);
+      put(cudaErrorSystemDriverMismatch.code, cudaErrorSystemDriverMismatch);
+      put(cudaErrorCompatNotSupportedOnDevice.code, cudaErrorCompatNotSupportedOnDevice);
+      put(cudaErrorMpsConnectionFailed.code, cudaErrorMpsConnectionFailed);
+      put(cudaErrorMpsRpcFailure.code, cudaErrorMpsRpcFailure);
+      put(cudaErrorMpsServerNotReady.code, cudaErrorMpsServerNotReady);
+      put(cudaErrorMpsMaxClientsReached.code, cudaErrorMpsMaxClientsReached);
+      put(cudaErrorMpsMaxConnectionsReached.code, cudaErrorMpsMaxConnectionsReached);
+      put(cudaErrorStreamCaptureUnsupported.code, cudaErrorStreamCaptureUnsupported);
+      put(cudaErrorStreamCaptureInvalidated.code, cudaErrorStreamCaptureInvalidated);
+      put(cudaErrorStreamCaptureMerge.code, cudaErrorStreamCaptureMerge);
+      put(cudaErrorStreamCaptureUnmatched.code, cudaErrorStreamCaptureUnmatched);
+      put(cudaErrorStreamCaptureUnjoined.code, cudaErrorStreamCaptureUnjoined);
+      put(cudaErrorStreamCaptureIsolation.code, cudaErrorStreamCaptureIsolation);
+      put(cudaErrorStreamCaptureImplicit.code, cudaErrorStreamCaptureImplicit);
+      put(cudaErrorCapturedEvent.code, cudaErrorCapturedEvent);
+      put(cudaErrorStreamCaptureWrongThread.code, cudaErrorStreamCaptureWrongThread);
+      put(cudaErrorTimeout.code, cudaErrorTimeout);
+      put(cudaErrorGraphExecUpdateFailure.code, cudaErrorGraphExecUpdateFailure);
+      put(cudaErrorExternalDevice.code, cudaErrorExternalDevice);
+      put(cudaErrorUnknown.code, cudaErrorUnknown);
+      put(cudaErrorApiFailureBase.code, cudaErrorApiFailureBase);
+    }};
+
+    CudaError(int errorCode) {
+      this.code = errorCode;
+    }
+
+    public static CudaError parseErrorCode(int errorCode) {
+      if (!codeToError.containsKey(errorCode)) {
+        return UnknownNativeError;
+      }
+      return codeToError.get(errorCode);
+    }
+
   }
 }
diff --git a/java/src/main/java/ai/rapids/cudf/CudaFatalException.java b/java/src/main/java/ai/rapids/cudf/CudaFatalException.java
new file mode 100644
index 00000000000..cf36726aa80
--- /dev/null
+++ b/java/src/main/java/ai/rapids/cudf/CudaFatalException.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ai.rapids.cudf;
+
+/**
+ * CudaFatalException is a kind of CudaException which leaves the process in an inconsistent state
+ * and any further CUDA work will return the same error.
+ * To continue using CUDA, the process must be terminated and relaunched.
+ */
+public class CudaFatalException extends CudaException {
+  CudaFatalException(String message, int errorCode) {
+    super(message, errorCode);
+  }
+
+  CudaFatalException(String message, int errorCode, Throwable cause) {
+    super(message, errorCode, cause);
+  }
+}
diff --git a/java/src/main/native/include/jni_utils.hpp b/java/src/main/native/include/jni_utils.hpp
index a45716a89b3..eca424132a5 100644
--- a/java/src/main/native/include/jni_utils.hpp
+++ b/java/src/main/native/include/jni_utils.hpp
@@ -30,6 +30,7 @@ namespace jni {
 constexpr jint MINIMUM_JNI_VERSION = JNI_VERSION_1_6;
 
 constexpr char const *CUDA_ERROR_CLASS = "ai/rapids/cudf/CudaException";
+constexpr char const *CUDA_FATAL_ERROR_CLASS = "ai/rapids/cudf/CudaFatalException";
 constexpr char const *CUDF_ERROR_CLASS = "ai/rapids/cudf/CudfException";
 constexpr char const *INDEX_OOB_CLASS = "java/lang/ArrayIndexOutOfBoundsException";
 constexpr char const *ILLEGAL_ARG_CLASS = "java/lang/IllegalArgumentException";
@@ -737,12 +738,26 @@ class native_jstringArray {
  * @brief create a cuda exception from a given cudaError_t
  */
 inline jthrowable cuda_exception(JNIEnv *const env, cudaError_t status, jthrowable cause = NULL) {
-  jclass ex_class = env->FindClass(cudf::jni::CUDA_ERROR_CLASS);
+  const char *ex_class_name;
+
+  // Calls cudaGetLastError twice. It is nearly certain that a fatal error occurred if the second
+  // call doesn't return with cudaSuccess.
+  cudaGetLastError();
+  auto const last = cudaGetLastError();
+  // Call cudaDeviceSynchronize to ensure `last` did not result from an asynchronous error.
+  // between two calls.
+  if (status == last && last == cudaDeviceSynchronize()) {
+    ex_class_name = cudf::jni::CUDA_FATAL_ERROR_CLASS;
+  } else {
+    ex_class_name = cudf::jni::CUDA_ERROR_CLASS;
+  }
+
+  jclass ex_class = env->FindClass(ex_class_name);
   if (ex_class == NULL) {
     return NULL;
   }
   jmethodID ctor_id =
-      env->GetMethodID(ex_class, "<init>", "(Ljava/lang/String;Ljava/lang/Throwable;)V");
+      env->GetMethodID(ex_class, "<init>", "(Ljava/lang/String;ILjava/lang/Throwable;)V");
   if (ctor_id == NULL) {
     return NULL;
   }
@@ -752,19 +767,20 @@ inline jthrowable cuda_exception(JNIEnv *const env, cudaError_t status, jthrowab
     return NULL;
   }
 
-  jobject ret = env->NewObject(ex_class, ctor_id, msg, cause);
+  jint err_code = static_cast<jint>(status);
+
+  jobject ret = env->NewObject(ex_class, ctor_id, msg, err_code, cause);
   return (jthrowable)ret;
 }
 
 inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) {
   if (cudaSuccess != cuda_status) {
-    // Clear the last error so it does not propagate.
-    cudaGetLastError();
     jthrowable jt = cuda_exception(env, cuda_status);
     if (jt != NULL) {
       env->Throw(jt);
-      throw jni_exception("CUDA ERROR");
     }
+    throw jni_exception(std::string("CUDA ERROR: code ") +
+                        std::to_string(static_cast<int>(cuda_status)));
   }
 }
 
@@ -790,18 +806,26 @@ inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) {
     JNI_THROW_NEW(env, class_name, message, ret_val)                                               \
   }
 
-#define JNI_CUDA_TRY(env, ret_val, call)                                                           \
+// Throw a new exception only if one is not pending then always return with the specified value
+#define JNI_CHECK_CUDA_ERROR(env, class_name, e, ret_val)                                          \
   {                                                                                                \
-    cudaError_t internal_cuda_status = (call);                                                     \
-    if (cudaSuccess != internal_cuda_status) {                                                     \
-      /* Clear the last error so it does not propagate.*/                                          \
-      cudaGetLastError();                                                                          \
-      jthrowable jt = cudf::jni::cuda_exception(env, internal_cuda_status);                        \
-      if (jt != NULL) {                                                                            \
-        env->Throw(jt);                                                                            \
-      }                                                                                            \
+    if (env->ExceptionOccurred()) {                                                                \
       return ret_val;                                                                              \
     }                                                                                              \
+    std::string n_msg = e.what() == nullptr ? "" : e.what();                                       \
+    jstring j_msg = env->NewStringUTF(n_msg.c_str());                                              \
+    jint e_code = static_cast<jint>(e.error_code());                                               \
+    jclass ex_class = env->FindClass(class_name);                                                  \
+    if (ex_class != NULL) {                                                                        \
+      jmethodID ctor_id = env->GetMethodID(ex_class, "<init>", "(Ljava/lang/String;I)V");          \
+      if (ctor_id != NULL) {                                                                       \
+        jobject cuda_error = env->NewObject(ex_class, ctor_id, j_msg, e_code);                     \
+        if (cuda_error != NULL) {                                                                  \
+          env->Throw((jthrowable)cuda_error);                                                      \
+        }                                                                                          \
+      }                                                                                            \
+    }                                                                                              \
+    return ret_val;                                                                                \
   }
 
 #define JNI_NULL_CHECK(env, obj, error_msg, ret_val)                                               \
@@ -831,6 +855,12 @@ inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) {
         std::string("Could not allocate native memory: ") + (e.what() == nullptr ? "" : e.what()); \
     JNI_CHECK_THROW_NEW(env, cudf::jni::OOM_CLASS, what.c_str(), ret_val);                         \
   }                                                                                                \
+  catch (const cudf::fatal_cuda_error &e) {                                                        \
+    JNI_CHECK_CUDA_ERROR(env, cudf::jni::CUDA_FATAL_ERROR_CLASS, e, ret_val);                      \
+  }                                                                                                \
+  catch (const cudf::cuda_error &e) {                                                              \
+    JNI_CHECK_CUDA_ERROR(env, cudf::jni::CUDA_ERROR_CLASS, e, ret_val);                            \
+  }                                                                                                \
   catch (const std::exception &e) {                                                                \
     /* If jni_exception caught then a Java exception is pending and this will not overwrite it. */ \
     JNI_CHECK_THROW_NEW(env, class_name, e.what(), ret_val);                                       \
diff --git a/java/src/main/native/src/CudaJni.cpp b/java/src/main/native/src/CudaJni.cpp
index 9862c3bface..926521c55f9 100644
--- a/java/src/main/native/src/CudaJni.cpp
+++ b/java/src/main/native/src/CudaJni.cpp
@@ -15,6 +15,7 @@
  */
 
 #include <cuda_profiler_api.h>
+#include <cudf/utilities/error.hpp>
 #include <rmm/device_buffer.hpp>
 
 #include "jni_utils.hpp"
@@ -66,7 +67,7 @@ JNIEXPORT jobject JNICALL Java_ai_rapids_cudf_Cuda_memGetInfo(JNIEnv *env, jclas
     cudf::jni::auto_set_device(env);
 
     size_t free, total;
-    JNI_CUDA_TRY(env, NULL, cudaMemGetInfo(&free, &total));
+    CUDF_CUDA_TRY(cudaMemGetInfo(&free, &total));
 
     jclass info_class = env->FindClass("Lai/rapids/cudf/CudaMemInfo;");
     if (info_class == NULL) {
@@ -90,7 +91,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Cuda_hostAllocPinned(JNIEnv *env, jc
   try {
     cudf::jni::auto_set_device(env);
     void *ret = nullptr;
-    JNI_CUDA_TRY(env, 0, cudaMallocHost(&ret, size));
+    CUDF_CUDA_TRY(cudaMallocHost(&ret, size));
     return reinterpret_cast<jlong>(ret);
   }
   CATCH_STD(env, 0);
@@ -99,7 +100,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Cuda_hostAllocPinned(JNIEnv *env, jc
 JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_freePinned(JNIEnv *env, jclass, jlong ptr) {
   try {
     cudf::jni::auto_set_device(env);
-    JNI_CUDA_TRY(env, , cudaFreeHost(reinterpret_cast<void *>(ptr)));
+    CUDF_CUDA_TRY(cudaFreeHost(reinterpret_cast<void *>(ptr)));
   }
   CATCH_STD(env, );
 }
@@ -109,8 +110,8 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_memset(JNIEnv *env, jclass, jlon
   JNI_NULL_CHECK(env, dst, "dst memory pointer is null", );
   try {
     cudf::jni::auto_set_device(env);
-    JNI_CUDA_TRY(env, , cudaMemsetAsync((void *)dst, value, count));
-    JNI_CUDA_TRY(env, , cudaStreamSynchronize(0));
+    CUDF_CUDA_TRY(cudaMemsetAsync((void *)dst, value, count));
+    CUDF_CUDA_TRY(cudaStreamSynchronize(0));
   }
   CATCH_STD(env, );
 }
@@ -120,7 +121,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_asyncMemset(JNIEnv *env, jclass,
   JNI_NULL_CHECK(env, dst, "dst memory pointer is null", );
   try {
     cudf::jni::auto_set_device(env);
-    JNI_CUDA_TRY(env, , cudaMemsetAsync((void *)dst, value, count));
+    CUDF_CUDA_TRY(cudaMemsetAsync((void *)dst, value, count));
   }
   CATCH_STD(env, );
 }
@@ -129,7 +130,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getDevice(JNIEnv *env, jclass) {
   try {
     cudf::jni::auto_set_device(env);
     jint dev;
-    JNI_CUDA_TRY(env, -2, cudaGetDevice(&dev));
+    CUDF_CUDA_TRY(cudaGetDevice(&dev));
     return dev;
   }
   CATCH_STD(env, -2);
@@ -139,7 +140,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getDeviceCount(JNIEnv *env, jcla
   try {
     cudf::jni::auto_set_device(env);
     jint count;
-    JNI_CUDA_TRY(env, -2, cudaGetDeviceCount(&count));
+    CUDF_CUDA_TRY(cudaGetDeviceCount(&count));
     return count;
   }
   CATCH_STD(env, -2);
@@ -151,7 +152,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_setDevice(JNIEnv *env, jclass, j
       cudf::jni::throw_java_exception(env, cudf::jni::CUDF_ERROR_CLASS,
                                       "Cannot change device after RMM init");
     }
-    JNI_CUDA_TRY(env, , cudaSetDevice(dev));
+    CUDF_CUDA_TRY(cudaSetDevice(dev));
   }
   CATCH_STD(env, );
 }
@@ -167,7 +168,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getDriverVersion(JNIEnv *env, jc
   try {
     cudf::jni::auto_set_device(env);
     jint driver_version;
-    JNI_CUDA_TRY(env, -2, cudaDriverGetVersion(&driver_version));
+    CUDF_CUDA_TRY(cudaDriverGetVersion(&driver_version));
     return driver_version;
   }
   CATCH_STD(env, -2);
@@ -177,7 +178,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getRuntimeVersion(JNIEnv *env, j
   try {
     cudf::jni::auto_set_device(env);
     jint runtime_version;
-    JNI_CUDA_TRY(env, -2, cudaRuntimeGetVersion(&runtime_version));
+    CUDF_CUDA_TRY(cudaRuntimeGetVersion(&runtime_version));
     return runtime_version;
   }
   CATCH_STD(env, -2);
@@ -187,9 +188,9 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getNativeComputeMode(JNIEnv *env
   try {
     cudf::jni::auto_set_device(env);
     int device;
-    JNI_CUDA_TRY(env, -2, cudaGetDevice(&device));
+    CUDF_CUDA_TRY(cudaGetDevice(&device));
     cudaDeviceProp device_prop;
-    JNI_CUDA_TRY(env, -2, cudaGetDeviceProperties(&device_prop, device));
+    CUDF_CUDA_TRY(cudaGetDeviceProperties(&device_prop, device));
     return device_prop.computeMode;
   }
   CATCH_STD(env, -2);
@@ -199,10 +200,9 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getComputeCapabilityMajor(JNIEnv
   try {
     cudf::jni::auto_set_device(env);
     int device;
-    JNI_CUDA_TRY(env, -2, ::cudaGetDevice(&device));
+    CUDF_CUDA_TRY(::cudaGetDevice(&device));
     int attribute_value;
-    JNI_CUDA_TRY(
-        env, -2,
+    CUDF_CUDA_TRY(
         ::cudaDeviceGetAttribute(&attribute_value, ::cudaDevAttrComputeCapabilityMajor, device));
     return attribute_value;
   }
@@ -213,10 +213,9 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getComputeCapabilityMinor(JNIEnv
   try {
     cudf::jni::auto_set_device(env);
     int device;
-    JNI_CUDA_TRY(env, -2, ::cudaGetDevice(&device));
+    CUDF_CUDA_TRY(::cudaGetDevice(&device));
     int attribute_value;
-    JNI_CUDA_TRY(
-        env, -2,
+    CUDF_CUDA_TRY(
         ::cudaDeviceGetAttribute(&attribute_value, ::cudaDevAttrComputeCapabilityMinor, device));
     return attribute_value;
   }
@@ -226,7 +225,7 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getComputeCapabilityMinor(JNIEnv
 JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_freeZero(JNIEnv *env, jclass) {
   try {
     cudf::jni::auto_set_device(env);
-    JNI_CUDA_TRY(env, , cudaFree(0));
+    CUDF_CUDA_TRY(cudaFree(0));
   }
   CATCH_STD(env, );
 }
@@ -237,7 +236,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Cuda_createStream(JNIEnv *env, jclas
     cudf::jni::auto_set_device(env);
     cudaStream_t stream = nullptr;
     auto flags = isNonBlocking ? cudaStreamNonBlocking : cudaStreamDefault;
-    JNI_CUDA_TRY(env, 0, cudaStreamCreateWithFlags(&stream, flags));
+    CUDF_CUDA_TRY(cudaStreamCreateWithFlags(&stream, flags));
     return reinterpret_cast<jlong>(stream);
   }
   CATCH_STD(env, 0);
@@ -247,7 +246,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_destroyStream(JNIEnv *env, jclas
   try {
     cudf::jni::auto_set_device(env);
     auto stream = reinterpret_cast<cudaStream_t>(jstream);
-    JNI_CUDA_TRY(env, , cudaStreamDestroy(stream));
+    CUDF_CUDA_TRY(cudaStreamDestroy(stream));
   }
   CATCH_STD(env, );
 }
@@ -258,7 +257,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_streamWaitEvent(JNIEnv *env, jcl
     cudf::jni::auto_set_device(env);
     auto stream = reinterpret_cast<cudaStream_t>(jstream);
     auto event = reinterpret_cast<cudaEvent_t>(jevent);
-    JNI_CUDA_TRY(env, , cudaStreamWaitEvent(stream, event, 0));
+    CUDF_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0));
   }
   CATCH_STD(env, );
 }
@@ -268,7 +267,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_streamSynchronize(JNIEnv *env, j
   try {
     cudf::jni::auto_set_device(env);
     auto stream = reinterpret_cast<cudaStream_t>(jstream);
-    JNI_CUDA_TRY(env, , cudaStreamSynchronize(stream));
+    CUDF_CUDA_TRY(cudaStreamSynchronize(stream));
   }
   CATCH_STD(env, );
 }
@@ -286,7 +285,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Cuda_createEvent(JNIEnv *env, jclass
     if (blockingSync) {
       flags = flags | cudaEventBlockingSync;
     }
-    JNI_CUDA_TRY(env, 0, cudaEventCreateWithFlags(&event, flags));
+    CUDF_CUDA_TRY(cudaEventCreateWithFlags(&event, flags));
     return reinterpret_cast<jlong>(event);
   }
   CATCH_STD(env, 0);
@@ -296,7 +295,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_destroyEvent(JNIEnv *env, jclass
   try {
     cudf::jni::auto_set_device(env);
     auto event = reinterpret_cast<cudaEvent_t>(jevent);
-    JNI_CUDA_TRY(env, , cudaEventDestroy(event));
+    CUDF_CUDA_TRY(cudaEventDestroy(event));
   }
   CATCH_STD(env, );
 }
@@ -311,7 +310,7 @@ JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_Cuda_eventQuery(JNIEnv *env, jcla
     } else if (result == cudaErrorNotReady) {
       return false;
     } // else
-    JNI_CUDA_TRY(env, false, result);
+    CUDF_CUDA_TRY(result);
   }
   CATCH_STD(env, false);
   return false;
@@ -323,7 +322,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_eventRecord(JNIEnv *env, jclass,
     cudf::jni::auto_set_device(env);
     auto event = reinterpret_cast<cudaEvent_t>(jevent);
     auto stream = reinterpret_cast<cudaStream_t>(jstream);
-    JNI_CUDA_TRY(env, , cudaEventRecord(event, stream));
+    CUDF_CUDA_TRY(cudaEventRecord(event, stream));
   }
   CATCH_STD(env, );
 }
@@ -333,7 +332,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_eventSynchronize(JNIEnv *env, jc
   try {
     cudf::jni::auto_set_device(env);
     auto event = reinterpret_cast<cudaEvent_t>(jevent);
-    JNI_CUDA_TRY(env, , cudaEventSynchronize(event));
+    CUDF_CUDA_TRY(cudaEventSynchronize(event));
   }
   CATCH_STD(env, );
 }
@@ -352,8 +351,8 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_memcpyOnStream(JNIEnv *env, jcla
     auto src = reinterpret_cast<void *>(jsrc);
     auto kind = static_cast<cudaMemcpyKind>(jkind);
     auto stream = reinterpret_cast<cudaStream_t>(jstream);
-    JNI_CUDA_TRY(env, , cudaMemcpyAsync(dst, src, count, kind, stream));
-    JNI_CUDA_TRY(env, , cudaStreamSynchronize(stream));
+    CUDF_CUDA_TRY(cudaMemcpyAsync(dst, src, count, kind, stream));
+    CUDF_CUDA_TRY(cudaStreamSynchronize(stream));
   }
   CATCH_STD(env, );
 }
@@ -372,7 +371,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_asyncMemcpyOnStream(JNIEnv *env,
     auto src = reinterpret_cast<void *>(jsrc);
     auto kind = static_cast<cudaMemcpyKind>(jkind);
     auto stream = reinterpret_cast<cudaStream_t>(jstream);
-    JNI_CUDA_TRY(env, , cudaMemcpyAsync(dst, src, count, kind, stream));
+    CUDF_CUDA_TRY(cudaMemcpyAsync(dst, src, count, kind, stream));
   }
   CATCH_STD(env, );
 }
diff --git a/java/src/test/java/ai/rapids/cudf/CudaTest.java b/java/src/test/java/ai/rapids/cudf/CudaTest.java
index 8905c2edd56..1a86dbb374d 100644
--- a/java/src/test/java/ai/rapids/cudf/CudaTest.java
+++ b/java/src/test/java/ai/rapids/cudf/CudaTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@
 
 import org.junit.jupiter.api.Test;
 
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class CudaTest {
 
@@ -32,4 +32,17 @@ public void testGetCudaRuntimeInfo() {
     assertEquals(Cuda.getNativeComputeMode(), Cuda.getComputeMode().nativeId);
   }
 
+  @Test
+  public void testCudaException() {
+    assertThrows(CudaException.class, () -> {
+          try {
+            Cuda.memset(Long.MAX_VALUE, (byte) 0, 1024);
+          } catch (CudaFatalException ignored) {
+          } catch (CudaException ex) {
+            assertEquals(CudaException.CudaError.cudaErrorInvalidValue, ex.cudaError);
+            throw ex;
+          }
+        }
+    );
+  }
 }