From 397b7c7ca23ddf85e199d5cc80aef55c811f1a20 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Fri, 25 Oct 2024 20:25:03 +0000
Subject: [PATCH 1/7] support kernel launch with CUDA 11 driver

---
 cuda_core/cuda/core/experimental/_launcher.py | 25 ++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_launcher.py b/cuda_core/cuda/core/experimental/_launcher.py
index 614fabcf..4951358d 100644
--- a/cuda_core/cuda/core/experimental/_launcher.py
+++ b/cuda_core/cuda/core/experimental/_launcher.py
@@ -19,6 +19,7 @@
 class LaunchConfig:
     """
     """
+    # TODO: expand LaunchConfig to include other attributes
     grid: Union[tuple, int] = None
     block: Union[tuple, int] = None
     stream: Stream = None
@@ -67,24 +68,30 @@ def launch(kernel, config, *kernel_args):
     if not isinstance(kernel, Kernel):
         raise ValueError
     config = check_or_create_options(LaunchConfig, config, "launch config")
+    if config.stream is None:
+        raise CUDAError("stream cannot be None")
+
     # TODO: can we ensure kernel_args is valid/safe to use here?
+    # TODO: merge with HelperKernelParams?
+    kernel_args = ParamHolder(kernel_args)
+    args_ptr = kernel_args.ptr
 
     driver_ver = handle_return(cuda.cuDriverGetVersion())
     if driver_ver >= 12000:
         drv_cfg = cuda.CUlaunchConfig()
         drv_cfg.gridDimX, drv_cfg.gridDimY, drv_cfg.gridDimZ = config.grid
         drv_cfg.blockDimX, drv_cfg.blockDimY, drv_cfg.blockDimZ = config.block
-        if config.stream is None:
-            raise CUDAError("stream cannot be None")
         drv_cfg.hStream = config.stream._handle
         drv_cfg.sharedMemBytes = config.shmem_size
-        drv_cfg.numAttrs = 0  # FIXME
-
-        # TODO: merge with HelperKernelParams?
-        kernel_args = ParamHolder(kernel_args)
-        args_ptr = kernel_args.ptr
-
+        drv_cfg.numAttrs = 0  # TODO
         handle_return(cuda.cuLaunchKernelEx(
             drv_cfg, int(kernel._handle), args_ptr, 0))
     else:
-        raise NotImplementedError("TODO")
+        # TODO: check if config has any unsupported attrs
+        handle_return(cuda.cuLaunchKernel(
+            int(kernel._handle),
+            *config.grid,
+            *config.block,
+            config.shmem_size,
+            config.stream._handle,
+            args_ptr, 0))

From b319731f05e15061133cd617732052154c624d46 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Fri, 25 Oct 2024 14:00:42 -0700
Subject: [PATCH 2/7] fix module load for cuda-python 11.x

---
 cuda_core/cuda/core/experimental/_module.py | 28 +++++++++++++++------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index a179faf8..325c307a 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -2,22 +2,31 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
+import importlib.metadata
+
 from cuda import cuda, cudart
 from cuda.core.experimental._utils import handle_return
 
 
 _backend = {
-    "new": {
-        "file": cuda.cuLibraryLoadFromFile,
-        "data": cuda.cuLibraryLoadData,
-        "kernel": cuda.cuLibraryGetKernel,
-    },
     "old": {
         "file": cuda.cuModuleLoad,
         "data": cuda.cuModuleLoadDataEx,
         "kernel": cuda.cuModuleGetFunction,
     },
 }
+_kernel_ctypes = [cuda.CUfunction]
+
+# binding availability depends on cuda-python version
+py_major_ver = int(importlib.metadata.version("cuda-python").split(".")[0])
+if py_major_ver >= 12:
+    _backend["new"] = {
+        "file": cuda.cuLibraryLoadFromFile,
+        "data": cuda.cuLibraryLoadData,
+        "kernel": cuda.cuLibraryGetKernel,
+    }
+    _kernel_ctypes.append(cuda.CUkernel)
+_kernel_ctypes = tuple(_kernel_ctypes)
 
 
 class Kernel:
@@ -29,7 +38,7 @@ def __init__(self):
 
     @staticmethod
     def _from_obj(obj, mod):
-        assert isinstance(obj, (cuda.CUkernel, cuda.CUfunction))
+        assert isinstance(obj, _kernel_ctypes)
         assert isinstance(mod, ObjectCode)
         ker = Kernel.__new__(Kernel)
         ker._handle = obj
@@ -49,7 +58,10 @@ def __init__(self, module, code_type, jit_options=None, *,
         self._handle = None
 
         driver_ver = handle_return(cuda.cuDriverGetVersion())
-        self._loader = _backend["new"] if driver_ver >= 12000 else _backend["old"]
+        if py_major_ver >= 12 and driver_ver >= 12000:
+            self._loader = _backend["new"]
+        else:
+            self._loader = _backend["old"]
 
         if isinstance(module, str):
             if driver_ver < 12000 and jit_options is not None:
@@ -65,7 +77,7 @@ def __init__(self, module, code_type, jit_options=None, *,
                         # TODO: support library options
                         [], [], 0)
             else:
-                args = (module, len(jit_options), jit_options.keys(), jit_options.values())
+                args = (module, len(jit_options), list(jit_options.keys()), list(jit_options.values()))
             self._handle = handle_return(self._loader["data"](*args))
 
         self._code_type = code_type

From b64f3379c1c1599d506ba374374614bdc699ad30 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Fri, 25 Oct 2024 18:17:01 -0700
Subject: [PATCH 3/7] simplify & fix module/library handling

---
 cuda_core/cuda/core/experimental/_module.py | 22 +++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index 325c307a..a51ab24f 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -15,7 +15,6 @@
         "kernel": cuda.cuModuleGetFunction,
     },
 }
-_kernel_ctypes = [cuda.CUfunction]
 
 # binding availability depends on cuda-python version
 py_major_ver = int(importlib.metadata.version("cuda-python").split(".")[0])
@@ -25,8 +24,10 @@
         "data": cuda.cuLibraryLoadData,
         "kernel": cuda.cuLibraryGetKernel,
     }
-    _kernel_ctypes.append(cuda.CUkernel)
-_kernel_ctypes = tuple(_kernel_ctypes)
+    _kernel_ctypes = (cuda.CUfunction, cuda.CUkernel)
+else:
+    _kernel_ctypes = (cuda.CUfunction,)
+driver_ver = handle_return(cuda.cuDriverGetVersion())
 
 
 class Kernel:
@@ -45,6 +46,8 @@ def _from_obj(obj, mod):
         ker._module = mod
         return ker
 
+    # TODO: implement from_handle()
+
 
 class ObjectCode:
 
@@ -57,11 +60,8 @@ def __init__(self, module, code_type, jit_options=None, *,
             raise ValueError
         self._handle = None
 
-        driver_ver = handle_return(cuda.cuDriverGetVersion())
-        if py_major_ver >= 12 and driver_ver >= 12000:
-            self._loader = _backend["new"]
-        else:
-            self._loader = _backend["old"]
+        backend = "new" if (py_major_ver >= 12 and driver_ver >= 12000) else "old"
+        self._loader = _backend[backend]
 
         if isinstance(module, str):
             if driver_ver < 12000 and jit_options is not None:
@@ -72,11 +72,11 @@ def __init__(self, module, code_type, jit_options=None, *,
             assert isinstance(module, bytes)
             if jit_options is None:
                 jit_options = {}
-            if driver_ver >= 12000:
+            if backend == "new":
                 args = (module, list(jit_options.keys()), list(jit_options.values()), len(jit_options),
                         # TODO: support library options
                         [], [], 0)
-            else:
+            else:  # "old" backend
                 args = (module, len(jit_options), list(jit_options.keys()), list(jit_options.values()))
             self._handle = handle_return(self._loader["data"](*args))
 
@@ -95,3 +95,5 @@ def get_kernel(self, name):
             name = name.encode()
         data = handle_return(self._loader["kernel"](self._handle, name))
         return Kernel._from_obj(data, self)
+
+    # TODO: implement from_handle()

From 7587684f2d7f3a0d04636271cb5401cb3dbd3cf9 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Fri, 25 Oct 2024 18:54:46 -0700
Subject: [PATCH 4/7] propagate py/driver ver check to launch

---
 cuda_core/cuda/core/experimental/_launcher.py |  9 ++++++---
 cuda_core/cuda/core/experimental/_module.py   | 10 ++++++----
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_launcher.py b/cuda_core/cuda/core/experimental/_launcher.py
index 4951358d..b1223365 100644
--- a/cuda_core/cuda/core/experimental/_launcher.py
+++ b/cuda_core/cuda/core/experimental/_launcher.py
@@ -76,8 +76,11 @@ def launch(kernel, config, *kernel_args):
     kernel_args = ParamHolder(kernel_args)
     args_ptr = kernel_args.ptr
 
-    driver_ver = handle_return(cuda.cuDriverGetVersion())
-    if driver_ver >= 12000:
+    # Note: CUkernel can still be launched via the old cuLaunchKernel. We check ._backend
+    # here not because of the CUfunction/CUkernel difference (which depends on whether the
+    # "old" or "new" module loading APIs are in use), but only as a proxy to check if
+    # both binding & driver versions support the "Ex" API, which is more feature rich.
+    if kernel._backend == "new":
         drv_cfg = cuda.CUlaunchConfig()
         drv_cfg.gridDimX, drv_cfg.gridDimY, drv_cfg.gridDimZ = config.grid
         drv_cfg.blockDimX, drv_cfg.blockDimY, drv_cfg.blockDimZ = config.block
@@ -86,7 +89,7 @@ def launch(kernel, config, *kernel_args):
         drv_cfg.numAttrs = 0  # TODO
         handle_return(cuda.cuLaunchKernelEx(
             drv_cfg, int(kernel._handle), args_ptr, 0))
-    else:
+    else:  # "old" backend
         # TODO: check if config has any unsupported attrs
         handle_return(cuda.cuLaunchKernel(
             int(kernel._handle),
diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index a51ab24f..e5d0808f 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -32,18 +32,19 @@
 
 class Kernel:
 
-    __slots__ = ("_handle", "_module",)
+    __slots__ = ("_handle", "_module", "_backend")
 
     def __init__(self):
         raise NotImplementedError("directly constructing a Kernel instance is not supported")
 
     @staticmethod
-    def _from_obj(obj, mod):
+    def _from_obj(obj, mod, backend):
         assert isinstance(obj, _kernel_ctypes)
         assert isinstance(mod, ObjectCode)
         ker = Kernel.__new__(Kernel)
         ker._handle = obj
         ker._module = mod
+        ker._backend = backend
         return ker
 
     # TODO: implement from_handle()
@@ -51,7 +52,7 @@ def _from_obj(obj, mod):
 
 class ObjectCode:
 
-    __slots__ = ("_handle", "_code_type", "_module", "_loader", "_sym_map")
+    __slots__ = ("_handle", "_code_type", "_module", "_loader", "_loader_backend", "_sym_map")
     _supported_code_type = ("cubin", "ptx", "fatbin")
 
     def __init__(self, module, code_type, jit_options=None, *,
@@ -62,6 +63,7 @@ def __init__(self, module, code_type, jit_options=None, *,
 
         backend = "new" if (py_major_ver >= 12 and driver_ver >= 12000) else "old"
         self._loader = _backend[backend]
+        self._loader_backend = backend
 
         if isinstance(module, str):
             if driver_ver < 12000 and jit_options is not None:
@@ -94,6 +96,6 @@ def get_kernel(self, name):
         except KeyError:
             name = name.encode()
         data = handle_return(self._loader["kernel"](self._handle, name))
-        return Kernel._from_obj(data, self)
+        return Kernel._from_obj(data, self, self._loader_backend)
 
     # TODO: implement from_handle()

From 7fd8ccb85aa542be4f3d68b2dce03931eeef94d6 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Fri, 25 Oct 2024 19:41:59 -0700
Subject: [PATCH 5/7] nit: cleaner treatment

---
 cuda_core/cuda/core/experimental/_launcher.py | 21 +++++++++++++------
 cuda_core/cuda/core/experimental/_module.py   | 10 ++++-----
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_launcher.py b/cuda_core/cuda/core/experimental/_launcher.py
index b1223365..c3af8866 100644
--- a/cuda_core/cuda/core/experimental/_launcher.py
+++ b/cuda_core/cuda/core/experimental/_launcher.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 from dataclasses import dataclass
+import importlib.metadata
 from typing import Optional, Union
 
 import numpy as np
@@ -64,6 +65,13 @@ def _cast_to_3_tuple(self, cfg):
             raise ValueError
 
 
+# binding availability depends on cuda-python version
+py_major_minor = tuple(int(v) for v in (
+    importlib.metadata.version("cuda-python").split(".")[:2]))
+driver_ver = handle_return(cuda.cuDriverGetVersion())
+use_ex = (driver_ver >= 11080) and (py_major_minor >= (11, 8))
+
+
 def launch(kernel, config, *kernel_args):
     if not isinstance(kernel, Kernel):
         raise ValueError
@@ -76,11 +84,12 @@ def launch(kernel, config, *kernel_args):
     kernel_args = ParamHolder(kernel_args)
     args_ptr = kernel_args.ptr
 
-    # Note: CUkernel can still be launched via the old cuLaunchKernel. We check ._backend
-    # here not because of the CUfunction/CUkernel difference (which depends on whether the
-    # "old" or "new" module loading APIs are in use), but only as a proxy to check if
-    # both binding & driver versions support the "Ex" API, which is more feature rich.
-    if kernel._backend == "new":
+    # Note: CUkernel can still be launched via the old cuLaunchKernel and we do not care
+    # about the CUfunction/CUkernel difference (which depends on whether the "old" or
+    # "new" module loading APIs are in use). We check both binding & driver versions here
+    # mainly to see if the "Ex" API is available and if so we use it, as it's more feature
+    # rich.
+    if use_ex:
         drv_cfg = cuda.CUlaunchConfig()
         drv_cfg.gridDimX, drv_cfg.gridDimY, drv_cfg.gridDimZ = config.grid
         drv_cfg.blockDimX, drv_cfg.blockDimY, drv_cfg.blockDimZ = config.block
@@ -89,7 +98,7 @@ def launch(kernel, config, *kernel_args):
         drv_cfg.numAttrs = 0  # TODO
         handle_return(cuda.cuLaunchKernelEx(
             drv_cfg, int(kernel._handle), args_ptr, 0))
-    else:  # "old" backend
+    else:
         # TODO: check if config has any unsupported attrs
         handle_return(cuda.cuLaunchKernel(
             int(kernel._handle),
diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index e5d0808f..a51ab24f 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -32,19 +32,18 @@
 
 class Kernel:
 
-    __slots__ = ("_handle", "_module", "_backend")
+    __slots__ = ("_handle", "_module",)
 
     def __init__(self):
         raise NotImplementedError("directly constructing a Kernel instance is not supported")
 
     @staticmethod
-    def _from_obj(obj, mod, backend):
+    def _from_obj(obj, mod):
         assert isinstance(obj, _kernel_ctypes)
         assert isinstance(mod, ObjectCode)
         ker = Kernel.__new__(Kernel)
         ker._handle = obj
         ker._module = mod
-        ker._backend = backend
         return ker
 
     # TODO: implement from_handle()
@@ -52,7 +51,7 @@ def _from_obj(obj, mod, backend):
 
 class ObjectCode:
 
-    __slots__ = ("_handle", "_code_type", "_module", "_loader", "_loader_backend", "_sym_map")
+    __slots__ = ("_handle", "_code_type", "_module", "_loader", "_sym_map")
     _supported_code_type = ("cubin", "ptx", "fatbin")
 
     def __init__(self, module, code_type, jit_options=None, *,
@@ -63,7 +62,6 @@ def __init__(self, module, code_type, jit_options=None, *,
 
         backend = "new" if (py_major_ver >= 12 and driver_ver >= 12000) else "old"
         self._loader = _backend[backend]
-        self._loader_backend = backend
 
         if isinstance(module, str):
             if driver_ver < 12000 and jit_options is not None:
@@ -96,6 +94,6 @@ def get_kernel(self, name):
         except KeyError:
             name = name.encode()
         data = handle_return(self._loader["kernel"](self._handle, name))
-        return Kernel._from_obj(data, self, self._loader_backend)
+        return Kernel._from_obj(data, self)
 
     # TODO: implement from_handle()

From b634d700141fb1c754a87e66bcaaed420d4bd887 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Mon, 28 Oct 2024 04:00:48 +0000
Subject: [PATCH 6/7] try to defer driver loading

---
 cuda_core/cuda/core/experimental/_launcher.py | 30 +++++++++----
 cuda_core/cuda/core/experimental/_module.py   | 44 +++++++++++++------
 2 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_launcher.py b/cuda_core/cuda/core/experimental/_launcher.py
index c3af8866..4b9533cb 100644
--- a/cuda_core/cuda/core/experimental/_launcher.py
+++ b/cuda_core/cuda/core/experimental/_launcher.py
@@ -16,6 +16,25 @@
 from cuda.core.experimental._utils import CUDAError, check_or_create_options, handle_return
 
 
+# TODO: revisit this treatment for py313t builds
+_inited = False
+_use_ex = None
+
+
+def _lazy_init():
+    global _inited
+    if _inited:
+        return
+
+    global _use_ex
+    # binding availability depends on cuda-python version
+    _py_major_minor = tuple(int(v) for v in (
+        importlib.metadata.version("cuda-python").split(".")[:2]))
+    _driver_ver = handle_return(cuda.cuDriverGetVersion())
+    _use_ex = (_driver_ver >= 11080) and (_py_major_minor >= (11, 8))
+    _inited = True
+
+
 @dataclass
 class LaunchConfig:
     """
@@ -41,6 +60,8 @@ def __post_init__(self):
         if self.shmem_size is None:
             self.shmem_size = 0
 
+        _lazy_init()
+
     def _cast_to_3_tuple(self, cfg):
         if isinstance(cfg, int):
             if cfg < 1:
@@ -65,13 +86,6 @@ def _cast_to_3_tuple(self, cfg):
             raise ValueError
 
 
-# binding availability depends on cuda-python version
-py_major_minor = tuple(int(v) for v in (
-    importlib.metadata.version("cuda-python").split(".")[:2]))
-driver_ver = handle_return(cuda.cuDriverGetVersion())
-use_ex = (driver_ver >= 11080) and (py_major_minor >= (11, 8))
-
-
 def launch(kernel, config, *kernel_args):
     if not isinstance(kernel, Kernel):
         raise ValueError
@@ -89,7 +103,7 @@ def launch(kernel, config, *kernel_args):
     # "new" module loading APIs are in use). We check both binding & driver versions here
     # mainly to see if the "Ex" API is available and if so we use it, as it's more feature
     # rich.
-    if use_ex:
+    if _use_ex:
         drv_cfg = cuda.CUlaunchConfig()
         drv_cfg.gridDimX, drv_cfg.gridDimY, drv_cfg.gridDimZ = config.grid
         drv_cfg.blockDimX, drv_cfg.blockDimY, drv_cfg.blockDimZ = config.block
diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index a51ab24f..2e1ca6e9 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -16,18 +16,33 @@
     },
 }
 
-# binding availability depends on cuda-python version
-py_major_ver = int(importlib.metadata.version("cuda-python").split(".")[0])
-if py_major_ver >= 12:
-    _backend["new"] = {
-        "file": cuda.cuLibraryLoadFromFile,
-        "data": cuda.cuLibraryLoadData,
-        "kernel": cuda.cuLibraryGetKernel,
-    }
-    _kernel_ctypes = (cuda.CUfunction, cuda.CUkernel)
-else:
-    _kernel_ctypes = (cuda.CUfunction,)
-driver_ver = handle_return(cuda.cuDriverGetVersion())
+
+# TODO: revisit this treatment for py313t builds
+_inited = False
+_py_major_ver = None
+_driver_ver = None
+_kernel_ctypes = None
+
+
+def _lazy_init():
+    global _inited
+    if _inited:
+        return
+
+    global _py_major_ver, _driver_ver, _kernel_ctypes
+    # binding availability depends on cuda-python version
+    _py_major_ver = int(importlib.metadata.version("cuda-python").split(".")[0])
+    if _py_major_ver >= 12:
+        _backend["new"] = {
+            "file": cuda.cuLibraryLoadFromFile,
+            "data": cuda.cuLibraryLoadData,
+            "kernel": cuda.cuLibraryGetKernel,
+        }
+        _kernel_ctypes = (cuda.CUfunction, cuda.CUkernel)
+    else:
+        _kernel_ctypes = (cuda.CUfunction,)
+    _driver_ver = handle_return(cuda.cuDriverGetVersion())
+    _inited = True
 
 
 class Kernel:
@@ -58,13 +73,14 @@ def __init__(self, module, code_type, jit_options=None, *,
                  symbol_mapping=None):
         if code_type not in self._supported_code_type:
             raise ValueError
+        _lazy_init()
         self._handle = None
 
-        backend = "new" if (py_major_ver >= 12 and driver_ver >= 12000) else "old"
+        backend = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old"
         self._loader = _backend[backend]
 
         if isinstance(module, str):
-            if driver_ver < 12000 and jit_options is not None:
+            if _driver_ver < 12000 and jit_options is not None:
                 raise ValueError
             module = module.encode()
             self._handle = handle_return(self._loader["file"](module))

From 74de685475cdc1c0d114de10552a94e9de88f53b Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Mon, 28 Oct 2024 18:07:47 -0400
Subject: [PATCH 7/7] ignore jit_options unconditionally for now when loading
 from a file

---
 cuda_core/cuda/core/experimental/_module.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index 2e1ca6e9..60d4db97 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -80,7 +80,9 @@ def __init__(self, module, code_type, jit_options=None, *,
         self._loader = _backend[backend]
 
         if isinstance(module, str):
-            if _driver_ver < 12000 and jit_options is not None:
+            # TODO: this option is only taken by the new library APIs, but we have
+            # a bug that we can't easily support it just yet (NVIDIA/cuda-python#73).
+            if jit_options is not None:
                 raise ValueError
             module = module.encode()
             self._handle = handle_return(self._loader["file"](module))