diff --git a/pkgs/applications/misc/blender/default.nix b/pkgs/applications/misc/blender/default.nix
index 24ea7287160b7..bdfd867f55675 100644
--- a/pkgs/applications/misc/blender/default.nix
+++ b/pkgs/applications/misc/blender/default.nix
@@ -52,7 +52,10 @@ stdenv.mkDerivation (finalAttrs: rec {
   nativeBuildInputs =
     [ cmake makeWrapper python310Packages.wrapPython llvmPackages.llvm.dev
     ]
-    ++ lib.optionals cudaSupport [ addOpenGLRunpath ]
+    ++ lib.optionals cudaSupport [
+      addOpenGLRunpath
+      cudaPackages.cuda_nvcc
+    ]
     ++ lib.optionals waylandSupport [ pkg-config ];
   buildInputs =
     [ boost ffmpeg gettext glew ilmbase
@@ -87,7 +90,7 @@ stdenv.mkDerivation (finalAttrs: rec {
       llvmPackages.openmp SDL Cocoa CoreGraphics ForceFeedback OpenAL OpenGL
     ])
     ++ lib.optional jackaudioSupport libjack2
-    ++ lib.optional cudaSupport cudaPackages.cudatoolkit
+    ++ lib.optionals cudaSupport [ cudaPackages.cuda_cudart ]
     ++ lib.optional colladaSupport opencollada
     ++ lib.optional spaceNavSupport libspnav;
   pythonPath = with python310Packages; [ numpy requests zstandard ];
diff --git a/pkgs/applications/science/molecular-dynamics/gromacs/default.nix b/pkgs/applications/science/molecular-dynamics/gromacs/default.nix
index e669562c9cc75..1daeeb81410a0 100644
--- a/pkgs/applications/science/molecular-dynamics/gromacs/default.nix
+++ b/pkgs/applications/science/molecular-dynamics/gromacs/default.nix
@@ -1,4 +1,14 @@
-{ lib, stdenv, fetchurl, cmake, hwloc, fftw, perl, blas, lapack, mpi, cudatoolkit
+{ lib
+, stdenv
+, fetchurl
+, cmake
+, hwloc
+, fftw
+, perl
+, blas
+, lapack
+, mpi
+, cudaPackages
 , plumed
 , singlePrec ? true
 , config
@@ -9,6 +19,8 @@
 }:
 
 let
+  inherit (cudaPackages.cudaFlags) cudaCapabilities dropDot;
+
   # Select reasonable defaults for all major platforms
   # The possible values are defined in CMakeLists.txt:
   # AUTO None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256
@@ -52,7 +64,7 @@ in stdenv.mkDerivation rec {
   nativeBuildInputs =
     [ cmake ]
     ++ lib.optional enablePlumed plumed
-    ;
+    ++ lib.optionals enableCuda [ cudaPackages.cuda_nvcc ];
 
   buildInputs = [
     fftw
@@ -61,13 +73,17 @@ in stdenv.mkDerivation rec {
     blas
     lapack
   ] ++ lib.optional enableMpi mpi
-    ++ lib.optional enableCuda cudatoolkit
-  ;
+  ++ lib.optionals enableCuda [
+    cudaPackages.cuda_cudart
+    cudaPackages.libcufft
+    cudaPackages.cuda_profiler_api
+  ];
 
   propagatedBuildInputs = lib.optional enableMpi mpi;
   propagatedUserEnvPkgs = lib.optional enableMpi mpi;
 
   cmakeFlags = [
+    (lib.cmakeBool "GMX_HWLOC" true)
     "-DGMX_SIMD:STRING=${SIMD cpuAcceleration}"
     "-DGMX_OPENMP:BOOL=TRUE"
     "-DBUILD_SHARED_LIBS=ON"
@@ -87,7 +103,13 @@ in stdenv.mkDerivation rec {
      else [
        "-DGMX_MPI:BOOL=FALSE"
      ]
-  ) ++ lib.optional enableCuda "-DGMX_GPU=CUDA";
+  ) ++ lib.optionals enableCuda [
+    "-DGMX_GPU=CUDA"
+    (lib.cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (builtins.concatStringsSep ";" (map dropDot cudaCapabilities)))
+
+    # Gromacs seems to ignore and override the normal variables, so we add this ad hoc:
+    (lib.cmakeFeature "GMX_CUDA_TARGET_COMPUTE" (builtins.concatStringsSep ";" (map dropDot cudaCapabilities)))
+  ];
 
   postInstall = ''
     moveToOutput share/cmake $dev
diff --git a/pkgs/development/compilers/cudatoolkit/extension.nix b/pkgs/development/compilers/cudatoolkit/extension.nix
index 93800a0dbc6b1..d75d288f5577e 100644
--- a/pkgs/development/compilers/cudatoolkit/extension.nix
+++ b/pkgs/development/compilers/cudatoolkit/extension.nix
@@ -54,11 +54,28 @@ final: prev: let
         {
           name = "setup-cuda-hook";
 
+          # Point NVCC at a compatible compiler
           substitutions.ccRoot = "${backendStdenv.cc}";
 
           # Required in addition to ccRoot as otherwise bin/gcc is looked up
           # when building CMakeCUDACompilerId.cu
           substitutions.ccFullPath = "${backendStdenv.cc}/bin/${backendStdenv.cc.targetPrefix}c++";
+
+          # Required by cmake's enable_language(CUDA) to build a test program
+          # When implementing cross-compilation support: this is
+          # final.pkgs.targetPackages.cudaPackages.cuda_cudart
+          # Given the multiple-outputs each CUDA redist has, we can specify the exact components we
+          # need from the package. CMake requires:
+          # - the cuda_runtime.h header, which is in the dev output
+          # - the dynamic library, which is in the lib output
+          # - the static library, which is in the static output
+          substitutions.cudartFlags = let cudart = final.cuda_cudart; in
+            builtins.concatStringsSep " " (final.lib.optionals (final ? cuda_cudart) ([
+              "-I${final.lib.getDev cudart}/include"
+              "-L${final.lib.getLib cudart}/lib"
+            ] ++ final.lib.optionals (builtins.elem "static" cudart.outputs) [
+              "-L${cudart.static}/lib"
+            ]));
         }
         ./hooks/setup-cuda-hook.sh)
     { });
diff --git a/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh b/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh
index 5ea57594211c4..0fa8883081c50 100644
--- a/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh
+++ b/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh
@@ -1,5 +1,8 @@
 # shellcheck shell=bash
 
+# Only run the hook from nativeBuildInputs
+(( "$hostOffset" == -1 && "$targetOffset" == 0)) || return 0
+
 echo Sourcing setup-cuda-hook >&2
 
 extendCUDAToolkit_ROOT() {
@@ -55,8 +58,9 @@ setupCUDAToolkitCompilers() {
 
     # CMake's enable_language(CUDA) runs a compiler test and it doesn't account for
     # CUDAToolkit_ROOT. We have to help it locate libcudart
-    if [[ -z "${nvccDontPrependCudartFlags-}" ]] ; then
-        export NVCC_APPEND_FLAGS+=" -L@cudartLib@/lib -L@cudartStatic@/lib -I@cudartInclude@/include"
+    local cudartFlags="@cudartFlags@"
+    if [[ -z "${nvccDontPrependCudartFlags-}" ]] && [[ -n "${cudartFlags:-}" ]] ; then
+        export NVCC_APPEND_FLAGS+=" $cudartFlags"
     fi
 }
 
diff --git a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix
index a0ac0b0fcb1fb..71e70e8d7b704 100644
--- a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix
+++ b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix
@@ -51,37 +51,14 @@ in
     ]
   );
 
-  cuda_nvcc = prev.cuda_nvcc.overrideAttrs (_: {
-    # Required by cmake's enable_language(CUDA) to build a test program
-    # When implementing cross-compilation support: this is
-    # final.pkgs.targetPackages.cudaPackages.cuda_cudart
-    env = {
-      # Given the multiple-outputs each CUDA redist has, we can specify the exact components we
-      # need from the package. CMake requires:
-      # - the cuda_runtime.h header, which is in the dev output
-      # - the dynamic library, which is in the lib output
-      # - the static library, which is in the static output
-      cudartInclude = "${final.cuda_cudart.dev}";
-      cudartLib = "${final.cuda_cudart.lib}";
-      cudartStatic = "${final.cuda_cudart.static}";
-    };
-
-    # Point NVCC at a compatible compiler
-
-    # Desiredata: whenever a package (e.g. magma) adds cuda_nvcc to
-    # nativeBuildInputs (offsets `(-1, 0)`), magma should also source the
-    # setupCudaHook, i.e. we want it the hook to be propagated into the
-    # same nativeBuildInputs.
-    #
-    # Logically, cuda_nvcc should include the hook in depsHostHostPropagated,
-    # so that the final offsets for the propagated hook would be `(-1, 0) +
-    # (0, 0) = (-1, 0)`.
-    #
-    # In practice, TargetTarget appears to work:
-    # https://gist.github.com/fd80ff142cd25e64603618a3700e7f82
-    depsTargetTargetPropagated = [
+  cuda_nvcc = prev.cuda_nvcc.overrideAttrs (oldAttrs: {
+    propagatedBuildInputs = [
       final.setupCudaHook
     ];
+
+    meta = (oldAttrs.meta or { }) // {
+      mainProgram = "nvcc";
+    };
   });
 
   cuda_nvprof = prev.cuda_nvprof.overrideAttrs (oldAttrs: {
diff --git a/pkgs/development/compilers/cudatoolkit/saxpy/default.nix b/pkgs/development/compilers/cudatoolkit/saxpy/default.nix
index f347b43d1d11c..2da6da29004dc 100644
--- a/pkgs/development/compilers/cudatoolkit/saxpy/default.nix
+++ b/pkgs/development/compilers/cudatoolkit/saxpy/default.nix
@@ -1,12 +1,13 @@
 { autoAddOpenGLRunpathHook
 , backendStdenv
 , cmake
-, cuda_cccl
-, cuda_cudart
+, cuda_cccl ? null
+, cuda_cudart ? null
 , cudaFlags
-, cuda_nvcc
+, cuda_nvcc ? null
+, cudatoolkit ? null
 , lib
-, libcublas
+, libcublas ? null
 , setupCudaHook
 , stdenv
 }:
@@ -17,23 +18,24 @@ backendStdenv.mkDerivation {
 
   src = ./.;
 
-  buildInputs = [
+  buildInputs = lib.optionals (cuda_cudart != null) [
     libcublas
     cuda_cudart
     cuda_cccl
+  ] ++ lib.optionals (cuda_cudart == null) [
+    cudatoolkit
   ];
   nativeBuildInputs = [
     cmake
 
-    # NOTE: this needs to be pkgs.buildPackages.cudaPackages_XX_Y.cuda_nvcc for
-    # cross-compilation to work. This should work automatically once we move to
-    # spliced scopes. Delete this comment once that happens
-    cuda_nvcc
-
     # Alternatively, we could remove the propagated hook from cuda_nvcc and add
     # directly:
     # setupCudaHook
     autoAddOpenGLRunpathHook
+  ] ++ lib.optionals (cuda_nvcc != null) [
+    cuda_nvcc
+  ] ++ lib.optionals (cuda_nvcc == null) [
+    cudatoolkit
   ];
 
   cmakeFlags = [
diff --git a/pkgs/development/libraries/ctranslate2/default.nix b/pkgs/development/libraries/ctranslate2/default.nix
index 58906b608231d..324a11a6c7a2c 100644
--- a/pkgs/development/libraries/ctranslate2/default.nix
+++ b/pkgs/development/libraries/ctranslate2/default.nix
@@ -57,6 +57,7 @@ stdenv.mkDerivation rec {
   buildInputs = lib.optionals withMkl [
     mkl
   ] ++ lib.optionals withCUDA [
+    cudaPackages.cuda_cccl # <nv/target> required by the fp16 headers in cudart
     cudaPackages.cuda_cudart
     cudaPackages.libcublas
     cudaPackages.libcurand
diff --git a/pkgs/development/libraries/hwloc/default.nix b/pkgs/development/libraries/hwloc/default.nix
index 67048167d6bfa..626d0b7cca949 100644
--- a/pkgs/development/libraries/hwloc/default.nix
+++ b/pkgs/development/libraries/hwloc/default.nix
@@ -22,12 +22,13 @@ stdenv.mkDerivation rec {
   ];
 
   # XXX: libX11 is not directly needed, but needed as a propagated dep of Cairo.
-  nativeBuildInputs = [ pkg-config ];
+  nativeBuildInputs = [ pkg-config ]
+  ++ lib.optionals enableCuda [ cudaPackages.cuda_nvcc ];
 
   buildInputs = [ expat ncurses ]
     ++ lib.optionals x11Support [ cairo libX11 ]
     ++ lib.optionals stdenv.isLinux [ numactl ]
-    ++ lib.optional enableCuda cudaPackages.cudatoolkit;
+    ++ lib.optionals enableCuda [ cudaPackages.cuda_cudart ];
 
   # Since `libpci' appears in `hwloc.pc', it must be propagated.
   propagatedBuildInputs = lib.optional stdenv.isLinux pciutils;
diff --git a/pkgs/development/libraries/nvidia-thrust/default.nix b/pkgs/development/libraries/nvidia-thrust/default.nix
deleted file mode 100644
index f68b57f193b79..0000000000000
--- a/pkgs/development/libraries/nvidia-thrust/default.nix
+++ /dev/null
@@ -1,102 +0,0 @@
-{ lib
-, config
-, fetchFromGitHub
-, stdenv
-, cmake
-, pkg-config
-, cudaPackages ? { }
-, symlinkJoin
-, tbb
-, hostSystem ? "CPP"
-, deviceSystem ? if config.cudaSupport then "CUDA" else "OMP"
-}:
-
-# Policy for device_vector<T>
-assert builtins.elem deviceSystem [
-  "CPP" # Serial on CPU
-  "OMP" # Parallel with OpenMP
-  "TBB" # Parallel with Intel TBB
-  "CUDA" # Parallel on GPU
-];
-
-# Policy for host_vector<T>
-# Always lives on CPU, but execution can be made parallel
-assert builtins.elem hostSystem [ "CPP" "OMP" "TBB" ];
-
-let
-  pname = "nvidia-thrust";
-  version = "1.16.0";
-
-  inherit (cudaPackages) backendStdenv cudaFlags;
-  cudaCapabilities = map cudaFlags.dropDot cudaFlags.cudaCapabilities;
-
-  tbbSupport = builtins.elem "TBB" [ deviceSystem hostSystem ];
-  cudaSupport = deviceSystem == "CUDA";
-
-  # TODO: Would like to use this:
-  cudaJoined = symlinkJoin {
-    name = "cuda-packages-unsplit";
-    paths = with cudaPackages; [
-      cuda_nvcc
-      cuda_nvrtc # symbols: cudaLaunchDevice, &c; notice postBuild
-      cuda_cudart # cuda_runtime.h
-      libcublas
-    ];
-    postBuild = ''
-      ln -s $out/lib $out/lib64
-    '';
-  };
-in
-stdenv.mkDerivation {
-  inherit pname version;
-
-  src = fetchFromGitHub {
-    owner = "NVIDIA";
-    repo = "thrust";
-    rev = version;
-    fetchSubmodules = true;
-    hash = "sha256-/EyznxWKuHuvHNjq+SQg27IaRbtkjXR2zlo2YgCWmUQ=";
-  };
-
-  # NVIDIA's "compiler hacks" seem like work-arounds for legacy toolchains and
-  # cause us errors such as:
-  # > Thrust's test harness uses CMAKE_CXX_COMPILER for the CUDA host compiler.
-  # > Refusing to overwrite specified CMAKE_CUDA_HOST_COMPILER
-  # So we un-fix cmake after them:
-  postPatch = ''
-    echo > cmake/ThrustCompilerHacks.cmake
-  '';
-
-  buildInputs = lib.optionals tbbSupport [ tbb ];
-
-  nativeBuildInputs = [
-    cmake
-    pkg-config
-  ] ++ lib.optionals cudaSupport [
-    # Goes in native build inputs because thrust looks for headers
-    # in a path relative to nvcc...
-    cudaJoined
-  ];
-
-  cmakeFlags = [
-    "-DTHRUST_INCLUDE_CUB_CMAKE=${if cudaSupport then "ON" else "OFF"}"
-    "-DTHRUST_DEVICE_SYSTEM=${deviceSystem}"
-    "-DTHRUST_HOST_SYSTEM=${hostSystem}"
-    "-DTHRUST_AUTO_DETECT_COMPUTE_ARCHS=OFF"
-    "-DTHRUST_DISABLE_ARCH_BY_DEFAULT=ON"
-  ] ++ lib.optionals cudaFlags.enableForwardCompat [
-    "-DTHRUST_ENABLE_COMPUTE_FUTURE=ON"
-  ] ++ map (sm: "THRUST_ENABLE_COMPUTE_${sm}") cudaCapabilities;
-
-  passthru = {
-    inherit cudaSupport cudaPackages cudaJoined;
-  };
-
-  meta = with lib; {
-    description = "A high-level C++ parallel algorithms library that builds on top of CUDA, TBB, OpenMP, etc";
-    homepage = "https://github.com/NVIDIA/thrust";
-    license = licenses.asl20;
-    platforms = platforms.unix;
-    maintainers = with maintainers; [ SomeoneSerge ];
-  };
-}
diff --git a/pkgs/development/libraries/opencv/4.x.nix b/pkgs/development/libraries/opencv/4.x.nix
index 06360449c1ba9..4c1b13d1309e0 100644
--- a/pkgs/development/libraries/opencv/4.x.nix
+++ b/pkgs/development/libraries/opencv/4.x.nix
@@ -476,6 +476,8 @@ effectiveStdenv.mkDerivation {
   '';
 
   passthru = {
+    cudaSupport = enableCuda;
+
     tests = {
       inherit (gst_all_1) gst-plugins-bad;
     }
diff --git a/pkgs/development/libraries/openmpi/default.nix b/pkgs/development/libraries/openmpi/default.nix
index 1c4955e2c51a4..a8bd8acacd595 100644
--- a/pkgs/development/libraries/openmpi/default.nix
+++ b/pkgs/development/libraries/openmpi/default.nix
@@ -3,7 +3,7 @@
 , libpsm2, libfabric, pmix, ucx, ucc
 , config
 # Enable CUDA support
-, cudaSupport ? config.cudaSupport, cudatoolkit
+, cudaSupport ? config.cudaSupport, cudaPackages
 
 # Enable the Sun Grid Engine bindings
 , enableSGE ? false
@@ -18,12 +18,7 @@
 , fortranSupport ? true
 }:
 
-let
-  cudatoolkit_joined = symlinkJoin {
-    name = "${cudatoolkit.name}-unsplit";
-    paths = [ cudatoolkit.out cudatoolkit.lib ];
-  };
-in stdenv.mkDerivation rec {
+stdenv.mkDerivation rec {
   pname = "openmpi";
   version = "4.1.6";
 
@@ -47,12 +42,13 @@ in stdenv.mkDerivation rec {
 
   buildInputs = [ zlib ]
     ++ lib.optionals stdenv.isLinux [ libnl numactl pmix ucx ucc ]
-    ++ lib.optionals cudaSupport [ cudatoolkit ]
+    ++ lib.optionals cudaSupport [ cudaPackages.cuda_cudart ]
     ++ [ libevent hwloc ]
     ++ lib.optional (stdenv.isLinux || stdenv.isFreeBSD) rdma-core
     ++ lib.optionals fabricSupport [ libpsm2 libfabric ];
 
   nativeBuildInputs = [ perl ]
+    ++ lib.optionals cudaSupport [ cudaPackages.cuda_nvcc ]
     ++ lib.optionals fortranSupport [ gfortran ];
 
   configureFlags = lib.optional (!cudaSupport) "--disable-mca-dso"
@@ -67,7 +63,7 @@ in stdenv.mkDerivation rec {
     # TODO: add UCX support, which is recommended to use with cuda for the most robust OpenMPI build
     # https://github.com/openucx/ucx
     # https://www.open-mpi.org/faq/?category=buildcuda
-    ++ lib.optionals cudaSupport [ "--with-cuda=${cudatoolkit_joined}" "--enable-dlopen" ]
+    ++ lib.optionals cudaSupport [ "--with-cuda=${cudaPackages.cuda_cudart}" "--enable-dlopen" ]
     ++ lib.optionals fabricSupport [ "--with-psm2=${lib.getDev libpsm2}" "--with-libfabric=${lib.getDev libfabric}" ]
     ;
 
@@ -98,7 +94,8 @@ in stdenv.mkDerivation rec {
   doCheck = true;
 
   passthru = {
-    inherit cudaSupport cudatoolkit;
+    inherit cudaSupport;
+    cudatoolkit = cudaPackages.cudatoolkit; # For backward compatibility only
   };
 
   meta = with lib; {
diff --git a/pkgs/development/libraries/opensubdiv/default.nix b/pkgs/development/libraries/opensubdiv/default.nix
index b0ff4b528864b..b946d8ab718ae 100644
--- a/pkgs/development/libraries/opensubdiv/default.nix
+++ b/pkgs/development/libraries/opensubdiv/default.nix
@@ -1,9 +1,7 @@
 { config, lib, stdenv, fetchFromGitHub, cmake, pkg-config, xorg, libGLU
 , libGL, glew, ocl-icd, python3
-, cudaSupport ? config.cudaSupport, cudatoolkit
-  # For visibility mostly. The whole approach to cuda architectures and capabilities
-  # will be reworked soon.
-, cudaArch ? "compute_37"
+, cudaSupport ? config.cudaSupport
+, cudaPackages
 , openclSupport ? !cudaSupport
 , darwin
 }:
@@ -21,7 +19,11 @@ stdenv.mkDerivation rec {
 
   outputs = [ "out" "dev" ];
 
-  nativeBuildInputs = [ cmake pkg-config ];
+  nativeBuildInputs = [
+    cmake
+    pkg-config
+    cudaPackages.cuda_nvcc
+  ];
   buildInputs =
     [ libGLU libGL python3
       # FIXME: these are not actually needed, but the configure script wants them.
@@ -30,21 +32,31 @@ stdenv.mkDerivation rec {
     ]
     ++ lib.optional (openclSupport && !stdenv.isDarwin) ocl-icd
     ++ lib.optionals stdenv.isDarwin (with darwin.apple_sdk.frameworks; [OpenCL Cocoa CoreVideo IOKit AppKit AGL ])
-    ++ lib.optional cudaSupport cudatoolkit;
+    ++ lib.optional cudaSupport [
+      cudaPackages.cuda_cudart
+    ];
+
+  # It's important to set OSD_CUDA_NVCC_FLAGS,
+  # because otherwise OSD might piggyback unwanted architectures:
+  # https://github.com/PixarAnimationStudios/OpenSubdiv/blob/7d0ab5530feef693ac0a920585b5c663b80773b3/CMakeLists.txt#L602
+  preConfigure = lib.optionalString cudaSupport ''
+    cmakeFlagsArray+=(
+      -DOSD_CUDA_NVCC_FLAGS="${lib.concatStringsSep " " cudaPackages.cudaFlags.gencode}"
+    )
+  '';
 
   cmakeFlags =
     [ "-DNO_TUTORIALS=1"
       "-DNO_REGRESSION=1"
       "-DNO_EXAMPLES=1"
       "-DNO_METAL=1" # don’t have metal in apple sdk
+      (lib.cmakeBool "NO_OPENCL" (!openclSupport))
+      (lib.cmakeBool "NO_CUDA" (!cudaSupport))
     ] ++ lib.optionals (!stdenv.isDarwin) [
       "-DGLEW_INCLUDE_DIR=${glew.dev}/include"
       "-DGLEW_LIBRARY=${glew.dev}/lib"
     ] ++ lib.optionals cudaSupport [
-      "-DOSD_CUDA_NVCC_FLAGS=--gpu-architecture=${cudaArch}"
-      "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
     ] ++ lib.optionals (!openclSupport) [
-      "-DNO_OPENCL=1"
     ];
 
   preBuild = let maxBuildCores = 16; in lib.optionalString cudaSupport ''
diff --git a/pkgs/development/libraries/openvino/default.nix b/pkgs/development/libraries/openvino/default.nix
index b3809f0953641..5761f9e7bb645 100644
--- a/pkgs/development/libraries/openvino/default.nix
+++ b/pkgs/development/libraries/openvino/default.nix
@@ -3,6 +3,7 @@
 , fetchFromGitHub
 , fetchurl
 , substituteAll
+, cudaSupport ? opencv.cudaSupport or false
 
 # build
 , addOpenGLRunpath
@@ -21,6 +22,7 @@
 , protobuf
 , pugixml
 , tbb
+, cudaPackages
 }:
 
 let
@@ -68,6 +70,8 @@ stdenv.mkDerivation rec {
       setuptools
     ]))
     shellcheck
+  ] ++ lib.optionals cudaSupport [
+    cudaPackages.cuda_nvcc
   ];
 
   patches = [
@@ -133,6 +137,8 @@ stdenv.mkDerivation rec {
     protobuf
     pugixml
     tbb
+  ] ++ lib.optionals cudaSupport [
+    cudaPackages.cuda_cudart
   ];
 
   enableParallelBuilding = true;
diff --git a/pkgs/development/libraries/science/math/cutensor/generic.nix b/pkgs/development/libraries/science/math/cutensor/generic.nix
index c957fcdd99d4e..02fe13851620b 100644
--- a/pkgs/development/libraries/science/math/cutensor/generic.nix
+++ b/pkgs/development/libraries/science/math/cutensor/generic.nix
@@ -1,7 +1,11 @@
 { stdenv
 , lib
 , libPath
+, cuda_cudart
+, cudaMajorVersion
+, cuda_nvcc
 , cudatoolkit
+, libcublas
 , fetchurl
 , autoPatchelfHook
 , addOpenGLRunpath
@@ -17,7 +21,7 @@ let
 in
 
 stdenv.mkDerivation {
-  pname = "cudatoolkit-${cudatoolkit.majorVersion}-cutensor";
+  pname = "cutensor-cu${cudaMajorVersion}";
   inherit version;
 
   src = fetchurl {
@@ -32,20 +36,27 @@ stdenv.mkDerivation {
   nativeBuildInputs = [
     autoPatchelfHook
     addOpenGLRunpath
+    cuda_nvcc
   ];
 
   buildInputs = [
     stdenv.cc.cc.lib
-  ];
-
-  propagatedBuildInputs = [
-    cudatoolkit
+    cuda_cudart
+    libcublas
   ];
 
   # Set RUNPATH so that libcuda in /run/opengl-driver(-32)/lib can be found.
   # See the explanation in addOpenGLRunpath.
   installPhase = ''
     mkdir -p "$out" "$dev"
+
+    if [[ ! -d "${libPath}" ]] ; then
+      echo "Cutensor: ${libPath} does not exist, only found:" >&2
+      find "$(dirname ${libPath})"/ -maxdepth 1 >&2
+      echo "This cutensor release might not support your cudatoolkit version" >&2
+      exit 1
+    fi
+
     mv include "$dev"
     mv ${libPath} "$out/lib"
 
@@ -58,7 +69,7 @@ stdenv.mkDerivation {
   '';
 
   passthru = {
-    inherit cudatoolkit;
+    cudatoolkit = lib.warn "cutensor.passthru: cudaPackages.cudatoolkit is deprecated" cudatoolkit;
     majorVersion = lib.versions.major version;
   };
 
@@ -66,7 +77,11 @@ stdenv.mkDerivation {
     description = "cuTENSOR: A High-Performance CUDA Library For Tensor Primitives";
     homepage = "https://developer.nvidia.com/cutensor";
     sourceProvenance = with sourceTypes; [ binaryNativeCode ];
-    license = licenses.unfree;
+    license = licenses.unfreeRedistributable // {
+      shortName = "cuTENSOR EULA";
+      name = "cuTENSOR SUPPLEMENT TO SOFTWARE LICENSE AGREEMENT FOR NVIDIA SOFTWARE DEVELOPMENT KITS";
+      url = "https://docs.nvidia.com/cuda/cutensor/license.html";
+    };
     platforms = [ "x86_64-linux" ];
     maintainers = with maintainers; [ obsidian-systems-maintenance ];
   };
diff --git a/pkgs/development/libraries/science/math/faiss/default.nix b/pkgs/development/libraries/science/math/faiss/default.nix
index 21e6cbf858cd5..25ac539e05f28 100644
--- a/pkgs/development/libraries/science/math/faiss/default.nix
+++ b/pkgs/development/libraries/science/math/faiss/default.nix
@@ -6,8 +6,6 @@
 , cmake
 , cudaPackages ? { }
 , cudaSupport ? config.cudaSupport
-, nvidia-thrust
-, useThrustSourceBuild ? true
 , pythonSupport ? true
 , pythonPackages
 , llvmPackages
@@ -27,8 +25,6 @@
 , runCommand
 }@inputs:
 
-assert cudaSupport -> nvidia-thrust.cudaSupport;
-
 let
   pname = "faiss";
   version = "1.7.4";
@@ -44,9 +40,6 @@ let
       cuda_cudart # cuda_runtime.h
       libcublas
       libcurand
-    ] ++ lib.optionals useThrustSourceBuild [
-      nvidia-thrust
-    ] ++ lib.optionals (!useThrustSourceBuild) [
       cuda_cccl
     ] ++ lib.optionals (cudaPackages ? cuda_profiler_api) [
       cuda_profiler_api # cuda_profiler_api.h
diff --git a/pkgs/development/libraries/science/math/tensorrt/extension.nix b/pkgs/development/libraries/science/math/tensorrt/extension.nix
index b4018c6cc284d..ffd9b672684cb 100644
--- a/pkgs/development/libraries/science/math/tensorrt/extension.nix
+++ b/pkgs/development/libraries/science/math/tensorrt/extension.nix
@@ -17,16 +17,32 @@ final: prev: let
     isSupported = fileData: elem cudaVersion fileData.supportedCudaVersions;
     # Return the first file that is supported. In practice there should only ever be one anyway.
     supportedFile = files: findFirst isSupported null files;
-    # Supported versions with versions as keys and file as value
-    supportedVersions = filterAttrs (version: file: file !=null ) (mapAttrs (version: files: supportedFile files) tensorRTVersions);
+
     # Compute versioned attribute name to be used in this package set
     computeName = version: "tensorrt_${toUnderscore version}";
+
+    # Supported versions with versions as keys and file as value
+    supportedVersions = lib.recursiveUpdate
+      {
+        tensorrt = {
+          enable = false;
+          fileVersionCuda = null;
+          fileVersionCudnn = null;
+          fullVersion = "0.0.0";
+          sha256 = null;
+          tarball = null;
+          supportedCudaVersions = [ ];
+        };
+      }
+      (mapAttrs' (version: attrs: nameValuePair (computeName version) attrs)
+        (filterAttrs (version: file: file != null) (mapAttrs (version: files: supportedFile files) tensorRTVersions)));
+
     # Add all supported builds as attributes
-    allBuilds = mapAttrs' (version: file: nameValuePair (computeName version) (buildTensorRTPackage (removeAttrs file ["fileVersionCuda"]))) supportedVersions;
+    allBuilds = mapAttrs (name: file: buildTensorRTPackage (removeAttrs file ["fileVersionCuda"])) supportedVersions;
+
     # Set the default attributes, e.g. tensorrt = tensorrt_8_4;
-    defaultBuild = { "tensorrt" = if allBuilds ? ${computeName tensorRTDefaultVersion}
-      then allBuilds.${computeName tensorRTDefaultVersion}
-      else throw "tensorrt-${tensorRTDefaultVersion} does not support your cuda version ${cudaVersion}"; };
+    defaultName = computeName tensorRTDefaultVersion;
+    defaultBuild = lib.optionalAttrs (allBuilds ? ${defaultName}) { tensorrt = allBuilds.${computeName tensorRTDefaultVersion}; };
   in {
     inherit buildTensorRTPackage;
   } // allBuilds // defaultBuild;
diff --git a/pkgs/development/libraries/science/math/tensorrt/generic.nix b/pkgs/development/libraries/science/math/tensorrt/generic.nix
index 165c6f356da89..2bcdd8e588cf0 100644
--- a/pkgs/development/libraries/science/math/tensorrt/generic.nix
+++ b/pkgs/development/libraries/science/math/tensorrt/generic.nix
@@ -8,20 +8,22 @@
 , cudnn
 }:
 
-{ fullVersion
+{ enable ? true
+, fullVersion
 , fileVersionCudnn ? null
 , tarball
 , sha256
 , supportedCudaVersions ? [ ]
 }:
 
-assert fileVersionCudnn == null || lib.assertMsg (lib.strings.versionAtLeast cudnn.version fileVersionCudnn)
+assert !enable || fileVersionCudnn == null || lib.assertMsg (lib.strings.versionAtLeast cudnn.version fileVersionCudnn)
   "This version of TensorRT requires at least cuDNN ${fileVersionCudnn} (current version is ${cudnn.version})";
 
 backendStdenv.mkDerivation rec {
   pname = "cudatoolkit-${cudatoolkit.majorVersion}-tensorrt";
   version = fullVersion;
-  src = requireFile rec {
+  src = if !enable then null else
+  requireFile rec {
     name = tarball;
     inherit sha256;
     message = ''
@@ -38,13 +40,13 @@ backendStdenv.mkDerivation rec {
 
   outputs = [ "out" "dev" ];
 
-  nativeBuildInputs = [
+  nativeBuildInputs = lib.optionals enable [
     autoPatchelfHook
     autoAddOpenGLRunpathHook
   ];
 
   # Used by autoPatchelfHook
-  buildInputs = [
+  buildInputs = lib.optionals enable [
     backendStdenv.cc.cc.lib # libstdc++
     cudatoolkit
     cudnn
@@ -75,6 +77,7 @@ backendStdenv.mkDerivation rec {
     '';
 
   passthru.stdenv = backendStdenv;
+  passthru.enable = enable;
 
   meta = with lib; {
     # Check that the cudatoolkit version satisfies our min/max constraints (both
@@ -82,7 +85,7 @@ backendStdenv.mkDerivation rec {
     # official version constraints (as recorded in default.nix). In some cases
     # you _may_ be able to smudge version constraints, just know that you're
     # embarking into unknown and unsupported territory when doing so.
-    broken = !(elem cudaVersion supportedCudaVersions);
+    broken = !enable || !(elem cudaVersion supportedCudaVersions);
     description = "TensorRT: a high-performance deep learning interface";
     homepage = "https://developer.nvidia.com/tensorrt";
     license = licenses.unfree;
diff --git a/pkgs/development/libraries/science/math/tiny-cuda-nn/default.nix b/pkgs/development/libraries/science/math/tiny-cuda-nn/default.nix
index d046c6864539d..2036c4c86253b 100644
--- a/pkgs/development/libraries/science/math/tiny-cuda-nn/default.nix
+++ b/pkgs/development/libraries/science/math/tiny-cuda-nn/default.nix
@@ -14,10 +14,15 @@
   inherit (cudaPackages) backendStdenv cudaFlags;
 
   cuda-common-redist = with cudaPackages; [
-    cuda_cudart # cuda_runtime.h
-    libcublas # cublas_v2.h
-    libcusolver # cusolverDn.h
-    libcusparse # cusparse.h
+    cuda_cudart.dev # cuda_runtime.h
+    cuda_cudart.lib
+    cuda_cccl.dev # <nv/target>
+    libcublas.dev # cublas_v2.h
+    libcublas.lib
+    libcusolver.dev # cusolverDn.h
+    libcusolver.lib
+    libcusparse.dev # cusparse.h
+    libcusparse.lib
   ];
 
   cuda-native-redist = symlinkJoin {
diff --git a/pkgs/development/libraries/ucc/default.nix b/pkgs/development/libraries/ucc/default.nix
index a92c6bea37d70..68f358b3d3deb 100644
--- a/pkgs/development/libraries/ucc/default.nix
+++ b/pkgs/development/libraries/ucc/default.nix
@@ -1,7 +1,7 @@
 { stdenv, lib, fetchFromGitHub, libtool, automake, autoconf, ucx
 , config
 , enableCuda ? config.cudaSupport
-, cudatoolkit
+, cudaPackages
 , enableAvx ? stdenv.hostPlatform.avxSupport
 , enableSse41 ? stdenv.hostPlatform.sse4_1Support
 , enableSse42 ? stdenv.hostPlatform.sse4_2Support
@@ -30,19 +30,25 @@ stdenv.mkDerivation rec {
     done
   '';
 
+  nativeBuildInputs = [ libtool automake autoconf ]
+    ++ lib.optionals enableCuda [ cudaPackages.cuda_nvcc ];
+  buildInputs = [ ucx ]
+    ++ lib.optionals enableCuda [
+      cudaPackages.cuda_cccl
+      cudaPackages.cuda_cudart
+    ];
+
+
   preConfigure = ''
     ./autogen.sh
+  '' + lib.optionalString enableCuda ''
+    configureFlagsArray+=( "--with-nvcc-gencode=${builtins.concatStringsSep " " cudaPackages.cudaFlags.gencode}" )
   '';
-
-  nativeBuildInputs = [ libtool automake autoconf ];
-  buildInputs = [ ucx ]
-    ++ lib.optional enableCuda cudatoolkit;
-
   configureFlags = [ ]
    ++ lib.optional enableSse41 "--with-sse41"
    ++ lib.optional enableSse42 "--with-sse42"
    ++ lib.optional enableAvx "--with-avx"
-   ++ lib.optional enableCuda "--with-cuda=${cudatoolkit}";
+   ++ lib.optional enableCuda "--with-cuda=${cudaPackages.cuda_cudart}";
 
   postInstall = ''
     find $out/lib/ -name "*.la" -exec rm -f \{} \;
diff --git a/pkgs/development/libraries/ucx/default.nix b/pkgs/development/libraries/ucx/default.nix
index 627cac56bb737..3b923d8efdd2a 100644
--- a/pkgs/development/libraries/ucx/default.nix
+++ b/pkgs/development/libraries/ucx/default.nix
@@ -2,18 +2,12 @@
 , rdma-core, libbfd, libiberty, perl, zlib, symlinkJoin, pkg-config
 , config
 , enableCuda ? config.cudaSupport
-, cudatoolkit
+, cudaPackages
 , enableRocm ? config.rocmSupport
 , rocmPackages
 }:
 
 let
-  # Needed for configure to find all libraries
-  cudatoolkit' = symlinkJoin {
-    inherit (cudatoolkit) name meta;
-    paths = [ cudatoolkit cudatoolkit.lib ];
-  };
-
   rocmList = with rocmPackages; [ rocm-core rocm-runtime rocm-device-libs clr ];
 
   rocm = symlinkJoin {
@@ -35,7 +29,15 @@ stdenv.mkDerivation rec {
 
   outputs = [ "out" "doc" "dev" ];
 
-  nativeBuildInputs = [ autoreconfHook doxygen pkg-config ];
+  nativeBuildInputs = [
+    autoreconfHook
+    doxygen
+    pkg-config
+  ]
+  ++ lib.optionals enableCuda [
+    cudaPackages.cuda_nvcc
+    cudaPackages.autoAddOpenGLRunpathHook
+  ];
 
   buildInputs = [
     libbfd
@@ -44,8 +46,16 @@ stdenv.mkDerivation rec {
     perl
     rdma-core
     zlib
-  ] ++ lib.optional enableCuda cudatoolkit
-  ++ lib.optionals enableRocm rocmList;
+  ] ++ lib.optionals enableCuda [
+    cudaPackages.cuda_cudart
+    cudaPackages.cuda_nvml_dev
+
+  ] ++ lib.optionals enableRocm rocmList;
+
+  LDFLAGS = lib.optionals enableCuda [
+    # Fake libnvidia-ml.so (the real one is deployed impurely)
+    "-L${cudaPackages.cuda_nvml_dev}/lib/stubs"
+  ];
 
   configureFlags = [
     "--with-rdmacm=${lib.getDev rdma-core}"
@@ -53,7 +63,7 @@ stdenv.mkDerivation rec {
     "--with-rc"
     "--with-dm"
     "--with-verbs=${lib.getDev rdma-core}"
-  ] ++ lib.optional enableCuda "--with-cuda=${cudatoolkit'}"
+  ] ++ lib.optionals enableCuda [ "--with-cuda=${cudaPackages.cuda_cudart}" ]
   ++ lib.optional enableRocm "--with-rocm=${rocm}";
 
   postInstall = ''
diff --git a/pkgs/development/python-modules/cupy/default.nix b/pkgs/development/python-modules/cupy/default.nix
index e5de149fca14a..71defbb99b985 100644
--- a/pkgs/development/python-modules/cupy/default.nix
+++ b/pkgs/development/python-modules/cupy/default.nix
@@ -11,11 +11,34 @@
 , cudaPackages
 , addOpenGLRunpath
 , pythonOlder
+, symlinkJoin
 }:
 
 let
-  inherit (cudaPackages) cudatoolkit cudnn cutensor nccl;
-in buildPythonPackage rec {
+  inherit (cudaPackages) cudnn cutensor nccl;
+  cudatoolkit-joined = symlinkJoin {
+    name = "cudatoolkit-joined-${cudaPackages.cudaVersion}";
+    paths = with cudaPackages; [
+      cuda_cccl # <nv/target>
+      cuda_cccl.dev
+      cuda_cudart
+      cuda_nvcc.dev # <crt/host_defines.h>
+      cuda_nvprof
+      cuda_nvrtc
+      cuda_nvtx
+      cuda_profiler_api
+      libcublas
+      libcufft
+      libcurand
+      libcusolver
+      libcusparse
+
+      # Missing:
+      # cusparselt
+    ];
+  };
+in
+buildPythonPackage rec {
   pname = "cupy";
   version = "12.2.0";
 
@@ -32,27 +55,32 @@ in buildPythonPackage rec {
   # very short builds and a few extremely long ones, so setting both ends up
   # working nicely in practice.
   preConfigure = ''
-    export CUDA_PATH=${cudatoolkit}
     export CUPY_NUM_BUILD_JOBS="$NIX_BUILD_CORES"
     export CUPY_NUM_NVCC_THREADS="$NIX_BUILD_CORES"
   '';
 
   nativeBuildInputs = [
+    setuptools
+    wheel
     addOpenGLRunpath
     cython
+    cudaPackages.cuda_nvcc
   ];
 
-  LDFLAGS = "-L${cudatoolkit}/lib/stubs";
-
-  propagatedBuildInputs = [
-    cudatoolkit
+  buildInputs = [
+    cudatoolkit-joined
     cudnn
     cutensor
     nccl
+  ];
+
+  NVCC = "${lib.getExe cudaPackages.cuda_nvcc}"; # FIXME: splicing/buildPackages
+  CUDA_PATH = "${cudatoolkit-joined}";
+  LDFLAGS = "-L${cudaPackages.cuda_cudart}/lib/stubs";
+
+  propagatedBuildInputs = [
     fastrlock
     numpy
-    setuptools
-    wheel
   ];
 
   nativeCheckInputs = [
diff --git a/pkgs/development/python-modules/jaxlib/bin.nix b/pkgs/development/python-modules/jaxlib/bin.nix
index d80cbc2a60183..e35b4759bd64f 100644
--- a/pkgs/development/python-modules/jaxlib/bin.nix
+++ b/pkgs/development/python-modules/jaxlib/bin.nix
@@ -29,11 +29,11 @@
 , stdenv
   # Options:
 , cudaSupport ? config.cudaSupport
-, cudaPackages ? {}
+, cudaPackagesGoogle
 }:
 
 let
-  inherit (cudaPackages) cudatoolkit cudnn;
+  inherit (cudaPackagesGoogle) cudatoolkit cudnn;
 
   version = "0.4.20";
 
@@ -210,8 +210,8 @@ buildPythonPackage {
     maintainers = with maintainers; [ samuela ];
     platforms = [ "aarch64-darwin" "x86_64-linux" "x86_64-darwin" ];
     broken =
-      !(cudaSupport -> (cudaPackages ? cudatoolkit) && lib.versionAtLeast cudatoolkit.version "11.1")
-      || !(cudaSupport -> (cudaPackages ? cudnn) && lib.versionAtLeast cudnn.version "8.2")
+      !(cudaSupport -> (cudaPackagesGoogle ? cudatoolkit) && lib.versionAtLeast cudatoolkit.version "11.1")
+      || !(cudaSupport -> (cudaPackagesGoogle ? cudnn) && lib.versionAtLeast cudnn.version "8.2")
       || !(cudaSupport -> stdenv.isLinux);
   };
 }
diff --git a/pkgs/development/python-modules/jaxlib/default.nix b/pkgs/development/python-modules/jaxlib/default.nix
index c70ab0ac2b327..a04d6973ca4be 100644
--- a/pkgs/development/python-modules/jaxlib/default.nix
+++ b/pkgs/development/python-modules/jaxlib/default.nix
@@ -44,14 +44,14 @@
 , config
   # CUDA flags:
 , cudaSupport ? config.cudaSupport
-, cudaPackages ? {}
+, cudaPackagesGoogle
 
   # MKL:
 , mklSupport ? true
 }:
 
 let
-  inherit (cudaPackages) backendStdenv cudatoolkit cudaFlags cudnn nccl;
+  inherit (cudaPackagesGoogle) backendStdenv cudatoolkit cudaFlags cudnn nccl;
 
   pname = "jaxlib";
   version = "0.4.20";
diff --git a/pkgs/development/python-modules/tensorflow/bin.nix b/pkgs/development/python-modules/tensorflow/bin.nix
index fa70e4cc4a305..1040023619262 100644
--- a/pkgs/development/python-modules/tensorflow/bin.nix
+++ b/pkgs/development/python-modules/tensorflow/bin.nix
@@ -22,7 +22,7 @@
 , tensorboard
 , config
 , cudaSupport ? config.cudaSupport
-, cudaPackages ? {}
+, cudaPackagesGoogle
 , zlib
 , python
 , keras-applications
@@ -43,7 +43,7 @@ assert ! (stdenv.isDarwin && cudaSupport);
 
 let
   packages = import ./binary-hashes.nix;
-  inherit (cudaPackages) cudatoolkit cudnn;
+  inherit (cudaPackagesGoogle) cudatoolkit cudnn;
 in buildPythonPackage {
   pname = "tensorflow" + lib.optionalString cudaSupport "-gpu";
   inherit (packages) version;
@@ -200,7 +200,7 @@ in buildPythonPackage {
   ];
 
   passthru = {
-    inherit cudaPackages;
+    cudaPackages = cudaPackagesGoogle;
   };
 
   meta = with lib; {
diff --git a/pkgs/development/python-modules/tensorflow/default.nix b/pkgs/development/python-modules/tensorflow/default.nix
index c8e292e316744..be8b26f3d0e99 100644
--- a/pkgs/development/python-modules/tensorflow/default.nix
+++ b/pkgs/development/python-modules/tensorflow/default.nix
@@ -19,8 +19,8 @@
 # https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/iRCt5m4qUz0
 , config
 , cudaSupport ? config.cudaSupport
-, cudaPackages ? { }
-, cudaCapabilities ? cudaPackages.cudaFlags.cudaCapabilities
+, cudaPackagesGoogle
+, cudaCapabilities ? cudaPackagesGoogle.cudaFlags.cudaCapabilities
 , mklSupport ? false, mkl
 , tensorboardSupport ? true
 # XLA without CUDA is broken
@@ -50,15 +50,15 @@ let
   # __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the
   # translation units, so the build fails at link time
   stdenv =
-    if cudaSupport then cudaPackages.backendStdenv
+    if cudaSupport then cudaPackagesGoogle.backendStdenv
     else if originalStdenv.isDarwin then llvmPackages_11.stdenv
     else originalStdenv;
-  inherit (cudaPackages) cudatoolkit nccl;
+  inherit (cudaPackagesGoogle) cudatoolkit nccl;
   # use compatible cuDNN (https://www.tensorflow.org/install/source#gpu)
   # cudaPackages.cudnn led to this:
   # https://github.com/tensorflow/tensorflow/issues/60398
   cudnnAttribute = "cudnn_8_6";
-  cudnn = cudaPackages.${cudnnAttribute};
+  cudnn = cudaPackagesGoogle.${cudnnAttribute};
   gentoo-patches = fetchzip {
     url = "https://dev.gentoo.org/~perfinion/patches/tensorflow-patches-2.12.0.tar.bz2";
     hash = "sha256-SCRX/5/zML7LmKEPJkcM5Tebez9vv/gmE4xhT/jyqWs=";
@@ -486,8 +486,8 @@ let
       broken =
         stdenv.isDarwin
         || !(xlaSupport -> cudaSupport)
-        || !(cudaSupport -> builtins.hasAttr cudnnAttribute cudaPackages)
-        || !(cudaSupport -> cudaPackages ? cudatoolkit);
+        || !(cudaSupport -> builtins.hasAttr cudnnAttribute cudaPackagesGoogle)
+        || !(cudaSupport -> cudaPackagesGoogle ? cudatoolkit);
     } // lib.optionalAttrs stdenv.isDarwin {
       timeout = 86400; # 24 hours
       maxSilent = 14400; # 4h, double the default of 7200s
@@ -590,7 +590,7 @@ in buildPythonPackage {
   # Regression test for #77626 removed because not more `tensorflow.contrib`.
 
   passthru = {
-    inherit cudaPackages;
+    cudaPackages = cudaPackagesGoogle;
     deps = bazel-build.deps;
     libtensorflow = bazel-build.out;
   };
diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix
index d18dd97df5b45..b648e2d7bf360 100644
--- a/pkgs/development/python-modules/torch/default.nix
+++ b/pkgs/development/python-modules/torch/default.nix
@@ -337,7 +337,8 @@ in buildPythonPackage rec {
   buildInputs = [ blas blas.provider ]
     ++ lib.optionals cudaSupport (with cudaPackages; [
       cuda_cccl.dev # <thrust/*>
-      cuda_cudart # cuda_runtime.h and libraries
+      cuda_cudart.dev # cuda_runtime.h and libraries
+      cuda_cudart.lib
       cuda_cupti.dev # For kineto
       cuda_cupti.lib # For kineto
       cuda_nvcc.dev # crt/host_config.h; even though we include this in nativeBuildinputs, it's needed here too
diff --git a/pkgs/top-level/aliases.nix b/pkgs/top-level/aliases.nix
index e655c34c7e824..daab3488385d1 100644
--- a/pkgs/top-level/aliases.nix
+++ b/pkgs/top-level/aliases.nix
@@ -644,6 +644,7 @@ mapAliases ({
   noto-fonts-cjk = noto-fonts-cjk-sans; # Added 2021-12-16
   noto-fonts-emoji = noto-fonts-color-emoji; # Added 2023-09-09
   noto-fonts-extra = noto-fonts; # Added 2023-04-08
+  nvidia-thrust = throw "nvidia-thrust has been removed because the project was deprecated; use cudaPackages.cuda_cccl";
 
   ### O ###
 
diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix
index e32f668e0d653..1e9069ef0ead4 100644
--- a/pkgs/top-level/all-packages.nix
+++ b/pkgs/top-level/all-packages.nix
@@ -7318,6 +7318,10 @@ with pkgs;
   cudaPackages_12_2 = callPackage ./cuda-packages.nix { cudaVersion = "12.2"; };
   cudaPackages_12 = cudaPackages_12_0;
 
+  # Use the older cudaPackages for tensorflow and jax, as determined by cudnn
+  # compatibility: https://www.tensorflow.org/install/source#gpu
+  cudaPackagesGoogle = cudaPackages_11;
+
   # TODO: try upgrading once there is a cuDNN release supporting CUDA 12. No
   # such cuDNN release as of 2023-01-10.
   cudaPackages = recurseIntoAttrs cudaPackages_11;
@@ -11272,16 +11276,6 @@ with pkgs;
 
   nvfetcher = haskell.lib.compose.justStaticExecutables haskellPackages.nvfetcher;
 
-  nvidia-thrust = callPackage ../development/libraries/nvidia-thrust { };
-
-  nvidia-thrust-intel = callPackage ../development/libraries/nvidia-thrust {
-    hostSystem = "TBB";
-    deviceSystem = if config.cudaSupport then "CUDA" else "TBB";
-  };
-
-  nvidia-thrust-cuda = callPackage ../development/libraries/nvidia-thrust {
-    deviceSystem = "CUDA";
-  };
 
   miller = callPackage ../tools/text/miller { };
 
@@ -20772,6 +20766,9 @@ with pkgs;
     # catboost requires clang 12+ for build
     # after bumping the default version of llvm, check for compatibility with the cuda backend and pin it.
     inherit (llvmPackages_12) stdenv;
+
+    # https://github.com/catboost/catboost/issues/2540
+    cudaPackages = cudaPackages_11;
   };
 
   ndn-cxx = callPackage ../development/libraries/ndn-cxx { };
@@ -39440,7 +39437,6 @@ with pkgs;
     singlePrec = true;
     enableMpi = true;
     enableCuda = true;
-    cudatoolkit = cudatoolkit_11;
     fftw = fftwSinglePrec;
   });
 
@@ -39982,7 +39978,6 @@ with pkgs;
 
   faissWithCuda = faiss.override {
     cudaSupport = true;
-    nvidia-thrust = nvidia-thrust-cuda;
   };
 
   fityk = callPackage ../applications/science/misc/fityk { };
diff --git a/pkgs/top-level/cuda-packages.nix b/pkgs/top-level/cuda-packages.nix
index a2f49a98ccd53..3912422785bc4 100644
--- a/pkgs/top-level/cuda-packages.nix
+++ b/pkgs/top-level/cuda-packages.nix
@@ -24,6 +24,7 @@ let
 
     buildCuTensorPackage = final.callPackage ../development/libraries/science/math/cutensor/generic.nix;
 
+    # FIXME: Include non-x86_64 platforms
     cuTensorVersions = {
       "1.2.2.5" = {
         hash = "sha256-lU7iK4DWuC/U3s1Ct/rq2Gr3w4F2U7RYYgpmF05bibY=";
@@ -31,12 +32,24 @@ let
       "1.5.0.3" = {
         hash = "sha256-T96+lPC6OTOkIs/z3QWg73oYVSyidN0SVkBWmT9VRx0=";
       };
+      "2.0.0.7" = {
+        hash = "sha256-32M4rtGOW2rgxJUhBT0WBtKkHhh9f17M+RgK9rvE72g=";
+      };
     };
 
     inherit (final) cudaMajorMinorVersion cudaMajorVersion;
 
+    cudaToCutensor = {
+      "10" = "1.2.25";
+      "11" = "1.5.0.3";
+      "12" = "2.0.0.7";
+    };
+
+    versionNewer = lib.flip lib.versionOlder;
+    latestVersion = (builtins.head (lib.sort versionNewer (builtins.attrNames cuTensorVersions)));
+
     cutensor = buildCuTensorPackage rec {
-      version = if cudaMajorMinorVersion == "10.1" then "1.2.2.5" else "1.5.0.3";
+      version = cudaToCutensor.${cudaMajorVersion} or latestVersion;
       inherit (cuTensorVersions.${version}) hash;
       # This can go into generic.nix
       libPath = "lib/${if cudaMajorVersion == "10" then cudaMajorMinorVersion else cudaMajorVersion}";
diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix
index ebf97327f53fc..1f38396d73c70 100644
--- a/pkgs/top-level/python-packages.nix
+++ b/pkgs/top-level/python-packages.nix
@@ -2467,7 +2467,8 @@ self: super: with self; {
 
   cufflinks = callPackage ../development/python-modules/cufflinks { };
 
-  cupy = callPackage ../development/python-modules/cupy { };
+  # cupy 12.2.0 possibly incompatible with cutensor 2.0 that comes with cudaPackages_12
+  cupy = callPackage ../development/python-modules/cupy { cudaPackages = pkgs.cudaPackages_11; };
 
   curio = callPackage ../development/python-modules/curio { };
 
@@ -13924,7 +13925,6 @@ self: super: with self; {
   callPackage ../development/python-modules/tensorflow {
     inherit (pkgs.darwin) cctools;
     inherit (pkgs.config) cudaSupport;
-    inherit (self.tensorflow-bin) cudaPackages;
     inherit (pkgs.darwin.apple_sdk.frameworks) Foundation Security;
     flatbuffers-core = pkgs.flatbuffers;
     flatbuffers-python = self.flatbuffers;
@@ -13957,7 +13957,7 @@ self: super: with self; {
 
   tensorly = callPackage ../development/python-modules/tensorly { };
 
-  tensorrt = callPackage ../development/python-modules/tensorrt { };
+  tensorrt = callPackage ../development/python-modules/tensorrt { cudaPackages = pkgs.cudaPackages_11; };
 
   tensorstore = callPackage ../development/python-modules/tensorstore { };