NixOS · ConnorBaker · Dec 15, 2023 · Nov 27, 2023 · Dec 8, 2023 · Dec 9, 2023
diff --git a/pkgs/applications/misc/blender/default.nix b/pkgs/applications/misc/blender/default.nix
@@ -52,7 +52,10 @@ stdenv.mkDerivation (finalAttrs: rec {
   nativeBuildInputs =
     [ cmake makeWrapper python310Packages.wrapPython llvmPackages.llvm.dev
     ]
-    ++ lib.optionals cudaSupport [ addOpenGLRunpath ]
+    ++ lib.optionals cudaSupport [
+      addOpenGLRunpath
+      cudaPackages.cuda_nvcc
+    ]
     ++ lib.optionals waylandSupport [ pkg-config ];
   buildInputs =
     [ boost ffmpeg gettext glew ilmbase
@@ -87,7 +90,7 @@ stdenv.mkDerivation (finalAttrs: rec {
       llvmPackages.openmp SDL Cocoa CoreGraphics ForceFeedback OpenAL OpenGL
     ])
     ++ lib.optional jackaudioSupport libjack2
-    ++ lib.optional cudaSupport cudaPackages.cudatoolkit
+    ++ lib.optionals cudaSupport [ cudaPackages.cuda_cudart ]
     ++ lib.optional colladaSupport opencollada
     ++ lib.optional spaceNavSupport libspnav;
   pythonPath = with python310Packages; [ numpy requests zstandard ];

diff --git a/pkgs/applications/science/molecular-dynamics/gromacs/default.nix b/pkgs/applications/science/molecular-dynamics/gromacs/default.nix
@@ -1,4 +1,14 @@
-{ lib, stdenv, fetchurl, cmake, hwloc, fftw, perl, blas, lapack, mpi, cudatoolkit
+{ lib
+, stdenv
+, fetchurl
+, cmake
+, hwloc
+, fftw
+, perl
+, blas
+, lapack
+, mpi
+, cudaPackages
 , singlePrec ? true
 , config
 , enableMpi ? false
@@ -7,6 +17,8 @@
 }:
 
 let
+  inherit (cudaPackages.cudaFlags) cudaCapabilities dropDot;
+
   # Select reasonable defaults for all major platforms
   # The possible values are defined in CMakeLists.txt:
   # AUTO None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256
@@ -31,7 +43,9 @@ in stdenv.mkDerivation rec {
 
   outputs = [ "out" "dev" "man" ];
 
-  nativeBuildInputs = [ cmake ];
+  nativeBuildInputs =
+    [ cmake ]
+    ++ lib.optionals enableCuda [ cudaPackages.cuda_nvcc ];
 
   buildInputs = [
     fftw
@@ -40,13 +54,17 @@ in stdenv.mkDerivation rec {
     blas
     lapack
   ] ++ lib.optional enableMpi mpi
-    ++ lib.optional enableCuda cudatoolkit
-  ;
+  ++ lib.optionals enableCuda [
+    cudaPackages.cuda_cudart
+    cudaPackages.libcufft
+    cudaPackages.cuda_profiler_api
+  ];
 
   propagatedBuildInputs = lib.optional enableMpi mpi;
   propagatedUserEnvPkgs = lib.optional enableMpi mpi;
 
   cmakeFlags = [
+    (lib.cmakeBool "GMX_HWLOC" true)
     "-DGMX_SIMD:STRING=${SIMD cpuAcceleration}"
     "-DGMX_OPENMP:BOOL=TRUE"
     "-DBUILD_SHARED_LIBS=ON"
@@ -66,7 +84,13 @@ in stdenv.mkDerivation rec {
      else [
        "-DGMX_MPI:BOOL=FALSE"
      ]
-  ) ++ lib.optional enableCuda "-DGMX_GPU=CUDA";
+  ) ++ lib.optionals enableCuda [
+    "-DGMX_GPU=CUDA"
+    (lib.cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (builtins.concatStringsSep ";" (map dropDot cudaCapabilities)))
+
+    # Gromacs seems to ignore and override the normal variables, so we add this ad hoc:
+    (lib.cmakeFeature "GMX_CUDA_TARGET_COMPUTE" (builtins.concatStringsSep ";" (map dropDot cudaCapabilities)))
+  ];
 
   postInstall = ''
     moveToOutput share/cmake $dev

diff --git a/pkgs/development/compilers/cudatoolkit/common.nix b/pkgs/development/compilers/cudatoolkit/common.nix
@@ -88,7 +88,7 @@ backendStdenv.mkDerivation rec {
   ] ++ lib.optionals (lib.versionAtLeast version "11.8") [
     qt6Packages.wrapQtAppsHook
   ];
-  depsTargetTargetPropagated = [
+  propagatedBuildInputs = [
     setupCudaHook
   ];
   buildInputs = lib.optionals (lib.versionOlder version "11") [

diff --git a/pkgs/development/compilers/cudatoolkit/extension.nix b/pkgs/development/compilers/cudatoolkit/extension.nix
@@ -47,13 +47,16 @@ final: prev: let
         ./hooks/mark-for-cudatoolkit-root-hook.sh)
     { });
 
-  # Normally propagated by cuda_nvcc or cudatoolkit through their depsHostHostPropagated
+  # Currently propagated by cuda_nvcc or cudatoolkit, rather than used directly
   setupCudaHook = (final.callPackage
     ({ makeSetupHook, backendStdenv }:
       makeSetupHook
         {
           name = "setup-cuda-hook";
 
+          substitutions.setupCudaHook = placeholder "out";
+
+          # Point NVCC at a compatible compiler
           substitutions.ccRoot = "${backendStdenv.cc}";
 
           # Required in addition to ccRoot as otherwise bin/gcc is looked up

diff --git a/pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh b/pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh
@@ -1,8 +1,14 @@
 # shellcheck shell=bash
 
+# Should we mimick cc-wrapper's "hygiene"?
+[[ -z ${strictDeps-} ]] || (( "$hostOffset" < 0 )) || return 0
+
+echo "Sourcing mark-for-cudatoolkit-root-hook" >&2
+
 markForCUDAToolkit_ROOT() {
     mkdir -p "${prefix}/nix-support"
-    touch "${prefix}/nix-support/include-in-cudatoolkit-root"
+    [[ -f "${prefix}/nix-support/include-in-cudatoolkit-root" ]] && return
+    echo "$pname-$output" > "${prefix}/nix-support/include-in-cudatoolkit-root"
 }
 
 fixupOutputHooks+=(markForCUDAToolkit_ROOT)
diff --git a/pkgs/development/compilers/cudatoolkit/hooks/nvcc-setup-hook.sh b/pkgs/development/compilers/cudatoolkit/hooks/nvcc-setup-hook.sh
diff --git a/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh b/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh
@@ -1,18 +1,59 @@
 # shellcheck shell=bash
 
-echo Sourcing setup-cuda-hook >&2
+# Starting with 24.05: only run the hook from nativeBuildInputs
+# (( "$hostOffset" == -1 && "$targetOffset" == 0)) || return 0
 
-extendCUDAToolkit_ROOT() {
-    if [[ -f "$1/nix-support/include-in-cudatoolkit-root" ]] ; then
-        addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$1"
+guard=Sourcing
+reason=
 
-        if [[ -d "$1/include" ]] ; then
-            addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIR "$1/include"
-        fi
-    fi
+[[ -n ${cudaSetupHookOnce-} ]] && guard=Skipping && reason=" because the hook has been propagated more than once"
+
+if (( "${NIX_DEBUG:-0}" >= 1 )) ; then
+    echo "$guard hostOffset=$hostOffset targetOffset=$targetOffset setupCudaHook$reason" >&2
+else
+    echo "$guard setup-cuda-hook$reason" >&2
+fi
+
+[[ "$guard" = Sourcing ]] || return 0
+
+declare -g cudaSetupHookOnce=1
+declare -Ag cudaHostPathsSeen=()
+declare -Ag cudaOutputToPath=()
+
+extendcudaHostPathsSeen() {
+    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "extendcudaHostPathsSeen $1" >&2
+
+    local markerPath="$1/nix-support/include-in-cudatoolkit-root"
+    [[ ! -f "${markerPath}" ]] && return
+    [[ -v cudaHostPathsSeen[$1] ]] && return
+
+    cudaHostPathsSeen["$1"]=1
+
+    # E.g. cuda_cudart-lib
+    local cudaOutputName
+    read -r cudaOutputName < "$markerPath"
+
+    [[ -z "$cudaOutputName" ]] && return
+
+    local oldPath="${cudaOutputToPath[$cudaOutputName]-}"
+    [[ -n "$oldPath" ]] && echo "extendcudaHostPathsSeen: warning: overwriting $cudaOutputName from $oldPath to $1" >&2
+    cudaOutputToPath["$cudaOutputName"]="$1"
 }
+addEnvHooks "$targetOffset" extendcudaHostPathsSeen
+
+setupCUDAToolkit_ROOT() {
+    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "setupCUDAToolkit_ROOT: cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2
 
-addEnvHooks "$targetOffset" extendCUDAToolkit_ROOT
+    for path in "${!cudaHostPathsSeen[@]}" ; do
+        addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$path"
+        if [[ -d "$path/include" ]] ; then
+            addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIR "$path/include"
+        fi
+    done
+
+    export cmakeFlags+=" -DCUDAToolkit_INCLUDE_DIR=$CUDAToolkit_INCLUDE_DIR -DCUDAToolkit_ROOT=$CUDAToolkit_ROOT"
+}
+preConfigureHooks+=(setupCUDAToolkit_ROOT)
 
 setupCUDAToolkitCompilers() {
     echo Executing setupCUDAToolkitCompilers >&2
@@ -56,13 +97,43 @@ setupCUDAToolkitCompilers() {
     # CMake's enable_language(CUDA) runs a compiler test and it doesn't account for
     # CUDAToolkit_ROOT. We have to help it locate libcudart
     if [[ -z "${nvccDontPrependCudartFlags-}" ]] ; then
-        export NVCC_APPEND_FLAGS+=" -L@cudartLib@/lib -L@cudartStatic@/lib -I@cudartInclude@/include"
+        if [[ ! -v cudaOutputToPath["cuda_cudart-out"] ]] ; then
+            echo "setupCUDAToolkitCompilers: missing cudaPackages.cuda_cudart. This may become an an error in the future" >&2
+            # exit 1
+        fi
+        for pkg in "${!cudaOutputToPath[@]}" ; do
+            [[ ! "$pkg" = cuda_cudart* ]] && continue
+
+            local path="${cudaOutputToPath[$pkg]}"
+            if [[ -d "$path/include" ]] ; then
+                export NVCC_PREPEND_FLAGS+=" -I$path/include"
+            fi
+            if [[ -d "$path/lib" ]] ; then
+                export NVCC_PREPEND_FLAGS+=" -L$path/lib"
+            fi
+        done
     fi
 }
+preConfigureHooks+=(setupCUDAToolkitCompilers)
 
-setupCMakeCUDAToolkit_ROOT() {
-    export cmakeFlags+=" -DCUDAToolkit_INCLUDE_DIR=$CUDAToolkit_INCLUDE_DIR -DCUDAToolkit_ROOT=$CUDAToolkit_ROOT"
-}
+propagateCudaLibraries() {
+    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "propagateCudaLibraries: cudaPropagateToOutput=$cudaPropagateToOutput cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2
+
+    [[ -z "${cudaPropagateToOutput-}" ]] && return
+
+    mkdir -p "${!cudaPropagateToOutput}/nix-support"
+    # One'd expect this should be propagated-bulid-build-deps, but that doesn't seem to work
+    echo "@setupCudaHook@" >> "${!cudaPropagateToOutput}/nix-support/propagated-native-build-inputs"
 
-postHooks+=(setupCUDAToolkitCompilers)
-preConfigureHooks+=(setupCMakeCUDAToolkit_ROOT)
+    local propagatedBuildInputs=( "${!cudaHostPathsSeen[@]}" )
+    for output in $(getAllOutputNames) ; do
+        if [[ ! "$output" = "$cudaPropagateToOutput" ]] ; then
+            propagatedBuildInputs+=( "${!output}" )
+        fi
+        break
+    done
+
+    # One'd expect this should be propagated-host-host-deps, but that doesn't seem to work
+    printWords "${propagatedBuildInputs[@]}" >> "${!cudaPropagateToOutput}/nix-support/propagated-build-inputs"
+}
+postFixupHooks+=(propagateCudaLibraries)
diff --git a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix
@@ -51,37 +51,14 @@ in
     ]
   );
 
-  cuda_nvcc = prev.cuda_nvcc.overrideAttrs (_: {
-    # Required by cmake's enable_language(CUDA) to build a test program
-    # When implementing cross-compilation support: this is
-    # final.pkgs.targetPackages.cudaPackages.cuda_cudart
-    env = {
-      # Given the multiple-outputs each CUDA redist has, we can specify the exact components we
-      # need from the package. CMake requires:
-      # - the cuda_runtime.h header, which is in the dev output
-      # - the dynamic library, which is in the lib output
-      # - the static library, which is in the static output
-      cudartInclude = "${final.cuda_cudart.dev}";
-      cudartLib = "${final.cuda_cudart.lib}";
-      cudartStatic = "${final.cuda_cudart.static}";
-    };
-
-    # Point NVCC at a compatible compiler
-
-    # Desiredata: whenever a package (e.g. magma) adds cuda_nvcc to
-    # nativeBuildInputs (offsets `(-1, 0)`), magma should also source the
-    # setupCudaHook, i.e. we want it the hook to be propagated into the
-    # same nativeBuildInputs.
-    #
-    # Logically, cuda_nvcc should include the hook in depsHostHostPropagated,
-    # so that the final offsets for the propagated hook would be `(-1, 0) +
-    # (0, 0) = (-1, 0)`.
-    #
-    # In practice, TargetTarget appears to work:
-    # https://gist.github.com/fd80ff142cd25e64603618a3700e7f82
-    depsTargetTargetPropagated = [
+  cuda_nvcc = prev.cuda_nvcc.overrideAttrs (oldAttrs: {
+    propagatedBuildInputs = [
       final.setupCudaHook
     ];
+
+    meta = (oldAttrs.meta or { }) // {
+      mainProgram = "nvcc";
+    };
   });
 
   cuda_nvprof = prev.cuda_nvprof.overrideAttrs (oldAttrs: {

diff --git a/pkgs/development/compilers/cudatoolkit/saxpy/default.nix b/pkgs/development/compilers/cudatoolkit/saxpy/default.nix
@@ -1,12 +1,13 @@
 { autoAddOpenGLRunpathHook
 , backendStdenv
 , cmake
-, cuda_cccl
-, cuda_cudart
+, cuda_cccl ? null
+, cuda_cudart ? null
 , cudaFlags
-, cuda_nvcc
+, cuda_nvcc ? null
+, cudatoolkit ? null
 , lib
-, libcublas
+, libcublas ? null
 , setupCudaHook
 , stdenv
 }:
@@ -17,23 +18,24 @@ backendStdenv.mkDerivation {
 
   src = ./.;
 
-  buildInputs = [
+  buildInputs = lib.optionals (cuda_cudart != null) [
     libcublas
     cuda_cudart
     cuda_cccl
+  ] ++ lib.optionals (cuda_cudart == null) [
+    cudatoolkit
   ];
   nativeBuildInputs = [
     cmake
 
-    # NOTE: this needs to be pkgs.buildPackages.cudaPackages_XX_Y.cuda_nvcc for
-    # cross-compilation to work. This should work automatically once we move to
-    # spliced scopes. Delete this comment once that happens
-    cuda_nvcc
-
     # Alternatively, we could remove the propagated hook from cuda_nvcc and add
     # directly:
     # setupCudaHook
     autoAddOpenGLRunpathHook
+  ] ++ lib.optionals (cuda_nvcc != null) [
+    cuda_nvcc
+  ] ++ lib.optionals (cuda_nvcc == null) [
+    cudatoolkit
   ];
 
   cmakeFlags = [

diff --git a/pkgs/development/libraries/cctag/default.nix b/pkgs/development/libraries/cctag/default.nix
@@ -49,7 +49,7 @@ stdenv.mkDerivation rec {
   buildInputs = [
     boost179
     eigen
-    opencv
+    opencv.cxxdev
   ];
 
   # Tests are broken on Darwin (linking issue)

diff --git a/pkgs/development/libraries/ctranslate2/default.nix b/pkgs/development/libraries/ctranslate2/default.nix
@@ -57,6 +57,7 @@ stdenv.mkDerivation rec {
   buildInputs = lib.optionals withMkl [
     mkl
   ] ++ lib.optionals withCUDA [
+    cudaPackages.cuda_cccl # <nv/target> required by the fp16 headers in cudart
     cudaPackages.cuda_cudart
     cudaPackages.libcublas
     cudaPackages.libcurand

diff --git a/pkgs/development/libraries/hwloc/default.nix b/pkgs/development/libraries/hwloc/default.nix
@@ -22,12 +22,13 @@ stdenv.mkDerivation rec {
   ];
 
   # XXX: libX11 is not directly needed, but needed as a propagated dep of Cairo.
-  nativeBuildInputs = [ pkg-config ];
+  nativeBuildInputs = [ pkg-config ]
+  ++ lib.optionals enableCuda [ cudaPackages.cuda_nvcc ];
 
   buildInputs = [ expat ncurses ]
     ++ lib.optionals x11Support [ cairo libX11 ]
     ++ lib.optionals stdenv.isLinux [ numactl ]
-    ++ lib.optional enableCuda cudaPackages.cudatoolkit;
+    ++ lib.optionals enableCuda [ cudaPackages.cuda_cudart ];
 
   # Since `libpci' appears in `hwloc.pc', it must be propagated.
   propagatedBuildInputs = lib.optional stdenv.isLinux pciutils;

diff --git a/pkgs/development/libraries/nvidia-optical-flow-sdk/default.nix b/pkgs/development/libraries/nvidia-optical-flow-sdk/default.nix
@@ -18,6 +18,11 @@ stdenv.mkDerivation {
     cp -R * $out/include
   '';
 
+  postFixup = ''
+    mkdir -p $out/nix-support
+    echo $pname >> "$out/nix-support/include-in-cudatoolkit-root"
+  '';
+
   meta = with lib; {
     description = "Nvidia optical flow headers for computing the relative motion of pixels between images";
     homepage = "https://developer.nvidia.com/opticalflow-sdk";