diff --git a/pkgs/applications/misc/blender/default.nix b/pkgs/applications/misc/blender/default.nix index 24ea7287160b7..bdfd867f55675 100644 --- a/pkgs/applications/misc/blender/default.nix +++ b/pkgs/applications/misc/blender/default.nix @@ -52,7 +52,10 @@ stdenv.mkDerivation (finalAttrs: rec { nativeBuildInputs = [ cmake makeWrapper python310Packages.wrapPython llvmPackages.llvm.dev ] - ++ lib.optionals cudaSupport [ addOpenGLRunpath ] + ++ lib.optionals cudaSupport [ + addOpenGLRunpath + cudaPackages.cuda_nvcc + ] ++ lib.optionals waylandSupport [ pkg-config ]; buildInputs = [ boost ffmpeg gettext glew ilmbase @@ -87,7 +90,7 @@ stdenv.mkDerivation (finalAttrs: rec { llvmPackages.openmp SDL Cocoa CoreGraphics ForceFeedback OpenAL OpenGL ]) ++ lib.optional jackaudioSupport libjack2 - ++ lib.optional cudaSupport cudaPackages.cudatoolkit + ++ lib.optionals cudaSupport [ cudaPackages.cuda_cudart ] ++ lib.optional colladaSupport opencollada ++ lib.optional spaceNavSupport libspnav; pythonPath = with python310Packages; [ numpy requests zstandard ]; diff --git a/pkgs/applications/science/molecular-dynamics/gromacs/default.nix b/pkgs/applications/science/molecular-dynamics/gromacs/default.nix index 2ca47d812bbfe..429376b72d912 100644 --- a/pkgs/applications/science/molecular-dynamics/gromacs/default.nix +++ b/pkgs/applications/science/molecular-dynamics/gromacs/default.nix @@ -1,4 +1,14 @@ -{ lib, stdenv, fetchurl, cmake, hwloc, fftw, perl, blas, lapack, mpi, cudatoolkit +{ lib +, stdenv +, fetchurl +, cmake +, hwloc +, fftw +, perl +, blas +, lapack +, mpi +, cudaPackages , singlePrec ? true , config , enableMpi ? false @@ -7,6 +17,8 @@ }: let + inherit (cudaPackages.cudaFlags) cudaCapabilities dropDot; + # Select reasonable defaults for all major platforms # The possible values are defined in CMakeLists.txt: # AUTO None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 @@ -31,7 +43,9 @@ in stdenv.mkDerivation rec { outputs = [ "out" "dev" "man" ]; - nativeBuildInputs = [ cmake ]; + nativeBuildInputs = + [ cmake ] + ++ lib.optionals enableCuda [ cudaPackages.cuda_nvcc ]; buildInputs = [ fftw @@ -40,13 +54,17 @@ in stdenv.mkDerivation rec { blas lapack ] ++ lib.optional enableMpi mpi - ++ lib.optional enableCuda cudatoolkit - ; + ++ lib.optionals enableCuda [ + cudaPackages.cuda_cudart + cudaPackages.libcufft + cudaPackages.cuda_profiler_api + ]; propagatedBuildInputs = lib.optional enableMpi mpi; propagatedUserEnvPkgs = lib.optional enableMpi mpi; cmakeFlags = [ + (lib.cmakeBool "GMX_HWLOC" true) "-DGMX_SIMD:STRING=${SIMD cpuAcceleration}" "-DGMX_OPENMP:BOOL=TRUE" "-DBUILD_SHARED_LIBS=ON" @@ -66,7 +84,13 @@ in stdenv.mkDerivation rec { else [ "-DGMX_MPI:BOOL=FALSE" ] - ) ++ lib.optional enableCuda "-DGMX_GPU=CUDA"; + ) ++ lib.optionals enableCuda [ + "-DGMX_GPU=CUDA" + (lib.cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (builtins.concatStringsSep ";" (map dropDot cudaCapabilities))) + + # Gromacs seems to ignore and override the normal variables, so we add this ad hoc: + (lib.cmakeFeature "GMX_CUDA_TARGET_COMPUTE" (builtins.concatStringsSep ";" (map dropDot cudaCapabilities))) + ]; postInstall = '' moveToOutput share/cmake $dev diff --git a/pkgs/development/compilers/cudatoolkit/common.nix b/pkgs/development/compilers/cudatoolkit/common.nix index 681549fa62dbe..0725fd56faf62 100644 --- a/pkgs/development/compilers/cudatoolkit/common.nix +++ b/pkgs/development/compilers/cudatoolkit/common.nix @@ -88,7 +88,7 @@ backendStdenv.mkDerivation rec { ] ++ lib.optionals (lib.versionAtLeast version "11.8") [ qt6Packages.wrapQtAppsHook ]; - depsTargetTargetPropagated = [ + propagatedBuildInputs = [ setupCudaHook ]; buildInputs = lib.optionals (lib.versionOlder version "11") [ diff --git a/pkgs/development/compilers/cudatoolkit/extension.nix b/pkgs/development/compilers/cudatoolkit/extension.nix index 93800a0dbc6b1..016675fa07015 100644 --- a/pkgs/development/compilers/cudatoolkit/extension.nix +++ b/pkgs/development/compilers/cudatoolkit/extension.nix @@ -47,13 +47,16 @@ final: prev: let ./hooks/mark-for-cudatoolkit-root-hook.sh) { }); - # Normally propagated by cuda_nvcc or cudatoolkit through their depsHostHostPropagated + # Currently propagated by cuda_nvcc or cudatoolkit, rather than used directly setupCudaHook = (final.callPackage ({ makeSetupHook, backendStdenv }: makeSetupHook { name = "setup-cuda-hook"; + substitutions.setupCudaHook = placeholder "out"; + + # Point NVCC at a compatible compiler substitutions.ccRoot = "${backendStdenv.cc}"; # Required in addition to ccRoot as otherwise bin/gcc is looked up diff --git a/pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh b/pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh index 5c18760a3a2b0..ba04c2e0806af 100644 --- a/pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh +++ b/pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh @@ -1,8 +1,14 @@ # shellcheck shell=bash +# Should we mimick cc-wrapper's "hygiene"? +[[ -z ${strictDeps-} ]] || (( "$hostOffset" < 0 )) || return 0 + +echo "Sourcing mark-for-cudatoolkit-root-hook" >&2 + markForCUDAToolkit_ROOT() { mkdir -p "${prefix}/nix-support" - touch "${prefix}/nix-support/include-in-cudatoolkit-root" + [[ -f "${prefix}/nix-support/include-in-cudatoolkit-root" ]] && return + echo "$pname-$output" > "${prefix}/nix-support/include-in-cudatoolkit-root" } fixupOutputHooks+=(markForCUDAToolkit_ROOT) diff --git a/pkgs/development/compilers/cudatoolkit/hooks/nvcc-setup-hook.sh b/pkgs/development/compilers/cudatoolkit/hooks/nvcc-setup-hook.sh deleted file mode 100644 index e75a84a9550e7..0000000000000 --- a/pkgs/development/compilers/cudatoolkit/hooks/nvcc-setup-hook.sh +++ /dev/null @@ -1,5 +0,0 @@ -# shellcheck shell=bash - -# CMake's enable_language(CUDA) runs a compiler test and it doesn't account for -# CUDAToolkit_ROOT. We have to help it locate libcudart -export NVCC_APPEND_FLAGS+=" -L@cudartLib@/lib -L@cudartStatic@/lib -I@cudartInclude@/include" diff --git a/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh b/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh index 5ea57594211c4..4f1009adfc02e 100644 --- a/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh +++ b/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh @@ -1,18 +1,59 @@ # shellcheck shell=bash -echo Sourcing setup-cuda-hook >&2 +# Starting with 24.05: only run the hook from nativeBuildInputs +# (( "$hostOffset" == -1 && "$targetOffset" == 0)) || return 0 -extendCUDAToolkit_ROOT() { - if [[ -f "$1/nix-support/include-in-cudatoolkit-root" ]] ; then - addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$1" +guard=Sourcing +reason= - if [[ -d "$1/include" ]] ; then - addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIR "$1/include" - fi - fi +[[ -n ${cudaSetupHookOnce-} ]] && guard=Skipping && reason=" because the hook has been propagated more than once" + +if (( "${NIX_DEBUG:-0}" >= 1 )) ; then + echo "$guard hostOffset=$hostOffset targetOffset=$targetOffset setupCudaHook$reason" >&2 +else + echo "$guard setup-cuda-hook$reason" >&2 +fi + +[[ "$guard" = Sourcing ]] || return 0 + +declare -g cudaSetupHookOnce=1 +declare -Ag cudaHostPathsSeen=() +declare -Ag cudaOutputToPath=() + +extendcudaHostPathsSeen() { + (( "${NIX_DEBUG:-0}" >= 1 )) && echo "extendcudaHostPathsSeen $1" >&2 + + local markerPath="$1/nix-support/include-in-cudatoolkit-root" + [[ ! -f "${markerPath}" ]] && return + [[ -v cudaHostPathsSeen[$1] ]] && return + + cudaHostPathsSeen["$1"]=1 + + # E.g. cuda_cudart-lib + local cudaOutputName + read -r cudaOutputName < "$markerPath" + + [[ -z "$cudaOutputName" ]] && return + + local oldPath="${cudaOutputToPath[$cudaOutputName]-}" + [[ -n "$oldPath" ]] && echo "extendcudaHostPathsSeen: warning: overwriting $cudaOutputName from $oldPath to $1" >&2 + cudaOutputToPath["$cudaOutputName"]="$1" } +addEnvHooks "$targetOffset" extendcudaHostPathsSeen + +setupCUDAToolkit_ROOT() { + (( "${NIX_DEBUG:-0}" >= 1 )) && echo "setupCUDAToolkit_ROOT: cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2 -addEnvHooks "$targetOffset" extendCUDAToolkit_ROOT + for path in "${!cudaHostPathsSeen[@]}" ; do + addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$path" + if [[ -d "$path/include" ]] ; then + addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIR "$path/include" + fi + done + + export cmakeFlags+=" -DCUDAToolkit_INCLUDE_DIR=$CUDAToolkit_INCLUDE_DIR -DCUDAToolkit_ROOT=$CUDAToolkit_ROOT" +} +preConfigureHooks+=(setupCUDAToolkit_ROOT) setupCUDAToolkitCompilers() { echo Executing setupCUDAToolkitCompilers >&2 @@ -56,13 +97,43 @@ setupCUDAToolkitCompilers() { # CMake's enable_language(CUDA) runs a compiler test and it doesn't account for # CUDAToolkit_ROOT. We have to help it locate libcudart if [[ -z "${nvccDontPrependCudartFlags-}" ]] ; then - export NVCC_APPEND_FLAGS+=" -L@cudartLib@/lib -L@cudartStatic@/lib -I@cudartInclude@/include" + if [[ ! -v cudaOutputToPath["cuda_cudart-out"] ]] ; then + echo "setupCUDAToolkitCompilers: missing cudaPackages.cuda_cudart. This may become an an error in the future" >&2 + # exit 1 + fi + for pkg in "${!cudaOutputToPath[@]}" ; do + [[ ! "$pkg" = cuda_cudart* ]] && continue + + local path="${cudaOutputToPath[$pkg]}" + if [[ -d "$path/include" ]] ; then + export NVCC_PREPEND_FLAGS+=" -I$path/include" + fi + if [[ -d "$path/lib" ]] ; then + export NVCC_PREPEND_FLAGS+=" -L$path/lib" + fi + done fi } +preConfigureHooks+=(setupCUDAToolkitCompilers) -setupCMakeCUDAToolkit_ROOT() { - export cmakeFlags+=" -DCUDAToolkit_INCLUDE_DIR=$CUDAToolkit_INCLUDE_DIR -DCUDAToolkit_ROOT=$CUDAToolkit_ROOT" -} +propagateCudaLibraries() { + (( "${NIX_DEBUG:-0}" >= 1 )) && echo "propagateCudaLibraries: cudaPropagateToOutput=$cudaPropagateToOutput cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2 + + [[ -z "${cudaPropagateToOutput-}" ]] && return + + mkdir -p "${!cudaPropagateToOutput}/nix-support" + # One'd expect this should be propagated-bulid-build-deps, but that doesn't seem to work + echo "@setupCudaHook@" >> "${!cudaPropagateToOutput}/nix-support/propagated-native-build-inputs" -postHooks+=(setupCUDAToolkitCompilers) -preConfigureHooks+=(setupCMakeCUDAToolkit_ROOT) + local propagatedBuildInputs=( "${!cudaHostPathsSeen[@]}" ) + for output in $(getAllOutputNames) ; do + if [[ ! "$output" = "$cudaPropagateToOutput" ]] ; then + propagatedBuildInputs+=( "${!output}" ) + fi + break + done + + # One'd expect this should be propagated-host-host-deps, but that doesn't seem to work + printWords "${propagatedBuildInputs[@]}" >> "${!cudaPropagateToOutput}/nix-support/propagated-build-inputs" +} +postFixupHooks+=(propagateCudaLibraries) diff --git a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix index a0ac0b0fcb1fb..71e70e8d7b704 100644 --- a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix +++ b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix @@ -51,37 +51,14 @@ in ] ); - cuda_nvcc = prev.cuda_nvcc.overrideAttrs (_: { - # Required by cmake's enable_language(CUDA) to build a test program - # When implementing cross-compilation support: this is - # final.pkgs.targetPackages.cudaPackages.cuda_cudart - env = { - # Given the multiple-outputs each CUDA redist has, we can specify the exact components we - # need from the package. CMake requires: - # - the cuda_runtime.h header, which is in the dev output - # - the dynamic library, which is in the lib output - # - the static library, which is in the static output - cudartInclude = "${final.cuda_cudart.dev}"; - cudartLib = "${final.cuda_cudart.lib}"; - cudartStatic = "${final.cuda_cudart.static}"; - }; - - # Point NVCC at a compatible compiler - - # Desiredata: whenever a package (e.g. magma) adds cuda_nvcc to - # nativeBuildInputs (offsets `(-1, 0)`), magma should also source the - # setupCudaHook, i.e. we want it the hook to be propagated into the - # same nativeBuildInputs. - # - # Logically, cuda_nvcc should include the hook in depsHostHostPropagated, - # so that the final offsets for the propagated hook would be `(-1, 0) + - # (0, 0) = (-1, 0)`. - # - # In practice, TargetTarget appears to work: - # https://gist.github.com/fd80ff142cd25e64603618a3700e7f82 - depsTargetTargetPropagated = [ + cuda_nvcc = prev.cuda_nvcc.overrideAttrs (oldAttrs: { + propagatedBuildInputs = [ final.setupCudaHook ]; + + meta = (oldAttrs.meta or { }) // { + mainProgram = "nvcc"; + }; }); cuda_nvprof = prev.cuda_nvprof.overrideAttrs (oldAttrs: { diff --git a/pkgs/development/compilers/cudatoolkit/saxpy/default.nix b/pkgs/development/compilers/cudatoolkit/saxpy/default.nix index f347b43d1d11c..2da6da29004dc 100644 --- a/pkgs/development/compilers/cudatoolkit/saxpy/default.nix +++ b/pkgs/development/compilers/cudatoolkit/saxpy/default.nix @@ -1,12 +1,13 @@ { autoAddOpenGLRunpathHook , backendStdenv , cmake -, cuda_cccl -, cuda_cudart +, cuda_cccl ? null +, cuda_cudart ? null , cudaFlags -, cuda_nvcc +, cuda_nvcc ? null +, cudatoolkit ? null , lib -, libcublas +, libcublas ? null , setupCudaHook , stdenv }: @@ -17,23 +18,24 @@ backendStdenv.mkDerivation { src = ./.; - buildInputs = [ + buildInputs = lib.optionals (cuda_cudart != null) [ libcublas cuda_cudart cuda_cccl + ] ++ lib.optionals (cuda_cudart == null) [ + cudatoolkit ]; nativeBuildInputs = [ cmake - # NOTE: this needs to be pkgs.buildPackages.cudaPackages_XX_Y.cuda_nvcc for - # cross-compilation to work. This should work automatically once we move to - # spliced scopes. Delete this comment once that happens - cuda_nvcc - # Alternatively, we could remove the propagated hook from cuda_nvcc and add # directly: # setupCudaHook autoAddOpenGLRunpathHook + ] ++ lib.optionals (cuda_nvcc != null) [ + cuda_nvcc + ] ++ lib.optionals (cuda_nvcc == null) [ + cudatoolkit ]; cmakeFlags = [ diff --git a/pkgs/development/libraries/cctag/default.nix b/pkgs/development/libraries/cctag/default.nix index 2c1a5f9ae7863..238821b6af914 100644 --- a/pkgs/development/libraries/cctag/default.nix +++ b/pkgs/development/libraries/cctag/default.nix @@ -49,7 +49,7 @@ stdenv.mkDerivation rec { buildInputs = [ boost179 eigen - opencv + opencv.cxxdev ]; # Tests are broken on Darwin (linking issue) diff --git a/pkgs/development/libraries/ctranslate2/default.nix b/pkgs/development/libraries/ctranslate2/default.nix index 722672d3a46b6..fa812432bd7ce 100644 --- a/pkgs/development/libraries/ctranslate2/default.nix +++ b/pkgs/development/libraries/ctranslate2/default.nix @@ -57,6 +57,7 @@ stdenv.mkDerivation rec { buildInputs = lib.optionals withMkl [ mkl ] ++ lib.optionals withCUDA [ + cudaPackages.cuda_cccl # required by the fp16 headers in cudart cudaPackages.cuda_cudart cudaPackages.libcublas cudaPackages.libcurand diff --git a/pkgs/development/libraries/hwloc/default.nix b/pkgs/development/libraries/hwloc/default.nix index 67048167d6bfa..626d0b7cca949 100644 --- a/pkgs/development/libraries/hwloc/default.nix +++ b/pkgs/development/libraries/hwloc/default.nix @@ -22,12 +22,13 @@ stdenv.mkDerivation rec { ]; # XXX: libX11 is not directly needed, but needed as a propagated dep of Cairo. - nativeBuildInputs = [ pkg-config ]; + nativeBuildInputs = [ pkg-config ] + ++ lib.optionals enableCuda [ cudaPackages.cuda_nvcc ]; buildInputs = [ expat ncurses ] ++ lib.optionals x11Support [ cairo libX11 ] ++ lib.optionals stdenv.isLinux [ numactl ] - ++ lib.optional enableCuda cudaPackages.cudatoolkit; + ++ lib.optionals enableCuda [ cudaPackages.cuda_cudart ]; # Since `libpci' appears in `hwloc.pc', it must be propagated. propagatedBuildInputs = lib.optional stdenv.isLinux pciutils; diff --git a/pkgs/development/libraries/nvidia-optical-flow-sdk/default.nix b/pkgs/development/libraries/nvidia-optical-flow-sdk/default.nix index a82fa9068c66c..813821bfb71c2 100644 --- a/pkgs/development/libraries/nvidia-optical-flow-sdk/default.nix +++ b/pkgs/development/libraries/nvidia-optical-flow-sdk/default.nix @@ -1,4 +1,4 @@ -{ lib, stdenv, fetchFromGitHub }: +{ lib, stdenv, fetchFromGitHub, cudaPackages }: stdenv.mkDerivation { pname = "nvidia-optical-flow-sdk"; @@ -18,6 +18,13 @@ stdenv.mkDerivation { cp -R * $out/include ''; + # Makes setupCudaHook propagate nvidia-optical-flow-sdk together with cuda + # packages. Currently used by opencv4.cxxdev, hopefully can be removed in the + # future + nativeBuildInputs = [ + cudaPackages.markForCudatoolkitRootHook + ]; + meta = with lib; { description = "Nvidia optical flow headers for computing the relative motion of pixels between images"; homepage = "https://developer.nvidia.com/opticalflow-sdk"; diff --git a/pkgs/development/libraries/nvidia-thrust/default.nix b/pkgs/development/libraries/nvidia-thrust/default.nix deleted file mode 100644 index f68b57f193b79..0000000000000 --- a/pkgs/development/libraries/nvidia-thrust/default.nix +++ /dev/null @@ -1,102 +0,0 @@ -{ lib -, config -, fetchFromGitHub -, stdenv -, cmake -, pkg-config -, cudaPackages ? { } -, symlinkJoin -, tbb -, hostSystem ? "CPP" -, deviceSystem ? if config.cudaSupport then "CUDA" else "OMP" -}: - -# Policy for device_vector -assert builtins.elem deviceSystem [ - "CPP" # Serial on CPU - "OMP" # Parallel with OpenMP - "TBB" # Parallel with Intel TBB - "CUDA" # Parallel on GPU -]; - -# Policy for host_vector -# Always lives on CPU, but execution can be made parallel -assert builtins.elem hostSystem [ "CPP" "OMP" "TBB" ]; - -let - pname = "nvidia-thrust"; - version = "1.16.0"; - - inherit (cudaPackages) backendStdenv cudaFlags; - cudaCapabilities = map cudaFlags.dropDot cudaFlags.cudaCapabilities; - - tbbSupport = builtins.elem "TBB" [ deviceSystem hostSystem ]; - cudaSupport = deviceSystem == "CUDA"; - - # TODO: Would like to use this: - cudaJoined = symlinkJoin { - name = "cuda-packages-unsplit"; - paths = with cudaPackages; [ - cuda_nvcc - cuda_nvrtc # symbols: cudaLaunchDevice, &c; notice postBuild - cuda_cudart # cuda_runtime.h - libcublas - ]; - postBuild = '' - ln -s $out/lib $out/lib64 - ''; - }; -in -stdenv.mkDerivation { - inherit pname version; - - src = fetchFromGitHub { - owner = "NVIDIA"; - repo = "thrust"; - rev = version; - fetchSubmodules = true; - hash = "sha256-/EyznxWKuHuvHNjq+SQg27IaRbtkjXR2zlo2YgCWmUQ="; - }; - - # NVIDIA's "compiler hacks" seem like work-arounds for legacy toolchains and - # cause us errors such as: - # > Thrust's test harness uses CMAKE_CXX_COMPILER for the CUDA host compiler. - # > Refusing to overwrite specified CMAKE_CUDA_HOST_COMPILER - # So we un-fix cmake after them: - postPatch = '' - echo > cmake/ThrustCompilerHacks.cmake - ''; - - buildInputs = lib.optionals tbbSupport [ tbb ]; - - nativeBuildInputs = [ - cmake - pkg-config - ] ++ lib.optionals cudaSupport [ - # Goes in native build inputs because thrust looks for headers - # in a path relative to nvcc... - cudaJoined - ]; - - cmakeFlags = [ - "-DTHRUST_INCLUDE_CUB_CMAKE=${if cudaSupport then "ON" else "OFF"}" - "-DTHRUST_DEVICE_SYSTEM=${deviceSystem}" - "-DTHRUST_HOST_SYSTEM=${hostSystem}" - "-DTHRUST_AUTO_DETECT_COMPUTE_ARCHS=OFF" - "-DTHRUST_DISABLE_ARCH_BY_DEFAULT=ON" - ] ++ lib.optionals cudaFlags.enableForwardCompat [ - "-DTHRUST_ENABLE_COMPUTE_FUTURE=ON" - ] ++ map (sm: "THRUST_ENABLE_COMPUTE_${sm}") cudaCapabilities; - - passthru = { - inherit cudaSupport cudaPackages cudaJoined; - }; - - meta = with lib; { - description = "A high-level C++ parallel algorithms library that builds on top of CUDA, TBB, OpenMP, etc"; - homepage = "https://github.com/NVIDIA/thrust"; - license = licenses.asl20; - platforms = platforms.unix; - maintainers = with maintainers; [ SomeoneSerge ]; - }; -} diff --git a/pkgs/development/libraries/opencv/4.x.nix b/pkgs/development/libraries/opencv/4.x.nix index 06360449c1ba9..023e56940b75c 100644 --- a/pkgs/development/libraries/opencv/4.x.nix +++ b/pkgs/development/libraries/opencv/4.x.nix @@ -247,8 +247,10 @@ effectiveStdenv.mkDerivation { outputs = [ "out" + "cxxdev" "package_tests" ]; + cudaPropagateToOutput = "cxxdev"; postUnpack = lib.optionalString buildContrib '' cp --no-preserve=mode -r "${contribSrc}/modules" "$NIX_BUILD_TOP/source/opencv_contrib" @@ -328,20 +330,32 @@ effectiveStdenv.mkDerivation { bzip2 AVFoundation Cocoa VideoDecodeAcceleration CoreMedia MediaToolbox Accelerate ] ++ lib.optionals enableDocs [ doxygen graphviz-nox ] - ++ lib.optionals enableCuda (with cudaPackages; [ - cuda_cudart - cuda_cccl # - libnpp # npp.h + ++ lib.optionals enableCuda (with cudaPackages; [ + cuda_cudart.lib + cuda_cudart.dev + cuda_cccl.dev # + libnpp.dev # npp.h + libnpp.lib + libnpp.static + nvidia-optical-flow-sdk ] ++ lib.optionals enableCublas [ - libcublas # cublas_v2.h + # May start using the default $out instead once + # https://github.com/NixOS/nixpkgs/issues/271792 + # has been addressed + libcublas.static + libcublas.lib + libcublas.dev # cublas_v2.h ] ++ lib.optionals enableCudnn [ - cudnn # cudnn.h + cudnn.dev # cudnn.h + cudnn.lib + cudnn.static ] ++ lib.optionals enableCufft [ - libcufft # cufft.h - ]); + libcufft.dev # cufft.h + libcufft.lib + libcufft.static + ]); - propagatedBuildInputs = lib.optional enablePython pythonPackages.numpy - ++ lib.optionals enableCuda [ nvidia-optical-flow-sdk ]; + propagatedBuildInputs = lib.optionals enablePython [ pythonPackages.numpy ]; nativeBuildInputs = [ cmake pkg-config unzip ] ++ lib.optionals enablePython [ @@ -458,6 +472,7 @@ effectiveStdenv.mkDerivation { postInstall = '' sed -i "s|{exec_prefix}/$out|{exec_prefix}|;s|{prefix}/$out|{prefix}|" \ "$out/lib/pkgconfig/opencv4.pc" + mkdir $cxxdev '' # install python distribution information, so other packages can `import opencv` + lib.optionalString enablePython '' @@ -476,6 +491,8 @@ effectiveStdenv.mkDerivation { ''; passthru = { + cudaSupport = enableCuda; + tests = { inherit (gst_all_1) gst-plugins-bad; } diff --git a/pkgs/development/libraries/openmpi/default.nix b/pkgs/development/libraries/openmpi/default.nix index 1c4955e2c51a4..a8bd8acacd595 100644 --- a/pkgs/development/libraries/openmpi/default.nix +++ b/pkgs/development/libraries/openmpi/default.nix @@ -3,7 +3,7 @@ , libpsm2, libfabric, pmix, ucx, ucc , config # Enable CUDA support -, cudaSupport ? config.cudaSupport, cudatoolkit +, cudaSupport ? config.cudaSupport, cudaPackages # Enable the Sun Grid Engine bindings , enableSGE ? false @@ -18,12 +18,7 @@ , fortranSupport ? true }: -let - cudatoolkit_joined = symlinkJoin { - name = "${cudatoolkit.name}-unsplit"; - paths = [ cudatoolkit.out cudatoolkit.lib ]; - }; -in stdenv.mkDerivation rec { +stdenv.mkDerivation rec { pname = "openmpi"; version = "4.1.6"; @@ -47,12 +42,13 @@ in stdenv.mkDerivation rec { buildInputs = [ zlib ] ++ lib.optionals stdenv.isLinux [ libnl numactl pmix ucx ucc ] - ++ lib.optionals cudaSupport [ cudatoolkit ] + ++ lib.optionals cudaSupport [ cudaPackages.cuda_cudart ] ++ [ libevent hwloc ] ++ lib.optional (stdenv.isLinux || stdenv.isFreeBSD) rdma-core ++ lib.optionals fabricSupport [ libpsm2 libfabric ]; nativeBuildInputs = [ perl ] + ++ lib.optionals cudaSupport [ cudaPackages.cuda_nvcc ] ++ lib.optionals fortranSupport [ gfortran ]; configureFlags = lib.optional (!cudaSupport) "--disable-mca-dso" @@ -67,7 +63,7 @@ in stdenv.mkDerivation rec { # TODO: add UCX support, which is recommended to use with cuda for the most robust OpenMPI build # https://github.com/openucx/ucx # https://www.open-mpi.org/faq/?category=buildcuda - ++ lib.optionals cudaSupport [ "--with-cuda=${cudatoolkit_joined}" "--enable-dlopen" ] + ++ lib.optionals cudaSupport [ "--with-cuda=${cudaPackages.cuda_cudart}" "--enable-dlopen" ] ++ lib.optionals fabricSupport [ "--with-psm2=${lib.getDev libpsm2}" "--with-libfabric=${lib.getDev libfabric}" ] ; @@ -98,7 +94,8 @@ in stdenv.mkDerivation rec { doCheck = true; passthru = { - inherit cudaSupport cudatoolkit; + inherit cudaSupport; + cudatoolkit = cudaPackages.cudatoolkit; # For backward compatibility only }; meta = with lib; { diff --git a/pkgs/development/libraries/opensubdiv/default.nix b/pkgs/development/libraries/opensubdiv/default.nix index b0ff4b528864b..9c485949a5c09 100644 --- a/pkgs/development/libraries/opensubdiv/default.nix +++ b/pkgs/development/libraries/opensubdiv/default.nix @@ -1,9 +1,7 @@ { config, lib, stdenv, fetchFromGitHub, cmake, pkg-config, xorg, libGLU , libGL, glew, ocl-icd, python3 -, cudaSupport ? config.cudaSupport, cudatoolkit - # For visibility mostly. The whole approach to cuda architectures and capabilities - # will be reworked soon. -, cudaArch ? "compute_37" +, cudaSupport ? config.cudaSupport +, cudaPackages , openclSupport ? !cudaSupport , darwin }: @@ -21,7 +19,12 @@ stdenv.mkDerivation rec { outputs = [ "out" "dev" ]; - nativeBuildInputs = [ cmake pkg-config ]; + nativeBuildInputs = [ + cmake + pkg-config + ] ++ lib.optional cudaSupport [ + cudaPackages.cuda_nvcc + ]; buildInputs = [ libGLU libGL python3 # FIXME: these are not actually needed, but the configure script wants them. @@ -30,21 +33,31 @@ stdenv.mkDerivation rec { ] ++ lib.optional (openclSupport && !stdenv.isDarwin) ocl-icd ++ lib.optionals stdenv.isDarwin (with darwin.apple_sdk.frameworks; [OpenCL Cocoa CoreVideo IOKit AppKit AGL ]) - ++ lib.optional cudaSupport cudatoolkit; + ++ lib.optional cudaSupport [ + cudaPackages.cuda_cudart + ]; + + # It's important to set OSD_CUDA_NVCC_FLAGS, + # because otherwise OSD might piggyback unwanted architectures: + # https://github.com/PixarAnimationStudios/OpenSubdiv/blob/7d0ab5530feef693ac0a920585b5c663b80773b3/CMakeLists.txt#L602 + preConfigure = lib.optionalString cudaSupport '' + cmakeFlagsArray+=( + -DOSD_CUDA_NVCC_FLAGS="${lib.concatStringsSep " " cudaPackages.cudaFlags.gencode}" + ) + ''; cmakeFlags = [ "-DNO_TUTORIALS=1" "-DNO_REGRESSION=1" "-DNO_EXAMPLES=1" "-DNO_METAL=1" # don’t have metal in apple sdk + (lib.cmakeBool "NO_OPENCL" (!openclSupport)) + (lib.cmakeBool "NO_CUDA" (!cudaSupport)) ] ++ lib.optionals (!stdenv.isDarwin) [ "-DGLEW_INCLUDE_DIR=${glew.dev}/include" "-DGLEW_LIBRARY=${glew.dev}/lib" ] ++ lib.optionals cudaSupport [ - "-DOSD_CUDA_NVCC_FLAGS=--gpu-architecture=${cudaArch}" - "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc" ] ++ lib.optionals (!openclSupport) [ - "-DNO_OPENCL=1" ]; preBuild = let maxBuildCores = 16; in lib.optionalString cudaSupport '' diff --git a/pkgs/development/libraries/openvino/default.nix b/pkgs/development/libraries/openvino/default.nix index b3809f0953641..26fac012948d4 100644 --- a/pkgs/development/libraries/openvino/default.nix +++ b/pkgs/development/libraries/openvino/default.nix @@ -3,6 +3,7 @@ , fetchFromGitHub , fetchurl , substituteAll +, cudaSupport ? opencv.cudaSupport or false # build , addOpenGLRunpath @@ -17,10 +18,12 @@ # runtime , libusb1 , libxml2 +, ocl-icd , opencv , protobuf , pugixml , tbb +, cudaPackages }: let @@ -68,6 +71,8 @@ stdenv.mkDerivation rec { setuptools ])) shellcheck + ] ++ lib.optionals cudaSupport [ + cudaPackages.cuda_nvcc ]; patches = [ @@ -118,6 +123,7 @@ stdenv.mkDerivation rec { "-DENABLE_CPPLINT:BOOL=OFF" "-DBUILD_TESTING:BOOL=OFF" "-DENABLE_SAMPLES:BOOL=OFF" + (lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true) ]; env.NIX_CFLAGS_COMPILE = lib.optionalString stdenv.isAarch64 "-Wno-narrowing"; @@ -129,10 +135,13 @@ stdenv.mkDerivation rec { buildInputs = [ libusb1 libxml2 - opencv + ocl-icd + opencv.cxxdev protobuf pugixml tbb + ] ++ lib.optionals cudaSupport [ + cudaPackages.cuda_cudart ]; enableParallelBuilding = true; diff --git a/pkgs/development/libraries/science/math/cutensor/generic.nix b/pkgs/development/libraries/science/math/cutensor/generic.nix index c957fcdd99d4e..02fe13851620b 100644 --- a/pkgs/development/libraries/science/math/cutensor/generic.nix +++ b/pkgs/development/libraries/science/math/cutensor/generic.nix @@ -1,7 +1,11 @@ { stdenv , lib , libPath +, cuda_cudart +, cudaMajorVersion +, cuda_nvcc , cudatoolkit +, libcublas , fetchurl , autoPatchelfHook , addOpenGLRunpath @@ -17,7 +21,7 @@ let in stdenv.mkDerivation { - pname = "cudatoolkit-${cudatoolkit.majorVersion}-cutensor"; + pname = "cutensor-cu${cudaMajorVersion}"; inherit version; src = fetchurl { @@ -32,20 +36,27 @@ stdenv.mkDerivation { nativeBuildInputs = [ autoPatchelfHook addOpenGLRunpath + cuda_nvcc ]; buildInputs = [ stdenv.cc.cc.lib - ]; - - propagatedBuildInputs = [ - cudatoolkit + cuda_cudart + libcublas ]; # Set RUNPATH so that libcuda in /run/opengl-driver(-32)/lib can be found. # See the explanation in addOpenGLRunpath. installPhase = '' mkdir -p "$out" "$dev" + + if [[ ! -d "${libPath}" ]] ; then + echo "Cutensor: ${libPath} does not exist, only found:" >&2 + find "$(dirname ${libPath})"/ -maxdepth 1 >&2 + echo "This cutensor release might not support your cudatoolkit version" >&2 + exit 1 + fi + mv include "$dev" mv ${libPath} "$out/lib" @@ -58,7 +69,7 @@ stdenv.mkDerivation { ''; passthru = { - inherit cudatoolkit; + cudatoolkit = lib.warn "cutensor.passthru: cudaPackages.cudatoolkit is deprecated" cudatoolkit; majorVersion = lib.versions.major version; }; @@ -66,7 +77,11 @@ stdenv.mkDerivation { description = "cuTENSOR: A High-Performance CUDA Library For Tensor Primitives"; homepage = "https://developer.nvidia.com/cutensor"; sourceProvenance = with sourceTypes; [ binaryNativeCode ]; - license = licenses.unfree; + license = licenses.unfreeRedistributable // { + shortName = "cuTENSOR EULA"; + name = "cuTENSOR SUPPLEMENT TO SOFTWARE LICENSE AGREEMENT FOR NVIDIA SOFTWARE DEVELOPMENT KITS"; + url = "https://docs.nvidia.com/cuda/cutensor/license.html"; + }; platforms = [ "x86_64-linux" ]; maintainers = with maintainers; [ obsidian-systems-maintenance ]; }; diff --git a/pkgs/development/libraries/science/math/faiss/default.nix b/pkgs/development/libraries/science/math/faiss/default.nix index 21e6cbf858cd5..25ac539e05f28 100644 --- a/pkgs/development/libraries/science/math/faiss/default.nix +++ b/pkgs/development/libraries/science/math/faiss/default.nix @@ -6,8 +6,6 @@ , cmake , cudaPackages ? { } , cudaSupport ? config.cudaSupport -, nvidia-thrust -, useThrustSourceBuild ? true , pythonSupport ? true , pythonPackages , llvmPackages @@ -27,8 +25,6 @@ , runCommand }@inputs: -assert cudaSupport -> nvidia-thrust.cudaSupport; - let pname = "faiss"; version = "1.7.4"; @@ -44,9 +40,6 @@ let cuda_cudart # cuda_runtime.h libcublas libcurand - ] ++ lib.optionals useThrustSourceBuild [ - nvidia-thrust - ] ++ lib.optionals (!useThrustSourceBuild) [ cuda_cccl ] ++ lib.optionals (cudaPackages ? cuda_profiler_api) [ cuda_profiler_api # cuda_profiler_api.h diff --git a/pkgs/development/libraries/science/math/suitesparse/default.nix b/pkgs/development/libraries/science/math/suitesparse/default.nix index dd2eb9478f7f6..abc2ff9a37012 100644 --- a/pkgs/development/libraries/science/math/suitesparse/default.nix +++ b/pkgs/development/libraries/science/math/suitesparse/default.nix @@ -8,7 +8,7 @@ , mpfr , config , enableCuda ? config.cudaSupport -, cudatoolkit +, cudaPackages }: stdenv.mkDerivation rec { @@ -25,7 +25,11 @@ stdenv.mkDerivation rec { }; nativeBuildInputs = [ - ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames; + ] ++ lib.optionals stdenv.isDarwin [ + fixDarwinDylibNames + ] ++ lib.optionals enableCuda [ + cudaPackages.cuda_nvcc + ]; # Use compatible indexing for lapack and blas used buildInputs = assert (blas.isILP64 == lapack.isILP64); [ @@ -34,7 +38,12 @@ stdenv.mkDerivation rec { gfortran.cc.lib gmp mpfr - ] ++ lib.optional enableCuda cudatoolkit; + ] ++ lib.optionals enableCuda [ + cudaPackages.cuda_cudart.dev + cudaPackages.cuda_cudart.lib + cudaPackages.libcublas.dev + cudaPackages.libcublas.lib + ]; preConfigure = '' # Mongoose and GraphBLAS are packaged separately @@ -49,9 +58,9 @@ stdenv.mkDerivation rec { ] ++ lib.optionals blas.isILP64 [ "CFLAGS=-DBLAS64" ] ++ lib.optionals enableCuda [ - "CUDA_PATH=${cudatoolkit}" - "CUDART_LIB=${cudatoolkit.lib}/lib/libcudart.so" - "CUBLAS_LIB=${cudatoolkit}/lib/libcublas.so" + "CUDA_PATH=${cudaPackages.cuda_nvcc}" + "CUDART_LIB=${cudaPackages.cuda_cudart.lib}/lib/libcudart.so" + "CUBLAS_LIB=${cudaPackages.libcublas.lib}/lib/libcublas.so" ] ++ lib.optionals stdenv.isDarwin [ # Unless these are set, the build will attempt to use `Accelerate` on darwin, see: # https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/v5.13.0/SuiteSparse_config/SuiteSparse_config.mk#L368 diff --git a/pkgs/development/libraries/science/math/tensorrt/extension.nix b/pkgs/development/libraries/science/math/tensorrt/extension.nix index c6596dbaacde1..5ffa3910f1e5c 100644 --- a/pkgs/development/libraries/science/math/tensorrt/extension.nix +++ b/pkgs/development/libraries/science/math/tensorrt/extension.nix @@ -17,16 +17,32 @@ final: prev: let isSupported = fileData: elem cudaVersion fileData.supportedCudaVersions; # Return the first file that is supported. In practice there should only ever be one anyway. supportedFile = files: findFirst isSupported null files; - # Supported versions with versions as keys and file as value - supportedVersions = filterAttrs (version: file: file !=null ) (mapAttrs (version: files: supportedFile files) tensorRTVersions); + # Compute versioned attribute name to be used in this package set computeName = version: "tensorrt_${toUnderscore version}"; + + # Supported versions with versions as keys and file as value + supportedVersions = lib.recursiveUpdate + { + tensorrt = { + enable = false; + fileVersionCuda = null; + fileVersionCudnn = null; + fullVersion = "0.0.0"; + sha256 = null; + tarball = null; + supportedCudaVersions = [ ]; + }; + } + (mapAttrs' (version: attrs: nameValuePair (computeName version) attrs) + (filterAttrs (version: file: file != null) (mapAttrs (version: files: supportedFile files) tensorRTVersions))); + # Add all supported builds as attributes - allBuilds = mapAttrs' (version: file: nameValuePair (computeName version) (buildTensorRTPackage (removeAttrs file ["fileVersionCuda"]))) supportedVersions; + allBuilds = mapAttrs (name: file: buildTensorRTPackage (removeAttrs file ["fileVersionCuda"])) supportedVersions; + # Set the default attributes, e.g. tensorrt = tensorrt_8_4; - defaultBuild = { "tensorrt" = if allBuilds ? ${computeName tensorRTDefaultVersion} - then allBuilds.${computeName tensorRTDefaultVersion} - else throw "tensorrt-${tensorRTDefaultVersion} does not support your cuda version ${cudaVersion}"; }; + defaultName = computeName tensorRTDefaultVersion; + defaultBuild = lib.optionalAttrs (allBuilds ? ${defaultName}) { tensorrt = allBuilds.${computeName tensorRTDefaultVersion}; }; in { inherit buildTensorRTPackage; } // allBuilds // defaultBuild; diff --git a/pkgs/development/libraries/science/math/tensorrt/generic.nix b/pkgs/development/libraries/science/math/tensorrt/generic.nix index 165c6f356da89..2bcdd8e588cf0 100644 --- a/pkgs/development/libraries/science/math/tensorrt/generic.nix +++ b/pkgs/development/libraries/science/math/tensorrt/generic.nix @@ -8,20 +8,22 @@ , cudnn }: -{ fullVersion +{ enable ? true +, fullVersion , fileVersionCudnn ? null , tarball , sha256 , supportedCudaVersions ? [ ] }: -assert fileVersionCudnn == null || lib.assertMsg (lib.strings.versionAtLeast cudnn.version fileVersionCudnn) +assert !enable || fileVersionCudnn == null || lib.assertMsg (lib.strings.versionAtLeast cudnn.version fileVersionCudnn) "This version of TensorRT requires at least cuDNN ${fileVersionCudnn} (current version is ${cudnn.version})"; backendStdenv.mkDerivation rec { pname = "cudatoolkit-${cudatoolkit.majorVersion}-tensorrt"; version = fullVersion; - src = requireFile rec { + src = if !enable then null else + requireFile rec { name = tarball; inherit sha256; message = '' @@ -38,13 +40,13 @@ backendStdenv.mkDerivation rec { outputs = [ "out" "dev" ]; - nativeBuildInputs = [ + nativeBuildInputs = lib.optionals enable [ autoPatchelfHook autoAddOpenGLRunpathHook ]; # Used by autoPatchelfHook - buildInputs = [ + buildInputs = lib.optionals enable [ backendStdenv.cc.cc.lib # libstdc++ cudatoolkit cudnn @@ -75,6 +77,7 @@ backendStdenv.mkDerivation rec { ''; passthru.stdenv = backendStdenv; + passthru.enable = enable; meta = with lib; { # Check that the cudatoolkit version satisfies our min/max constraints (both @@ -82,7 +85,7 @@ backendStdenv.mkDerivation rec { # official version constraints (as recorded in default.nix). In some cases # you _may_ be able to smudge version constraints, just know that you're # embarking into unknown and unsupported territory when doing so. - broken = !(elem cudaVersion supportedCudaVersions); + broken = !enable || !(elem cudaVersion supportedCudaVersions); description = "TensorRT: a high-performance deep learning interface"; homepage = "https://developer.nvidia.com/tensorrt"; license = licenses.unfree; diff --git a/pkgs/development/libraries/science/math/tiny-cuda-nn/default.nix b/pkgs/development/libraries/science/math/tiny-cuda-nn/default.nix index d046c6864539d..2036c4c86253b 100644 --- a/pkgs/development/libraries/science/math/tiny-cuda-nn/default.nix +++ b/pkgs/development/libraries/science/math/tiny-cuda-nn/default.nix @@ -14,10 +14,15 @@ inherit (cudaPackages) backendStdenv cudaFlags; cuda-common-redist = with cudaPackages; [ - cuda_cudart # cuda_runtime.h - libcublas # cublas_v2.h - libcusolver # cusolverDn.h - libcusparse # cusparse.h + cuda_cudart.dev # cuda_runtime.h + cuda_cudart.lib + cuda_cccl.dev # + libcublas.dev # cublas_v2.h + libcublas.lib + libcusolver.dev # cusolverDn.h + libcusolver.lib + libcusparse.dev # cusparse.h + libcusparse.lib ]; cuda-native-redist = symlinkJoin { diff --git a/pkgs/development/libraries/ucc/default.nix b/pkgs/development/libraries/ucc/default.nix index a92c6bea37d70..68f358b3d3deb 100644 --- a/pkgs/development/libraries/ucc/default.nix +++ b/pkgs/development/libraries/ucc/default.nix @@ -1,7 +1,7 @@ { stdenv, lib, fetchFromGitHub, libtool, automake, autoconf, ucx , config , enableCuda ? config.cudaSupport -, cudatoolkit +, cudaPackages , enableAvx ? stdenv.hostPlatform.avxSupport , enableSse41 ? stdenv.hostPlatform.sse4_1Support , enableSse42 ? stdenv.hostPlatform.sse4_2Support @@ -30,19 +30,25 @@ stdenv.mkDerivation rec { done ''; + nativeBuildInputs = [ libtool automake autoconf ] + ++ lib.optionals enableCuda [ cudaPackages.cuda_nvcc ]; + buildInputs = [ ucx ] + ++ lib.optionals enableCuda [ + cudaPackages.cuda_cccl + cudaPackages.cuda_cudart + ]; + + preConfigure = '' ./autogen.sh + '' + lib.optionalString enableCuda '' + configureFlagsArray+=( "--with-nvcc-gencode=${builtins.concatStringsSep " " cudaPackages.cudaFlags.gencode}" ) ''; - - nativeBuildInputs = [ libtool automake autoconf ]; - buildInputs = [ ucx ] - ++ lib.optional enableCuda cudatoolkit; - configureFlags = [ ] ++ lib.optional enableSse41 "--with-sse41" ++ lib.optional enableSse42 "--with-sse42" ++ lib.optional enableAvx "--with-avx" - ++ lib.optional enableCuda "--with-cuda=${cudatoolkit}"; + ++ lib.optional enableCuda "--with-cuda=${cudaPackages.cuda_cudart}"; postInstall = '' find $out/lib/ -name "*.la" -exec rm -f \{} \; diff --git a/pkgs/development/libraries/ucx/default.nix b/pkgs/development/libraries/ucx/default.nix index 627cac56bb737..3b923d8efdd2a 100644 --- a/pkgs/development/libraries/ucx/default.nix +++ b/pkgs/development/libraries/ucx/default.nix @@ -2,18 +2,12 @@ , rdma-core, libbfd, libiberty, perl, zlib, symlinkJoin, pkg-config , config , enableCuda ? config.cudaSupport -, cudatoolkit +, cudaPackages , enableRocm ? config.rocmSupport , rocmPackages }: let - # Needed for configure to find all libraries - cudatoolkit' = symlinkJoin { - inherit (cudatoolkit) name meta; - paths = [ cudatoolkit cudatoolkit.lib ]; - }; - rocmList = with rocmPackages; [ rocm-core rocm-runtime rocm-device-libs clr ]; rocm = symlinkJoin { @@ -35,7 +29,15 @@ stdenv.mkDerivation rec { outputs = [ "out" "doc" "dev" ]; - nativeBuildInputs = [ autoreconfHook doxygen pkg-config ]; + nativeBuildInputs = [ + autoreconfHook + doxygen + pkg-config + ] + ++ lib.optionals enableCuda [ + cudaPackages.cuda_nvcc + cudaPackages.autoAddOpenGLRunpathHook + ]; buildInputs = [ libbfd @@ -44,8 +46,16 @@ stdenv.mkDerivation rec { perl rdma-core zlib - ] ++ lib.optional enableCuda cudatoolkit - ++ lib.optionals enableRocm rocmList; + ] ++ lib.optionals enableCuda [ + cudaPackages.cuda_cudart + cudaPackages.cuda_nvml_dev + + ] ++ lib.optionals enableRocm rocmList; + + LDFLAGS = lib.optionals enableCuda [ + # Fake libnvidia-ml.so (the real one is deployed impurely) + "-L${cudaPackages.cuda_nvml_dev}/lib/stubs" + ]; configureFlags = [ "--with-rdmacm=${lib.getDev rdma-core}" @@ -53,7 +63,7 @@ stdenv.mkDerivation rec { "--with-rc" "--with-dm" "--with-verbs=${lib.getDev rdma-core}" - ] ++ lib.optional enableCuda "--with-cuda=${cudatoolkit'}" + ] ++ lib.optionals enableCuda [ "--with-cuda=${cudaPackages.cuda_cudart}" ] ++ lib.optional enableRocm "--with-rocm=${rocm}"; postInstall = '' diff --git a/pkgs/development/python-modules/cupy/default.nix b/pkgs/development/python-modules/cupy/default.nix index e5de149fca14a..71defbb99b985 100644 --- a/pkgs/development/python-modules/cupy/default.nix +++ b/pkgs/development/python-modules/cupy/default.nix @@ -11,11 +11,34 @@ , cudaPackages , addOpenGLRunpath , pythonOlder +, symlinkJoin }: let - inherit (cudaPackages) cudatoolkit cudnn cutensor nccl; -in buildPythonPackage rec { + inherit (cudaPackages) cudnn cutensor nccl; + cudatoolkit-joined = symlinkJoin { + name = "cudatoolkit-joined-${cudaPackages.cudaVersion}"; + paths = with cudaPackages; [ + cuda_cccl # + cuda_cccl.dev + cuda_cudart + cuda_nvcc.dev # + cuda_nvprof + cuda_nvrtc + cuda_nvtx + cuda_profiler_api + libcublas + libcufft + libcurand + libcusolver + libcusparse + + # Missing: + # cusparselt + ]; + }; +in +buildPythonPackage rec { pname = "cupy"; version = "12.2.0"; @@ -32,27 +55,32 @@ in buildPythonPackage rec { # very short builds and a few extremely long ones, so setting both ends up # working nicely in practice. preConfigure = '' - export CUDA_PATH=${cudatoolkit} export CUPY_NUM_BUILD_JOBS="$NIX_BUILD_CORES" export CUPY_NUM_NVCC_THREADS="$NIX_BUILD_CORES" ''; nativeBuildInputs = [ + setuptools + wheel addOpenGLRunpath cython + cudaPackages.cuda_nvcc ]; - LDFLAGS = "-L${cudatoolkit}/lib/stubs"; - - propagatedBuildInputs = [ - cudatoolkit + buildInputs = [ + cudatoolkit-joined cudnn cutensor nccl + ]; + + NVCC = "${lib.getExe cudaPackages.cuda_nvcc}"; # FIXME: splicing/buildPackages + CUDA_PATH = "${cudatoolkit-joined}"; + LDFLAGS = "-L${cudaPackages.cuda_cudart}/lib/stubs"; + + propagatedBuildInputs = [ fastrlock numpy - setuptools - wheel ]; nativeCheckInputs = [ diff --git a/pkgs/development/python-modules/jaxlib/bin.nix b/pkgs/development/python-modules/jaxlib/bin.nix index d80cbc2a60183..e35b4759bd64f 100644 --- a/pkgs/development/python-modules/jaxlib/bin.nix +++ b/pkgs/development/python-modules/jaxlib/bin.nix @@ -29,11 +29,11 @@ , stdenv # Options: , cudaSupport ? config.cudaSupport -, cudaPackages ? {} +, cudaPackagesGoogle }: let - inherit (cudaPackages) cudatoolkit cudnn; + inherit (cudaPackagesGoogle) cudatoolkit cudnn; version = "0.4.20"; @@ -210,8 +210,8 @@ buildPythonPackage { maintainers = with maintainers; [ samuela ]; platforms = [ "aarch64-darwin" "x86_64-linux" "x86_64-darwin" ]; broken = - !(cudaSupport -> (cudaPackages ? cudatoolkit) && lib.versionAtLeast cudatoolkit.version "11.1") - || !(cudaSupport -> (cudaPackages ? cudnn) && lib.versionAtLeast cudnn.version "8.2") + !(cudaSupport -> (cudaPackagesGoogle ? cudatoolkit) && lib.versionAtLeast cudatoolkit.version "11.1") + || !(cudaSupport -> (cudaPackagesGoogle ? cudnn) && lib.versionAtLeast cudnn.version "8.2") || !(cudaSupport -> stdenv.isLinux); }; } diff --git a/pkgs/development/python-modules/jaxlib/default.nix b/pkgs/development/python-modules/jaxlib/default.nix index c70ab0ac2b327..a04d6973ca4be 100644 --- a/pkgs/development/python-modules/jaxlib/default.nix +++ b/pkgs/development/python-modules/jaxlib/default.nix @@ -44,14 +44,14 @@ , config # CUDA flags: , cudaSupport ? config.cudaSupport -, cudaPackages ? {} +, cudaPackagesGoogle # MKL: , mklSupport ? true }: let - inherit (cudaPackages) backendStdenv cudatoolkit cudaFlags cudnn nccl; + inherit (cudaPackagesGoogle) backendStdenv cudatoolkit cudaFlags cudnn nccl; pname = "jaxlib"; version = "0.4.20"; diff --git a/pkgs/development/python-modules/tensorflow/bin.nix b/pkgs/development/python-modules/tensorflow/bin.nix index dae6816a906c3..ac5bb7edf1a54 100644 --- a/pkgs/development/python-modules/tensorflow/bin.nix +++ b/pkgs/development/python-modules/tensorflow/bin.nix @@ -22,7 +22,7 @@ , tensorboard , config , cudaSupport ? config.cudaSupport -, cudaPackages ? {} +, cudaPackagesGoogle , zlib , python , keras-applications @@ -43,7 +43,7 @@ assert ! (stdenv.isDarwin && cudaSupport); let packages = import ./binary-hashes.nix; - inherit (cudaPackages) cudatoolkit cudnn; + inherit (cudaPackagesGoogle) cudatoolkit cudnn; in buildPythonPackage { pname = "tensorflow" + lib.optionalString cudaSupport "-gpu"; inherit (packages) version; @@ -198,7 +198,7 @@ in buildPythonPackage { ]; passthru = { - inherit cudaPackages; + cudaPackages = cudaPackagesGoogle; }; meta = with lib; { diff --git a/pkgs/development/python-modules/tensorflow/default.nix b/pkgs/development/python-modules/tensorflow/default.nix index c8e292e316744..be8b26f3d0e99 100644 --- a/pkgs/development/python-modules/tensorflow/default.nix +++ b/pkgs/development/python-modules/tensorflow/default.nix @@ -19,8 +19,8 @@ # https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/iRCt5m4qUz0 , config , cudaSupport ? config.cudaSupport -, cudaPackages ? { } -, cudaCapabilities ? cudaPackages.cudaFlags.cudaCapabilities +, cudaPackagesGoogle +, cudaCapabilities ? cudaPackagesGoogle.cudaFlags.cudaCapabilities , mklSupport ? false, mkl , tensorboardSupport ? true # XLA without CUDA is broken @@ -50,15 +50,15 @@ let # __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the # translation units, so the build fails at link time stdenv = - if cudaSupport then cudaPackages.backendStdenv + if cudaSupport then cudaPackagesGoogle.backendStdenv else if originalStdenv.isDarwin then llvmPackages_11.stdenv else originalStdenv; - inherit (cudaPackages) cudatoolkit nccl; + inherit (cudaPackagesGoogle) cudatoolkit nccl; # use compatible cuDNN (https://www.tensorflow.org/install/source#gpu) # cudaPackages.cudnn led to this: # https://github.com/tensorflow/tensorflow/issues/60398 cudnnAttribute = "cudnn_8_6"; - cudnn = cudaPackages.${cudnnAttribute}; + cudnn = cudaPackagesGoogle.${cudnnAttribute}; gentoo-patches = fetchzip { url = "https://dev.gentoo.org/~perfinion/patches/tensorflow-patches-2.12.0.tar.bz2"; hash = "sha256-SCRX/5/zML7LmKEPJkcM5Tebez9vv/gmE4xhT/jyqWs="; @@ -486,8 +486,8 @@ let broken = stdenv.isDarwin || !(xlaSupport -> cudaSupport) - || !(cudaSupport -> builtins.hasAttr cudnnAttribute cudaPackages) - || !(cudaSupport -> cudaPackages ? cudatoolkit); + || !(cudaSupport -> builtins.hasAttr cudnnAttribute cudaPackagesGoogle) + || !(cudaSupport -> cudaPackagesGoogle ? cudatoolkit); } // lib.optionalAttrs stdenv.isDarwin { timeout = 86400; # 24 hours maxSilent = 14400; # 4h, double the default of 7200s @@ -590,7 +590,7 @@ in buildPythonPackage { # Regression test for #77626 removed because not more `tensorflow.contrib`. passthru = { - inherit cudaPackages; + cudaPackages = cudaPackagesGoogle; deps = bazel-build.deps; libtensorflow = bazel-build.out; }; diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix index 5523a87b6b5e8..b930f08aec73d 100644 --- a/pkgs/development/python-modules/torch/default.nix +++ b/pkgs/development/python-modules/torch/default.nix @@ -133,7 +133,9 @@ in buildPythonPackage rec { "out" # output standard python package "dev" # output libtorch headers "lib" # output libtorch libraries + "cxxdev" # propagated deps for the cmake consumers of torch ]; + cudaPropagateToOutput = "cxxdev"; src = fetchFromGitHub { owner = "pytorch"; @@ -334,7 +336,9 @@ in buildPythonPackage rec { buildInputs = [ blas blas.provider ] ++ lib.optionals cudaSupport (with cudaPackages; [ cuda_cccl.dev # - cuda_cudart # cuda_runtime.h and libraries + cuda_cudart.dev # cuda_runtime.h and libraries + cuda_cudart.lib + cuda_cudart.static cuda_cupti.dev # For kineto cuda_cupti.lib # For kineto cuda_nvcc.dev # crt/host_config.h; even though we include this in nativeBuildinputs, it's needed here too @@ -367,7 +371,10 @@ in buildPythonPackage rec { ++ lib.optionals rocmSupport [ rocmPackages.llvm.openmp ] ++ lib.optionals (cudaSupport || rocmSupport) [ effectiveMagma ] ++ lib.optionals stdenv.isLinux [ numactl ] - ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ]; + ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ] + ++ lib.optionals tritonSupport [ openai-triton ] + ++ lib.optionals MPISupport [ mpi ] + ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; propagatedBuildInputs = [ cffi @@ -387,8 +394,10 @@ in buildPythonPackage rec { # torch/csrc requires `pybind11` at runtime pybind11 + ] ++ lib.optionals tritonSupport [ openai-triton ]; + + propagatedCxxBuildInputs = [ ] - ++ lib.optionals tritonSupport [ openai-triton ] ++ lib.optionals MPISupport [ mpi ] ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; @@ -449,7 +458,10 @@ in buildPythonPackage rec { --replace "/build/source/torch/include" "$dev/include" ''; - postFixup = lib.optionalString stdenv.isDarwin '' + postFixup = '' + mkdir -p "$cxxdev/nix-support" + printWords "''${propagatedCxxBuildInputs[@]}" >> "$cxxdev/nix-support/propagated-build-inputs" + '' + lib.optionalString stdenv.isDarwin '' for f in $(ls $lib/lib/*.dylib); do install_name_tool -id $lib/lib/$(basename $f) $f || true done diff --git a/pkgs/development/python-modules/torchaudio/default.nix b/pkgs/development/python-modules/torchaudio/default.nix index 207d2a6fade27..4d689d0b39064 100644 --- a/pkgs/development/python-modules/torchaudio/default.nix +++ b/pkgs/development/python-modules/torchaudio/default.nix @@ -44,17 +44,7 @@ buildPythonPackage rec { ]; buildInputs = [ pybind11 - ] ++ lib.optionals cudaSupport [ - cudaPackages.libcurand.dev - cudaPackages.libcurand.lib - cudaPackages.cuda_cudart # cuda_runtime.h and libraries - cudaPackages.cuda_cccl.dev # - cudaPackages.cuda_nvtx.dev - cudaPackages.cuda_nvtx.lib # -llibNVToolsExt - cudaPackages.libcublas.dev - cudaPackages.libcublas.lib - cudaPackages.libcufft.dev - cudaPackages.libcufft.lib + torch.cxxdev ]; propagatedBuildInputs = [ torch diff --git a/pkgs/development/python-modules/torchvision/default.nix b/pkgs/development/python-modules/torchvision/default.nix index 401e415e2812a..46a933835f0cf 100644 --- a/pkgs/development/python-modules/torchvision/default.nix +++ b/pkgs/development/python-modules/torchvision/default.nix @@ -17,28 +17,6 @@ let inherit (torch) cudaCapabilities cudaPackages cudaSupport; inherit (cudaPackages) backendStdenv cudaVersion; - # NOTE: torchvision doesn't use cudnn; torch does! - # For this reason it is not included. - cuda-common-redist = with cudaPackages; [ - cuda_cccl # - libcublas # cublas_v2.h - libcusolver # cusolverDn.h - libcusparse # cusparse.h - ]; - - cuda-native-redist = symlinkJoin { - name = "cuda-native-redist-${cudaVersion}"; - paths = with cudaPackages; [ - cuda_cudart # cuda_runtime.h - cuda_nvcc - ] ++ cuda-common-redist; - }; - - cuda-redist = symlinkJoin { - name = "cuda-redist-${cudaVersion}"; - paths = cuda-common-redist; - }; - pname = "torchvision"; version = "0.15.2"; in @@ -52,9 +30,15 @@ buildPythonPackage { hash = "sha256-KNbOgd6PCINZqZ24c/Ev+ODux3ik5iUlzem9uUfQArM="; }; - nativeBuildInputs = [ libpng ninja which ] ++ lib.optionals cudaSupport [ cuda-native-redist ]; + nativeBuildInputs = [ + libpng + ninja + which + ] ++ lib.optionals cudaSupport [ + cudaPackages.cuda_nvcc + ]; - buildInputs = [ libjpeg_turbo libpng ] ++ lib.optionals cudaSupport [ cuda-redist ]; + buildInputs = [ libjpeg_turbo libpng torch.cxxdev ]; propagatedBuildInputs = [ numpy pillow torch scipy ]; diff --git a/pkgs/top-level/aliases.nix b/pkgs/top-level/aliases.nix index 5d1ae513ce4ca..340850200686b 100644 --- a/pkgs/top-level/aliases.nix +++ b/pkgs/top-level/aliases.nix @@ -644,6 +644,7 @@ mapAliases ({ noto-fonts-cjk = noto-fonts-cjk-sans; # Added 2021-12-16 noto-fonts-emoji = noto-fonts-color-emoji; # Added 2023-09-09 noto-fonts-extra = noto-fonts; # Added 2023-04-08 + nvidia-thrust = throw "nvidia-thrust has been removed because the project was deprecated; use cudaPackages.cuda_cccl"; ### O ### diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index d908c937e50f3..aff9967296248 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -7324,6 +7324,10 @@ with pkgs; cudaPackages_12_2 = callPackage ./cuda-packages.nix { cudaVersion = "12.2"; }; cudaPackages_12 = cudaPackages_12_0; + # Use the older cudaPackages for tensorflow and jax, as determined by cudnn + # compatibility: https://www.tensorflow.org/install/source#gpu + cudaPackagesGoogle = cudaPackages_11; + # TODO: try upgrading once there is a cuDNN release supporting CUDA 12. No # such cuDNN release as of 2023-01-10. cudaPackages = recurseIntoAttrs cudaPackages_11; @@ -11278,16 +11282,6 @@ with pkgs; nvfetcher = haskell.lib.compose.justStaticExecutables haskellPackages.nvfetcher; - nvidia-thrust = callPackage ../development/libraries/nvidia-thrust { }; - - nvidia-thrust-intel = callPackage ../development/libraries/nvidia-thrust { - hostSystem = "TBB"; - deviceSystem = if config.cudaSupport then "CUDA" else "TBB"; - }; - - nvidia-thrust-cuda = callPackage ../development/libraries/nvidia-thrust { - deviceSystem = "CUDA"; - }; miller = callPackage ../tools/text/miller { }; @@ -20802,6 +20796,9 @@ with pkgs; # catboost requires clang 12+ for build # after bumping the default version of llvm, check for compatibility with the cuda backend and pin it. inherit (llvmPackages_12) stdenv; + + # https://github.com/catboost/catboost/issues/2540 + cudaPackages = cudaPackages_11; }; ndn-cxx = callPackage ../development/libraries/ndn-cxx { }; @@ -39479,7 +39476,6 @@ with pkgs; singlePrec = true; enableMpi = true; enableCuda = true; - cudatoolkit = cudatoolkit_11; fftw = fftwSinglePrec; }); @@ -40019,7 +40015,6 @@ with pkgs; faissWithCuda = faiss.override { cudaSupport = true; - nvidia-thrust = nvidia-thrust-cuda; }; fityk = callPackage ../applications/science/misc/fityk { }; diff --git a/pkgs/top-level/cuda-packages.nix b/pkgs/top-level/cuda-packages.nix index a2f49a98ccd53..3912422785bc4 100644 --- a/pkgs/top-level/cuda-packages.nix +++ b/pkgs/top-level/cuda-packages.nix @@ -24,6 +24,7 @@ let buildCuTensorPackage = final.callPackage ../development/libraries/science/math/cutensor/generic.nix; + # FIXME: Include non-x86_64 platforms cuTensorVersions = { "1.2.2.5" = { hash = "sha256-lU7iK4DWuC/U3s1Ct/rq2Gr3w4F2U7RYYgpmF05bibY="; @@ -31,12 +32,24 @@ let "1.5.0.3" = { hash = "sha256-T96+lPC6OTOkIs/z3QWg73oYVSyidN0SVkBWmT9VRx0="; }; + "2.0.0.7" = { + hash = "sha256-32M4rtGOW2rgxJUhBT0WBtKkHhh9f17M+RgK9rvE72g="; + }; }; inherit (final) cudaMajorMinorVersion cudaMajorVersion; + cudaToCutensor = { + "10" = "1.2.25"; + "11" = "1.5.0.3"; + "12" = "2.0.0.7"; + }; + + versionNewer = lib.flip lib.versionOlder; + latestVersion = (builtins.head (lib.sort versionNewer (builtins.attrNames cuTensorVersions))); + cutensor = buildCuTensorPackage rec { - version = if cudaMajorMinorVersion == "10.1" then "1.2.2.5" else "1.5.0.3"; + version = cudaToCutensor.${cudaMajorVersion} or latestVersion; inherit (cuTensorVersions.${version}) hash; # This can go into generic.nix libPath = "lib/${if cudaMajorVersion == "10" then cudaMajorMinorVersion else cudaMajorVersion}"; diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index 2cc25bf403520..326f59a8a08a1 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -2447,7 +2447,8 @@ self: super: with self; { cufflinks = callPackage ../development/python-modules/cufflinks { }; - cupy = callPackage ../development/python-modules/cupy { }; + # cupy 12.2.0 possibly incompatible with cutensor 2.0 that comes with cudaPackages_12 + cupy = callPackage ../development/python-modules/cupy { cudaPackages = pkgs.cudaPackages_11; }; curio = callPackage ../development/python-modules/curio { }; @@ -13828,7 +13829,6 @@ self: super: with self; { callPackage ../development/python-modules/tensorflow { inherit (pkgs.darwin) cctools; inherit (pkgs.config) cudaSupport; - inherit (self.tensorflow-bin) cudaPackages; inherit (pkgs.darwin.apple_sdk.frameworks) Foundation Security; flatbuffers-core = pkgs.flatbuffers; flatbuffers-python = self.flatbuffers; @@ -13861,7 +13861,7 @@ self: super: with self; { tensorly = callPackage ../development/python-modules/tensorly { }; - tensorrt = callPackage ../development/python-modules/tensorrt { }; + tensorrt = callPackage ../development/python-modules/tensorrt { cudaPackages = pkgs.cudaPackages_11; }; tensorstore = callPackage ../development/python-modules/tensorstore { };