diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit/cdi-generate.nix b/nixos/modules/services/hardware/nvidia-container-toolkit/cdi-generate.nix index 360a832e28cbe..6a04fa0ff7502 100644 --- a/nixos/modules/services/hardware/nvidia-container-toolkit/cdi-generate.nix +++ b/nixos/modules/services/hardware/nvidia-container-toolkit/cdi-generate.nix @@ -13,11 +13,14 @@ inherit hostPath containerPath; options = mountOptions; }; - jqAddMountExpression = ".containerEdits.mounts[.containerEdits.mounts | length] |= . +"; - allJqMounts = lib.concatMap - (mount: - ["${lib.getExe jq} '${jqAddMountExpression} ${builtins.toJSON (mkMount mount)}'"]) - mounts; + mountToCommand = mount: + "additionalMount \"${mount.hostPath}\" \"${mount.containerPath}\" '${builtins.toJSON mount.mountOptions}'"; + mountsToCommands = mounts: + if (builtins.length mounts) == 0 then + "cat" + else + (lib.strings.concatMapStringsSep " | \\\n" + mountToCommand mounts); in writeScriptBin "nvidia-cdi-generator" '' @@ -32,6 +35,18 @@ function cdiGenerate { --nvidia-ctk-path ${lib.getExe' nvidia-container-toolkit "nvidia-ctk"} } -cdiGenerate | \ - ${lib.concatStringsSep " | " allJqMounts} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json +function additionalMount { + local hostPath="$1" + local containerPath="$2" + local mountOptions="$3" + if [ -e "$hostPath" ]; then + ${lib.getExe jq} ".containerEdits.mounts[.containerEdits.mounts | length] = { \"hostPath\": \"$hostPath\", \"containerPath\": \"$containerPath\", \"options\": $mountOptions }" + else + echo "Mount $hostPath ignored: could not find path in the host machine" >&2 + cat + fi +} + +cdiGenerate | + ${mountsToCommands mounts} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json '' diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit/default.nix b/nixos/modules/services/hardware/nvidia-container-toolkit/default.nix index 5c0fed35c4c97..a188c16141e1a 100644 --- a/nixos/modules/services/hardware/nvidia-container-toolkit/default.nix +++ b/nixos/modules/services/hardware/nvidia-container-toolkit/default.nix @@ -71,6 +71,8 @@ /usr/local/nvidia/lib64. ''; }; + + package = lib.mkPackageOption pkgs "nvidia-container-toolkit" { }; }; }; @@ -129,6 +131,7 @@ let script = pkgs.callPackage ./cdi-generate.nix { inherit (config.hardware.nvidia-container-toolkit) mounts; + nvidia-container-toolkit = config.hardware.nvidia-container-toolkit.package; nvidia-driver = config.hardware.nvidia.package; deviceNameStrategy = config.hardware.nvidia-container-toolkit.device-name-strategy; }; diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix index 0cd52cbec9a36..ee004f3c9825e 100644 --- a/nixos/tests/all-tests.nix +++ b/nixos/tests/all-tests.nix @@ -705,6 +705,7 @@ in { ntfy-sh = handleTest ./ntfy-sh.nix {}; ntfy-sh-migration = handleTest ./ntfy-sh-migration.nix {}; ntpd-rs = handleTest ./ntpd-rs.nix {}; + nvidia-container-toolkit = runTest ./nvidia-container-toolkit.nix; nvmetcfg = handleTest ./nvmetcfg.nix {}; nzbget = handleTest ./nzbget.nix {}; nzbhydra2 = handleTest ./nzbhydra2.nix {}; diff --git a/nixos/tests/nvidia-container-toolkit.nix b/nixos/tests/nvidia-container-toolkit.nix new file mode 100644 index 0000000000000..b22b989c0814f --- /dev/null +++ b/nixos/tests/nvidia-container-toolkit.nix @@ -0,0 +1,149 @@ +{ pkgs, lib, ... }: +let + testCDIScript = pkgs.writeShellScriptBin "test-cdi" '' + die() { + echo "$1" + exit 1 + } + + check_file_referential_integrity() { + echo "checking $1 referential integrity" + ( ${pkgs.glibc.bin}/bin/ldd "$1" | ${lib.getExe pkgs.gnugrep} "not found" &> /dev/null ) && return 1 + return 0 + } + + check_directory_referential_integrity() { + ${lib.getExe pkgs.findutils} "$1" -type f -print0 | while read -d $'\0' file; do + if [[ $(${lib.getExe pkgs.file} "$file" | ${lib.getExe pkgs.gnugrep} ELF) ]]; then + check_file_referential_integrity "$file" || exit 1 + else + echo "skipping $file: not an ELF file" + fi + done + } + + check_directory_referential_integrity "/usr/bin" || exit 1 + check_directory_referential_integrity "${pkgs.addDriverRunpath.driverLink}" || exit 1 + check_directory_referential_integrity "/usr/local/nvidia" || exit 1 + ''; + testContainerImage = pkgs.dockerTools.buildImage { + name = "cdi-test"; + tag = "latest"; + config = { + Cmd = [ (lib.getExe testCDIScript) ]; + }; + copyToRoot = with pkgs.dockerTools; [ + usrBinEnv + binSh + ]; + }; + emptyCDISpec = '' + { + "cdiVersion": "0.5.0", + "kind": "nvidia.com/gpu", + "devices": [ + { + "name": "all", + "containerEdits": { + "deviceNodes": [ + { + "path": "/dev/urandom" + } + ], + "hooks": [], + "mounts": [] + } + } + ], + "containerEdits": { + "deviceNodes": [], + "hooks": [], + "mounts": [] + } + } + ''; + nvidia-container-toolkit = { + enable = true; + package = pkgs.stdenv.mkDerivation { + pname = "nvidia-ctk-dummy"; + version = "1.0.0"; + dontUnpack = true; + dontBuild = true; + + inherit emptyCDISpec; + passAsFile = [ "emptyCDISpec" ]; + + installPhase = '' + mkdir -p $out/bin $out/share/nvidia-container-toolkit + cp "$emptyCDISpecPath" "$out/share/nvidia-container-toolkit/spec.json" + echo -n "$emptyCDISpec" > "$out/bin/nvidia-ctk"; + cat << EOF > "$out/bin/nvidia-ctk" + #!${pkgs.runtimeShell} + cat "$out/share/nvidia-container-toolkit/spec.json" + EOF + chmod +x $out/bin/nvidia-ctk + ''; + meta.mainProgram = "nvidia-ctk"; + }; + }; +in +{ + name = "nvidia-container-toolkit"; + meta = with lib.maintainers; { + maintainers = [ ereslibre ]; + }; + defaults = + { config, ... }: + { + environment.systemPackages = with pkgs; [ jq ]; + virtualisation.diskSize = lib.mkDefault 10240; + virtualisation.containers.enable = lib.mkDefault true; + hardware = { + inherit nvidia-container-toolkit; + nvidia = { + open = true; + package = config.boot.kernelPackages.nvidiaPackages.stable.open; + }; + graphics.enable = lib.mkDefault true; + }; + }; + nodes = { + no-gpus = { + virtualisation.containers.enable = false; + hardware.graphics.enable = false; + }; + one-gpu = + { pkgs, ... }: + { + environment.systemPackages = with pkgs; [ podman ]; + hardware.graphics.enable = true; + }; + + one-gpu-invalid-host-paths = { + hardware.nvidia-container-toolkit.mounts = [ + { + hostPath = "/non-existant-path"; + containerPath = "/some/path"; + } + ]; + }; + }; + testScript = '' + start_all() + + with subtest("Generate an empty CDI spec for a machine with no Nvidia GPUs"): + no_gpus.wait_for_unit("nvidia-container-toolkit-cdi-generator.service") + no_gpus.succeed("cat /var/run/cdi/nvidia-container-toolkit.json | jq") + + with subtest("Podman loads the generated CDI spec for a machine with an Nvidia GPU"): + one_gpu.wait_for_unit("nvidia-container-toolkit-cdi-generator.service") + one_gpu.succeed("cat /var/run/cdi/nvidia-container-toolkit.json | jq") + one_gpu.succeed("podman load < ${testContainerImage}") + print(one_gpu.succeed("podman run --pull=never --device=nvidia.com/gpu=all -v /run/opengl-driver:/run/opengl-driver:ro cdi-test:latest")) + + # Issue: https://github.com/NixOS/nixpkgs/issues/319201 + with subtest("The generated CDI spec skips specified non-existant paths in the host"): + one_gpu_invalid_host_paths.wait_for_unit("nvidia-container-toolkit-cdi-generator.service") + one_gpu_invalid_host_paths.fail("grep 'non-existant-path' /var/run/cdi/nvidia-container-toolkit.json") + ''; +}