Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CDI: Add mount-nvidia-binaries and mount-nvidia-docker-1-directories options #290979

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions nixos/doc/manual/release-notes/rl-2405.section.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ Use `services.pipewire.extraConfig` or `services.pipewire.configPackages` for Pi

- The default dbus implementation has transitioned to dbus-broker from the classic dbus daemon for better performance and reliability. Users can revert to the classic dbus daemon by setting `services.dbus.implementation = "dbus";`. For detailed deviations, refer to [dbus-broker's deviations page](https://github.com/bus1/dbus-broker/wiki/Deviations).

- A new option `virtualisation.containers.cdi` was added. It contains `static` and `dynamic` attributes (corresponding to `/etc/cdi` and `/run/cdi` respectively) to configure the Container Device Interface (CDI).

- `virtualisation.docker.enableNvidia` and `virtualisation.podman.enableNvidia` options are deprecated. `virtualisation.containers.cdi.dynamic.nvidia.enable` should be used instead. This option will expose GPUs on containers with the `--device` CLI option. This is supported by Docker 25, Podman 3.2.0 and Singularity 4. Any container runtime that supports the CDI specification will take advantage of this feature.
- `virtualisation.docker.enableNvidia` and `virtualisation.podman.enableNvidia` options are deprecated. `hardware.nvidia-container-toolkit.enable` should be used instead. This option will expose GPUs on containers with the `--device` CLI option. This is supported by Docker 25, Podman 3.2.0 and Singularity 4. Any container runtime that supports the CDI specification will take advantage of this feature.

- A new option `system.etc.overlay.enable` was added. If enabled, `/etc` is
mounted via an overlayfs instead of being created by a custom perl script.
Expand Down
2 changes: 1 addition & 1 deletion nixos/modules/module-list.nix
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@
./services/hardware/kanata.nix
./services/hardware/lcd.nix
./services/hardware/lirc.nix
./services/hardware/nvidia-container-toolkit-cdi-generator
./services/hardware/nvidia-container-toolkit
./services/hardware/monado.nix
./services/hardware/nvidia-optimus.nix
./services/hardware/openrgb.nix
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
glibc,
jq,
lib,
mounts,
nvidia-container-toolkit,
nvidia-driver,
runtimeShell,
writeScriptBin,
}: let
mkMount = {hostPath, containerPath, mountOptions}: {
inherit hostPath containerPath;
options = mountOptions;
};
jqAddMountExpression = ".containerEdits.mounts[.containerEdits.mounts | length] |= . +";
allJqMounts = lib.concatMap
(mount:
["${lib.getExe jq} '${jqAddMountExpression} ${builtins.toJSON (mkMount mount)}'"])
mounts;
in
writeScriptBin "nvidia-cdi-generator"
''
#! ${runtimeShell}

function cdiGenerate {
${lib.getExe' nvidia-container-toolkit "nvidia-ctk"} cdi generate \
--format json \
--ldconfig-path ${lib.getExe' glibc "ldconfig"} \
--library-search-path ${lib.getLib nvidia-driver}/lib \
--nvidia-ctk-path ${lib.getExe' nvidia-container-toolkit "nvidia-ctk"}
}

cdiGenerate | \
${lib.concatStringsSep " | " allJqMounts} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json
''
121 changes: 121 additions & 0 deletions nixos/modules/services/hardware/nvidia-container-toolkit/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
{ config, lib, pkgs, ... }:

{
imports = [
(lib.mkRenamedOptionModule
[ "virtualisation" "containers" "cdi" "dynamic" "nvidia" "enable" ]
[ "hardware" "nvidia-container-toolkit" "enable" ])
];

options = let
mountType = {
options = {
hostPath = lib.mkOption {
type = lib.types.str;
description = "Host path.";
};
containerPath = lib.mkOption {
type = lib.types.str;
description = "Container path.";
};
mountOptions = lib.mkOption {
default = [ "ro" "nosuid" "nodev" "bind" ];
type = lib.types.listOf lib.types.str;
description = "Mount options.";
};
};
};
in {

hardware.nvidia-container-toolkit = {
enable = lib.mkOption {
default = false;
type = lib.types.bool;
description = ''
Enable dynamic CDI configuration for NVidia devices by running
nvidia-container-toolkit on boot.
'';
};

mounts = lib.mkOption {
type = lib.types.listOf (lib.types.submodule mountType);
default = [];
description = "Mounts to be added to every container under the Nvidia CDI profile.";
};

mount-nvidia-executables = lib.mkOption {
default = true;
type = lib.types.bool;
description = ''
Mount executables nvidia-smi, nvidia-cuda-mps-control, nvidia-cuda-mps-server,
nvidia-debugdump, nvidia-powerd and nvidia-ctk on containers.
'';
};

mount-nvidia-docker-1-directories = lib.mkOption {
default = true;
type = lib.types.bool;
description = ''
Mount nvidia-docker-1 directories on containers: /usr/local/nvidia/lib and
/usr/local/nvidia/lib64.
'';
};
};

};

config = {

hardware.nvidia-container-toolkit.mounts = let
nvidia-driver = config.hardware.nvidia.package;
in (lib.mkMerge [
[{ hostPath = pkgs.addDriverRunpath.driverLink;
containerPath = pkgs.addDriverRunpath.driverLink; }
{ hostPath = "${lib.getLib pkgs.glibc}/lib";
containerPath = "${lib.getLib pkgs.glibc}/lib"; }
{ hostPath = "${lib.getLib pkgs.glibc}/lib64";
containerPath = "${lib.getLib pkgs.glibc}/lib64"; }]
(lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-executables
[{ hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-control";
containerPath = "/usr/bin/nvidia-cuda-mps-control"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-server";
containerPath = "/usr/bin/nvidia-cuda-mps-server"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-debugdump";
containerPath = "/usr/bin/nvidia-debugdump"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-powerd";
containerPath = "/usr/bin/nvidia-powerd"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-smi";
containerPath = "/usr/bin/nvidia-smi"; }])
# nvidia-docker 1.0 uses /usr/local/nvidia/lib{,64}
# e.g.
# - https://gitlab.com/nvidia/container-images/cuda/-/blob/e3ff10eab3a1424fe394899df0e0f8ca5a410f0f/dist/12.3.1/ubi9/base/Dockerfile#L44
# - https://github.com/NVIDIA/nvidia-docker/blob/01d2c9436620d7dde4672e414698afe6da4a282f/src/nvidia/volumes.go#L104-L173
(lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-docker-1-directories
[{ hostPath = "${lib.getLib nvidia-driver}/lib";
containerPath = "/usr/local/nvidia/lib"; }
{ hostPath = "${lib.getLib nvidia-driver}/lib";
containerPath = "/usr/local/nvidia/lib64"; }])
]);

systemd.services.nvidia-container-toolkit-cdi-generator = lib.mkIf config.hardware.nvidia-container-toolkit.enable {
description = "Container Device Interface (CDI) for Nvidia generator";
wantedBy = [ "multi-user.target" ];
after = [ "systemd-udev-settle.service" ];
serviceConfig = {
RuntimeDirectory = "cdi";
RemainAfterExit = true;
ExecStart =
let
script = pkgs.callPackage ./cdi-generate.nix {
inherit (config.hardware.nvidia-container-toolkit) mounts;
nvidia-driver = config.hardware.nvidia.package;
};
in
lib.getExe script;
Type = "oneshot";
};
};

};

}
49 changes: 2 additions & 47 deletions nixos/modules/virtualisation/containers.nix
ereslibre marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -28,43 +28,6 @@ in
description = "Enable the OCI seccomp BPF hook";
};

cdi = {
dynamic.nvidia.enable = mkOption {
type = types.bool;
default = false;
description = ''
Enable dynamic CDI configuration for NVidia devices by running nvidia-container-toolkit on boot.
'';
};

static = mkOption {
type = types.attrs;
default = { };
description = ''
Declarative CDI specification. Each key of the attribute set
will be mapped to a file in /etc/cdi. It is required for every
key to be provided in JSON format.
'';
example = {
some-vendor = builtins.fromJSON ''
{
"cdiVersion": "0.5.0",
"kind": "some-vendor.com/foo",
"devices": [],
"containerEdits": []
}
'';

some-other-vendor = {
cdiVersion = "0.5.0";
kind = "some-other-vendor.com/bar";
devices = [];
containerEdits = [];
};
};
};
};

containersConf.settings = mkOption {
type = toml.type;
default = { };
Expand Down Expand Up @@ -150,8 +113,6 @@ in

config = lib.mkIf cfg.enable {

hardware.nvidia-container-toolkit-cdi-generator.enable = lib.mkIf cfg.cdi.dynamic.nvidia.enable true;

virtualisation.containers.containersConf.cniPlugins = [ pkgs.cni-plugins ];

virtualisation.containers.containersConf.settings = {
Expand All @@ -163,13 +124,7 @@ in
};
};

environment.etc = let
cdiStaticConfigurationFiles = (lib.attrsets.mapAttrs'
(name: value:
lib.attrsets.nameValuePair "cdi/${name}.json"
{ text = builtins.toJSON value; })
cfg.cdi.static);
in {
environment.etc = {
"containers/containers.conf".source =
toml.generate "containers.conf" cfg.containersConf.settings;

Expand All @@ -183,7 +138,7 @@ in
"containers/policy.json".source =
if cfg.policy != { } then pkgs.writeText "policy.json" (builtins.toJSON cfg.policy)
else "${pkgs.skopeo.policy}/default-policy.json";
} // cdiStaticConfigurationFiles;
};

};

Expand Down