From e9babb74413c46fa89a4e61e738121a6d53a86c9 Mon Sep 17 00:00:00 2001 From: Danny Breyfogle Date: Sun, 14 Apr 2024 22:10:57 -0700 Subject: [PATCH] Update CUDA docs to use k3s suggested method (#1430) --- docs/usage/advanced/cuda.md | 42 +++++++------- docs/usage/advanced/cuda/Dockerfile | 45 +++++---------- docs/usage/advanced/cuda/build.sh | 12 ++-- docs/usage/advanced/cuda/config.toml.tmpl | 55 ------------------- docs/usage/advanced/cuda/cuda-vector-add.yaml | 1 + .../cuda/device-plugin-daemonset.yaml | 44 +++++++++------ 6 files changed, 67 insertions(+), 132 deletions(-) delete mode 100644 docs/usage/advanced/cuda/config.toml.tmpl diff --git a/docs/usage/advanced/cuda.md b/docs/usage/advanced/cuda.md index b89087f10..be2d0ba48 100644 --- a/docs/usage/advanced/cuda.md +++ b/docs/usage/advanced/cuda.md @@ -25,24 +25,12 @@ To get around this we need to build the image with a supported base image. This Dockerfile is based on the [K3s Dockerfile](https://github.com/rancher/k3s/blob/master/package/Dockerfile) The following changes are applied: -1. Change the base images to nvidia/cuda:11.2.0-base-ubuntu18.04 so the NVIDIA Container Runtime can be installed. The version of `cuda:xx.x.x` must match the one you're planning to use. -2. Add a custom containerd `config.toml` template to add the NVIDIA Container Runtime. This replaces the default `runc` runtime -3. Add a manifest for the NVIDIA driver plugin for Kubernetes - -### Configure containerd - -We need to configure containerd to use the NVIDIA Container Runtime. We need to customize the config.toml that is used at startup. K3s provides a way to do this using a [config.toml.tmpl](cuda/config.toml.tmpl) file. More information can be found on the [K3s site](https://rancher.com/docs/k3s/latest/en/advanced/#configuring-containerd). - -```go -{% - include-markdown "./cuda/config.toml.tmpl" - comments=false -%} -``` +1. Change the base images to nvidia/cuda:12.4.1-base-ubuntu22.04 so the NVIDIA Container Toolkit can be installed. The version of `cuda:xx.x.x` must match the one you're planning to use. +2. Add a manifest for the NVIDIA driver plugin for Kubernetes with an added RuntimeClass definition. See [k3s documentation](https://docs.k3s.io/advanced#nvidia-container-runtime-support). ### The NVIDIA device plugin -To enable NVIDIA GPU support on Kubernetes you also need to install the [NVIDIA device plugin](https://github.com/NVIDIA/k8s-device-plugin). The device plugin is a deamonset and allows you to automatically: +To enable NVIDIA GPU support on Kubernetes you also need to install the [NVIDIA device plugin](https://github.com/NVIDIA/k8s-device-plugin). The device plugin is a daemonset and allows you to automatically: * Expose the number of GPUs on each nodes of your cluster * Keep track of the health of your GPUs @@ -55,6 +43,22 @@ To enable NVIDIA GPU support on Kubernetes you also need to install the [NVIDIA %} ``` +Two modifications have been made to the original NVIDIA daemonset: + +1. Added RuntimeClass definition to the YAML frontmatter. + + ```yaml + apiVersion: node.k8s.io/v1 + kind: RuntimeClass + metadata: + name: nvidia + handler: nvidia + ``` + +2. Added `runtimeClassName: nvidia` to the Pod spec. + +Note: you must explicitly add `runtimeClassName: nvidia` to all your Pod specs to use the GPU. See [k3s documentation](https://docs.k3s.io/advanced#nvidia-container-runtime-support). + ### Build the K3s image To build the custom image we need to build K3s because we need the generated output. @@ -62,12 +66,11 @@ To build the custom image we need to build K3s because we need the generated out Put the following files in a directory: * [Dockerfile](cuda/Dockerfile) -* [config.toml.tmpl](cuda/config.toml.tmpl) * [device-plugin-daemonset.yaml](cuda/device-plugin-daemonset.yaml) * [build.sh](cuda/build.sh) * [cuda-vector-add.yaml](cuda/cuda-vector-add.yaml) -The `build.sh` script is configured using exports & defaults to `v1.21.2+k3s1`. Please set at least the `IMAGE_REGISTRY` variable! The script performs the following steps builds the custom K3s image including the nvidia drivers. +The `build.sh` script is configured using exports & defaults to `v1.28.8+k3s1`. Please set at least the `IMAGE_REGISTRY` variable! The script performs the following steps builds the custom K3s image including the nvidia drivers. [build.sh](cuda/build.sh): @@ -108,10 +111,6 @@ Done If the `cuda-vector-add` pod is stuck in `Pending` state, probably the device-driver daemonset didn't get deployed correctly from the auto-deploy manifests. In that case, you can apply it manually via `#!bash kubectl apply -f device-plugin-daemonset.yaml`. -## Known issues - -* This approach does not work on WSL2 yet. The NVIDIA driver plugin and container runtime rely on the NVIDIA Management Library (NVML) which is not yet supported. See the [CUDA on WSL User Guide](https://docs.nvidia.com/cuda/wsl-user-guide/index.html#known-limitations). - ## Acknowledgements Most of the information in this article was obtained from various sources: @@ -126,3 +125,4 @@ Most of the information in this article was obtained from various sources: * [@markrexwinkel](https://github.com/markrexwinkel) * [@vainkop](https://github.com/vainkop) * [@iwilltry42](https://github.com/iwilltry42) +* [@dbreyfogle](https://github.com/dbreyfogle) diff --git a/docs/usage/advanced/cuda/Dockerfile b/docs/usage/advanced/cuda/Dockerfile index d17e8da0b..728be226c 100644 --- a/docs/usage/advanced/cuda/Dockerfile +++ b/docs/usage/advanced/cuda/Dockerfile @@ -1,39 +1,22 @@ -ARG K3S_TAG="v1.21.2-k3s1" -FROM rancher/k3s:$K3S_TAG as k3s - -FROM nvidia/cuda:11.2.0-base-ubuntu18.04 - -ARG NVIDIA_CONTAINER_RUNTIME_VERSION -ENV NVIDIA_CONTAINER_RUNTIME_VERSION=$NVIDIA_CONTAINER_RUNTIME_VERSION - -RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections - -RUN apt-get update && \ - apt-get -y install gnupg2 curl - -# Install NVIDIA Container Runtime -RUN curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey | apt-key add - +ARG K3S_TAG="v1.28.8-k3s1" +ARG CUDA_TAG="12.4.1-base-ubuntu22.04" -RUN curl -s -L https://nvidia.github.io/nvidia-container-runtime/ubuntu18.04/nvidia-container-runtime.list | tee /etc/apt/sources.list.d/nvidia-container-runtime.list - -RUN apt-get update && \ - apt-get -y install nvidia-container-runtime=${NVIDIA_CONTAINER_RUNTIME_VERSION} - -COPY --from=k3s / / - -RUN mkdir -p /etc && \ - echo 'hosts: files dns' > /etc/nsswitch.conf - -RUN chmod 1777 /tmp +FROM rancher/k3s:$K3S_TAG as k3s +FROM nvcr.io/nvidia/cuda:$CUDA_TAG -# Provide custom containerd configuration to configure the nvidia-container-runtime -RUN mkdir -p /var/lib/rancher/k3s/agent/etc/containerd/ +# Install the NVIDIA container toolkit +RUN apt-get update && apt-get install -y curl \ + && curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ + && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ + sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ + tee /etc/apt/sources.list.d/nvidia-container-toolkit.list \ + && apt-get update && apt-get install -y nvidia-container-toolkit \ + && nvidia-ctk runtime configure --runtime=containerd -COPY config.toml.tmpl /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl +COPY --from=k3s / / --exclude=/bin +COPY --from=k3s /bin /bin # Deploy the nvidia driver plugin on startup -RUN mkdir -p /var/lib/rancher/k3s/server/manifests - COPY device-plugin-daemonset.yaml /var/lib/rancher/k3s/server/manifests/nvidia-device-plugin-daemonset.yaml VOLUME /var/lib/kubelet diff --git a/docs/usage/advanced/cuda/build.sh b/docs/usage/advanced/cuda/build.sh index 562601dc3..afbc475b7 100755 --- a/docs/usage/advanced/cuda/build.sh +++ b/docs/usage/advanced/cuda/build.sh @@ -2,20 +2,18 @@ set -euxo pipefail -K3S_TAG=${K3S_TAG:="v1.21.2-k3s1"} # replace + with -, if needed +K3S_TAG=${K3S_TAG:="v1.28.8-k3s1"} # replace + with -, if needed +CUDA_TAG=${CUDA_TAG:="12.4.1-base-ubuntu22.04"} IMAGE_REGISTRY=${IMAGE_REGISTRY:="MY_REGISTRY"} IMAGE_REPOSITORY=${IMAGE_REPOSITORY:="rancher/k3s"} -IMAGE_TAG="$K3S_TAG-cuda" +IMAGE_TAG="$K3S_TAG-cuda-$CUDA_TAG" IMAGE=${IMAGE:="$IMAGE_REGISTRY/$IMAGE_REPOSITORY:$IMAGE_TAG"} -NVIDIA_CONTAINER_RUNTIME_VERSION=${NVIDIA_CONTAINER_RUNTIME_VERSION:="3.5.0-1"} - echo "IMAGE=$IMAGE" -# due to some unknown reason, copying symlinks fails with buildkit enabled -DOCKER_BUILDKIT=0 docker build \ +docker build \ --build-arg K3S_TAG=$K3S_TAG \ - --build-arg NVIDIA_CONTAINER_RUNTIME_VERSION=$NVIDIA_CONTAINER_RUNTIME_VERSION \ + --build-arg CUDA_TAG=$CUDA_TAG \ -t $IMAGE . docker push $IMAGE echo "Done!" \ No newline at end of file diff --git a/docs/usage/advanced/cuda/config.toml.tmpl b/docs/usage/advanced/cuda/config.toml.tmpl deleted file mode 100644 index 4d5c7fa4c..000000000 --- a/docs/usage/advanced/cuda/config.toml.tmpl +++ /dev/null @@ -1,55 +0,0 @@ -[plugins.opt] - path = "{{ .NodeConfig.Containerd.Opt }}" - -[plugins.cri] - stream_server_address = "127.0.0.1" - stream_server_port = "10010" - -{{- if .IsRunningInUserNS }} - disable_cgroup = true - disable_apparmor = true - restrict_oom_score_adj = true -{{end}} - -{{- if .NodeConfig.AgentConfig.PauseImage }} - sandbox_image = "{{ .NodeConfig.AgentConfig.PauseImage }}" -{{end}} - -{{- if not .NodeConfig.NoFlannel }} -[plugins.cri.cni] - bin_dir = "{{ .NodeConfig.AgentConfig.CNIBinDir }}" - conf_dir = "{{ .NodeConfig.AgentConfig.CNIConfDir }}" -{{end}} - -[plugins.cri.containerd.runtimes.runc] - # ---- changed from 'io.containerd.runc.v2' for GPU support - runtime_type = "io.containerd.runtime.v1.linux" - -# ---- added for GPU support -[plugins.linux] - runtime = "nvidia-container-runtime" - -{{ if .PrivateRegistryConfig }} -{{ if .PrivateRegistryConfig.Mirrors }} -[plugins.cri.registry.mirrors]{{end}} -{{range $k, $v := .PrivateRegistryConfig.Mirrors }} -[plugins.cri.registry.mirrors."{{$k}}"] - endpoint = [{{range $i, $j := $v.Endpoints}}{{if $i}}, {{end}}{{printf "%q" .}}{{end}}] -{{end}} - -{{range $k, $v := .PrivateRegistryConfig.Configs }} -{{ if $v.Auth }} -[plugins.cri.registry.configs."{{$k}}".auth] - {{ if $v.Auth.Username }}username = "{{ $v.Auth.Username }}"{{end}} - {{ if $v.Auth.Password }}password = "{{ $v.Auth.Password }}"{{end}} - {{ if $v.Auth.Auth }}auth = "{{ $v.Auth.Auth }}"{{end}} - {{ if $v.Auth.IdentityToken }}identitytoken = "{{ $v.Auth.IdentityToken }}"{{end}} -{{end}} -{{ if $v.TLS }} -[plugins.cri.registry.configs."{{$k}}".tls] - {{ if $v.TLS.CAFile }}ca_file = "{{ $v.TLS.CAFile }}"{{end}} - {{ if $v.TLS.CertFile }}cert_file = "{{ $v.TLS.CertFile }}"{{end}} - {{ if $v.TLS.KeyFile }}key_file = "{{ $v.TLS.KeyFile }}"{{end}} -{{end}} -{{end}} -{{end}} \ No newline at end of file diff --git a/docs/usage/advanced/cuda/cuda-vector-add.yaml b/docs/usage/advanced/cuda/cuda-vector-add.yaml index e22849b40..5b7e5b66b 100644 --- a/docs/usage/advanced/cuda/cuda-vector-add.yaml +++ b/docs/usage/advanced/cuda/cuda-vector-add.yaml @@ -3,6 +3,7 @@ kind: Pod metadata: name: cuda-vector-add spec: + runtimeClassName: nvidia # Explicitly request the runtime restartPolicy: OnFailure containers: - name: cuda-vector-add diff --git a/docs/usage/advanced/cuda/device-plugin-daemonset.yaml b/docs/usage/advanced/cuda/device-plugin-daemonset.yaml index 6bb521a32..a52bb06d2 100644 --- a/docs/usage/advanced/cuda/device-plugin-daemonset.yaml +++ b/docs/usage/advanced/cuda/device-plugin-daemonset.yaml @@ -1,3 +1,9 @@ +apiVersion: node.k8s.io/v1 +kind: RuntimeClass +metadata: + name: nvidia +handler: nvidia +--- apiVersion: apps/v1 kind: DaemonSet metadata: @@ -7,35 +13,37 @@ spec: selector: matchLabels: name: nvidia-device-plugin-ds + updateStrategy: + type: RollingUpdate template: metadata: - # Mark this pod as a critical add-on; when enabled, the critical add-on scheduler - # reserves resources for critical add-on pods so that they can be rescheduled after - # a failure. This annotation works in tandem with the toleration below. - annotations: - scheduler.alpha.kubernetes.io/critical-pod: "" labels: name: nvidia-device-plugin-ds spec: + runtimeClassName: nvidia # Explicitly request the runtime tolerations: - # Allow this pod to be rescheduled while the node is in "critical add-ons only" mode. - # This, along with the annotation above marks this pod as a critical add-on. - - key: CriticalAddonsOnly + - key: nvidia.com/gpu operator: Exists + effect: NoSchedule + # Mark this pod as a critical add-on; when enabled, the critical add-on + # scheduler reserves resources for critical add-on pods so that they can + # be rescheduled after a failure. + # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ + priorityClassName: "system-node-critical" containers: - - env: - - name: DP_DISABLE_HEALTHCHECKS - value: xids - image: nvidia/k8s-device-plugin:1.11 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0-rc.2 name: nvidia-device-plugin-ctr + env: + - name: FAIL_ON_INIT_ERROR + value: "false" securityContext: - allowPrivilegeEscalation: true + allowPrivilegeEscalation: false capabilities: drop: ["ALL"] volumeMounts: - - name: device-plugin - mountPath: /var/lib/kubelet/device-plugins - volumes: - name: device-plugin - hostPath: - path: /var/lib/kubelet/device-plugins \ No newline at end of file + mountPath: /var/lib/kubelet/device-plugins + volumes: + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins \ No newline at end of file