Skip to content

Commit

Permalink
Merge pull request #7571 from zalando-incubator/kube-1.30-to-alpha
Browse files Browse the repository at this point in the history
kube-1.30 to alpha
  • Loading branch information
RomanZavodskikh authored May 27, 2024
2 parents bd33306 + 9ccd2ea commit 08fbcb2
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 1 deletion.
4 changes: 4 additions & 0 deletions cluster/config-defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,10 @@ flannel_memory: "100Mi"
nvidia_device_plugin_cpu: "10m"
nvidia_device_plugin_memory: "50Mi"

# nvidia dcgm exporter
nvidia_dcgm_exporter_cpu: "10m"
nvidia_dcgm_exporter_memory: "200Mi"

# static egress controller settings
static_egress_controller_enabled: "true"

Expand Down
30 changes: 30 additions & 0 deletions cluster/manifests/nvidia/nvidia-gpu-device-plugin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ spec:
component: nvidia-gpu-device-plugin
annotations:
logging/destination: "{{.Cluster.ConfigItems.log_destination_infra}}"
prometheus.io/path: /metrics
prometheus.io/port: "9400"
prometheus.io/scrape: "true"
spec:
serviceAccountName: nvidia
tolerations:
Expand All @@ -48,6 +51,9 @@ spec:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
- name: pod-gpu-resources
hostPath:
path: /opt/podruntime/kubelet/pod-resources
containers:
- name: nvidia-gpu-device-plugin
image: container-registry.zalando.net/teapot/nvidia-gpu-device-plugin:v0.14.5-master-10
Expand All @@ -68,3 +74,27 @@ spec:
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
- name: dcgm-exporter
image: container-registry.zalando.net/teapot/nvidia-dcgm-exporter:v3.3.6-3.4.2-ubuntu22.04-master-11
args:
- --kubernetes
- --address=:9400
resources:
requests:
cpu: "{{ .Cluster.ConfigItems.nvidia_dcgm_exporter_cpu }}"
memory: "{{ .Cluster.ConfigItems.nvidia_dcgm_exporter_memory }}"
limits:
cpu: "{{ .Cluster.ConfigItems.nvidia_dcgm_exporter_cpu }}"
memory: "{{ .Cluster.ConfigItems.nvidia_dcgm_exporter_memory }}"
ports:
- name: metrics
containerPort: 9400
securityContext:
runAsNonRoot: false
runAsUser: 0
capabilities:
add: ["SYS_ADMIN"]
volumeMounts:
- name: pod-gpu-resources
mountPath: /var/lib/kubelet/pod-resources
readOnly: true
2 changes: 1 addition & 1 deletion cluster/manifests/skipper/deployment.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{{ $internal_version := "v0.21.91-921" }}
{{ $canary_internal_version := "v0.21.91-921" }}
{{ $canary_internal_version := "v0.21.99-931" }}

{{/* Optional canary arguments separated by "[cf724afc]" to allow whitespaces, e.g. "-foo=has a whitespace[cf724afc]-baz=qux" */}}
{{ $canary_args := "" }}
Expand Down

0 comments on commit 08fbcb2

Please sign in to comment.