From 920dc0ef055711b96684aece7546f53875e4d98c Mon Sep 17 00:00:00 2001 From: "Lubomir I. Ivanov" Date: Mon, 31 May 2021 19:56:44 +0300 Subject: [PATCH] kinder: pin the CR cgroup driver to "systemd" Pin the driver to "systemd" in: - the Docker / containerd config files during image alter - KubeletConfiguration that kubeadm deploys --- kinder/pkg/build/alter/alter.go | 5 ++++ kinder/pkg/cri/nodes/alterhelper.go | 11 ++++++++ .../pkg/cri/nodes/containerd/alterhelper.go | 12 +++++++++ kinder/pkg/cri/nodes/docker/alterhelper.go | 26 +++++++++++++++++++ kinder/pkg/kubeadm/config.go | 6 +++++ 5 files changed, 60 insertions(+) diff --git a/kinder/pkg/build/alter/alter.go b/kinder/pkg/build/alter/alter.go index 97278043..70af120b 100644 --- a/kinder/pkg/build/alter/alter.go +++ b/kinder/pkg/build/alter/alter.go @@ -332,6 +332,11 @@ func (c *Context) alterImage(bitsInstallers []bits.Installer, bc *bits.BuildCont } } + log.Info("Setup CRI ...") + if err := alterHelper.SetupCRI(bc); err != nil { + return errors.Wrapf(err, "image build Failed! Failed to setup %s", runtime) + } + log.Info("Start CRI ...") if err := alterHelper.StartCRI(bc); err != nil { return errors.Wrapf(err, "image build Failed! Failed to start %s", runtime) diff --git a/kinder/pkg/cri/nodes/alterhelper.go b/kinder/pkg/cri/nodes/alterhelper.go index b94da8da..b70e1f16 100644 --- a/kinder/pkg/cri/nodes/alterhelper.go +++ b/kinder/pkg/cri/nodes/alterhelper.go @@ -61,6 +61,17 @@ func (h *AlterHelper) StartCRI(bc *bits.BuildContext) error { return errors.Errorf("unknown cri: %s", h.cri) } +// SetupCRI setups the container runtime. +func (h *AlterHelper) SetupCRI(bc *bits.BuildContext) error { + switch h.cri { + case status.ContainerdRuntime: + return containerd.SetupRuntime(bc) + case status.DockerRuntime: + return docker.SetupRuntime(bc) + } + return errors.Errorf("unknown cri: %s", h.cri) +} + // PreLoadInitImages preload images required by kubeadm-init into the selected container runtime that exists inside a kind(er) node func (h *AlterHelper) PreLoadInitImages(bc *bits.BuildContext, srcFolder string) error { switch h.cri { diff --git a/kinder/pkg/cri/nodes/containerd/alterhelper.go b/kinder/pkg/cri/nodes/containerd/alterhelper.go index d4e2bdb5..e05b4e8f 100644 --- a/kinder/pkg/cri/nodes/containerd/alterhelper.go +++ b/kinder/pkg/cri/nodes/containerd/alterhelper.go @@ -46,6 +46,18 @@ func GetAlterContainerArgs() ([]string, []string) { return runArgs, runCommands } +// SetupRuntime setups the runtime +func SetupRuntime(bc *bits.BuildContext) error { + // Append the containerd settings for a systemd cgroup driver at the end of the config.toml. + // This assumes runc is used as the underlying runtime. + if err := bc.RunInContainer("bash", "-c", + "printf '[plugins.\"io.containerd.grpc.v1.cri\".containerd.runtimes.runc.options]\nsystemdCgroup = true\n' >> /etc/containerd/config.toml", + ); err != nil { + return errors.Wrap(err, "could not append to /etc/containerd/config.toml") + } + return nil +} + // StartRuntime starts the runtime func StartRuntime(bc *bits.BuildContext) error { log.Info("starting containerd") diff --git a/kinder/pkg/cri/nodes/docker/alterhelper.go b/kinder/pkg/cri/nodes/docker/alterhelper.go index 3513353c..ab8b3512 100644 --- a/kinder/pkg/cri/nodes/docker/alterhelper.go +++ b/kinder/pkg/cri/nodes/docker/alterhelper.go @@ -37,6 +37,32 @@ func GetAlterContainerArgs() ([]string, []string) { return runArgs, []string{} } +// SetupRuntime setups the runtime +func SetupRuntime(bc *bits.BuildContext) error { + // Rewrite the Docker daemon config to include: + // - the "cri-containerd: true", which is something that already exists in kindest/base:v20190403-1ebf15f + // - the cgroup driver setting (systemd) + if err := bc.RunInContainer("bash", "-c", + "printf '{\"cri-containerd\": true, \"exec-opts\": [\"native.cgroupdriver=systemd\"]}\n' > /etc/docker/daemon.json", + ); err != nil { + return errors.Wrap(err, "could not overwrite /etc/docker/daemon.json") + } + // Workaround from https://github.com/kubernetes/kubernetes/issues/43704#issuecomment-289484654 + // Using the systemd driver in our rather old base image results in errors around the kubepods.slice. + // Write the flags --cgroups-per-qos=false --enforce-node-allocatable="" in the KUBELET_EXTRA_ARGS file. + // + // It's not possible to pass these via the KubeletConfiguration because the validation / defaulting is bogus. + // Empty slice gets defaulted to "pods" and non-empty slice fails for 'cgroupsPerQOS: false' - + // i.e. it is not possible to pass 'none' or '[]' for the 'enforceNodeAllocatable' config field. + // https://github.com/kubernetes/kubernetes/blob/ea0764452222146c47ec826977f49d7001b0ea8c/pkg/kubelet/apis/config/validation/validation.go#L53-L54 + if err := bc.RunInContainer("bash", "-c", + "printf 'KUBELET_EXTRA_ARGS=\"--cgroups-per-qos=false --enforce-node-allocatable=\"\"\"' > /etc/default/kubelet", + ); err != nil { + return errors.Wrap(err, "could not write /etc/default/kubelet") + } + return nil +} + // StartRuntime starts the runtime func StartRuntime(bc *bits.BuildContext) error { log.Info("starting dockerd") diff --git a/kinder/pkg/kubeadm/config.go b/kinder/pkg/kubeadm/config.go index 475d689e..8537dcc9 100644 --- a/kinder/pkg/kubeadm/config.go +++ b/kinder/pkg/kubeadm/config.go @@ -206,6 +206,9 @@ evictionHard: nodefs.available: "0%" nodefs.inodesFree: "0%" imagefs.available: "0%" +# pin the cgroup driver to systemd. +# this assumes that the CR on the node image is configured accordingly. +cgroupDriver: "systemd" --- # no-op entry that exists solely so it can be patched apiVersion: kubeproxy.config.k8s.io/v1alpha1 @@ -302,6 +305,9 @@ evictionHard: nodefs.available: "0%" nodefs.inodesFree: "0%" imagefs.available: "0%" +# pin the cgroup driver to systemd. +# this assumes that the CR on the node image is configured accordingly. +cgroupDriver: "systemd" --- # no-op entry that exists solely so it can be patched apiVersion: kubeproxy.config.k8s.io/v1alpha1