Skip to content

Commit

Permalink
[Backport][v0.7] Patch cgroup drivers (switch to systemd) (#2200)
Browse files Browse the repository at this point in the history
* Backported patching cgroup drivers
  • Loading branch information
plirglo authored Apr 13, 2021
1 parent 45a63e0 commit c8d6059
Show file tree
Hide file tree
Showing 12 changed files with 232 additions and 69 deletions.
1 change: 1 addition & 0 deletions CHANGELOG-0.7.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

- [#2164](https://github.com/epiphany-platform/epiphany/issues/2164) - Replace Bintray repository
- [#1888](https://github.com/epiphany-platform/epiphany/issues/1888) - epicli upgrade of cluster created by Epiphany v0.5 may fail
- [#1908](https://github.com/epiphany-platform/epiphany/issues/1908) - Research why Epiphany nodes hang when memory is overcommited

### Added

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
---
docker_logging:
log_opts:
max_file_size: 10m # The maximum size of the log before it is rolled. A positive integer plus a modifier representing the unit of measure (k, m, or g)
max_files: 2 # The maximum number of log files that can be present
docker_daemon_defaults:
exec-opts:
- native.cgroupdriver=systemd
insecure-registries:
- "{{ image_registry_address }}"
log-driver: json-file
log-opts:
# The maximum size of the log before it is rolled. A positive integer plus a modifier representing the unit of measure (k, m, or g).
max-size: 10m
# The maximum number of log files that can be present.
max-file: "2" # must be a string

docker_version:
Debian: "5:19.03.14*" # * is needed to match a version such as '5:19.03.14~3-0~ubuntu-bionic'
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,32 +1,19 @@
---
- name: Ensure directory exists
file:
path: /etc/docker
state: directory
- include_tasks: update-daemon-config.yml

- name: Copy configuration file (daemon.json)
template:
src: daemon.json.j2
dest: /etc/docker/daemon.json
mode: 0644
notify: Reload Docker

- name: Reload Docker before verification # to apply new configuration
meta: flush_handlers

- name: Start Docker
- name: Enable and ensure Docker is started
systemd:
name: docker
state: started
enabled: yes
enabled: true

# Get log driver for verification
- name: Get Docker logging driver
shell: docker info | grep -i 'Logging Driver'
register: docker_log_driver
changed_when: false

- name: Verify logging driver # 'json-file' is needed for K8s metadata in Filebeat and log rotation
- name: Verify logging driver # 'json-file' is needed for K8s metadata in Filebeat and log rotation
assert:
that: "'json-file' in docker_log_driver.stdout"
fail_msg: "Unexpected logging driver, docker_log_driver.stdout: '{{ docker_log_driver.stdout }}'"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
---
- name: Stat /etc/docker/daemon.json
stat:
path: /etc/docker/daemon.json
get_attributes: false
get_checksum: false
get_mime: false
register: stat_etc_docker_daemon_json

- name: Read /etc/docker/daemon.json
slurp:
path: /etc/docker/daemon.json
register: slurp_etc_docker_daemon_json
when:
- stat_etc_docker_daemon_json.stat.exists

- name: Process /etc/docker/daemon.json
set_fact:
etc_docker_daemon_json:
output: "{{ _output }}"
changed: "{{ _changed }}"
reload: "{{ _reload }}"
restart: "{{ _restart }}"
vars:
# To detect changes we cannot use defaults as inputs here.
_input: >-
{{ (slurp_etc_docker_daemon_json.content | b64decode | from_json)
if slurp_etc_docker_daemon_json.content is defined else
{} }}
# This role is used directly during both "apply" and "upgrade" runs.
# In the case of "upgrade" we have to accept what we find on the target machine and
# make corrections to "exec-opts" later in a separate procedure (inside the "upgrade" role).
# In the case of "apply" it is just fine to overwrite the whole document with defaults.
_output: >-
{{ (docker_daemon_defaults | dict2items
| rejectattr('key', '==', 'exec-opts')
| list
| items2dict
| combine(_input, recursive=true))
if is_upgrade_run else
docker_daemon_defaults }}
_changed: >-
{{ _output != _input }}
# Restart is too much to handle changes to "insecure-registries".
_reload: >-
{{ _changed and (not _restart) }}
# Reload is not enough to handle changes to "exec-opts".
_restart: >-
{{ _changed and (_input['exec-opts'] | default([]) != _output['exec-opts'] | default([])) }}
- name: Write config and reload/restart Docker
when:
- etc_docker_daemon_json.changed
block:
- name: Ensure directory /etc/docker/ exists
file:
path: /etc/docker/
state: directory
owner: root
group: root
mode: u=rwx,go=rx

# NOTE: Previously a "template" task was used here instead, but
# it has proven to provide insufficient idempotency (unnecessary docker restarts).
- name: Write /etc/docker/daemon.json
copy:
dest: /etc/docker/daemon.json
content: |
{{ etc_docker_daemon_json.output | to_nice_json(indent=2) }}
owner: root
group: root
mode: u=rw,go=r

- name: Reload Docker
systemd:
name: docker
state: reloaded
when:
- etc_docker_daemon_json.reload

- name: Restart Docker
systemd:
name: docker
state: restarted
when:
- etc_docker_daemon_json.restart

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,49 @@
# These tasks are run from master and worker roles

- name: Ensure kubelet drop-in directory exists
become: true
file:
path: /etc/systemd/system/kubelet.service.d
state: directory
owner: root
group: root
mode: u=rwx,go=rx

- name: Copy kubelet configuration file (11-cgroup.conf)
template:
src: 11-cgroup.conf.j2
dest: /etc/systemd/system/kubelet.service.d/11-cgroup.conf
src: 11-cgroup.conf.j2
owner: root
group: root
mode: u=rw,g=r,o=
register: kubelet_cgroup_conf

# Requires kubeadm init/join
- name: Load configuration from '/var/lib/kubelet/config.yaml'
slurp:
src: /var/lib/kubelet/config.yaml
register: kubelet_config_yaml
- name: Update /var/lib/kubelet/config.yaml
when: kubelet_custom_config
block:
- name: Load configuration from /var/lib/kubelet/config.yaml
slurp:
src: /var/lib/kubelet/config.yaml
register: kubelet_config_yaml

- name: Apply configuration to '/var/lib/kubelet/config.yaml'
template:
src: config.yaml.j2
dest: /var/lib/kubelet/config.yaml
backup: true
register: apply_kubelet_custom_config
when: kubelet_custom_config
- name: Apply configuration to '/var/lib/kubelet/config.yaml'
template:
dest: /var/lib/kubelet/config.yaml
src: config.yaml.j2
owner: root
group: root
mode: u=rw,g=r,o=
backup: true
register: apply_kubelet_custom_config

- name: Restart kubelet service
systemd:
name: kubelet
state: restarted
daemon_reload: "{{ kubelet_cgroup_conf.changed }}"
when: kubelet_cgroup_conf.changed or apply_kubelet_custom_config.changed

- name: Enable kubelet service
systemd:
name: kubelet
enabled: yes
when: kubelet_cgroup_conf.changed or (kubelet_custom_config and apply_kubelet_custom_config.changed)

- name: Start kubelet service
- name: Enable and start kubelet service
systemd:
name: kubelet
state: started
enabled: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
---
# A standalone version of this procedure can be found in tools/development/k8s/memory/patch_cgroup_driver/.
# It has been reported that Epiphany behaves unstable with high resource utilization, this patch seems to be fixing these problems.

# K8s documentation (https://kubernetes.io/docs/setup/production-environment/container-runtimes/#cgroup-drivers) states:
# > A single cgroup manager simplifies the view of what resources are being allocated and will by default have a more consistent view of the available and in-use resources.
# > When there are two cgroup managers on a system, you end up with two views of those resources.
# > In the field, people have reported cases where nodes that are configured to use cgroupfs for the kubelet and Docker,
# > but systemd for the rest of the processes, become unstable under resource pressure.

- name: k8s/cgroups | Read /var/lib/kubelet/kubeadm-flags.env
slurp:
path: /var/lib/kubelet/kubeadm-flags.env
register: slurp_var_lib_kubelet_kubeadm_flags_env

- name: k8s/cgroups | Process /var/lib/kubelet/kubeadm-flags.env
set_fact:
var_lib_kubelet_kubeadm_flags_env:
output: "{{ _output }}"
changed: "{{ _output != _input }}"
vars:
_input: >-
{{ slurp_var_lib_kubelet_kubeadm_flags_env.content | b64decode }}
_output: >-
{{ _input.replace('--cgroup-driver=cgroupfs', '--cgroup-driver=systemd') }}
- name: k8s/cgroups | Read /etc/docker/daemon.json
slurp:
path: /etc/docker/daemon.json
register: slurp_etc_docker_daemon_json

- name: k8s/cgroups | Process /etc/docker/daemon.json
set_fact:
etc_docker_daemon_json:
output: "{{ _output }}"
changed: "{{ _output['exec-opts'] != _exec_opts }}"
vars:
_input: >-
{{ slurp_etc_docker_daemon_json.content | b64decode | from_json }}
_exec_opts: >-
{{ _input['exec-opts'] | default([]) }}
_update:
exec-opts: >-
{{ _exec_opts | difference(['native.cgroupdriver=cgroupfs']) | union(['native.cgroupdriver=systemd']) }}
_output: >-
{{ _input | combine(_update, recursive=true) }}
- name: k8s/cgroups | Perform cgroup driver patching (switch to systemd)
when: var_lib_kubelet_kubeadm_flags_env.changed or etc_docker_daemon_json.changed
block:
# At this point we assume that currently processed node has been drained already.

- name: k8s/cgroups | Write /var/lib/kubelet/kubeadm-flags.env
copy:
dest: /var/lib/kubelet/kubeadm-flags.env
content: |
{{ var_lib_kubelet_kubeadm_flags_env.output }}
owner: root
group: root
mode: preserve

- name: k8s/cgroups | Write /etc/docker/daemon.json
copy:
dest: /etc/docker/daemon.json
content: |
{{ etc_docker_daemon_json.output | to_nice_json(indent=2) }}
owner: root
group: root
mode: preserve

- name: k8s/cgroups | Restart kubelet and docker
include_tasks: utils/restart-kubelet-and-docker.yml
when:
- (_requires_restart is undefined) or _requires_restart
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,15 @@
- name: k8s/master0 | Upgrade Docker # this may restart Docker daemon
include_tasks: docker.yml

- name: k8s/master0 | Reload kubelet and docker
include_tasks: utils/reload-kubelet-and-docker.yml
# This is considered a bugfix for existing clusters created prior Epiphany v0.10.x and after v0.5.x.
- name: k8s/master0 | Replace cgroupfs driver with systemd driver
include_tasks: patch-cgroup-driver.yml
vars: { _requires_restart: false } # it will be properly restarted anyways
when:
- upgrade_to_final_version

- name: k8s/master0 | Restart kubelet and docker
include_tasks: utils/restart-kubelet-and-docker.yml

- name: k8s/master0 | Uncordon master - mark master as schedulable
include_tasks: utils/uncordon.yml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,15 @@
- name: k8s/masterN | Upgrade Docker # this may restart Docker daemon
include_tasks: docker.yml

- name: k8s/masterN | Reload kubelet and docker
include_tasks: utils/reload-kubelet-and-docker.yml
# This is considered a bugfix for existing clusters created prior Epiphany v0.10.x and after v0.5.x.
- name: k8s/masterN | Replace cgroupfs driver with systemd driver
include_tasks: patch-cgroup-driver.yml
vars: { _requires_restart: false } # it will be properly restarted anyways
when:
- upgrade_to_final_version

- name: k8s/masterN | Restart kubelet and docker
include_tasks: utils/restart-kubelet-and-docker.yml

- name: k8s/masterN | Wait for cluster's readiness
include_tasks: utils/wait.yml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,16 @@
- name: k8s/node | Drain node in preparation for maintenance
include_tasks: utils/drain.yml

- name: k8s/node | Upgrade Docker # this may restart Docker daemon
- name: k8s/node | Upgrade Docker # this may restart Docker daemon
include_tasks: docker.yml

# This is considered a bugfix for existing clusters created prior Epiphany v0.10.x and after v0.5.x.
- name: k8s/node | Replace cgroupfs driver with systemd driver
include_tasks: patch-cgroup-driver.yml
vars: { _requires_restart: true }
when:
- upgrade_to_final_version

- name: Upgrade packages and perform upgrade
block:
- name: k8s/node | Install kubeadm
Expand Down

0 comments on commit c8d6059

Please sign in to comment.