From 78828f180431ac640b69134340a6838fdac1786b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Zeidler?= Date: Fri, 3 Dec 2021 10:59:01 +0100 Subject: [PATCH 01/22] Switch to containerd (#2769) * Add containerd * Switch to containerd if upgrade * Add changelog entry --- ansible/playbooks/kubernetes_master.yml | 2 +- ansible/playbooks/kubernetes_node.yml | 2 +- .../roles/containerd/files/containerd.conf | 2 + .../roles/containerd/handlers/main.yml | 9 ++ .../containerd/tasks/configure-containerd.yml | 20 +++ .../tasks/configure-prerequisites.yml | 16 +++ .../playbooks/roles/containerd/tasks/main.yml | 22 +++ .../roles/containerd/tasks/remove-docker.yml | 29 ++++ .../roles/containerd/templates/config.toml.j2 | 129 ++++++++++++++++++ .../roles/kubernetes_common/tasks/main.yml | 2 +- .../roles/upgrade/tasks/image-registry.yml | 39 ------ .../tasks/kubernetes/patch-cgroup-driver.yml | 39 +----- .../tasks/kubernetes/upgrade-master0.yml | 13 +- .../upgrade/tasks/kubernetes/upgrade-node.yml | 3 - ...let-and-docker.yml => restart-kubelet.yml} | 11 -- .../tasks/kubernetes/verify-upgrade.yml | 2 + ansible/playbooks/upgrade.yml | 5 +- 17 files changed, 246 insertions(+), 99 deletions(-) create mode 100644 ansible/playbooks/roles/containerd/files/containerd.conf create mode 100644 ansible/playbooks/roles/containerd/handlers/main.yml create mode 100644 ansible/playbooks/roles/containerd/tasks/configure-containerd.yml create mode 100644 ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml create mode 100644 ansible/playbooks/roles/containerd/tasks/main.yml create mode 100644 ansible/playbooks/roles/containerd/tasks/remove-docker.yml create mode 100644 ansible/playbooks/roles/containerd/templates/config.toml.j2 delete mode 100644 ansible/playbooks/roles/upgrade/tasks/image-registry.yml rename ansible/playbooks/roles/upgrade/tasks/kubernetes/utils/{restart-kubelet-and-docker.yml => restart-kubelet.yml} (58%) diff --git a/ansible/playbooks/kubernetes_master.yml b/ansible/playbooks/kubernetes_master.yml index b4df086f6b..5dcfc49be6 100644 --- a/ansible/playbooks/kubernetes_master.yml +++ b/ansible/playbooks/kubernetes_master.yml @@ -8,7 +8,7 @@ become_method: sudo pre_tasks: - import_role: - name: docker + name: containerd tasks_from: main - import_role: name: kubernetes_common diff --git a/ansible/playbooks/kubernetes_node.yml b/ansible/playbooks/kubernetes_node.yml index 2c5623714a..a5f3fb831e 100644 --- a/ansible/playbooks/kubernetes_node.yml +++ b/ansible/playbooks/kubernetes_node.yml @@ -20,7 +20,7 @@ become_method: sudo pre_tasks: - import_role: - name: docker + name: containerd tasks_from: main - import_role: name: kubernetes_common diff --git a/ansible/playbooks/roles/containerd/files/containerd.conf b/ansible/playbooks/roles/containerd/files/containerd.conf new file mode 100644 index 0000000000..43dd5433bc --- /dev/null +++ b/ansible/playbooks/roles/containerd/files/containerd.conf @@ -0,0 +1,2 @@ +overlay +br_netfilter diff --git a/ansible/playbooks/roles/containerd/handlers/main.yml b/ansible/playbooks/roles/containerd/handlers/main.yml new file mode 100644 index 0000000000..1c18e8ef6c --- /dev/null +++ b/ansible/playbooks/roles/containerd/handlers/main.yml @@ -0,0 +1,9 @@ +- name: Restart Containerd + systemd: + name: containerd + state: restarted + +- name: Restart kubelet + systemd: + name: kubelet + state: restarted diff --git a/ansible/playbooks/roles/containerd/tasks/configure-containerd.yml b/ansible/playbooks/roles/containerd/tasks/configure-containerd.yml new file mode 100644 index 0000000000..860fe7eeb3 --- /dev/null +++ b/ansible/playbooks/roles/containerd/tasks/configure-containerd.yml @@ -0,0 +1,20 @@ +--- +- name: Create Containerd dir + file: + path: /etc/containerd + state: directory + owner: root + group: root + mode: u=rw,go=r + +- name: Provide Containerd config + template: + src: config.toml.j2 + dest: /etc/containerd/config.toml + mode: u=rw,go= + owner: root + group: root + notify: + - Restart Containerd + + diff --git a/ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml b/ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml new file mode 100644 index 0000000000..84b4c54ea9 --- /dev/null +++ b/ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml @@ -0,0 +1,16 @@ +--- +- name: Provide containerd.conf file + become: true + copy: + src: containerd.conf + dest: /etc/modules-load.d/containerd.conf + owner: root + group: root + mode: u=rw,go= + +- name: Load modules + command: "modprobe {{ item }}" + become: true + with_items: + - overlay + - br_netfilter diff --git a/ansible/playbooks/roles/containerd/tasks/main.yml b/ansible/playbooks/roles/containerd/tasks/main.yml new file mode 100644 index 0000000000..8b4c6d8695 --- /dev/null +++ b/ansible/playbooks/roles/containerd/tasks/main.yml @@ -0,0 +1,22 @@ +--- +- include_tasks: remove-docker.yml + when: is_upgrade_run + +- name: Install Containerd package + package: + name: containerd.io + state: present + module_defaults: + yum: { lock_timeout: "{{ yum_lock_timeout }}" } + +- include_tasks: configure-prerequisites.yml +- include_tasks: configure-containerd.yml + +- name: Append Containerd to kubelet config + replace: + path: /var/lib/kubelet/kubeadm-flags.env + regexp: '(\")$' + replace: ' --container-runtime=remote --container-runtime-endpoint=/run/containerd/containerd.sock"' + notify: + - Restart kubelet + when: is_upgrade_run diff --git a/ansible/playbooks/roles/containerd/tasks/remove-docker.yml b/ansible/playbooks/roles/containerd/tasks/remove-docker.yml new file mode 100644 index 0000000000..4c46b80eb4 --- /dev/null +++ b/ansible/playbooks/roles/containerd/tasks/remove-docker.yml @@ -0,0 +1,29 @@ +--- +- name: Populate service facts + service_facts: + +- name: Stop Docker daemon + systemd: + name: docker + state: stopped + enabled: no + when: "'docker.service' in ansible_facts.services" + +- name: Remove Docker packages + package: + name: "{{ _packages }}" + state: absent + vars: + _packages: + - docker-ce-cli + - docker-ce-rootless-extras + - docker-ce + +- name: Remove Docker sockets leftovers + file: + path: "{{ _paths }}" + state: absent + vars: + _paths: + - /var/run/docker.sock + - /var/run/dockershim.sock diff --git a/ansible/playbooks/roles/containerd/templates/config.toml.j2 b/ansible/playbooks/roles/containerd/templates/config.toml.j2 new file mode 100644 index 0000000000..bc7d986bf5 --- /dev/null +++ b/ansible/playbooks/roles/containerd/templates/config.toml.j2 @@ -0,0 +1,129 @@ +version = 2 +root = "/var/lib/containerd" +state = "/run/containerd" +plugin_dir = "" +disabled_plugins = [] +required_plugins = [] +oom_score = 0 + +[grpc] + address = "/run/containerd/containerd.sock" + tcp_address = "" + tcp_tls_cert = "" + tcp_tls_key = "" + uid = 0 + gid = 0 + max_recv_message_size = 16777216 + max_send_message_size = 16777216 + +[ttrpc] + address = "" + uid = 0 + gid = 0 + +[debug] + address = "" + uid = 0 + gid = 0 + level = "" + +[metrics] + address = "" + grpc_histogram = false + +[cgroup] + path = "" + +[timeouts] + "io.containerd.timeout.shim.cleanup" = "5s" + "io.containerd.timeout.shim.load" = "5s" + "io.containerd.timeout.shim.shutdown" = "3s" + "io.containerd.timeout.task.state" = "2s" + +[plugins] + [plugins."io.containerd.gc.v1.scheduler"] + pause_threshold = 0.02 + deletion_threshold = 0 + mutation_threshold = 100 + schedule_delay = "0s" + startup_delay = "100ms" + [plugins."io.containerd.grpc.v1.cri"] + disable_tcp_service = true + stream_server_address = "127.0.0.1" + stream_server_port = "0" + stream_idle_timeout = "4h0m0s" + enable_selinux = false + selinux_category_range = 1024 + sandbox_image = "k8s.gcr.io/pause:3.2" + stats_collect_period = 10 + systemd_cgroup = false + enable_tls_streaming = false + max_container_log_line_size = 16384 + disable_cgroup = false + disable_apparmor = false + restrict_oom_score_adj = false + max_concurrent_downloads = 3 + disable_proc_mount = false + unset_seccomp_profile = "" + tolerate_missing_hugetlb_controller = true + disable_hugetlb_controller = true + ignore_image_defined_volumes = false + [plugins."io.containerd.grpc.v1.cri".containerd] + snapshotter = "overlayfs" + default_runtime_name = "runc" + no_pivot = false + disable_snapshot_annotations = true + discard_unpacked_layers = false + [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime] + runtime_type = "" + runtime_engine = "" + runtime_root = "" + privileged_without_host_devices = false + base_runtime_spec = "" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + runtime_engine = "" + runtime_root = "" + privileged_without_host_devices = false + base_runtime_spec = "" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + [plugins."io.containerd.grpc.v1.cri".cni] + bin_dir = "/opt/cni/bin" + conf_dir = "/etc/cni/net.d" + max_conf_num = 1 + conf_template = "" + [plugins."io.containerd.grpc.v1.cri".registry] + [plugins."io.containerd.grpc.v1.cri".registry.mirrors] + [plugins."io.containerd.grpc.v1.cri".registry.mirrors."{{ image_registry_address }}"] + endpoint = ["http://{{ image_registry_address }}"] + [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"] + endpoint = ["https://registry-1.docker.io"] + [plugins."io.containerd.grpc.v1.cri".image_decryption] + key_model = "" + [plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming] + tls_cert_file = "" + tls_key_file = "" + [plugins."io.containerd.internal.v1.opt"] + path = "/opt/containerd" + [plugins."io.containerd.internal.v1.restart"] + interval = "10s" + [plugins."io.containerd.metadata.v1.bolt"] + content_sharing_policy = "shared" + [plugins."io.containerd.monitor.v1.cgroups"] + no_prometheus = false + [plugins."io.containerd.runtime.v1.linux"] + shim = "containerd-shim" + runtime = "runc" + runtime_root = "" + no_shim = false + shim_debug = false + [plugins."io.containerd.runtime.v2.task"] + platforms = ["linux/amd64"] + [plugins."io.containerd.service.v1.diff-service"] + default = ["walking"] + [plugins."io.containerd.snapshotter.v1.devmapper"] + root_path = "" + pool_name = "" + base_image_size = "" + async_remove = false diff --git a/ansible/playbooks/roles/kubernetes_common/tasks/main.yml b/ansible/playbooks/roles/kubernetes_common/tasks/main.yml index 344b2ea946..72595e53fa 100644 --- a/ansible/playbooks/roles/kubernetes_common/tasks/main.yml +++ b/ansible/playbooks/roles/kubernetes_common/tasks/main.yml @@ -20,7 +20,7 @@ state: present value: "1" reload: yes - when: sysctl_bridge_nf_call_iptables.rc == 0 + when: sysctl_bridge_nf_call_iptables.rc != 0 with_items: - net.bridge.bridge-nf-call-iptables - net.bridge.bridge-nf-call-ip6tables diff --git a/ansible/playbooks/roles/upgrade/tasks/image-registry.yml b/ansible/playbooks/roles/upgrade/tasks/image-registry.yml deleted file mode 100644 index c43ba58244..0000000000 --- a/ansible/playbooks/roles/upgrade/tasks/image-registry.yml +++ /dev/null @@ -1,39 +0,0 @@ ---- - -- name: image-registry | Include get-registries.yml from docker role # this sets result - include_role: - name: docker - tasks_from: get-registries - -- name: Reconfigure Docker for pulling images from local registry - block: - - name: image-registry | Drain node in preparation for Docker reconfiguration - include_tasks: kubernetes/utils/drain.yml - when: - - groups.kubernetes_node is defined - - inventory_hostname in groups.kubernetes_node - - - name: image-registry | Wait for cluster's readiness - include_tasks: kubernetes/utils/wait.yml - when: - - groups.kubernetes_node is defined - - inventory_hostname in groups.kubernetes_node - - - name: image-registry | Reconfigure Docker if necessary # this restarts Docker daemon - include_role: - name: docker - tasks_from: configure-docker - - - name: Include wait-for-kube-apiserver.yml - include_tasks: kubernetes/utils/wait-for-kube-apiserver.yml - when: - - inventory_hostname in groups.kubernetes_master - - - name: image-registry | Uncordon node - mark node as schedulable - include_tasks: kubernetes/utils/uncordon.yml - when: - - groups.kubernetes_node is defined - - inventory_hostname in groups.kubernetes_node - - when: - - not image_registry_address in result.stdout diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/patch-cgroup-driver.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/patch-cgroup-driver.yml index 1a86d895d3..9ad5c88165 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/patch-cgroup-driver.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/patch-cgroup-driver.yml @@ -25,32 +25,8 @@ _output_str: >- {{ _input | combine(_cgroup_driver) | to_nice_yaml(indent=2) }} -- name: k8s/cgroups | Read /etc/docker/daemon.json - slurp: - path: /etc/docker/daemon.json - register: slurp_etc_docker_daemon_json - -- name: k8s/cgroups | Process /etc/docker/daemon.json - set_fact: - etc_docker_daemon_json: - output: "{{ _output }}" - changed: "{{ _output['exec-opts'] != _exec_opts }}" - vars: - _input: >- - {{ slurp_etc_docker_daemon_json.content | b64decode | from_json }} - - _exec_opts: >- - {{ _input['exec-opts'] | default([]) }} - - _update: - exec-opts: >- - {{ _exec_opts | difference(['native.cgroupdriver=cgroupfs']) | union(['native.cgroupdriver=systemd']) }} - - _output: >- - {{ _input | combine(_update, recursive=true) }} - - name: k8s/cgroups | Perform cgroup driver patching (switch to systemd) - when: var_lib_kubelet_config_yaml.changed or etc_docker_daemon_json.changed + when: var_lib_kubelet_config_yaml.changed block: # At this point we assume that currently processed node has been drained already. @@ -63,16 +39,7 @@ group: root mode: preserve - - name: k8s/cgroups | Write /etc/docker/daemon.json - copy: - dest: /etc/docker/daemon.json - content: | - {{ etc_docker_daemon_json.output | to_nice_json(indent=2) }} - owner: root - group: root - mode: preserve - - - name: k8s/cgroups | Restart kubelet and docker - include_tasks: utils/restart-kubelet-and-docker.yml + - name: k8s/cgroups | Restart kubelet + include_tasks: utils/restart-kubelet.yml when: - (_requires_restart is undefined) or _requires_restart diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml index 4fef7acc15..6cc4b631e8 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml @@ -41,6 +41,12 @@ - name: k8s/master0 | Include set-cluster-version.yml include_tasks: set-cluster-version.yml # sets cluster_version + - name: k8s/master0 | Add k8s annotation for containerd + delegate_to: "{{ groups.kubernetes_master[0] }}" + run_once: true + command: >- + kubectl annotate node {{ inventory_hostname }} --overwrite kubeadm.alpha.kubernetes.io/cri-socket=unix:///run/containerd/containerd.sock + # Note: Usage of the --config flag for reconfiguring the cluster during upgrade is not recommended since v1.16 - name: k8s/master0 | Upgrade K8s cluster to v{{ version }} command: >- @@ -81,9 +87,6 @@ - name: k8s/master0 | Backup kubeadm-config.yml include_tasks: backup-kubeadm-config.yml -- name: k8s/master0 | Upgrade Docker # this may restart Docker daemon - include_tasks: docker.yml - - name: k8s/master0 | Patch kubelet ConfigMap with systemd cgroup driver include_tasks: patch-kubelet-cm.yml @@ -93,8 +96,8 @@ when: - upgrade_to_final_version -- name: k8s/master0 | Restart kubelet and docker - include_tasks: utils/restart-kubelet-and-docker.yml +- name: k8s/master0 | Restart kubelet + include_tasks: utils/restart-kubelet.yml - name: k8s/master0 | Uncordon master - mark master as schedulable include_tasks: utils/uncordon.yml diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-node.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-node.yml index 6e8e38589a..3c491b95f5 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-node.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-node.yml @@ -2,9 +2,6 @@ - name: k8s/node | Drain node in preparation for maintenance include_tasks: utils/drain.yml -- name: k8s/node | Upgrade Docker # this may restart Docker daemon - include_tasks: docker.yml - - name: k8s/node | Replace cgroupfs driver with systemd driver include_tasks: patch-cgroup-driver.yml vars: { _requires_restart: true } diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/utils/restart-kubelet-and-docker.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/utils/restart-kubelet.yml similarity index 58% rename from ansible/playbooks/roles/upgrade/tasks/kubernetes/utils/restart-kubelet-and-docker.yml rename to ansible/playbooks/roles/upgrade/tasks/kubernetes/utils/restart-kubelet.yml index f5c7731f84..cd1c222d20 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/utils/restart-kubelet-and-docker.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/utils/restart-kubelet.yml @@ -4,21 +4,10 @@ state: stopped name: kubelet -- name: k8s/utils | Stop Docker - systemd: - state: stopped - name: docker - - name: k8s/utils | Reload daemon systemd: daemon_reload: true -- name: k8s/utils | Start Docker - systemd: - name: docker - state: started - enabled: true - - name: k8s/utils | Start Kubelet systemd: name: kubelet diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/verify-upgrade.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/verify-upgrade.yml index 02dfad56b0..2455749f82 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/verify-upgrade.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/verify-upgrade.yml @@ -64,3 +64,5 @@ check_mode: true register: cgroup_driver failed_when: cgroup_driver.changed + when: + - upgrade_to_final_version diff --git a/ansible/playbooks/upgrade.yml b/ansible/playbooks/upgrade.yml index 1b8fb40e20..1922d9e926 100644 --- a/ansible/playbooks/upgrade.yml +++ b/ansible/playbooks/upgrade.yml @@ -35,14 +35,15 @@ environment: KUBECONFIG: "{{ kubeconfig.remote }}" + - hosts: kubernetes_master:kubernetes_node serial: 1 become: true become_method: sudo tasks: - import_role: - name: upgrade - tasks_from: image-registry + name: containerd + tasks_from: main when: "'kubernetes' in upgrade_components or upgrade_components|length == 0" environment: KUBECONFIG: "{{ kubeconfig.remote }}" From 0108ddced5cbccf1476a527031fd87ff51c581df Mon Sep 17 00:00:00 2001 From: rafzei Date: Thu, 13 Jan 2022 10:10:46 +0100 Subject: [PATCH 02/22] Spec test update --- .../kubernetes_master_spec.rb | 20 +++++++++-------- .../kubernetes_node/kubernetes_node_spec.rb | 22 ++++++------------- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb b/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb index b4e9393b3e..cce0ca4313 100644 --- a/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb +++ b/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb @@ -207,18 +207,20 @@ end end -describe 'Check the docker cgroup and logging driver' do - describe file('/etc/docker/daemon.json') do +describe 'Check the containerd' do + describe command('crictl --runtime-endpoint unix:///run/containerd/containerd.sock version') do let(:disable_sudo) { false } - its(:content_as_json) { should include('exec-opts' => include('native.cgroupdriver=systemd')) } - its(:content_as_json) { should include('log-driver' => 'json-file') } - its(:content_as_json) { should_not include('exec-opts' => include('native.cgroupdriver=cgroupfs')) } + its(:stdout) { should include('RuntimeName: containerd') } end - describe command('docker info | grep -i driver') do + describe command("kubectl get nodes -o jsonpath='{.items[].status.nodeInfo.containerRuntimeVersion}'") do + its(:stdout) { should include('containerd://1.4.12') } + end +end + +describe 'Check the OCI-spec' do + describe command('crictl --runtime-endpoint unix:///run/containerd/containerd.sock info') do let(:disable_sudo) { false } - its(:stdout) { should match(/Cgroup Driver: systemd/) } - its(:stdout) { should match(/Logging Driver: json-file/) } - its(:exit_status) { should eq 0 } + its(:content_as_yaml) { should include('defaultRuntimeName' => 'runc') } end end diff --git a/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb b/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb index cf708c8156..3e80e995e6 100644 --- a/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb +++ b/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb @@ -1,25 +1,17 @@ require 'spec_helper' -describe 'Check the kubelet cgroup driver' do - describe file('/var/lib/kubelet/config.yaml') do +describe 'Check the containerd' do + describe command('crictl --runtime-endpoint unix:///run/containerd/containerd.sock version') do let(:disable_sudo) { false } - its(:content_as_yaml) { should include('cgroupDriver' => 'systemd') } - its(:content_as_yaml) { should_not include('cgroupDriver' => 'cgroupfs') } + its(:stdout) { should include('RuntimeName: containerd') } end end -describe 'Check the docker cgroup and logging driver' do - describe file('/etc/docker/daemon.json') do - let(:disable_sudo) { false } - its(:content_as_json) { should include('exec-opts' => include('native.cgroupdriver=systemd')) } - its(:content_as_json) { should include('log-driver' => 'json-file') } - its(:content_as_json) { should_not include('exec-opts' => include('native.cgroupdriver=cgroupfs')) } - end - describe command('docker info | grep -i driver') do +describe 'Check the kubelet cgroup driver' do + describe file('/var/lib/kubelet/config.yaml') do let(:disable_sudo) { false } - its(:stdout) { should match(/Cgroup Driver: systemd/) } - its(:stdout) { should match(/Logging Driver: json-file/) } - its(:exit_status) { should eq 0 } + its(:content_as_yaml) { should include('cgroupDriver' => 'systemd') } + its(:content_as_yaml) { should_not include('cgroupDriver' => 'cgroupfs') } end end From 2011f31d077df58f4f1bdd575e9ddf55d144bcd5 Mon Sep 17 00:00:00 2001 From: rafzei Date: Thu, 13 Jan 2022 15:53:28 +0100 Subject: [PATCH 03/22] Rebase CHANGELOG-2.0.md --- .../roles/containerd/tasks/configure-containerd.yml | 2 -- .../roles/upgrade/tasks/kubernetes/upgrade-masterN.yml | 7 ++----- ansible/playbooks/upgrade.yml | 1 - docs/changelogs/CHANGELOG-2.0.md | 1 + 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/ansible/playbooks/roles/containerd/tasks/configure-containerd.yml b/ansible/playbooks/roles/containerd/tasks/configure-containerd.yml index 860fe7eeb3..a4e032fe67 100644 --- a/ansible/playbooks/roles/containerd/tasks/configure-containerd.yml +++ b/ansible/playbooks/roles/containerd/tasks/configure-containerd.yml @@ -16,5 +16,3 @@ group: root notify: - Restart Containerd - - diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml index 850ba8427d..099a6b8626 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml @@ -32,17 +32,14 @@ - name: k8s/masterN | Backup kubeadm-config.yml include_tasks: backup-kubeadm-config.yml -- name: k8s/masterN | Upgrade Docker # this may restart Docker daemon - include_tasks: docker.yml - - name: k8s/masterN | Replace cgroupfs driver with systemd driver include_tasks: patch-cgroup-driver.yml vars: { _requires_restart: false } # it will be properly restarted anyways when: - upgrade_to_final_version -- name: k8s/masterN | Restart kubelet and docker - include_tasks: utils/restart-kubelet-and-docker.yml +- name: k8s/masterN | Restart kubelet + include_tasks: utils/restart-kubelet.yml - name: k8s/masterN | Wait for cluster's readiness include_tasks: utils/wait.yml diff --git a/ansible/playbooks/upgrade.yml b/ansible/playbooks/upgrade.yml index 1922d9e926..6639327668 100644 --- a/ansible/playbooks/upgrade.yml +++ b/ansible/playbooks/upgrade.yml @@ -35,7 +35,6 @@ environment: KUBECONFIG: "{{ kubeconfig.remote }}" - - hosts: kubernetes_master:kubernetes_node serial: 1 become: true diff --git a/docs/changelogs/CHANGELOG-2.0.md b/docs/changelogs/CHANGELOG-2.0.md index b285d5c91e..fe48476299 100644 --- a/docs/changelogs/CHANGELOG-2.0.md +++ b/docs/changelogs/CHANGELOG-2.0.md @@ -9,6 +9,7 @@ - [#2812](https://github.com/epiphany-platform/epiphany/issues/2812) - Extend K8s config validation - [#2950](https://github.com/epiphany-platform/epiphany/issues/2950) - CLI refactor to make it more consistant - [#2844](https://github.com/epiphany-platform/epiphany/issues/2844) - Refactor K8s upgrade task in order to simplify its flow +- [#2716](https://github.com/epiphany-platform/epiphany/issues/2716) - Change container runtime to containerd ### Fixed From 1fc72dc2bf6378e8002bd87ee5287f06601662f4 Mon Sep 17 00:00:00 2001 From: rafzei Date: Tue, 18 Jan 2022 14:48:46 +0100 Subject: [PATCH 04/22] Adjust filebeat.yml after docker removal --- .../filebeat/tasks/configure-filebeat.yml | 21 ------------------- .../playbooks/roles/filebeat/tasks/main.yml | 10 +++++++-- .../roles/filebeat/templates/filebeat.yml.j2 | 19 +++-------------- 3 files changed, 11 insertions(+), 39 deletions(-) diff --git a/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml b/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml index 47f622cca5..f70cb976ef 100644 --- a/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml +++ b/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml @@ -25,26 +25,6 @@ mode: u=rw,go= register: modify_filebeat_yml -- name: Set Filebeat to be started after Docker - when: (groups['kubernetes_master'] is defined and inventory_hostname in groups['kubernetes_master']) - or (groups['kubernetes_node'] is defined and inventory_hostname in groups['kubernetes_node']) - block: - - name: Create directory (filebeat.service.d) - file: - path: /etc/systemd/system/filebeat.service.d - state: directory - - - name: Copy drop-in configuration file (extra-dependencies.conf) - template: - dest: /etc/systemd/system/filebeat.service.d/extra-dependencies.conf - src: extra-dependencies.conf.j2 - register: modify_filebeat_unit_dependencies - - - name: Run systemctl daemon-reload - systemd: - daemon_reload: true - when: modify_filebeat_unit_dependencies.changed - - name: Start/restart and enable filebeat service when: groups.logging[0] is defined block: @@ -58,7 +38,6 @@ name: filebeat state: restarted when: modify_filebeat_yml.changed - or modify_filebeat_unit_dependencies.changed or enable_module.changed or install_filebeat_package.changed diff --git a/ansible/playbooks/roles/filebeat/tasks/main.yml b/ansible/playbooks/roles/filebeat/tasks/main.yml index 4cdfe32550..73124a9a21 100644 --- a/ansible/playbooks/roles/filebeat/tasks/main.yml +++ b/ansible/playbooks/roles/filebeat/tasks/main.yml @@ -8,13 +8,19 @@ name: opendistro_for_logging_vars when: groups.logging is defined -- name: Include installation tasks for Filebeat as DaemonSet for "k8s as cloud service" - include_tasks: install-filebeat-as-daemonset.yml +- name: Filebeat as DaemonSet when: - k8s_as_cloud_service is defined - k8s_as_cloud_service - groups.logging is defined - groups.logging | length > 0 + block: + - name: Include installation tasks for Filebeat as DaemonSet for "k8s as cloud service" + include_tasks: install-filebeat-as-daemonset.yml + + - name: Set fact filebeat_as_daemonset + set_fact: + filebeat_as_daemonset: true - name: Include auditd configuration tasks include_tasks: configure-auditd.yml diff --git a/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 b/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 index f59e8bdfdd..a4dd7ff572 100644 --- a/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 +++ b/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 @@ -113,7 +113,7 @@ filebeat.inputs: - type: container enabled: true paths: - - /var/lib/docker/containers/*/*.log + - /var/log/containers/*.log {% if specification.container_input.multiline is defined %} multiline: {% for k, v in specification.container_input.multiline.items() %} @@ -122,21 +122,8 @@ filebeat.inputs: {% endif %} processors: - - add_docker_metadata: - labels.dedot: false - - rename: - fields: - - from: container.labels.io.kubernetes.container.name - to: kubernetes.container.name - - from: container.labels.io.kubernetes.pod.name - to: kubernetes.pod.name - - from: container.labels.io.kubernetes.pod.namespace - to: kubernetes.namespace - ignore_missing: true - fail_on_error: true - - drop_fields: - fields: - - container # Drop all fields added by 'add_docker_metadata' that were not renamed + - add_kubernetes_metadata: + in_cluster: {{ 'true' if filebeat_as_daemonset is defined else 'false' }} {% endif %} # ============================== Filebeat modules ============================== From dbc44b57d53a34419557922965ec6b16f0782a0e Mon Sep 17 00:00:00 2001 From: rafzei Date: Wed, 19 Jan 2022 00:03:52 +0100 Subject: [PATCH 05/22] Adjust K8s cert renewal + fix in spec test --- tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb b/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb index cce0ca4313..6759ee3ca4 100644 --- a/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb +++ b/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb @@ -220,7 +220,7 @@ describe 'Check the OCI-spec' do describe command('crictl --runtime-endpoint unix:///run/containerd/containerd.sock info') do let(:disable_sudo) { false } - its(:content_as_yaml) { should include('defaultRuntimeName' => 'runc') } + its(:stdout) { should contain('\"defaultRuntimeName\": \"runc\"') } end end @@ -230,8 +230,8 @@ its(:content_as_yaml) { should include('rotateCertificates' => true) } end describe command("kubectl describe cm $(kubectl get cm -n kube-system \ - | awk '/kubelet-config/{print $1}') -n kube-system | grep -i rotateCertificates") do - its(:stdout) { should match(/rotateCertificates: true/) } + | awk '/kubelet-config/{print $1}') -n kube-system") do + its(:stdout) { should contain('rotateCertificates: true') } its(:exit_status) { should eq 0 } end end From 9f435dee88c313a9481e32b003eabcc236c4b5d1 Mon Sep 17 00:00:00 2001 From: rafzei Date: Mon, 24 Jan 2022 11:57:23 +0100 Subject: [PATCH 06/22] Change filebeat custom chart values and spec test, fix backup k8s --- .../roles/backup/tasks/kubernetes.yml | 8 +++-- .../templates/custom-chart-values.yml.j2 | 32 ++++--------------- .../templates/extra-dependencies.conf.j2 | 4 --- .../kubernetes_promote/handlers/main.yml | 21 ++++++------ tests/spec/spec/filebeat/filebeat_spec.rb | 10 ------ 5 files changed, 20 insertions(+), 55 deletions(-) delete mode 100644 ansible/playbooks/roles/filebeat/templates/extra-dependencies.conf.j2 diff --git a/ansible/playbooks/roles/backup/tasks/kubernetes.yml b/ansible/playbooks/roles/backup/tasks/kubernetes.yml index 1a0062dae7..1d27b640f8 100644 --- a/ansible/playbooks/roles/backup/tasks/kubernetes.yml +++ b/ansible/playbooks/roles/backup/tasks/kubernetes.yml @@ -49,11 +49,13 @@ - name: Save etcd snapshot shell: | - docker run \ - -v "{{ backup_temp_dir.path }}/:/backup/" \ - --network host \ + ctr --namespace k8s.io \ + run \ + --mount type=bind,src={{ backup_temp_dir.path }}/,dst=/backup/,options=rbind:rw \ + --net-host \ --env ETCDCTL_API=3 \ --rm "{{ etcd_image_name.stdout | trim }}" \ + etcd \ etcdctl \ --endpoints https://127.0.0.1:2379 \ --cacert /backup/pki/etcd/ca.crt \ diff --git a/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 b/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 index 37bd8447a4..b27afdeadd 100644 --- a/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 +++ b/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 @@ -50,39 +50,19 @@ filebeatConfig: {# -------------------------- Kubernetes input -------------------------- #} {% if (k8s_as_cloud_service is defined and k8s_as_cloud_service) or ('kubernetes_master' in groups or 'kubernetes_node' in groups) %} - - type: docker + - type: container enabled: true - containers.ids: "*" -{% if specification.docker_input.multiline is defined %} +{% if specification.container_input.multiline is defined %} multiline: -{% for k, v in specification.docker_input.multiline.items() %} +{% for k, v in specification.container_input.multiline.items() %} {{ k }}: {{ v }} {% endfor %} {% endif %} processors: - - add_docker_metadata: - - rename: - fields: - - from: docker.container.labels.io.kubernetes.container.name - to: kubernetes.container.name - - from: container.labels.io_kubernetes_container_name - to: kubernetes.container.name - - - from: docker.container.labels.io.kubernetes.pod.name - to: kubernetes.pod.name - - from: container.labels.io_kubernetes_pod_name - to: kubernetes.pod.name - - - from: docker.container.labels.io.kubernetes.pod.namespace - to: kubernetes.namespace - - from: container.labels.io_kubernetes_pod_namespace - to: kubernetes.namespace - ignore_missing: true - fail_on_error: true - - drop_fields: - fields: - - docker + - add_kubernetes_metadata: + in_cluster: {{ 'true' if filebeat_as_daemonset is defined else 'false' }} + {% endif %} {# -------------------------- Filebeat modules -------------------------- #} diff --git a/ansible/playbooks/roles/filebeat/templates/extra-dependencies.conf.j2 b/ansible/playbooks/roles/filebeat/templates/extra-dependencies.conf.j2 deleted file mode 100644 index d171edae51..0000000000 --- a/ansible/playbooks/roles/filebeat/templates/extra-dependencies.conf.j2 +++ /dev/null @@ -1,4 +0,0 @@ -# {{ ansible_managed }} - -[Unit] -After=docker.service \ No newline at end of file diff --git a/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml b/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml index eda1a4eeb0..38459f7514 100644 --- a/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml +++ b/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml @@ -1,19 +1,17 @@ --- - name: Restart controller-manager shell: | - docker ps \ - --filter 'name=kube-controller-manager_kube-controller-manager' \ - --format '{{ "{{.ID}}" }}' \ - | xargs --no-run-if-empty docker kill + crictl ps \ + --name=\kube-controller-manager' -q \ + | xargs --no-run-if-empty crictl stop --timeout=0 args: executable: /bin/bash - name: Restart scheduler shell: | - docker ps \ - --filter 'name=kube-scheduler_kube-scheduler' \ - --format '{{ "{{.ID}}" }}' \ - | xargs --no-run-if-empty docker kill + crictl ps \ + --name='kube-scheduler' -q \ + | xargs --no-run-if-empty crictl stop --timeout=0 args: executable: /bin/bash @@ -24,10 +22,9 @@ - name: Restart kube-proxy shell: | - docker ps \ - --filter 'name=kube-proxy_kube-proxy' \ - --format '{{ "{{.ID}}" }}' \ - | xargs --no-run-if-empty docker kill + crictl ps \ + --name='kube-proxy' -q \ + | xargs --no-run-if-empty crictl stop --timeout=0 args: executable: /bin/bash diff --git a/tests/spec/spec/filebeat/filebeat_spec.rb b/tests/spec/spec/filebeat/filebeat_spec.rb index 54432233ca..985662fcd7 100644 --- a/tests/spec/spec/filebeat/filebeat_spec.rb +++ b/tests/spec/spec/filebeat/filebeat_spec.rb @@ -44,16 +44,6 @@ end end -if hostInGroups?("kubernetes_master") || hostInGroups?("kubernetes_node") - describe 'Check extra configuration for master/worker roles - setting Filebeat to be started after Docker' do - describe file("/etc/systemd/system/filebeat.service.d/extra-dependencies.conf") do - it { should exist } - it { should be_a_file } - its(:content) { should match /After=docker\.service/ } - end - end -end - if es_logstash_user_is_active listInventoryHosts("logging").each do |val| describe 'Check the connection to the Elasticsearch hosts' do From a213003a7ee407be31fbbdee1490972a6be939d7 Mon Sep 17 00:00:00 2001 From: rafzei Date: Mon, 24 Jan 2022 12:44:03 +0100 Subject: [PATCH 07/22] Doc update --- docs/architecture/logical-view.md | 2 +- docs/home/howto/MAINTENANCE.md | 90 +++++++++++++++++-------------- 2 files changed, 51 insertions(+), 41 deletions(-) diff --git a/docs/architecture/logical-view.md b/docs/architecture/logical-view.md index d82c707d16..47d9acde34 100644 --- a/docs/architecture/logical-view.md +++ b/docs/architecture/logical-view.md @@ -49,7 +49,7 @@ Source | Purpose /var/log/secure | Logs from authentication and authorization /var/log/syslog | System logs and events /var/log/zookeeper/version-2/* | Zookeeper's logs -Docker containers | Kubernetes components that run in a container +Containers | Kubernetes components that run in a container `Filebeat`, unlike `Grafana`, pushes data to database (`Elasticsearch`) instead of pulling them. [Read more](https://www.elastic.co/products/beats/filebeat) about `Filebeat`. diff --git a/docs/home/howto/MAINTENANCE.md b/docs/home/howto/MAINTENANCE.md index e9497faa6c..e8a9d28d0f 100644 --- a/docs/home/howto/MAINTENANCE.md +++ b/docs/home/howto/MAINTENANCE.md @@ -4,33 +4,18 @@ This part of the documentations covers the topic how to check if each component is working properly. -#### - Docker - -To verify that Docker services are up and running you can first check the status of the Docker service with the -following command: - -```shell -systemctl status docker -``` - -Additionally, you can check also if the command: - -```shell -docker info -``` - -doesn't return any error. You can also find there useful information about your Docker configuration. - #### - Kubernetes -First to check if everything is working fine we need to check verify status of Kubernetes kubelet service with the +First to check if everything is working fine you need to check verify status of Kubernetes kubelet service with the command: ```shell systemctl status kubelet ``` -We can also check state of Kubernetes nodes using the command: +##### - kubectl + +You can also check state of Kubernetes nodes using the `kubectl` command: ```shell root@primary01:~# kubectl get nodes --kubeconfig=/etc/kubernetes/admin.conf @@ -40,7 +25,7 @@ node01 Ready 23h node02 Ready 23h vx.xx.x ``` -We can get additional information about Kubernetes components: +You can get additional information about Kubernetes components: ```shell root@primary01:~# kubectl cluster-info --kubeconfig=/etc/kubernetes/admin.conf @@ -48,14 +33,39 @@ Kubernetes control plane is running at https://primary01:6443 CoreDNS is running at https://primary01:6443/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy ``` -We can also check status of pods in all namespaces using the command: +You can also check status of pods in all namespaces using the command: ```shell kubectl get pods -A --kubeconfig=/etc/kubernetes/admin.conf ``` For more detailed information please refer -to [official documentation](https://kubernetes.io/docs/reference/kubectl/overview/) +to [the official documentation](https://kubernetes.io/docs/reference/kubectl/overview/). + +##### - crictl + +You can also check state of Kubernetes components using the `crictl` command: + +List all pods: + +```shell +crictl pods +``` + +List all images: + +```shell +crictl images +``` + +List all containers: + +```shell +crictl ps -a +``` + +The crictl tool provides the possibility to run a sandbox container which may be useful for debugging purposes. +For more information please refer to [the official documentation](https://kubernetes.io/docs/tasks/debug-application-cluster/crictl). #### - Keycloak @@ -67,24 +77,24 @@ kubectl get pods --kubeconfig=/etc/kubernetes/admin.conf --namespace=keycloak_se #### - HAProxy -To check status of HAProxy we can use the command: +To check status of HAProxy you can use the command: ```shell systemctl status haproxy ``` -Additionally, we can check if the application is listening on ports defined in the file haproxy.cfg running netstat +Additionally, you can check if the application is listening on ports defined in the file haproxy.cfg running netstat command. #### - Prometheus -To check status of Prometheus we can use the command: +To check status of Prometheus you can use the command: ```shell systemctl status prometheus ``` -We can also check if Prometheus service is listening at the port 9090: +You can also check if Prometheus service is listening at the port 9090: ```shell netstat -antup | grep 9090 @@ -92,13 +102,13 @@ netstat -antup | grep 9090 #### - Grafana -To check status of Grafana we can use the command: +To check status of Grafana you can use the command: ```shell systemctl status grafana-server ``` -We can also check if Grafana service is listening at the port 3000: +You can also check if Grafana service is listening at the port 3000: ```shell netstat -antup | grep 3000 @@ -106,7 +116,7 @@ netstat -antup | grep 3000 #### - Prometheus Node Exporter -To check status of Node Exporter we can use the command: +To check status of Node Exporter you can use the command: ```shell status prometheus-node-exporter @@ -114,41 +124,41 @@ status prometheus-node-exporter #### - Elasticsearch -To check status of Elasticsearch we can use the command: +To check status of Elasticsearch you can use the command: ```shell systemct status elasticsearch ``` -We can check if service is listening on 9200 (API communication port): +You can check if service is listening on 9200 (API communication port): ```shell netstat -antup | grep 9200 ``` -We can also check if service is listening on 9300 (nodes communication port): +You can also check if service is listening on 9300 (nodes communication port): ```shell netstat -antup | grep 9300 ``` -We can also check status of Elasticsearch cluster: +You can also check status of Elasticsearch cluster: ```shell :9200/_cluster/health ``` -We can do this using curl or any other equivalent tool. +You can do this using curl or any other equivalent tool. #### - Kibana -To check status of Kibana we can use the command: +To check status of Kibana you can use the command: ```shell systemctl status kibana ``` -We can also check if Kibana service is listening at the port 5601: +You can also check if Kibana service is listening at the port 5601: ```shell netstat -antup | grep 5601 @@ -156,7 +166,7 @@ netstat -antup | grep 5601 #### - Filebeat -To check status of Filebeat we can use the command: +To check status of Filebeat you can use the command: ```shell systemctl status filebeat @@ -164,7 +174,7 @@ systemctl status filebeat #### - PostgreSQL -To check status of PostgreSQL we can use commands: +To check status of PostgreSQL you can use commands: - on Ubuntu: @@ -181,13 +191,13 @@ systemctl status postgresql-10 where postgresql-10 is only an example, because the number differs from version to version. Please refer to your version number in case of using this command. -We can also check if PostgreSQL service is listening at the port 5432: +You can also check if PostgreSQL service is listening at the port 5432: ```shell netstat -antup | grep 5432 ``` -We can also use the pg_isready command, to get information if the PostgreSQL server is running and accepting connections +You can also use the pg_isready command, to get information if the PostgreSQL server is running and accepting connections with command: - on Ubuntu: From a7e727971eacc08e4d320f662c69fdf5de5b03b5 Mon Sep 17 00:00:00 2001 From: rafzei Date: Mon, 24 Jan 2022 16:45:15 +0100 Subject: [PATCH 08/22] Add extra condition for docker removal --- ansible/playbooks/roles/containerd/tasks/main.yml | 14 ++++++++++---- .../roles/kubernetes_promote/handlers/main.yml | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/ansible/playbooks/roles/containerd/tasks/main.yml b/ansible/playbooks/roles/containerd/tasks/main.yml index 8b4c6d8695..6661735847 100644 --- a/ansible/playbooks/roles/containerd/tasks/main.yml +++ b/ansible/playbooks/roles/containerd/tasks/main.yml @@ -1,6 +1,9 @@ --- -- include_tasks: remove-docker.yml - when: is_upgrade_run +- name: Remove Docker + include_tasks: remove-docker.yml + when: + - is_upgrade_run + - inventory_hostname not in groups.image_registry - name: Install Containerd package package: @@ -9,8 +12,11 @@ module_defaults: yum: { lock_timeout: "{{ yum_lock_timeout }}" } -- include_tasks: configure-prerequisites.yml -- include_tasks: configure-containerd.yml +- name: Configure prerequisites + include_tasks: configure-prerequisites.yml + +- name: Configure Containerd + include_tasks: configure-containerd.yml - name: Append Containerd to kubelet config replace: diff --git a/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml b/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml index 38459f7514..91177d484a 100644 --- a/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml +++ b/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml @@ -2,7 +2,7 @@ - name: Restart controller-manager shell: | crictl ps \ - --name=\kube-controller-manager' -q \ + --name='kube-controller-manager' -q \ | xargs --no-run-if-empty crictl stop --timeout=0 args: executable: /bin/bash From b0909f240071e559d28d290e7c81833234e2d8ce Mon Sep 17 00:00:00 2001 From: rafzei Date: Tue, 18 Jan 2022 14:48:46 +0100 Subject: [PATCH 09/22] Adjust filebeat.yml after docker removal --- docs/changelogs/CHANGELOG-2.0.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/changelogs/CHANGELOG-2.0.md b/docs/changelogs/CHANGELOG-2.0.md index fe48476299..2f45f85a58 100644 --- a/docs/changelogs/CHANGELOG-2.0.md +++ b/docs/changelogs/CHANGELOG-2.0.md @@ -10,7 +10,8 @@ - [#2950](https://github.com/epiphany-platform/epiphany/issues/2950) - CLI refactor to make it more consistant - [#2844](https://github.com/epiphany-platform/epiphany/issues/2844) - Refactor K8s upgrade task in order to simplify its flow - [#2716](https://github.com/epiphany-platform/epiphany/issues/2716) - Change container runtime to containerd - +- [#2701](https://github.com/epiphany-platform/epiphany/issues/2701) - Epicli prepare - generate files in separate directory +- [#2716](https://github.com/epiphany-platform/epiphany/issues/2716) - Change container runtime to containerd ### Fixed - [#2653](https://github.com/epiphany-platform/epiphany/issues/2653) - Epicli is failing in air-gapped infra mode From 8bfe30e71586ecb12e228f6c88a82dbc8f2f1894 Mon Sep 17 00:00:00 2001 From: rafzei Date: Tue, 1 Feb 2022 22:27:04 +0100 Subject: [PATCH 10/22] Changes after review --- .../tasks/configure-prerequisites.yml | 6 +- .../playbooks/roles/containerd/tasks/main.yml | 55 ++++++++++++++++--- .../roles/containerd/templates/config.toml.j2 | 1 + .../tasks/remove-docker.yml | 2 + .../filebeat/tasks/configure-filebeat.yml | 26 +++++++++ .../templates/custom-chart-values.yml.j2 | 9 ++- .../roles/filebeat/templates/filebeat.yml.j2 | 6 ++ .../roles/kubernetes_common/tasks/main.yml | 5 +- .../tasks/kubernetes/upgrade-master0.yml | 2 - docs/changelogs/CHANGELOG-2.0.md | 4 +- docs/home/howto/MAINTENANCE.md | 11 ++-- 11 files changed, 101 insertions(+), 26 deletions(-) rename ansible/playbooks/roles/{containerd => docker}/tasks/remove-docker.yml (88%) diff --git a/ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml b/ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml index 84b4c54ea9..ba418e1b32 100644 --- a/ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml +++ b/ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml @@ -1,6 +1,5 @@ --- - name: Provide containerd.conf file - become: true copy: src: containerd.conf dest: /etc/modules-load.d/containerd.conf @@ -9,8 +8,7 @@ mode: u=rw,go= - name: Load modules - command: "modprobe {{ item }}" - become: true - with_items: + command: modprobe {{ item }} + loop: - overlay - br_netfilter diff --git a/ansible/playbooks/roles/containerd/tasks/main.yml b/ansible/playbooks/roles/containerd/tasks/main.yml index 6661735847..bf35fe08e3 100644 --- a/ansible/playbooks/roles/containerd/tasks/main.yml +++ b/ansible/playbooks/roles/containerd/tasks/main.yml @@ -1,6 +1,17 @@ --- - name: Remove Docker - include_tasks: remove-docker.yml + block: + - name: Stop Kubelet before Docker removal + systemd: + name: kubelet + state: stopped + notify: + - Restart kubelet + + - name: Remove Docker + include_role: + name: docker + tasks_from: remove-docker.yml when: - is_upgrade_run - inventory_hostname not in groups.image_registry @@ -18,11 +29,39 @@ - name: Configure Containerd include_tasks: configure-containerd.yml -- name: Append Containerd to kubelet config - replace: - path: /var/lib/kubelet/kubeadm-flags.env - regexp: '(\")$' - replace: ' --container-runtime=remote --container-runtime-endpoint=/run/containerd/containerd.sock"' - notify: - - Restart kubelet +- name: Reconfigure kubelet args when: is_upgrade_run + block: + - name: Get kubeadm-flags.env file content + slurp: + src: /var/lib/kubelet/kubeadm-flags.env + register: kubelet_kubeadm_args + + - name: Set kubelet_kubeadmn_args_content + set_fact: + kubelet_kubeadmn_args_content: "{{ kubelet_kubeadm_args.content | b64decode }}" + + - name: Modify container-runtime + replace: + path: /var/lib/kubelet/kubeadm-flags.env + regexp: '{{ item.regexp }}' + replace: '{{ item.replace }}' + backup: yes + loop: + - { regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(--container-runtime=[a-zA-Z0-9_]+)(.*)', replace: '\1\2--container-runtime=remote\4' } + - { regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(--container-runtime-endpoint=.*\.sock)(.*)', replace: '\1\2--container-runtime-endpoint=/run/containerd/containerd.sock\4' } + when: kubelet_kubeadmn_args_content.find('--container-runtime') != -1 + + - name: Append container-runtime to kubelet config + replace: + path: /var/lib/kubelet/kubeadm-flags.env + regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(\")$' + replace: '\1\2 --container-runtime=remote"' + when: kubelet_kubeadmn_args_content.find('--container-runtime') == -1 + + - name: Append container-runtime-endpoint to kubelet config + replace: + path: /var/lib/kubelet/kubeadm-flags.env + regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(\")$' + replace: '\1\2 --container-runtime-endpoint=/run/containerd/containerd.sock"' + when: kubelet_kubeadmn_args_content.find('--container-runtime-endpoint') == -1 diff --git a/ansible/playbooks/roles/containerd/templates/config.toml.j2 b/ansible/playbooks/roles/containerd/templates/config.toml.j2 index bc7d986bf5..1434ec685d 100644 --- a/ansible/playbooks/roles/containerd/templates/config.toml.j2 +++ b/ansible/playbooks/roles/containerd/templates/config.toml.j2 @@ -1,3 +1,4 @@ +# {{ ansible_managed }} version = 2 root = "/var/lib/containerd" state = "/run/containerd" diff --git a/ansible/playbooks/roles/containerd/tasks/remove-docker.yml b/ansible/playbooks/roles/docker/tasks/remove-docker.yml similarity index 88% rename from ansible/playbooks/roles/containerd/tasks/remove-docker.yml rename to ansible/playbooks/roles/docker/tasks/remove-docker.yml index 4c46b80eb4..0a6f952112 100644 --- a/ansible/playbooks/roles/containerd/tasks/remove-docker.yml +++ b/ansible/playbooks/roles/docker/tasks/remove-docker.yml @@ -1,6 +1,8 @@ +# Included in containerd role --- - name: Populate service facts service_facts: + when: ansible_facts.services is undefined - name: Stop Docker daemon systemd: diff --git a/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml b/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml index f70cb976ef..2be8f37ee8 100644 --- a/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml +++ b/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml @@ -25,6 +25,32 @@ mode: u=rw,go= register: modify_filebeat_yml +- name: Append new field definition + blockinfile: + path: /etc/filebeat/fields.yml + backup: true + block: |2 + - key: containerd + title: "Containerd" + description: > + Reading data from containerd log filepath. + short_config: true + fields: + - name: containerd + type: group + description: > + Contains extra fields for containerd logs. + fields: + - name: container.pod.name + type: text + format: string + - name: container.uuid + type: text + format: string + - name: container.namespace + type: text + format: string + - name: Start/restart and enable filebeat service when: groups.logging[0] is defined block: diff --git a/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 b/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 index b27afdeadd..52b3f2c8e3 100644 --- a/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 +++ b/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 @@ -52,6 +52,9 @@ filebeatConfig: {% if (k8s_as_cloud_service is defined and k8s_as_cloud_service) or ('kubernetes_master' in groups or 'kubernetes_node' in groups) %} - type: container enabled: true + format: cri + paths: + - /var/log/containers/*.log {% if specification.container_input.multiline is defined %} multiline: {% for k, v in specification.container_input.multiline.items() %} @@ -62,7 +65,11 @@ filebeatConfig: processors: - add_kubernetes_metadata: in_cluster: {{ 'true' if filebeat_as_daemonset is defined else 'false' }} - + - dissect: + tokenizer: "/var/log/containers/%{container.pod.name}_%{container.namespace}_%{container.uuid}.log" + field: "log.file.path" + target_prefix: "" + overwrite_keys: true {% endif %} {# -------------------------- Filebeat modules -------------------------- #} diff --git a/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 b/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 index a4dd7ff572..4b2586d5b2 100644 --- a/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 +++ b/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 @@ -112,6 +112,7 @@ filebeat.inputs: - type: container enabled: true + format: cri paths: - /var/log/containers/*.log {% if specification.container_input.multiline is defined %} @@ -124,6 +125,11 @@ filebeat.inputs: processors: - add_kubernetes_metadata: in_cluster: {{ 'true' if filebeat_as_daemonset is defined else 'false' }} + - dissect: + tokenizer: "/var/log/containers/%{container.pod.name}_%{container.namespace}_%{container.uuid}.log" + field: "log.file.path" + target_prefix: "" + overwrite_keys: true {% endif %} # ============================== Filebeat modules ============================== diff --git a/ansible/playbooks/roles/kubernetes_common/tasks/main.yml b/ansible/playbooks/roles/kubernetes_common/tasks/main.yml index 72595e53fa..5a7656c2f7 100644 --- a/ansible/playbooks/roles/kubernetes_common/tasks/main.yml +++ b/ansible/playbooks/roles/kubernetes_common/tasks/main.yml @@ -8,6 +8,7 @@ state: present reload: yes +# Check needed only for RHEL - name: Check if bridge-nf-call-iptables key exists command: "sysctl net.bridge.bridge-nf-call-iptables" failed_when: false @@ -20,8 +21,8 @@ state: present value: "1" reload: yes - when: sysctl_bridge_nf_call_iptables.rc != 0 - with_items: + when: sysctl_bridge_nf_call_iptables.rc == 0 + loop: - net.bridge.bridge-nf-call-iptables - net.bridge.bridge-nf-call-ip6tables diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml index 6cc4b631e8..2c32b4768b 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml @@ -42,8 +42,6 @@ include_tasks: set-cluster-version.yml # sets cluster_version - name: k8s/master0 | Add k8s annotation for containerd - delegate_to: "{{ groups.kubernetes_master[0] }}" - run_once: true command: >- kubectl annotate node {{ inventory_hostname }} --overwrite kubeadm.alpha.kubernetes.io/cri-socket=unix:///run/containerd/containerd.sock diff --git a/docs/changelogs/CHANGELOG-2.0.md b/docs/changelogs/CHANGELOG-2.0.md index 2f45f85a58..6a8ed04530 100644 --- a/docs/changelogs/CHANGELOG-2.0.md +++ b/docs/changelogs/CHANGELOG-2.0.md @@ -10,8 +10,7 @@ - [#2950](https://github.com/epiphany-platform/epiphany/issues/2950) - CLI refactor to make it more consistant - [#2844](https://github.com/epiphany-platform/epiphany/issues/2844) - Refactor K8s upgrade task in order to simplify its flow - [#2716](https://github.com/epiphany-platform/epiphany/issues/2716) - Change container runtime to containerd -- [#2701](https://github.com/epiphany-platform/epiphany/issues/2701) - Epicli prepare - generate files in separate directory -- [#2716](https://github.com/epiphany-platform/epiphany/issues/2716) - Change container runtime to containerd + ### Fixed - [#2653](https://github.com/epiphany-platform/epiphany/issues/2653) - Epicli is failing in air-gapped infra mode @@ -48,7 +47,6 @@ ### Deprecated - ### Breaking changes - Upgrade of Terraform components in issue [#2825](https://github.com/epiphany-platform/epiphany/issues/2825) and [#2853](https://github.com/epiphany-platform/epiphany/issues/2853) will make running re-apply with infrastructure break on existing 1.x clusters. The advice is to deploy a new cluster and migrate data. If needed a manual upgrade path is described [here.](../home/howto/UPGRADE.md#terraform-upgrade-from-epiphany-1.x-to-2.x) diff --git a/docs/home/howto/MAINTENANCE.md b/docs/home/howto/MAINTENANCE.md index e8a9d28d0f..949e56c57a 100644 --- a/docs/home/howto/MAINTENANCE.md +++ b/docs/home/howto/MAINTENANCE.md @@ -6,8 +6,7 @@ This part of the documentations covers the topic how to check if each component #### - Kubernetes -First to check if everything is working fine you need to check verify status of Kubernetes kubelet service with the -command: +Verify status of Kubernetes kubelet service with the command: ```shell systemctl status kubelet @@ -15,7 +14,7 @@ systemctl status kubelet ##### - kubectl -You can also check state of Kubernetes nodes using the `kubectl` command: +Check state of Kubernetes nodes using the `kubectl` command: ```shell root@primary01:~# kubectl get nodes --kubeconfig=/etc/kubernetes/admin.conf @@ -25,7 +24,7 @@ node01 Ready 23h node02 Ready 23h vx.xx.x ``` -You can get additional information about Kubernetes components: +Get additional information about Kubernetes components: ```shell root@primary01:~# kubectl cluster-info --kubeconfig=/etc/kubernetes/admin.conf @@ -33,7 +32,7 @@ Kubernetes control plane is running at https://primary01:6443 CoreDNS is running at https://primary01:6443/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy ``` -You can also check status of pods in all namespaces using the command: +Check status of pods in all namespaces using the command: ```shell kubectl get pods -A --kubeconfig=/etc/kubernetes/admin.conf @@ -65,7 +64,7 @@ crictl ps -a ``` The crictl tool provides the possibility to run a sandbox container which may be useful for debugging purposes. -For more information please refer to [the official documentation](https://kubernetes.io/docs/tasks/debug-application-cluster/crictl). +For more information, refer to [the official documentation](https://kubernetes.io/docs/tasks/debug-application-cluster/crictl). #### - Keycloak From 6b671e1db7ac905d8a3327a82724ef47a35a3277 Mon Sep 17 00:00:00 2001 From: rafzei Date: Wed, 2 Feb 2022 11:51:25 +0100 Subject: [PATCH 11/22] Doc update --- docs/home/howto/MAINTENANCE.md | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/home/howto/MAINTENANCE.md b/docs/home/howto/MAINTENANCE.md index 949e56c57a..52cc3de205 100644 --- a/docs/home/howto/MAINTENANCE.md +++ b/docs/home/howto/MAINTENANCE.md @@ -43,7 +43,7 @@ to [the official documentation](https://kubernetes.io/docs/reference/kubectl/ove ##### - crictl -You can also check state of Kubernetes components using the `crictl` command: +Check state of Kubernetes components using the `crictl` command: List all pods: @@ -76,7 +76,7 @@ kubectl get pods --kubeconfig=/etc/kubernetes/admin.conf --namespace=keycloak_se #### - HAProxy -To check status of HAProxy you can use the command: +To check status of HAProxy, use the command: ```shell systemctl status haproxy @@ -87,13 +87,13 @@ command. #### - Prometheus -To check status of Prometheus you can use the command: +To check status of Prometheus, use the command: ```shell systemctl status prometheus ``` -You can also check if Prometheus service is listening at the port 9090: +Check if Prometheus service is listening at the port 9090: ```shell netstat -antup | grep 9090 @@ -101,13 +101,13 @@ netstat -antup | grep 9090 #### - Grafana -To check status of Grafana you can use the command: +To check status of Grafana, use the command: ```shell systemctl status grafana-server ``` -You can also check if Grafana service is listening at the port 3000: +Check if Grafana service is listening at the port 3000: ```shell netstat -antup | grep 3000 @@ -115,7 +115,7 @@ netstat -antup | grep 3000 #### - Prometheus Node Exporter -To check status of Node Exporter you can use the command: +To check status of Node Exporter, use the command: ```shell status prometheus-node-exporter @@ -123,25 +123,25 @@ status prometheus-node-exporter #### - Elasticsearch -To check status of Elasticsearch you can use the command: +To check status of Elasticsearch, use the command: ```shell systemct status elasticsearch ``` -You can check if service is listening on 9200 (API communication port): +Check if service is listening on 9200 (API communication port): ```shell netstat -antup | grep 9200 ``` -You can also check if service is listening on 9300 (nodes communication port): +Check if service is listening on 9300 (nodes communication port): ```shell netstat -antup | grep 9300 ``` -You can also check status of Elasticsearch cluster: +Check status of Elasticsearch cluster: ```shell :9200/_cluster/health @@ -151,13 +151,13 @@ You can do this using curl or any other equivalent tool. #### - Kibana -To check status of Kibana you can use the command: +To check status of Kibana, use the command: ```shell systemctl status kibana ``` -You can also check if Kibana service is listening at the port 5601: +Check if Kibana service is listening at the port 5601: ```shell netstat -antup | grep 5601 @@ -165,7 +165,7 @@ netstat -antup | grep 5601 #### - Filebeat -To check status of Filebeat you can use the command: +To check status of Filebeat, use the command: ```shell systemctl status filebeat @@ -173,7 +173,7 @@ systemctl status filebeat #### - PostgreSQL -To check status of PostgreSQL you can use commands: +To check status of PostgreSQL, use commands: - on Ubuntu: @@ -190,13 +190,13 @@ systemctl status postgresql-10 where postgresql-10 is only an example, because the number differs from version to version. Please refer to your version number in case of using this command. -You can also check if PostgreSQL service is listening at the port 5432: +Check if PostgreSQL service is listening at the port 5432: ```shell netstat -antup | grep 5432 ``` -You can also use the pg_isready command, to get information if the PostgreSQL server is running and accepting connections +Use the pg_isready command, to get information if the PostgreSQL server is running and accepting connections with command: - on Ubuntu: From fc8a804989e240009011352a7cfc9e461823a68e Mon Sep 17 00:00:00 2001 From: rafzei Date: Wed, 2 Feb 2022 21:49:32 +0100 Subject: [PATCH 12/22] Hardcode containerd version, update components.md --- .../download-requirements/centos-7/requirements.aarch64.txt | 2 +- .../download-requirements/centos-7/requirements.x86_64.txt | 2 +- .../download-requirements/redhat-7/requirements.x86_64.txt | 2 +- .../download-requirements/ubuntu-20.04/requirements.x86_64.txt | 2 +- docs/home/COMPONENTS.md | 1 + 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.aarch64.txt b/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.aarch64.txt index 7ecc3fe7f3..c75b511532 100644 --- a/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.aarch64.txt +++ b/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.aarch64.txt @@ -26,7 +26,7 @@ bash-completion ca-certificates cifs-utils conntrack-tools # for kubelet -containerd.io +containerd.io-1.4.12 container-selinux #cri-tools-1.13.0 curl diff --git a/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.x86_64.txt b/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.x86_64.txt index fe4ac78f84..fa77cfea97 100644 --- a/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.x86_64.txt +++ b/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.x86_64.txt @@ -26,7 +26,7 @@ bash-completion ca-certificates cifs-utils conntrack-tools # for kubelet -containerd.io +containerd.io-1.4.12 container-selinux cri-tools-1.13.0 curl diff --git a/ansible/playbooks/roles/repository/files/download-requirements/redhat-7/requirements.x86_64.txt b/ansible/playbooks/roles/repository/files/download-requirements/redhat-7/requirements.x86_64.txt index 86698c69fa..0d76c63999 100644 --- a/ansible/playbooks/roles/repository/files/download-requirements/redhat-7/requirements.x86_64.txt +++ b/ansible/playbooks/roles/repository/files/download-requirements/redhat-7/requirements.x86_64.txt @@ -24,7 +24,7 @@ bash-completion ca-certificates cifs-utils conntrack-tools # for kubelet -containerd.io +containerd.io-1.4.12 container-selinux cri-tools-1.13.0 curl diff --git a/ansible/playbooks/roles/repository/files/download-requirements/ubuntu-20.04/requirements.x86_64.txt b/ansible/playbooks/roles/repository/files/download-requirements/ubuntu-20.04/requirements.x86_64.txt index a37ec22874..6de70d5e4f 100644 --- a/ansible/playbooks/roles/repository/files/download-requirements/ubuntu-20.04/requirements.x86_64.txt +++ b/ansible/playbooks/roles/repository/files/download-requirements/ubuntu-20.04/requirements.x86_64.txt @@ -8,7 +8,7 @@ auditd bash-completion ca-certificates cifs-utils -containerd.io +containerd.io 1.4.12 cri-tools 1.13.0 curl docker-ce 5:20.10.8 diff --git a/docs/home/COMPONENTS.md b/docs/home/COMPONENTS.md index 8511441380..5e77ef75f2 100644 --- a/docs/home/COMPONENTS.md +++ b/docs/home/COMPONENTS.md @@ -9,6 +9,7 @@ Note that versions are default versions and can be changed in certain cases thro | Kubernetes | 1.22.4 | https://github.com/kubernetes/kubernetes | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | | Kubernetes Dashboard | 2.3.1 | https://github.com/kubernetes/dashboard | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | | Kubernetes metrics-scraper | 1.0.7 | https://github.com/kubernetes-sigs/dashboard-metrics-scraper | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| Containerd | 1.4.12 | https://github.com/containerd/containerd | [Apache License 2.0](https://github.com/containerd/containerd/blob/main/LICENSE) | | Calico | 3.20.3 | https://github.com/projectcalico/calico | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | | Flannel | 0.14.0 | https://github.com/coreos/flannel/ | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | | Canal | 3.20.3 | https://github.com/projectcalico/calico | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | From 25c157f70f21380412b73328b92508591c1301dc Mon Sep 17 00:00:00 2001 From: rafzei Date: Mon, 7 Feb 2022 21:21:56 +0100 Subject: [PATCH 13/22] Changes after review --- .../roles/containerd/defaults/main.yml | 2 ++ .../roles/containerd/handlers/main.yml | 2 +- .../containerd/tasks/configure-containerd.yml | 6 +++--- .../tasks/configure-prerequisites.yml | 4 +++- .../playbooks/roles/containerd/tasks/main.yml | 18 ++++++++++++++--- .../roles/containerd/templates/config.toml.j2 | 2 +- .../filebeat/tasks/configure-filebeat.yml | 9 +++++---- .../playbooks/roles/filebeat/tasks/main.yml | 10 ++-------- .../templates/custom-chart-values.yml.j2 | 10 ++++------ .../roles/filebeat/templates/filebeat.yml.j2 | 4 +--- .../tasks/generate-certificates.yml | 2 +- .../roles/kubernetes_master/tasks/main.yml | 8 +++----- .../kubernetes_promote/handlers/main.yml | 20 +++++++++---------- docs/changelogs/CHANGELOG-2.0.md | 2 ++ docs/home/COMPONENTS.md | 2 +- 15 files changed, 54 insertions(+), 47 deletions(-) create mode 100644 ansible/playbooks/roles/containerd/defaults/main.yml diff --git a/ansible/playbooks/roles/containerd/defaults/main.yml b/ansible/playbooks/roles/containerd/defaults/main.yml new file mode 100644 index 0000000000..f2fc92a8b4 --- /dev/null +++ b/ansible/playbooks/roles/containerd/defaults/main.yml @@ -0,0 +1,2 @@ +--- +containerd_version: "1.4.12" diff --git a/ansible/playbooks/roles/containerd/handlers/main.yml b/ansible/playbooks/roles/containerd/handlers/main.yml index 1c18e8ef6c..49f7022e0e 100644 --- a/ansible/playbooks/roles/containerd/handlers/main.yml +++ b/ansible/playbooks/roles/containerd/handlers/main.yml @@ -1,4 +1,4 @@ -- name: Restart Containerd +- name: Restart containerd systemd: name: containerd state: restarted diff --git a/ansible/playbooks/roles/containerd/tasks/configure-containerd.yml b/ansible/playbooks/roles/containerd/tasks/configure-containerd.yml index a4e032fe67..5d1ada26c7 100644 --- a/ansible/playbooks/roles/containerd/tasks/configure-containerd.yml +++ b/ansible/playbooks/roles/containerd/tasks/configure-containerd.yml @@ -1,5 +1,5 @@ --- -- name: Create Containerd dir +- name: Create containerd dir file: path: /etc/containerd state: directory @@ -7,7 +7,7 @@ group: root mode: u=rw,go=r -- name: Provide Containerd config +- name: Provide containerd config template: src: config.toml.j2 dest: /etc/containerd/config.toml @@ -15,4 +15,4 @@ owner: root group: root notify: - - Restart Containerd + - Restart containerd diff --git a/ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml b/ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml index ba418e1b32..c65330cede 100644 --- a/ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml +++ b/ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml @@ -8,7 +8,9 @@ mode: u=rw,go= - name: Load modules - command: modprobe {{ item }} + modprobe: + name: "{{ item }}" + state: present loop: - overlay - br_netfilter diff --git a/ansible/playbooks/roles/containerd/tasks/main.yml b/ansible/playbooks/roles/containerd/tasks/main.yml index bf35fe08e3..3d1e8a15b4 100644 --- a/ansible/playbooks/roles/containerd/tasks/main.yml +++ b/ansible/playbooks/roles/containerd/tasks/main.yml @@ -1,4 +1,8 @@ --- +- name: Get information on installed packages + package_facts: + when: ansible_facts.packages is undefined + - name: Remove Docker block: - name: Stop Kubelet before Docker removal @@ -15,20 +19,28 @@ when: - is_upgrade_run - inventory_hostname not in groups.image_registry + - ansible_facts.packages['docker-ce'] is defined -- name: Install Containerd package +- name: Install containerd package package: - name: containerd.io + name: "{{ _packages[ansible_os_family] }}" state: present + vars: + _packages: + Debian: + - containerd.io={{ containerd_version }}-* + RedHat: + - containerd.io-{{ containerd_version }} module_defaults: yum: { lock_timeout: "{{ yum_lock_timeout }}" } - name: Configure prerequisites include_tasks: configure-prerequisites.yml -- name: Configure Containerd +- name: Configure containerd include_tasks: configure-containerd.yml +# To be replaced by filter plugin (https://github.com/epiphany-platform/epiphany/issues/2943) - name: Reconfigure kubelet args when: is_upgrade_run block: diff --git a/ansible/playbooks/roles/containerd/templates/config.toml.j2 b/ansible/playbooks/roles/containerd/templates/config.toml.j2 index 1434ec685d..25160cb4f3 100644 --- a/ansible/playbooks/roles/containerd/templates/config.toml.j2 +++ b/ansible/playbooks/roles/containerd/templates/config.toml.j2 @@ -55,7 +55,7 @@ oom_score = 0 stream_idle_timeout = "4h0m0s" enable_selinux = false selinux_category_range = 1024 - sandbox_image = "k8s.gcr.io/pause:3.2" + sandbox_image = "{{ image_registry_address }}/k8s.gcr.io/pause:3.5" stats_collect_period = 10 systemd_cgroup = false enable_tls_streaming = false diff --git a/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml b/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml index 2be8f37ee8..cb7e2a723e 100644 --- a/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml +++ b/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml @@ -26,12 +26,13 @@ register: modify_filebeat_yml - name: Append new field definition + when: k8s_as_cloud_service is not defined blockinfile: path: /etc/filebeat/fields.yml backup: true block: |2 - key: containerd - title: "Containerd" + title: "containerd" description: > Reading data from containerd log filepath. short_config: true @@ -41,13 +42,13 @@ description: > Contains extra fields for containerd logs. fields: - - name: container.pod.name + - name: kubernetes.pod.name type: text format: string - - name: container.uuid + - name: kubernetes.namespace type: text format: string - - name: container.namespace + - name: container.id type: text format: string diff --git a/ansible/playbooks/roles/filebeat/tasks/main.yml b/ansible/playbooks/roles/filebeat/tasks/main.yml index 73124a9a21..4cdfe32550 100644 --- a/ansible/playbooks/roles/filebeat/tasks/main.yml +++ b/ansible/playbooks/roles/filebeat/tasks/main.yml @@ -8,19 +8,13 @@ name: opendistro_for_logging_vars when: groups.logging is defined -- name: Filebeat as DaemonSet +- name: Include installation tasks for Filebeat as DaemonSet for "k8s as cloud service" + include_tasks: install-filebeat-as-daemonset.yml when: - k8s_as_cloud_service is defined - k8s_as_cloud_service - groups.logging is defined - groups.logging | length > 0 - block: - - name: Include installation tasks for Filebeat as DaemonSet for "k8s as cloud service" - include_tasks: install-filebeat-as-daemonset.yml - - - name: Set fact filebeat_as_daemonset - set_fact: - filebeat_as_daemonset: true - name: Include auditd configuration tasks include_tasks: configure-auditd.yml diff --git a/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 b/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 index 52b3f2c8e3..831897a347 100644 --- a/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 +++ b/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 @@ -64,12 +64,10 @@ filebeatConfig: processors: - add_kubernetes_metadata: - in_cluster: {{ 'true' if filebeat_as_daemonset is defined else 'false' }} - - dissect: - tokenizer: "/var/log/containers/%{container.pod.name}_%{container.namespace}_%{container.uuid}.log" - field: "log.file.path" - target_prefix: "" - overwrite_keys: true + in_cluster: true + matchers: + - logs_path: + logs_path: "/var/log/containers/" {% endif %} {# -------------------------- Filebeat modules -------------------------- #} diff --git a/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 b/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 index 4b2586d5b2..a6715edf20 100644 --- a/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 +++ b/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 @@ -123,10 +123,8 @@ filebeat.inputs: {% endif %} processors: - - add_kubernetes_metadata: - in_cluster: {{ 'true' if filebeat_as_daemonset is defined else 'false' }} - dissect: - tokenizer: "/var/log/containers/%{container.pod.name}_%{container.namespace}_%{container.uuid}.log" + tokenizer: "/var/log/containers/%{kubernetes.pod.name}_%{kubernetes.namespace}_%{container.id}.log" field: "log.file.path" target_prefix: "" overwrite_keys: true diff --git a/ansible/playbooks/roles/kubernetes_master/tasks/generate-certificates.yml b/ansible/playbooks/roles/kubernetes_master/tasks/generate-certificates.yml index 9e01e11d8a..53837e80b9 100644 --- a/ansible/playbooks/roles/kubernetes_master/tasks/generate-certificates.yml +++ b/ansible/playbooks/roles/kubernetes_master/tasks/generate-certificates.yml @@ -212,7 +212,7 @@ - name: Restart systemd services when: - services_to_restart is defined - - services_to_restart | difference(['docker', 'kubelet']) | length == 0 + - services_to_restart | difference(['containerd', 'kubelet']) | length == 0 block: - name: Restart services systemd: diff --git a/ansible/playbooks/roles/kubernetes_master/tasks/main.yml b/ansible/playbooks/roles/kubernetes_master/tasks/main.yml index e3a32f6d37..e74c1285f6 100644 --- a/ansible/playbooks/roles/kubernetes_master/tasks/main.yml +++ b/ansible/playbooks/roles/kubernetes_master/tasks/main.yml @@ -86,10 +86,8 @@ - name: Restart apiserver shell: |- set -o pipefail && \ - docker ps \ - --filter 'name=kube-apiserver_kube-apiserver' \ - --format '{{ "{{.ID}}" }}' \ - | xargs --no-run-if-empty docker kill + crictl ps --name='kube-apiserver' -q \ + | xargs --no-run-if-empty crictl stop --timeout=0 args: executable: /bin/bash @@ -109,7 +107,7 @@ vars: valid_days: "{{ specification.advanced.certificates.expiration_days }}" services_to_restart: - - docker + - containerd include_tasks: generate-certificates.yml # kubeadm-config.yml can appear not only on 'automation_designated_master' in 2 cases: diff --git a/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml b/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml index 91177d484a..eaad9e045a 100644 --- a/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml +++ b/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml @@ -1,16 +1,16 @@ --- - name: Restart controller-manager - shell: | - crictl ps \ - --name='kube-controller-manager' -q \ + shell: >- + set -o pipefail && + crictl ps --name='kube-controller-manager' -q \ | xargs --no-run-if-empty crictl stop --timeout=0 args: executable: /bin/bash - name: Restart scheduler - shell: | - crictl ps \ - --name='kube-scheduler' -q \ + shell: >- + set -o pipefail && + crictl ps --name='kube-scheduler' -q \ | xargs --no-run-if-empty crictl stop --timeout=0 args: executable: /bin/bash @@ -21,10 +21,10 @@ state: restarted - name: Restart kube-proxy - shell: | - crictl ps \ - --name='kube-proxy' -q \ - | xargs --no-run-if-empty crictl stop --timeout=0 + shell: >- + set -o pipefail && + crictl ps --name='kube-proxy' -q \ + | xargs --no-run-if-empty crictl stop --timeout=0 args: executable: /bin/bash diff --git a/docs/changelogs/CHANGELOG-2.0.md b/docs/changelogs/CHANGELOG-2.0.md index 6a8ed04530..af6a1c2dc8 100644 --- a/docs/changelogs/CHANGELOG-2.0.md +++ b/docs/changelogs/CHANGELOG-2.0.md @@ -50,5 +50,7 @@ ### Breaking changes - Upgrade of Terraform components in issue [#2825](https://github.com/epiphany-platform/epiphany/issues/2825) and [#2853](https://github.com/epiphany-platform/epiphany/issues/2853) will make running re-apply with infrastructure break on existing 1.x clusters. The advice is to deploy a new cluster and migrate data. If needed a manual upgrade path is described [here.](../home/howto/UPGRADE.md#terraform-upgrade-from-epiphany-1.x-to-2.x) +- Kubernetes container runtime changed. Dockershim and Docker are no longer on Kubernetes hosts. +- Filebeat docker input replaced by container input. New field provided for Filebeat as system service installation: container.id. Field kubernetes.container.name is no longer valid. ### Known issues diff --git a/docs/home/COMPONENTS.md b/docs/home/COMPONENTS.md index 5e77ef75f2..7328ff5628 100644 --- a/docs/home/COMPONENTS.md +++ b/docs/home/COMPONENTS.md @@ -9,7 +9,7 @@ Note that versions are default versions and can be changed in certain cases thro | Kubernetes | 1.22.4 | https://github.com/kubernetes/kubernetes | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | | Kubernetes Dashboard | 2.3.1 | https://github.com/kubernetes/dashboard | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | | Kubernetes metrics-scraper | 1.0.7 | https://github.com/kubernetes-sigs/dashboard-metrics-scraper | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | -| Containerd | 1.4.12 | https://github.com/containerd/containerd | [Apache License 2.0](https://github.com/containerd/containerd/blob/main/LICENSE) | +| containerd | 1.4.12 | https://github.com/containerd/containerd | [Apache License 2.0](https://github.com/containerd/containerd/blob/main/LICENSE) | | Calico | 3.20.3 | https://github.com/projectcalico/calico | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | | Flannel | 0.14.0 | https://github.com/coreos/flannel/ | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | | Canal | 3.20.3 | https://github.com/projectcalico/calico | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | From b5b004a42ed28f93ecd1c82f6b376c0ff0a98cf3 Mon Sep 17 00:00:00 2001 From: rafzei Date: Tue, 18 Jan 2022 14:48:46 +0100 Subject: [PATCH 14/22] Adjust filebeat.yml after docker removal --- docs/changelogs/CHANGELOG-2.0.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/changelogs/CHANGELOG-2.0.md b/docs/changelogs/CHANGELOG-2.0.md index af6a1c2dc8..b6def088a5 100644 --- a/docs/changelogs/CHANGELOG-2.0.md +++ b/docs/changelogs/CHANGELOG-2.0.md @@ -10,7 +10,8 @@ - [#2950](https://github.com/epiphany-platform/epiphany/issues/2950) - CLI refactor to make it more consistant - [#2844](https://github.com/epiphany-platform/epiphany/issues/2844) - Refactor K8s upgrade task in order to simplify its flow - [#2716](https://github.com/epiphany-platform/epiphany/issues/2716) - Change container runtime to containerd - +- [#2701](https://github.com/epiphany-platform/epiphany/issues/2701) - Epicli prepare - generate files in separate directory +- [#2716](https://github.com/epiphany-platform/epiphany/issues/2716) - Change container runtime to containerd ### Fixed - [#2653](https://github.com/epiphany-platform/epiphany/issues/2653) - Epicli is failing in air-gapped infra mode From fbec80d2febf18914cc603917b8518ccabc0ee4a Mon Sep 17 00:00:00 2001 From: rafzei Date: Tue, 1 Feb 2022 22:27:04 +0100 Subject: [PATCH 15/22] Changes after review --- ansible/playbooks/roles/containerd/tasks/main.yml | 2 +- .../roles/containerd/templates/config.toml.j2 | 1 + .../roles/docker/tasks/remove-docker.yml | 4 +++- docs/changelogs/CHANGELOG-2.0.md | 3 +-- .../kubernetes_master/kubernetes_master_spec.rb | 15 +++++++++++++-- .../spec/kubernetes_node/kubernetes_node_spec.rb | 4 ++++ 6 files changed, 23 insertions(+), 6 deletions(-) diff --git a/ansible/playbooks/roles/containerd/tasks/main.yml b/ansible/playbooks/roles/containerd/tasks/main.yml index 3d1e8a15b4..fb19d51222 100644 --- a/ansible/playbooks/roles/containerd/tasks/main.yml +++ b/ansible/playbooks/roles/containerd/tasks/main.yml @@ -58,7 +58,7 @@ path: /var/lib/kubelet/kubeadm-flags.env regexp: '{{ item.regexp }}' replace: '{{ item.replace }}' - backup: yes + backup: true loop: - { regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(--container-runtime=[a-zA-Z0-9_]+)(.*)', replace: '\1\2--container-runtime=remote\4' } - { regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(--container-runtime-endpoint=.*\.sock)(.*)', replace: '\1\2--container-runtime-endpoint=/run/containerd/containerd.sock\4' } diff --git a/ansible/playbooks/roles/containerd/templates/config.toml.j2 b/ansible/playbooks/roles/containerd/templates/config.toml.j2 index 25160cb4f3..52893a8b8c 100644 --- a/ansible/playbooks/roles/containerd/templates/config.toml.j2 +++ b/ansible/playbooks/roles/containerd/templates/config.toml.j2 @@ -89,6 +89,7 @@ oom_score = 0 privileged_without_host_devices = false base_runtime_spec = "" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + SystemdCgroup = true [plugins."io.containerd.grpc.v1.cri".cni] bin_dir = "/opt/cni/bin" conf_dir = "/etc/cni/net.d" diff --git a/ansible/playbooks/roles/docker/tasks/remove-docker.yml b/ansible/playbooks/roles/docker/tasks/remove-docker.yml index 0a6f952112..3e7c8e7939 100644 --- a/ansible/playbooks/roles/docker/tasks/remove-docker.yml +++ b/ansible/playbooks/roles/docker/tasks/remove-docker.yml @@ -9,7 +9,9 @@ name: docker state: stopped enabled: no - when: "'docker.service' in ansible_facts.services" + when: + - "'docker.service' in ansible_facts.services" + - ansible_facts.services['docker.service']['status'] != 'not-found' - name: Remove Docker packages package: diff --git a/docs/changelogs/CHANGELOG-2.0.md b/docs/changelogs/CHANGELOG-2.0.md index b6def088a5..af6a1c2dc8 100644 --- a/docs/changelogs/CHANGELOG-2.0.md +++ b/docs/changelogs/CHANGELOG-2.0.md @@ -10,8 +10,7 @@ - [#2950](https://github.com/epiphany-platform/epiphany/issues/2950) - CLI refactor to make it more consistant - [#2844](https://github.com/epiphany-platform/epiphany/issues/2844) - Refactor K8s upgrade task in order to simplify its flow - [#2716](https://github.com/epiphany-platform/epiphany/issues/2716) - Change container runtime to containerd -- [#2701](https://github.com/epiphany-platform/epiphany/issues/2701) - Epicli prepare - generate files in separate directory -- [#2716](https://github.com/epiphany-platform/epiphany/issues/2716) - Change container runtime to containerd + ### Fixed - [#2653](https://github.com/epiphany-platform/epiphany/issues/2653) - Epicli is failing in air-gapped infra mode diff --git a/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb b/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb index 6759ee3ca4..f9a2d33a46 100644 --- a/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb +++ b/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb @@ -187,7 +187,6 @@ its(:exit_status) { should eq 0 } end end - end describe 'Check the kubelet cgroup driver' do @@ -215,12 +214,24 @@ describe command("kubectl get nodes -o jsonpath='{.items[].status.nodeInfo.containerRuntimeVersion}'") do its(:stdout) { should include('containerd://1.4.12') } end + describe file('/etc/containerd/config.toml') do + let(:disable_sudo) { false } + its(:content) { should match(/SystemdCgroup = true/) } + end end describe 'Check the OCI-spec' do describe command('crictl --runtime-endpoint unix:///run/containerd/containerd.sock info') do let(:disable_sudo) { false } - its(:stdout) { should contain('\"defaultRuntimeName\": \"runc\"') } + its(:stdout) { should match('\"defaultRuntimeName\": \"runc\"') } + end +end + +describe 'Check the kubelet cgroup driver' do + describe file('/var/lib/kubelet/config.yaml') do + let(:disable_sudo) { false } + its(:content_as_yaml) { should include('cgroupDriver' => 'systemd') } + its(:content_as_yaml) { should_not include('cgroupDriver' => 'cgroupfs') } end end diff --git a/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb b/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb index 3e80e995e6..25b74aef7a 100644 --- a/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb +++ b/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb @@ -5,6 +5,10 @@ let(:disable_sudo) { false } its(:stdout) { should include('RuntimeName: containerd') } end + describe file('/etc/containerd/config.toml') do + let(:disable_sudo) { false } + its(:content) { should match(/SystemdCgroup = true/) } + end end describe 'Check the kubelet cgroup driver' do From 8abe62078c09c19239dbe10b9b0bffb85cd9ef10 Mon Sep 17 00:00:00 2001 From: rafzei Date: Thu, 10 Feb 2022 13:45:24 +0100 Subject: [PATCH 16/22] Add crictl config file, add containerd version in docker role --- ansible/playbooks/roles/containerd/files/crictl.yaml | 1 + ansible/playbooks/roles/containerd/tasks/main.yml | 8 ++++++++ ansible/playbooks/roles/docker/tasks/main.yml | 10 +++++++--- docs/changelogs/CHANGELOG-2.0.md | 2 +- .../spec/kubernetes_master/kubernetes_master_spec.rb | 4 ++-- .../spec/spec/kubernetes_node/kubernetes_node_spec.rb | 2 +- 6 files changed, 20 insertions(+), 7 deletions(-) create mode 100644 ansible/playbooks/roles/containerd/files/crictl.yaml diff --git a/ansible/playbooks/roles/containerd/files/crictl.yaml b/ansible/playbooks/roles/containerd/files/crictl.yaml new file mode 100644 index 0000000000..8bfa215950 --- /dev/null +++ b/ansible/playbooks/roles/containerd/files/crictl.yaml @@ -0,0 +1 @@ +runtime-endpoint: unix:///run/containerd/containerd.sock diff --git a/ansible/playbooks/roles/containerd/tasks/main.yml b/ansible/playbooks/roles/containerd/tasks/main.yml index fb19d51222..20efe4463a 100644 --- a/ansible/playbooks/roles/containerd/tasks/main.yml +++ b/ansible/playbooks/roles/containerd/tasks/main.yml @@ -77,3 +77,11 @@ regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(\")$' replace: '\1\2 --container-runtime-endpoint=/run/containerd/containerd.sock"' when: kubelet_kubeadmn_args_content.find('--container-runtime-endpoint') == -1 + +- name: Provide crictl.yaml file + copy: + src: crictl.yaml + dest: /etc/crictl.yaml + owner: root + group: root + mode: u=rw,go=r diff --git a/ansible/playbooks/roles/docker/tasks/main.yml b/ansible/playbooks/roles/docker/tasks/main.yml index aef03c16b2..ead5dae074 100644 --- a/ansible/playbooks/roles/docker/tasks/main.yml +++ b/ansible/playbooks/roles/docker/tasks/main.yml @@ -1,5 +1,9 @@ --- -# Docker (used by master & worker as dependency) +# Docker (used by image_registry) +- name: Include containerd defauts + include_vars: + file: roles/containerd/defaults/main.yml + name: containerd_defaults - name: Install Docker packages package: @@ -8,12 +12,12 @@ vars: _packages: Debian: - - containerd.io + - containerd.io={{ containerd_defaults.containerd_version }}-* - docker-ce-cli={{ docker_version.Debian }} - docker-ce-rootless-extras={{ docker_version.Debian }} - docker-ce={{ docker_version.Debian }} RedHat: - - containerd.io + - containerd.io-{{ containerd_defaults.containerd_version }} - docker-ce-cli-{{ docker_version.RedHat }} - docker-ce-rootless-extras-{{ docker_version.RedHat }} - docker-ce-{{ docker_version.RedHat }} diff --git a/docs/changelogs/CHANGELOG-2.0.md b/docs/changelogs/CHANGELOG-2.0.md index af6a1c2dc8..3837f479d6 100644 --- a/docs/changelogs/CHANGELOG-2.0.md +++ b/docs/changelogs/CHANGELOG-2.0.md @@ -51,6 +51,6 @@ - Upgrade of Terraform components in issue [#2825](https://github.com/epiphany-platform/epiphany/issues/2825) and [#2853](https://github.com/epiphany-platform/epiphany/issues/2853) will make running re-apply with infrastructure break on existing 1.x clusters. The advice is to deploy a new cluster and migrate data. If needed a manual upgrade path is described [here.](../home/howto/UPGRADE.md#terraform-upgrade-from-epiphany-1.x-to-2.x) - Kubernetes container runtime changed. Dockershim and Docker are no longer on Kubernetes hosts. -- Filebeat docker input replaced by container input. New field provided for Filebeat as system service installation: container.id. Field kubernetes.container.name is no longer valid. +- Filebeat `docker` input replaced by `container` input. New field provided for Filebeat as system service installation: `container.id`. Field `kubernetes.container.name` is no longer valid. ### Known issues diff --git a/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb b/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb index f9a2d33a46..5d07e330a1 100644 --- a/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb +++ b/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb @@ -207,7 +207,7 @@ end describe 'Check the containerd' do - describe command('crictl --runtime-endpoint unix:///run/containerd/containerd.sock version') do + describe command('crictl version') do let(:disable_sudo) { false } its(:stdout) { should include('RuntimeName: containerd') } end @@ -221,7 +221,7 @@ end describe 'Check the OCI-spec' do - describe command('crictl --runtime-endpoint unix:///run/containerd/containerd.sock info') do + describe command('crictl info') do let(:disable_sudo) { false } its(:stdout) { should match('\"defaultRuntimeName\": \"runc\"') } end diff --git a/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb b/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb index 25b74aef7a..3990bda5a6 100644 --- a/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb +++ b/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb @@ -1,7 +1,7 @@ require 'spec_helper' describe 'Check the containerd' do - describe command('crictl --runtime-endpoint unix:///run/containerd/containerd.sock version') do + describe command('crictl version') do let(:disable_sudo) { false } its(:stdout) { should include('RuntimeName: containerd') } end From 65d8f41721d216cca5f39d88ae13c627ff7937a1 Mon Sep 17 00:00:00 2001 From: rafzei Date: Thu, 10 Feb 2022 14:37:31 +0100 Subject: [PATCH 17/22] Add ansible_managed comment --- ansible/playbooks/roles/containerd/files/containerd.conf | 1 + ansible/playbooks/roles/containerd/files/crictl.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/ansible/playbooks/roles/containerd/files/containerd.conf b/ansible/playbooks/roles/containerd/files/containerd.conf index 43dd5433bc..60514b5ed2 100644 --- a/ansible/playbooks/roles/containerd/files/containerd.conf +++ b/ansible/playbooks/roles/containerd/files/containerd.conf @@ -1,2 +1,3 @@ +# Ansible managed overlay br_netfilter diff --git a/ansible/playbooks/roles/containerd/files/crictl.yaml b/ansible/playbooks/roles/containerd/files/crictl.yaml index 8bfa215950..e081be4438 100644 --- a/ansible/playbooks/roles/containerd/files/crictl.yaml +++ b/ansible/playbooks/roles/containerd/files/crictl.yaml @@ -1 +1,2 @@ +# Ansible managed runtime-endpoint: unix:///run/containerd/containerd.sock From 435a639a154556c98e20f35aad3992cdb07518f4 Mon Sep 17 00:00:00 2001 From: webdler Date: Tue, 15 Feb 2022 15:23:07 +0100 Subject: [PATCH 18/22] Enable containerd service, Add fix for single machine --- .../roles/containerd/handlers/main.yml | 1 + .../tasks/configure-kubelet-env.yml | 39 +++++++++++++++++++ .../playbooks/roles/containerd/tasks/main.yml | 38 ------------------ .../tasks/configure-kubelet.yml | 7 ++++ 4 files changed, 47 insertions(+), 38 deletions(-) create mode 100644 ansible/playbooks/roles/containerd/tasks/configure-kubelet-env.yml diff --git a/ansible/playbooks/roles/containerd/handlers/main.yml b/ansible/playbooks/roles/containerd/handlers/main.yml index 49f7022e0e..c4258bc0d4 100644 --- a/ansible/playbooks/roles/containerd/handlers/main.yml +++ b/ansible/playbooks/roles/containerd/handlers/main.yml @@ -2,6 +2,7 @@ systemd: name: containerd state: restarted + enabled: true - name: Restart kubelet systemd: diff --git a/ansible/playbooks/roles/containerd/tasks/configure-kubelet-env.yml b/ansible/playbooks/roles/containerd/tasks/configure-kubelet-env.yml new file mode 100644 index 0000000000..47f38b53f0 --- /dev/null +++ b/ansible/playbooks/roles/containerd/tasks/configure-kubelet-env.yml @@ -0,0 +1,39 @@ +# To be replaced by filter plugin (https://github.com/epiphany-platform/epiphany/issues/2943) +- name: Reconfigure kubelet args + notify: + - Restart containerd + - Restart kubelet + block: + - name: Get kubeadm-flags.env file content + slurp: + src: /var/lib/kubelet/kubeadm-flags.env + register: kubelet_kubeadm_args + + - name: Set kubelet_kubeadmn_args_content + set_fact: + kubelet_kubeadmn_args_content: "{{ kubelet_kubeadm_args.content | b64decode }}" + + - name: Modify container-runtime + replace: + path: /var/lib/kubelet/kubeadm-flags.env + regexp: '{{ item.regexp }}' + replace: '{{ item.replace }}' + backup: true + loop: + - { regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(--container-runtime=[a-zA-Z0-9_]+)(.*)', replace: '\1\2--container-runtime=remote\4' } + - { regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(--container-runtime-endpoint=.*\.sock)(.*)', replace: '\1\2--container-runtime-endpoint=/run/containerd/containerd.sock\4' } + when: kubelet_kubeadmn_args_content.find('--container-runtime') != -1 + + - name: Append container-runtime to kubelet config + replace: + path: /var/lib/kubelet/kubeadm-flags.env + regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(\")$' + replace: '\1\2 --container-runtime=remote"' + when: kubelet_kubeadmn_args_content.find('--container-runtime') == -1 + + - name: Append container-runtime-endpoint to kubelet config + replace: + path: /var/lib/kubelet/kubeadm-flags.env + regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(\")$' + replace: '\1\2 --container-runtime-endpoint=/run/containerd/containerd.sock"' + when: kubelet_kubeadmn_args_content.find('--container-runtime-endpoint') == -1 diff --git a/ansible/playbooks/roles/containerd/tasks/main.yml b/ansible/playbooks/roles/containerd/tasks/main.yml index 20efe4463a..f8e314d822 100644 --- a/ansible/playbooks/roles/containerd/tasks/main.yml +++ b/ansible/playbooks/roles/containerd/tasks/main.yml @@ -40,44 +40,6 @@ - name: Configure containerd include_tasks: configure-containerd.yml -# To be replaced by filter plugin (https://github.com/epiphany-platform/epiphany/issues/2943) -- name: Reconfigure kubelet args - when: is_upgrade_run - block: - - name: Get kubeadm-flags.env file content - slurp: - src: /var/lib/kubelet/kubeadm-flags.env - register: kubelet_kubeadm_args - - - name: Set kubelet_kubeadmn_args_content - set_fact: - kubelet_kubeadmn_args_content: "{{ kubelet_kubeadm_args.content | b64decode }}" - - - name: Modify container-runtime - replace: - path: /var/lib/kubelet/kubeadm-flags.env - regexp: '{{ item.regexp }}' - replace: '{{ item.replace }}' - backup: true - loop: - - { regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(--container-runtime=[a-zA-Z0-9_]+)(.*)', replace: '\1\2--container-runtime=remote\4' } - - { regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(--container-runtime-endpoint=.*\.sock)(.*)', replace: '\1\2--container-runtime-endpoint=/run/containerd/containerd.sock\4' } - when: kubelet_kubeadmn_args_content.find('--container-runtime') != -1 - - - name: Append container-runtime to kubelet config - replace: - path: /var/lib/kubelet/kubeadm-flags.env - regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(\")$' - replace: '\1\2 --container-runtime=remote"' - when: kubelet_kubeadmn_args_content.find('--container-runtime') == -1 - - - name: Append container-runtime-endpoint to kubelet config - replace: - path: /var/lib/kubelet/kubeadm-flags.env - regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(\")$' - replace: '\1\2 --container-runtime-endpoint=/run/containerd/containerd.sock"' - when: kubelet_kubeadmn_args_content.find('--container-runtime-endpoint') == -1 - - name: Provide crictl.yaml file copy: src: crictl.yaml diff --git a/ansible/playbooks/roles/kubernetes_common/tasks/configure-kubelet.yml b/ansible/playbooks/roles/kubernetes_common/tasks/configure-kubelet.yml index 9a73fe0ee5..7b4d0e1132 100644 --- a/ansible/playbooks/roles/kubernetes_common/tasks/configure-kubelet.yml +++ b/ansible/playbooks/roles/kubernetes_common/tasks/configure-kubelet.yml @@ -36,6 +36,13 @@ backup: true register: apply_kubelet_custom_config +- name: Configure kubelet-env + include_role: + name: containerd + tasks_from: configure-kubelet-env + when: + - is_upgrade_run or inventory_hostname in groups.image_registry + - name: Restart kubelet service systemd: name: kubelet From 0c48e9ddedf0fde05375226b90a68b120beba3b8 Mon Sep 17 00:00:00 2001 From: webdler Date: Tue, 15 Feb 2022 16:57:33 +0100 Subject: [PATCH 19/22] Add spec tests for containerd service --- .../spec/spec/kubernetes_master/kubernetes_master_spec.rb | 7 +++++++ tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb b/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb index 5d07e330a1..c3750b39d3 100644 --- a/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb +++ b/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb @@ -11,6 +11,13 @@ end end +describe 'Check if containerd service is enabled/running' do + describe service('containerd') do + it { should be_enabled } + it { should be_running } + end +end + describe 'Checking if kubelet service is running' do describe service('kubelet') do it { should be_enabled } diff --git a/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb b/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb index 3990bda5a6..93b4b61a42 100644 --- a/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb +++ b/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb @@ -1,5 +1,12 @@ require 'spec_helper' +describe 'Check if containerd service is enabled/running' do + describe service('containerd') do + it { should be_enabled } + it { should be_running } + end +end + describe 'Check the containerd' do describe command('crictl version') do let(:disable_sudo) { false } From 4cede5c4f48aec87b64378a751b82107afc125c3 Mon Sep 17 00:00:00 2001 From: webdler Date: Wed, 16 Feb 2022 11:42:57 +0100 Subject: [PATCH 20/22] Add annotation to k8s/masterN, fix for upgrade --- ansible/playbooks/roles/containerd/tasks/main.yml | 4 ++++ .../roles/kubernetes_common/tasks/configure-kubelet.yml | 2 +- .../roles/upgrade/tasks/kubernetes/upgrade-masterN.yml | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ansible/playbooks/roles/containerd/tasks/main.yml b/ansible/playbooks/roles/containerd/tasks/main.yml index f8e314d822..1262eba0ab 100644 --- a/ansible/playbooks/roles/containerd/tasks/main.yml +++ b/ansible/playbooks/roles/containerd/tasks/main.yml @@ -40,6 +40,10 @@ - name: Configure containerd include_tasks: configure-containerd.yml +- name: Configure kubelet-env + include_tasks: configure-kubelet-env.yml + when: is_upgrade_run + - name: Provide crictl.yaml file copy: src: crictl.yaml diff --git a/ansible/playbooks/roles/kubernetes_common/tasks/configure-kubelet.yml b/ansible/playbooks/roles/kubernetes_common/tasks/configure-kubelet.yml index 7b4d0e1132..5334f35e0e 100644 --- a/ansible/playbooks/roles/kubernetes_common/tasks/configure-kubelet.yml +++ b/ansible/playbooks/roles/kubernetes_common/tasks/configure-kubelet.yml @@ -41,7 +41,7 @@ name: containerd tasks_from: configure-kubelet-env when: - - is_upgrade_run or inventory_hostname in groups.image_registry + - inventory_hostname in groups.image_registry - name: Restart kubelet service systemd: diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml index 099a6b8626..cd34a6eab4 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml @@ -7,6 +7,10 @@ - name: k8s/masterN | Install kubeadm include_tasks: "{{ ansible_os_family }}/install-kubeadm.yml" + - name: k8s/master0 | Add k8s annotation for containerd + command: >- + kubectl annotate node {{ inventory_hostname }} --overwrite kubeadm.alpha.kubernetes.io/cri-socket=unix:///run/containerd/containerd.sock + - name: k8s/masterN | Upgrade master {{ inventory_hostname }} command: >- kubeadm upgrade node From 6ea36a8545e673a0402405576511d7456103ceee Mon Sep 17 00:00:00 2001 From: webdler Date: Wed, 16 Feb 2022 12:20:11 +0100 Subject: [PATCH 21/22] k8s/master0 -> k8s/masterN --- .../roles/upgrade/tasks/kubernetes/upgrade-masterN.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml index cd34a6eab4..c615ecc858 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml @@ -7,7 +7,7 @@ - name: k8s/masterN | Install kubeadm include_tasks: "{{ ansible_os_family }}/install-kubeadm.yml" - - name: k8s/master0 | Add k8s annotation for containerd + - name: k8s/masterN | Add k8s annotation for containerd command: >- kubectl annotate node {{ inventory_hostname }} --overwrite kubeadm.alpha.kubernetes.io/cri-socket=unix:///run/containerd/containerd.sock From 1cc4d743c7b59a166a02a721a4b09a84970d87ca Mon Sep 17 00:00:00 2001 From: webdler Date: Thu, 17 Feb 2022 13:21:44 +0100 Subject: [PATCH 22/22] Add retry to kubectl command --- .../upgrade/tasks/kubernetes/backup-kubeadm-config.yml | 5 +++++ .../roles/upgrade/tasks/kubernetes/upgrade-master0.yml | 6 ++++++ .../roles/upgrade/tasks/kubernetes/upgrade-masterN.yml | 6 ++++++ 3 files changed, 17 insertions(+) diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/backup-kubeadm-config.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/backup-kubeadm-config.yml index ad88bcd302..6f9b6c5e5f 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/backup-kubeadm-config.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/backup-kubeadm-config.yml @@ -1,10 +1,15 @@ --- # The kubeadm-config.yml file is no longer used during upgrade process, # but we keep it for backup and reference purposes. +# Retries needed for HA deployment (random failures) - name: k8s/master | Collect kubeadm-config ConfigMap command: >- kubectl get cm kubeadm-config -o yaml -n kube-system register: kubeadm_config + until: + - kubeadm_config is succeeded + retries: 30 + delay: 1 - name: k8s/master | Save kubeadm-config ConfigMap to file copy: diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml index 2c32b4768b..8d447738ef 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml @@ -41,9 +41,15 @@ - name: k8s/master0 | Include set-cluster-version.yml include_tasks: set-cluster-version.yml # sets cluster_version + # Retries needed for HA deployment (random failures) - name: k8s/master0 | Add k8s annotation for containerd command: >- kubectl annotate node {{ inventory_hostname }} --overwrite kubeadm.alpha.kubernetes.io/cri-socket=unix:///run/containerd/containerd.sock + register: result + until: + - result is succeeded + retries: 30 + delay: 1 # Note: Usage of the --config flag for reconfiguring the cluster during upgrade is not recommended since v1.16 - name: k8s/master0 | Upgrade K8s cluster to v{{ version }} diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml index c615ecc858..4364b8066e 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml @@ -7,9 +7,15 @@ - name: k8s/masterN | Install kubeadm include_tasks: "{{ ansible_os_family }}/install-kubeadm.yml" + # Retries needed for HA deployment (random failures) - name: k8s/masterN | Add k8s annotation for containerd command: >- kubectl annotate node {{ inventory_hostname }} --overwrite kubeadm.alpha.kubernetes.io/cri-socket=unix:///run/containerd/containerd.sock + register: result + until: + - result is succeeded + retries: 30 + delay: 1 - name: k8s/masterN | Upgrade master {{ inventory_hostname }} command: >-