From d2ac0d973490b10a819b9f928e9daa7fe287e045 Mon Sep 17 00:00:00 2001 From: Rafal Zeidler Date: Thu, 17 Feb 2022 16:31:27 +0100 Subject: [PATCH] Containerd change (#2919) * Switch to containerd (#2769) * Add containerd * Switch to containerd if upgrade * Add changelog entry * Spec test update * Rebase CHANGELOG-2.0.md * Adjust filebeat.yml after docker removal * Adjust K8s cert renewal + fix in spec test * Change filebeat custom chart values and spec test, fix backup k8s * Doc update * Add extra condition for docker removal * Adjust filebeat.yml after docker removal * Changes after review * Doc update * Hardcode containerd version, update components.md * Changes after review * Adjust filebeat.yml after docker removal * Changes after review * Add crictl config file, add containerd version in docker role * Add ansible_managed comment * Enable containerd service, Add fix for single machine * Add spec tests for containerd service * Add annotation to k8s/masterN, fix for upgrade * k8s/master0 -> k8s/masterN * Add retry to kubectl command --- ansible/playbooks/kubernetes_master.yml | 2 +- ansible/playbooks/kubernetes_node.yml | 2 +- .../roles/backup/tasks/kubernetes.yml | 8 +- .../roles/containerd/defaults/main.yml | 2 + .../roles/containerd/files/containerd.conf | 3 + .../roles/containerd/files/crictl.yaml | 2 + .../roles/containerd/handlers/main.yml | 10 ++ .../containerd/tasks/configure-containerd.yml | 18 +++ .../tasks/configure-kubelet-env.yml | 39 ++++++ .../tasks/configure-prerequisites.yml | 16 +++ .../playbooks/roles/containerd/tasks/main.yml | 53 +++++++ .../roles/containerd/templates/config.toml.j2 | 131 ++++++++++++++++++ ansible/playbooks/roles/docker/tasks/main.yml | 10 +- .../roles/docker/tasks/remove-docker.yml | 33 +++++ .../filebeat/tasks/configure-filebeat.yml | 46 +++--- .../templates/custom-chart-values.yml.j2 | 37 ++--- .../templates/extra-dependencies.conf.j2 | 4 - .../roles/filebeat/templates/filebeat.yml.j2 | 23 +-- .../tasks/configure-kubelet.yml | 7 + .../roles/kubernetes_common/tasks/main.yml | 3 +- .../tasks/generate-certificates.yml | 2 +- .../roles/kubernetes_master/tasks/main.yml | 8 +- .../kubernetes_promote/handlers/main.yml | 27 ++-- .../centos-7/requirements.aarch64.txt | 2 +- .../centos-7/requirements.x86_64.txt | 2 +- .../redhat-7/requirements.x86_64.txt | 2 +- .../ubuntu-20.04/requirements.x86_64.txt | 2 +- .../roles/upgrade/tasks/image-registry.yml | 39 ------ .../kubernetes/backup-kubeadm-config.yml | 5 + .../tasks/kubernetes/patch-cgroup-driver.yml | 39 +----- .../tasks/kubernetes/upgrade-master0.yml | 17 ++- .../tasks/kubernetes/upgrade-masterN.yml | 17 ++- .../upgrade/tasks/kubernetes/upgrade-node.yml | 3 - ...let-and-docker.yml => restart-kubelet.yml} | 11 -- .../tasks/kubernetes/verify-upgrade.yml | 2 + ansible/playbooks/upgrade.yml | 4 +- docs/architecture/logical-view.md | 2 +- docs/changelogs/CHANGELOG-2.0.md | 4 +- docs/home/COMPONENTS.md | 1 + docs/home/howto/MAINTENANCE.md | 91 ++++++------ tests/spec/spec/filebeat/filebeat_spec.rb | 10 -- .../kubernetes_master_spec.rb | 44 ++++-- .../kubernetes_node/kubernetes_node_spec.rb | 31 +++-- 43 files changed, 534 insertions(+), 280 deletions(-) create mode 100644 ansible/playbooks/roles/containerd/defaults/main.yml create mode 100644 ansible/playbooks/roles/containerd/files/containerd.conf create mode 100644 ansible/playbooks/roles/containerd/files/crictl.yaml create mode 100644 ansible/playbooks/roles/containerd/handlers/main.yml create mode 100644 ansible/playbooks/roles/containerd/tasks/configure-containerd.yml create mode 100644 ansible/playbooks/roles/containerd/tasks/configure-kubelet-env.yml create mode 100644 ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml create mode 100644 ansible/playbooks/roles/containerd/tasks/main.yml create mode 100644 ansible/playbooks/roles/containerd/templates/config.toml.j2 create mode 100644 ansible/playbooks/roles/docker/tasks/remove-docker.yml delete mode 100644 ansible/playbooks/roles/filebeat/templates/extra-dependencies.conf.j2 delete mode 100644 ansible/playbooks/roles/upgrade/tasks/image-registry.yml rename ansible/playbooks/roles/upgrade/tasks/kubernetes/utils/{restart-kubelet-and-docker.yml => restart-kubelet.yml} (58%) diff --git a/ansible/playbooks/kubernetes_master.yml b/ansible/playbooks/kubernetes_master.yml index b4df086f6b..5dcfc49be6 100644 --- a/ansible/playbooks/kubernetes_master.yml +++ b/ansible/playbooks/kubernetes_master.yml @@ -8,7 +8,7 @@ become_method: sudo pre_tasks: - import_role: - name: docker + name: containerd tasks_from: main - import_role: name: kubernetes_common diff --git a/ansible/playbooks/kubernetes_node.yml b/ansible/playbooks/kubernetes_node.yml index 2c5623714a..a5f3fb831e 100644 --- a/ansible/playbooks/kubernetes_node.yml +++ b/ansible/playbooks/kubernetes_node.yml @@ -20,7 +20,7 @@ become_method: sudo pre_tasks: - import_role: - name: docker + name: containerd tasks_from: main - import_role: name: kubernetes_common diff --git a/ansible/playbooks/roles/backup/tasks/kubernetes.yml b/ansible/playbooks/roles/backup/tasks/kubernetes.yml index 1a0062dae7..1d27b640f8 100644 --- a/ansible/playbooks/roles/backup/tasks/kubernetes.yml +++ b/ansible/playbooks/roles/backup/tasks/kubernetes.yml @@ -49,11 +49,13 @@ - name: Save etcd snapshot shell: | - docker run \ - -v "{{ backup_temp_dir.path }}/:/backup/" \ - --network host \ + ctr --namespace k8s.io \ + run \ + --mount type=bind,src={{ backup_temp_dir.path }}/,dst=/backup/,options=rbind:rw \ + --net-host \ --env ETCDCTL_API=3 \ --rm "{{ etcd_image_name.stdout | trim }}" \ + etcd \ etcdctl \ --endpoints https://127.0.0.1:2379 \ --cacert /backup/pki/etcd/ca.crt \ diff --git a/ansible/playbooks/roles/containerd/defaults/main.yml b/ansible/playbooks/roles/containerd/defaults/main.yml new file mode 100644 index 0000000000..f2fc92a8b4 --- /dev/null +++ b/ansible/playbooks/roles/containerd/defaults/main.yml @@ -0,0 +1,2 @@ +--- +containerd_version: "1.4.12" diff --git a/ansible/playbooks/roles/containerd/files/containerd.conf b/ansible/playbooks/roles/containerd/files/containerd.conf new file mode 100644 index 0000000000..60514b5ed2 --- /dev/null +++ b/ansible/playbooks/roles/containerd/files/containerd.conf @@ -0,0 +1,3 @@ +# Ansible managed +overlay +br_netfilter diff --git a/ansible/playbooks/roles/containerd/files/crictl.yaml b/ansible/playbooks/roles/containerd/files/crictl.yaml new file mode 100644 index 0000000000..e081be4438 --- /dev/null +++ b/ansible/playbooks/roles/containerd/files/crictl.yaml @@ -0,0 +1,2 @@ +# Ansible managed +runtime-endpoint: unix:///run/containerd/containerd.sock diff --git a/ansible/playbooks/roles/containerd/handlers/main.yml b/ansible/playbooks/roles/containerd/handlers/main.yml new file mode 100644 index 0000000000..c4258bc0d4 --- /dev/null +++ b/ansible/playbooks/roles/containerd/handlers/main.yml @@ -0,0 +1,10 @@ +- name: Restart containerd + systemd: + name: containerd + state: restarted + enabled: true + +- name: Restart kubelet + systemd: + name: kubelet + state: restarted diff --git a/ansible/playbooks/roles/containerd/tasks/configure-containerd.yml b/ansible/playbooks/roles/containerd/tasks/configure-containerd.yml new file mode 100644 index 0000000000..5d1ada26c7 --- /dev/null +++ b/ansible/playbooks/roles/containerd/tasks/configure-containerd.yml @@ -0,0 +1,18 @@ +--- +- name: Create containerd dir + file: + path: /etc/containerd + state: directory + owner: root + group: root + mode: u=rw,go=r + +- name: Provide containerd config + template: + src: config.toml.j2 + dest: /etc/containerd/config.toml + mode: u=rw,go= + owner: root + group: root + notify: + - Restart containerd diff --git a/ansible/playbooks/roles/containerd/tasks/configure-kubelet-env.yml b/ansible/playbooks/roles/containerd/tasks/configure-kubelet-env.yml new file mode 100644 index 0000000000..47f38b53f0 --- /dev/null +++ b/ansible/playbooks/roles/containerd/tasks/configure-kubelet-env.yml @@ -0,0 +1,39 @@ +# To be replaced by filter plugin (https://github.com/epiphany-platform/epiphany/issues/2943) +- name: Reconfigure kubelet args + notify: + - Restart containerd + - Restart kubelet + block: + - name: Get kubeadm-flags.env file content + slurp: + src: /var/lib/kubelet/kubeadm-flags.env + register: kubelet_kubeadm_args + + - name: Set kubelet_kubeadmn_args_content + set_fact: + kubelet_kubeadmn_args_content: "{{ kubelet_kubeadm_args.content | b64decode }}" + + - name: Modify container-runtime + replace: + path: /var/lib/kubelet/kubeadm-flags.env + regexp: '{{ item.regexp }}' + replace: '{{ item.replace }}' + backup: true + loop: + - { regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(--container-runtime=[a-zA-Z0-9_]+)(.*)', replace: '\1\2--container-runtime=remote\4' } + - { regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(--container-runtime-endpoint=.*\.sock)(.*)', replace: '\1\2--container-runtime-endpoint=/run/containerd/containerd.sock\4' } + when: kubelet_kubeadmn_args_content.find('--container-runtime') != -1 + + - name: Append container-runtime to kubelet config + replace: + path: /var/lib/kubelet/kubeadm-flags.env + regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(\")$' + replace: '\1\2 --container-runtime=remote"' + when: kubelet_kubeadmn_args_content.find('--container-runtime') == -1 + + - name: Append container-runtime-endpoint to kubelet config + replace: + path: /var/lib/kubelet/kubeadm-flags.env + regexp: '^(KUBELET_KUBEADM_ARGS=\")(.*)(\")$' + replace: '\1\2 --container-runtime-endpoint=/run/containerd/containerd.sock"' + when: kubelet_kubeadmn_args_content.find('--container-runtime-endpoint') == -1 diff --git a/ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml b/ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml new file mode 100644 index 0000000000..c65330cede --- /dev/null +++ b/ansible/playbooks/roles/containerd/tasks/configure-prerequisites.yml @@ -0,0 +1,16 @@ +--- +- name: Provide containerd.conf file + copy: + src: containerd.conf + dest: /etc/modules-load.d/containerd.conf + owner: root + group: root + mode: u=rw,go= + +- name: Load modules + modprobe: + name: "{{ item }}" + state: present + loop: + - overlay + - br_netfilter diff --git a/ansible/playbooks/roles/containerd/tasks/main.yml b/ansible/playbooks/roles/containerd/tasks/main.yml new file mode 100644 index 0000000000..1262eba0ab --- /dev/null +++ b/ansible/playbooks/roles/containerd/tasks/main.yml @@ -0,0 +1,53 @@ +--- +- name: Get information on installed packages + package_facts: + when: ansible_facts.packages is undefined + +- name: Remove Docker + block: + - name: Stop Kubelet before Docker removal + systemd: + name: kubelet + state: stopped + notify: + - Restart kubelet + + - name: Remove Docker + include_role: + name: docker + tasks_from: remove-docker.yml + when: + - is_upgrade_run + - inventory_hostname not in groups.image_registry + - ansible_facts.packages['docker-ce'] is defined + +- name: Install containerd package + package: + name: "{{ _packages[ansible_os_family] }}" + state: present + vars: + _packages: + Debian: + - containerd.io={{ containerd_version }}-* + RedHat: + - containerd.io-{{ containerd_version }} + module_defaults: + yum: { lock_timeout: "{{ yum_lock_timeout }}" } + +- name: Configure prerequisites + include_tasks: configure-prerequisites.yml + +- name: Configure containerd + include_tasks: configure-containerd.yml + +- name: Configure kubelet-env + include_tasks: configure-kubelet-env.yml + when: is_upgrade_run + +- name: Provide crictl.yaml file + copy: + src: crictl.yaml + dest: /etc/crictl.yaml + owner: root + group: root + mode: u=rw,go=r diff --git a/ansible/playbooks/roles/containerd/templates/config.toml.j2 b/ansible/playbooks/roles/containerd/templates/config.toml.j2 new file mode 100644 index 0000000000..52893a8b8c --- /dev/null +++ b/ansible/playbooks/roles/containerd/templates/config.toml.j2 @@ -0,0 +1,131 @@ +# {{ ansible_managed }} +version = 2 +root = "/var/lib/containerd" +state = "/run/containerd" +plugin_dir = "" +disabled_plugins = [] +required_plugins = [] +oom_score = 0 + +[grpc] + address = "/run/containerd/containerd.sock" + tcp_address = "" + tcp_tls_cert = "" + tcp_tls_key = "" + uid = 0 + gid = 0 + max_recv_message_size = 16777216 + max_send_message_size = 16777216 + +[ttrpc] + address = "" + uid = 0 + gid = 0 + +[debug] + address = "" + uid = 0 + gid = 0 + level = "" + +[metrics] + address = "" + grpc_histogram = false + +[cgroup] + path = "" + +[timeouts] + "io.containerd.timeout.shim.cleanup" = "5s" + "io.containerd.timeout.shim.load" = "5s" + "io.containerd.timeout.shim.shutdown" = "3s" + "io.containerd.timeout.task.state" = "2s" + +[plugins] + [plugins."io.containerd.gc.v1.scheduler"] + pause_threshold = 0.02 + deletion_threshold = 0 + mutation_threshold = 100 + schedule_delay = "0s" + startup_delay = "100ms" + [plugins."io.containerd.grpc.v1.cri"] + disable_tcp_service = true + stream_server_address = "127.0.0.1" + stream_server_port = "0" + stream_idle_timeout = "4h0m0s" + enable_selinux = false + selinux_category_range = 1024 + sandbox_image = "{{ image_registry_address }}/k8s.gcr.io/pause:3.5" + stats_collect_period = 10 + systemd_cgroup = false + enable_tls_streaming = false + max_container_log_line_size = 16384 + disable_cgroup = false + disable_apparmor = false + restrict_oom_score_adj = false + max_concurrent_downloads = 3 + disable_proc_mount = false + unset_seccomp_profile = "" + tolerate_missing_hugetlb_controller = true + disable_hugetlb_controller = true + ignore_image_defined_volumes = false + [plugins."io.containerd.grpc.v1.cri".containerd] + snapshotter = "overlayfs" + default_runtime_name = "runc" + no_pivot = false + disable_snapshot_annotations = true + discard_unpacked_layers = false + [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime] + runtime_type = "" + runtime_engine = "" + runtime_root = "" + privileged_without_host_devices = false + base_runtime_spec = "" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + runtime_engine = "" + runtime_root = "" + privileged_without_host_devices = false + base_runtime_spec = "" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + SystemdCgroup = true + [plugins."io.containerd.grpc.v1.cri".cni] + bin_dir = "/opt/cni/bin" + conf_dir = "/etc/cni/net.d" + max_conf_num = 1 + conf_template = "" + [plugins."io.containerd.grpc.v1.cri".registry] + [plugins."io.containerd.grpc.v1.cri".registry.mirrors] + [plugins."io.containerd.grpc.v1.cri".registry.mirrors."{{ image_registry_address }}"] + endpoint = ["http://{{ image_registry_address }}"] + [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"] + endpoint = ["https://registry-1.docker.io"] + [plugins."io.containerd.grpc.v1.cri".image_decryption] + key_model = "" + [plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming] + tls_cert_file = "" + tls_key_file = "" + [plugins."io.containerd.internal.v1.opt"] + path = "/opt/containerd" + [plugins."io.containerd.internal.v1.restart"] + interval = "10s" + [plugins."io.containerd.metadata.v1.bolt"] + content_sharing_policy = "shared" + [plugins."io.containerd.monitor.v1.cgroups"] + no_prometheus = false + [plugins."io.containerd.runtime.v1.linux"] + shim = "containerd-shim" + runtime = "runc" + runtime_root = "" + no_shim = false + shim_debug = false + [plugins."io.containerd.runtime.v2.task"] + platforms = ["linux/amd64"] + [plugins."io.containerd.service.v1.diff-service"] + default = ["walking"] + [plugins."io.containerd.snapshotter.v1.devmapper"] + root_path = "" + pool_name = "" + base_image_size = "" + async_remove = false diff --git a/ansible/playbooks/roles/docker/tasks/main.yml b/ansible/playbooks/roles/docker/tasks/main.yml index aef03c16b2..ead5dae074 100644 --- a/ansible/playbooks/roles/docker/tasks/main.yml +++ b/ansible/playbooks/roles/docker/tasks/main.yml @@ -1,5 +1,9 @@ --- -# Docker (used by master & worker as dependency) +# Docker (used by image_registry) +- name: Include containerd defauts + include_vars: + file: roles/containerd/defaults/main.yml + name: containerd_defaults - name: Install Docker packages package: @@ -8,12 +12,12 @@ vars: _packages: Debian: - - containerd.io + - containerd.io={{ containerd_defaults.containerd_version }}-* - docker-ce-cli={{ docker_version.Debian }} - docker-ce-rootless-extras={{ docker_version.Debian }} - docker-ce={{ docker_version.Debian }} RedHat: - - containerd.io + - containerd.io-{{ containerd_defaults.containerd_version }} - docker-ce-cli-{{ docker_version.RedHat }} - docker-ce-rootless-extras-{{ docker_version.RedHat }} - docker-ce-{{ docker_version.RedHat }} diff --git a/ansible/playbooks/roles/docker/tasks/remove-docker.yml b/ansible/playbooks/roles/docker/tasks/remove-docker.yml new file mode 100644 index 0000000000..3e7c8e7939 --- /dev/null +++ b/ansible/playbooks/roles/docker/tasks/remove-docker.yml @@ -0,0 +1,33 @@ +# Included in containerd role +--- +- name: Populate service facts + service_facts: + when: ansible_facts.services is undefined + +- name: Stop Docker daemon + systemd: + name: docker + state: stopped + enabled: no + when: + - "'docker.service' in ansible_facts.services" + - ansible_facts.services['docker.service']['status'] != 'not-found' + +- name: Remove Docker packages + package: + name: "{{ _packages }}" + state: absent + vars: + _packages: + - docker-ce-cli + - docker-ce-rootless-extras + - docker-ce + +- name: Remove Docker sockets leftovers + file: + path: "{{ _paths }}" + state: absent + vars: + _paths: + - /var/run/docker.sock + - /var/run/dockershim.sock diff --git a/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml b/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml index 47f622cca5..cb7e2a723e 100644 --- a/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml +++ b/ansible/playbooks/roles/filebeat/tasks/configure-filebeat.yml @@ -25,25 +25,32 @@ mode: u=rw,go= register: modify_filebeat_yml -- name: Set Filebeat to be started after Docker - when: (groups['kubernetes_master'] is defined and inventory_hostname in groups['kubernetes_master']) - or (groups['kubernetes_node'] is defined and inventory_hostname in groups['kubernetes_node']) - block: - - name: Create directory (filebeat.service.d) - file: - path: /etc/systemd/system/filebeat.service.d - state: directory - - - name: Copy drop-in configuration file (extra-dependencies.conf) - template: - dest: /etc/systemd/system/filebeat.service.d/extra-dependencies.conf - src: extra-dependencies.conf.j2 - register: modify_filebeat_unit_dependencies - - - name: Run systemctl daemon-reload - systemd: - daemon_reload: true - when: modify_filebeat_unit_dependencies.changed +- name: Append new field definition + when: k8s_as_cloud_service is not defined + blockinfile: + path: /etc/filebeat/fields.yml + backup: true + block: |2 + - key: containerd + title: "containerd" + description: > + Reading data from containerd log filepath. + short_config: true + fields: + - name: containerd + type: group + description: > + Contains extra fields for containerd logs. + fields: + - name: kubernetes.pod.name + type: text + format: string + - name: kubernetes.namespace + type: text + format: string + - name: container.id + type: text + format: string - name: Start/restart and enable filebeat service when: groups.logging[0] is defined @@ -58,7 +65,6 @@ name: filebeat state: restarted when: modify_filebeat_yml.changed - or modify_filebeat_unit_dependencies.changed or enable_module.changed or install_filebeat_package.changed diff --git a/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 b/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 index 37bd8447a4..831897a347 100644 --- a/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 +++ b/ansible/playbooks/roles/filebeat/templates/custom-chart-values.yml.j2 @@ -50,39 +50,24 @@ filebeatConfig: {# -------------------------- Kubernetes input -------------------------- #} {% if (k8s_as_cloud_service is defined and k8s_as_cloud_service) or ('kubernetes_master' in groups or 'kubernetes_node' in groups) %} - - type: docker + - type: container enabled: true - containers.ids: "*" -{% if specification.docker_input.multiline is defined %} + format: cri + paths: + - /var/log/containers/*.log +{% if specification.container_input.multiline is defined %} multiline: -{% for k, v in specification.docker_input.multiline.items() %} +{% for k, v in specification.container_input.multiline.items() %} {{ k }}: {{ v }} {% endfor %} {% endif %} processors: - - add_docker_metadata: - - rename: - fields: - - from: docker.container.labels.io.kubernetes.container.name - to: kubernetes.container.name - - from: container.labels.io_kubernetes_container_name - to: kubernetes.container.name - - - from: docker.container.labels.io.kubernetes.pod.name - to: kubernetes.pod.name - - from: container.labels.io_kubernetes_pod_name - to: kubernetes.pod.name - - - from: docker.container.labels.io.kubernetes.pod.namespace - to: kubernetes.namespace - - from: container.labels.io_kubernetes_pod_namespace - to: kubernetes.namespace - ignore_missing: true - fail_on_error: true - - drop_fields: - fields: - - docker + - add_kubernetes_metadata: + in_cluster: true + matchers: + - logs_path: + logs_path: "/var/log/containers/" {% endif %} {# -------------------------- Filebeat modules -------------------------- #} diff --git a/ansible/playbooks/roles/filebeat/templates/extra-dependencies.conf.j2 b/ansible/playbooks/roles/filebeat/templates/extra-dependencies.conf.j2 deleted file mode 100644 index d171edae51..0000000000 --- a/ansible/playbooks/roles/filebeat/templates/extra-dependencies.conf.j2 +++ /dev/null @@ -1,4 +0,0 @@ -# {{ ansible_managed }} - -[Unit] -After=docker.service \ No newline at end of file diff --git a/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 b/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 index f59e8bdfdd..a6715edf20 100644 --- a/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 +++ b/ansible/playbooks/roles/filebeat/templates/filebeat.yml.j2 @@ -112,8 +112,9 @@ filebeat.inputs: - type: container enabled: true + format: cri paths: - - /var/lib/docker/containers/*/*.log + - /var/log/containers/*.log {% if specification.container_input.multiline is defined %} multiline: {% for k, v in specification.container_input.multiline.items() %} @@ -122,21 +123,11 @@ filebeat.inputs: {% endif %} processors: - - add_docker_metadata: - labels.dedot: false - - rename: - fields: - - from: container.labels.io.kubernetes.container.name - to: kubernetes.container.name - - from: container.labels.io.kubernetes.pod.name - to: kubernetes.pod.name - - from: container.labels.io.kubernetes.pod.namespace - to: kubernetes.namespace - ignore_missing: true - fail_on_error: true - - drop_fields: - fields: - - container # Drop all fields added by 'add_docker_metadata' that were not renamed + - dissect: + tokenizer: "/var/log/containers/%{kubernetes.pod.name}_%{kubernetes.namespace}_%{container.id}.log" + field: "log.file.path" + target_prefix: "" + overwrite_keys: true {% endif %} # ============================== Filebeat modules ============================== diff --git a/ansible/playbooks/roles/kubernetes_common/tasks/configure-kubelet.yml b/ansible/playbooks/roles/kubernetes_common/tasks/configure-kubelet.yml index 9a73fe0ee5..5334f35e0e 100644 --- a/ansible/playbooks/roles/kubernetes_common/tasks/configure-kubelet.yml +++ b/ansible/playbooks/roles/kubernetes_common/tasks/configure-kubelet.yml @@ -36,6 +36,13 @@ backup: true register: apply_kubelet_custom_config +- name: Configure kubelet-env + include_role: + name: containerd + tasks_from: configure-kubelet-env + when: + - inventory_hostname in groups.image_registry + - name: Restart kubelet service systemd: name: kubelet diff --git a/ansible/playbooks/roles/kubernetes_common/tasks/main.yml b/ansible/playbooks/roles/kubernetes_common/tasks/main.yml index 344b2ea946..5a7656c2f7 100644 --- a/ansible/playbooks/roles/kubernetes_common/tasks/main.yml +++ b/ansible/playbooks/roles/kubernetes_common/tasks/main.yml @@ -8,6 +8,7 @@ state: present reload: yes +# Check needed only for RHEL - name: Check if bridge-nf-call-iptables key exists command: "sysctl net.bridge.bridge-nf-call-iptables" failed_when: false @@ -21,7 +22,7 @@ value: "1" reload: yes when: sysctl_bridge_nf_call_iptables.rc == 0 - with_items: + loop: - net.bridge.bridge-nf-call-iptables - net.bridge.bridge-nf-call-ip6tables diff --git a/ansible/playbooks/roles/kubernetes_master/tasks/generate-certificates.yml b/ansible/playbooks/roles/kubernetes_master/tasks/generate-certificates.yml index 9e01e11d8a..53837e80b9 100644 --- a/ansible/playbooks/roles/kubernetes_master/tasks/generate-certificates.yml +++ b/ansible/playbooks/roles/kubernetes_master/tasks/generate-certificates.yml @@ -212,7 +212,7 @@ - name: Restart systemd services when: - services_to_restart is defined - - services_to_restart | difference(['docker', 'kubelet']) | length == 0 + - services_to_restart | difference(['containerd', 'kubelet']) | length == 0 block: - name: Restart services systemd: diff --git a/ansible/playbooks/roles/kubernetes_master/tasks/main.yml b/ansible/playbooks/roles/kubernetes_master/tasks/main.yml index e3a32f6d37..e74c1285f6 100644 --- a/ansible/playbooks/roles/kubernetes_master/tasks/main.yml +++ b/ansible/playbooks/roles/kubernetes_master/tasks/main.yml @@ -86,10 +86,8 @@ - name: Restart apiserver shell: |- set -o pipefail && \ - docker ps \ - --filter 'name=kube-apiserver_kube-apiserver' \ - --format '{{ "{{.ID}}" }}' \ - | xargs --no-run-if-empty docker kill + crictl ps --name='kube-apiserver' -q \ + | xargs --no-run-if-empty crictl stop --timeout=0 args: executable: /bin/bash @@ -109,7 +107,7 @@ vars: valid_days: "{{ specification.advanced.certificates.expiration_days }}" services_to_restart: - - docker + - containerd include_tasks: generate-certificates.yml # kubeadm-config.yml can appear not only on 'automation_designated_master' in 2 cases: diff --git a/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml b/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml index eda1a4eeb0..eaad9e045a 100644 --- a/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml +++ b/ansible/playbooks/roles/kubernetes_promote/handlers/main.yml @@ -1,19 +1,17 @@ --- - name: Restart controller-manager - shell: | - docker ps \ - --filter 'name=kube-controller-manager_kube-controller-manager' \ - --format '{{ "{{.ID}}" }}' \ - | xargs --no-run-if-empty docker kill + shell: >- + set -o pipefail && + crictl ps --name='kube-controller-manager' -q \ + | xargs --no-run-if-empty crictl stop --timeout=0 args: executable: /bin/bash - name: Restart scheduler - shell: | - docker ps \ - --filter 'name=kube-scheduler_kube-scheduler' \ - --format '{{ "{{.ID}}" }}' \ - | xargs --no-run-if-empty docker kill + shell: >- + set -o pipefail && + crictl ps --name='kube-scheduler' -q \ + | xargs --no-run-if-empty crictl stop --timeout=0 args: executable: /bin/bash @@ -23,11 +21,10 @@ state: restarted - name: Restart kube-proxy - shell: | - docker ps \ - --filter 'name=kube-proxy_kube-proxy' \ - --format '{{ "{{.ID}}" }}' \ - | xargs --no-run-if-empty docker kill + shell: >- + set -o pipefail && + crictl ps --name='kube-proxy' -q \ + | xargs --no-run-if-empty crictl stop --timeout=0 args: executable: /bin/bash diff --git a/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.aarch64.txt b/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.aarch64.txt index 7ecc3fe7f3..c75b511532 100644 --- a/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.aarch64.txt +++ b/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.aarch64.txt @@ -26,7 +26,7 @@ bash-completion ca-certificates cifs-utils conntrack-tools # for kubelet -containerd.io +containerd.io-1.4.12 container-selinux #cri-tools-1.13.0 curl diff --git a/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.x86_64.txt b/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.x86_64.txt index fe4ac78f84..fa77cfea97 100644 --- a/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.x86_64.txt +++ b/ansible/playbooks/roles/repository/files/download-requirements/centos-7/requirements.x86_64.txt @@ -26,7 +26,7 @@ bash-completion ca-certificates cifs-utils conntrack-tools # for kubelet -containerd.io +containerd.io-1.4.12 container-selinux cri-tools-1.13.0 curl diff --git a/ansible/playbooks/roles/repository/files/download-requirements/redhat-7/requirements.x86_64.txt b/ansible/playbooks/roles/repository/files/download-requirements/redhat-7/requirements.x86_64.txt index 86698c69fa..0d76c63999 100644 --- a/ansible/playbooks/roles/repository/files/download-requirements/redhat-7/requirements.x86_64.txt +++ b/ansible/playbooks/roles/repository/files/download-requirements/redhat-7/requirements.x86_64.txt @@ -24,7 +24,7 @@ bash-completion ca-certificates cifs-utils conntrack-tools # for kubelet -containerd.io +containerd.io-1.4.12 container-selinux cri-tools-1.13.0 curl diff --git a/ansible/playbooks/roles/repository/files/download-requirements/ubuntu-20.04/requirements.x86_64.txt b/ansible/playbooks/roles/repository/files/download-requirements/ubuntu-20.04/requirements.x86_64.txt index a37ec22874..6de70d5e4f 100644 --- a/ansible/playbooks/roles/repository/files/download-requirements/ubuntu-20.04/requirements.x86_64.txt +++ b/ansible/playbooks/roles/repository/files/download-requirements/ubuntu-20.04/requirements.x86_64.txt @@ -8,7 +8,7 @@ auditd bash-completion ca-certificates cifs-utils -containerd.io +containerd.io 1.4.12 cri-tools 1.13.0 curl docker-ce 5:20.10.8 diff --git a/ansible/playbooks/roles/upgrade/tasks/image-registry.yml b/ansible/playbooks/roles/upgrade/tasks/image-registry.yml deleted file mode 100644 index c43ba58244..0000000000 --- a/ansible/playbooks/roles/upgrade/tasks/image-registry.yml +++ /dev/null @@ -1,39 +0,0 @@ ---- - -- name: image-registry | Include get-registries.yml from docker role # this sets result - include_role: - name: docker - tasks_from: get-registries - -- name: Reconfigure Docker for pulling images from local registry - block: - - name: image-registry | Drain node in preparation for Docker reconfiguration - include_tasks: kubernetes/utils/drain.yml - when: - - groups.kubernetes_node is defined - - inventory_hostname in groups.kubernetes_node - - - name: image-registry | Wait for cluster's readiness - include_tasks: kubernetes/utils/wait.yml - when: - - groups.kubernetes_node is defined - - inventory_hostname in groups.kubernetes_node - - - name: image-registry | Reconfigure Docker if necessary # this restarts Docker daemon - include_role: - name: docker - tasks_from: configure-docker - - - name: Include wait-for-kube-apiserver.yml - include_tasks: kubernetes/utils/wait-for-kube-apiserver.yml - when: - - inventory_hostname in groups.kubernetes_master - - - name: image-registry | Uncordon node - mark node as schedulable - include_tasks: kubernetes/utils/uncordon.yml - when: - - groups.kubernetes_node is defined - - inventory_hostname in groups.kubernetes_node - - when: - - not image_registry_address in result.stdout diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/backup-kubeadm-config.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/backup-kubeadm-config.yml index ad88bcd302..6f9b6c5e5f 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/backup-kubeadm-config.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/backup-kubeadm-config.yml @@ -1,10 +1,15 @@ --- # The kubeadm-config.yml file is no longer used during upgrade process, # but we keep it for backup and reference purposes. +# Retries needed for HA deployment (random failures) - name: k8s/master | Collect kubeadm-config ConfigMap command: >- kubectl get cm kubeadm-config -o yaml -n kube-system register: kubeadm_config + until: + - kubeadm_config is succeeded + retries: 30 + delay: 1 - name: k8s/master | Save kubeadm-config ConfigMap to file copy: diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/patch-cgroup-driver.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/patch-cgroup-driver.yml index 1a86d895d3..9ad5c88165 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/patch-cgroup-driver.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/patch-cgroup-driver.yml @@ -25,32 +25,8 @@ _output_str: >- {{ _input | combine(_cgroup_driver) | to_nice_yaml(indent=2) }} -- name: k8s/cgroups | Read /etc/docker/daemon.json - slurp: - path: /etc/docker/daemon.json - register: slurp_etc_docker_daemon_json - -- name: k8s/cgroups | Process /etc/docker/daemon.json - set_fact: - etc_docker_daemon_json: - output: "{{ _output }}" - changed: "{{ _output['exec-opts'] != _exec_opts }}" - vars: - _input: >- - {{ slurp_etc_docker_daemon_json.content | b64decode | from_json }} - - _exec_opts: >- - {{ _input['exec-opts'] | default([]) }} - - _update: - exec-opts: >- - {{ _exec_opts | difference(['native.cgroupdriver=cgroupfs']) | union(['native.cgroupdriver=systemd']) }} - - _output: >- - {{ _input | combine(_update, recursive=true) }} - - name: k8s/cgroups | Perform cgroup driver patching (switch to systemd) - when: var_lib_kubelet_config_yaml.changed or etc_docker_daemon_json.changed + when: var_lib_kubelet_config_yaml.changed block: # At this point we assume that currently processed node has been drained already. @@ -63,16 +39,7 @@ group: root mode: preserve - - name: k8s/cgroups | Write /etc/docker/daemon.json - copy: - dest: /etc/docker/daemon.json - content: | - {{ etc_docker_daemon_json.output | to_nice_json(indent=2) }} - owner: root - group: root - mode: preserve - - - name: k8s/cgroups | Restart kubelet and docker - include_tasks: utils/restart-kubelet-and-docker.yml + - name: k8s/cgroups | Restart kubelet + include_tasks: utils/restart-kubelet.yml when: - (_requires_restart is undefined) or _requires_restart diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml index 4fef7acc15..8d447738ef 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-master0.yml @@ -41,6 +41,16 @@ - name: k8s/master0 | Include set-cluster-version.yml include_tasks: set-cluster-version.yml # sets cluster_version + # Retries needed for HA deployment (random failures) + - name: k8s/master0 | Add k8s annotation for containerd + command: >- + kubectl annotate node {{ inventory_hostname }} --overwrite kubeadm.alpha.kubernetes.io/cri-socket=unix:///run/containerd/containerd.sock + register: result + until: + - result is succeeded + retries: 30 + delay: 1 + # Note: Usage of the --config flag for reconfiguring the cluster during upgrade is not recommended since v1.16 - name: k8s/master0 | Upgrade K8s cluster to v{{ version }} command: >- @@ -81,9 +91,6 @@ - name: k8s/master0 | Backup kubeadm-config.yml include_tasks: backup-kubeadm-config.yml -- name: k8s/master0 | Upgrade Docker # this may restart Docker daemon - include_tasks: docker.yml - - name: k8s/master0 | Patch kubelet ConfigMap with systemd cgroup driver include_tasks: patch-kubelet-cm.yml @@ -93,8 +100,8 @@ when: - upgrade_to_final_version -- name: k8s/master0 | Restart kubelet and docker - include_tasks: utils/restart-kubelet-and-docker.yml +- name: k8s/master0 | Restart kubelet + include_tasks: utils/restart-kubelet.yml - name: k8s/master0 | Uncordon master - mark master as schedulable include_tasks: utils/uncordon.yml diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml index 850ba8427d..4364b8066e 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-masterN.yml @@ -7,6 +7,16 @@ - name: k8s/masterN | Install kubeadm include_tasks: "{{ ansible_os_family }}/install-kubeadm.yml" + # Retries needed for HA deployment (random failures) + - name: k8s/masterN | Add k8s annotation for containerd + command: >- + kubectl annotate node {{ inventory_hostname }} --overwrite kubeadm.alpha.kubernetes.io/cri-socket=unix:///run/containerd/containerd.sock + register: result + until: + - result is succeeded + retries: 30 + delay: 1 + - name: k8s/masterN | Upgrade master {{ inventory_hostname }} command: >- kubeadm upgrade node @@ -32,17 +42,14 @@ - name: k8s/masterN | Backup kubeadm-config.yml include_tasks: backup-kubeadm-config.yml -- name: k8s/masterN | Upgrade Docker # this may restart Docker daemon - include_tasks: docker.yml - - name: k8s/masterN | Replace cgroupfs driver with systemd driver include_tasks: patch-cgroup-driver.yml vars: { _requires_restart: false } # it will be properly restarted anyways when: - upgrade_to_final_version -- name: k8s/masterN | Restart kubelet and docker - include_tasks: utils/restart-kubelet-and-docker.yml +- name: k8s/masterN | Restart kubelet + include_tasks: utils/restart-kubelet.yml - name: k8s/masterN | Wait for cluster's readiness include_tasks: utils/wait.yml diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-node.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-node.yml index 6e8e38589a..3c491b95f5 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-node.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/upgrade-node.yml @@ -2,9 +2,6 @@ - name: k8s/node | Drain node in preparation for maintenance include_tasks: utils/drain.yml -- name: k8s/node | Upgrade Docker # this may restart Docker daemon - include_tasks: docker.yml - - name: k8s/node | Replace cgroupfs driver with systemd driver include_tasks: patch-cgroup-driver.yml vars: { _requires_restart: true } diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/utils/restart-kubelet-and-docker.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/utils/restart-kubelet.yml similarity index 58% rename from ansible/playbooks/roles/upgrade/tasks/kubernetes/utils/restart-kubelet-and-docker.yml rename to ansible/playbooks/roles/upgrade/tasks/kubernetes/utils/restart-kubelet.yml index f5c7731f84..cd1c222d20 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/utils/restart-kubelet-and-docker.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/utils/restart-kubelet.yml @@ -4,21 +4,10 @@ state: stopped name: kubelet -- name: k8s/utils | Stop Docker - systemd: - state: stopped - name: docker - - name: k8s/utils | Reload daemon systemd: daemon_reload: true -- name: k8s/utils | Start Docker - systemd: - name: docker - state: started - enabled: true - - name: k8s/utils | Start Kubelet systemd: name: kubelet diff --git a/ansible/playbooks/roles/upgrade/tasks/kubernetes/verify-upgrade.yml b/ansible/playbooks/roles/upgrade/tasks/kubernetes/verify-upgrade.yml index 02dfad56b0..2455749f82 100644 --- a/ansible/playbooks/roles/upgrade/tasks/kubernetes/verify-upgrade.yml +++ b/ansible/playbooks/roles/upgrade/tasks/kubernetes/verify-upgrade.yml @@ -64,3 +64,5 @@ check_mode: true register: cgroup_driver failed_when: cgroup_driver.changed + when: + - upgrade_to_final_version diff --git a/ansible/playbooks/upgrade.yml b/ansible/playbooks/upgrade.yml index 1b8fb40e20..6639327668 100644 --- a/ansible/playbooks/upgrade.yml +++ b/ansible/playbooks/upgrade.yml @@ -41,8 +41,8 @@ become_method: sudo tasks: - import_role: - name: upgrade - tasks_from: image-registry + name: containerd + tasks_from: main when: "'kubernetes' in upgrade_components or upgrade_components|length == 0" environment: KUBECONFIG: "{{ kubeconfig.remote }}" diff --git a/docs/architecture/logical-view.md b/docs/architecture/logical-view.md index d82c707d16..47d9acde34 100644 --- a/docs/architecture/logical-view.md +++ b/docs/architecture/logical-view.md @@ -49,7 +49,7 @@ Source | Purpose /var/log/secure | Logs from authentication and authorization /var/log/syslog | System logs and events /var/log/zookeeper/version-2/* | Zookeeper's logs -Docker containers | Kubernetes components that run in a container +Containers | Kubernetes components that run in a container `Filebeat`, unlike `Grafana`, pushes data to database (`Elasticsearch`) instead of pulling them. [Read more](https://www.elastic.co/products/beats/filebeat) about `Filebeat`. diff --git a/docs/changelogs/CHANGELOG-2.0.md b/docs/changelogs/CHANGELOG-2.0.md index d02c779ce0..cd93b49d0f 100644 --- a/docs/changelogs/CHANGELOG-2.0.md +++ b/docs/changelogs/CHANGELOG-2.0.md @@ -9,6 +9,7 @@ - [#2812](https://github.com/epiphany-platform/epiphany/issues/2812) - Extend K8s config validation - [#2950](https://github.com/epiphany-platform/epiphany/issues/2950) - CLI refactor to make it more consistant - [#2844](https://github.com/epiphany-platform/epiphany/issues/2844) - Refactor K8s upgrade task in order to simplify its flow +- [#2716](https://github.com/epiphany-platform/epiphany/issues/2716) - Change container runtime to containerd ### Fixed @@ -49,9 +50,10 @@ ### Deprecated - ### Breaking changes - Upgrade of Terraform components in issue [#2825](https://github.com/epiphany-platform/epiphany/issues/2825) and [#2853](https://github.com/epiphany-platform/epiphany/issues/2853) will make running re-apply with infrastructure break on existing 1.x clusters. The advice is to deploy a new cluster and migrate data. If needed a manual upgrade path is described [here.](../home/howto/UPGRADE.md#terraform-upgrade-from-epiphany-1.x-to-2.x) +- Kubernetes container runtime changed. Dockershim and Docker are no longer on Kubernetes hosts. +- Filebeat `docker` input replaced by `container` input. New field provided for Filebeat as system service installation: `container.id`. Field `kubernetes.container.name` is no longer valid. ### Known issues diff --git a/docs/home/COMPONENTS.md b/docs/home/COMPONENTS.md index 8511441380..7328ff5628 100644 --- a/docs/home/COMPONENTS.md +++ b/docs/home/COMPONENTS.md @@ -9,6 +9,7 @@ Note that versions are default versions and can be changed in certain cases thro | Kubernetes | 1.22.4 | https://github.com/kubernetes/kubernetes | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | | Kubernetes Dashboard | 2.3.1 | https://github.com/kubernetes/dashboard | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | | Kubernetes metrics-scraper | 1.0.7 | https://github.com/kubernetes-sigs/dashboard-metrics-scraper | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| containerd | 1.4.12 | https://github.com/containerd/containerd | [Apache License 2.0](https://github.com/containerd/containerd/blob/main/LICENSE) | | Calico | 3.20.3 | https://github.com/projectcalico/calico | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | | Flannel | 0.14.0 | https://github.com/coreos/flannel/ | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | | Canal | 3.20.3 | https://github.com/projectcalico/calico | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) | diff --git a/docs/home/howto/MAINTENANCE.md b/docs/home/howto/MAINTENANCE.md index e9497faa6c..52cc3de205 100644 --- a/docs/home/howto/MAINTENANCE.md +++ b/docs/home/howto/MAINTENANCE.md @@ -4,33 +4,17 @@ This part of the documentations covers the topic how to check if each component is working properly. -#### - Docker - -To verify that Docker services are up and running you can first check the status of the Docker service with the -following command: - -```shell -systemctl status docker -``` - -Additionally, you can check also if the command: - -```shell -docker info -``` - -doesn't return any error. You can also find there useful information about your Docker configuration. - #### - Kubernetes -First to check if everything is working fine we need to check verify status of Kubernetes kubelet service with the -command: +Verify status of Kubernetes kubelet service with the command: ```shell systemctl status kubelet ``` -We can also check state of Kubernetes nodes using the command: +##### - kubectl + +Check state of Kubernetes nodes using the `kubectl` command: ```shell root@primary01:~# kubectl get nodes --kubeconfig=/etc/kubernetes/admin.conf @@ -40,7 +24,7 @@ node01 Ready 23h node02 Ready 23h vx.xx.x ``` -We can get additional information about Kubernetes components: +Get additional information about Kubernetes components: ```shell root@primary01:~# kubectl cluster-info --kubeconfig=/etc/kubernetes/admin.conf @@ -48,14 +32,39 @@ Kubernetes control plane is running at https://primary01:6443 CoreDNS is running at https://primary01:6443/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy ``` -We can also check status of pods in all namespaces using the command: +Check status of pods in all namespaces using the command: ```shell kubectl get pods -A --kubeconfig=/etc/kubernetes/admin.conf ``` For more detailed information please refer -to [official documentation](https://kubernetes.io/docs/reference/kubectl/overview/) +to [the official documentation](https://kubernetes.io/docs/reference/kubectl/overview/). + +##### - crictl + +Check state of Kubernetes components using the `crictl` command: + +List all pods: + +```shell +crictl pods +``` + +List all images: + +```shell +crictl images +``` + +List all containers: + +```shell +crictl ps -a +``` + +The crictl tool provides the possibility to run a sandbox container which may be useful for debugging purposes. +For more information, refer to [the official documentation](https://kubernetes.io/docs/tasks/debug-application-cluster/crictl). #### - Keycloak @@ -67,24 +76,24 @@ kubectl get pods --kubeconfig=/etc/kubernetes/admin.conf --namespace=keycloak_se #### - HAProxy -To check status of HAProxy we can use the command: +To check status of HAProxy, use the command: ```shell systemctl status haproxy ``` -Additionally, we can check if the application is listening on ports defined in the file haproxy.cfg running netstat +Additionally, you can check if the application is listening on ports defined in the file haproxy.cfg running netstat command. #### - Prometheus -To check status of Prometheus we can use the command: +To check status of Prometheus, use the command: ```shell systemctl status prometheus ``` -We can also check if Prometheus service is listening at the port 9090: +Check if Prometheus service is listening at the port 9090: ```shell netstat -antup | grep 9090 @@ -92,13 +101,13 @@ netstat -antup | grep 9090 #### - Grafana -To check status of Grafana we can use the command: +To check status of Grafana, use the command: ```shell systemctl status grafana-server ``` -We can also check if Grafana service is listening at the port 3000: +Check if Grafana service is listening at the port 3000: ```shell netstat -antup | grep 3000 @@ -106,7 +115,7 @@ netstat -antup | grep 3000 #### - Prometheus Node Exporter -To check status of Node Exporter we can use the command: +To check status of Node Exporter, use the command: ```shell status prometheus-node-exporter @@ -114,41 +123,41 @@ status prometheus-node-exporter #### - Elasticsearch -To check status of Elasticsearch we can use the command: +To check status of Elasticsearch, use the command: ```shell systemct status elasticsearch ``` -We can check if service is listening on 9200 (API communication port): +Check if service is listening on 9200 (API communication port): ```shell netstat -antup | grep 9200 ``` -We can also check if service is listening on 9300 (nodes communication port): +Check if service is listening on 9300 (nodes communication port): ```shell netstat -antup | grep 9300 ``` -We can also check status of Elasticsearch cluster: +Check status of Elasticsearch cluster: ```shell :9200/_cluster/health ``` -We can do this using curl or any other equivalent tool. +You can do this using curl or any other equivalent tool. #### - Kibana -To check status of Kibana we can use the command: +To check status of Kibana, use the command: ```shell systemctl status kibana ``` -We can also check if Kibana service is listening at the port 5601: +Check if Kibana service is listening at the port 5601: ```shell netstat -antup | grep 5601 @@ -156,7 +165,7 @@ netstat -antup | grep 5601 #### - Filebeat -To check status of Filebeat we can use the command: +To check status of Filebeat, use the command: ```shell systemctl status filebeat @@ -164,7 +173,7 @@ systemctl status filebeat #### - PostgreSQL -To check status of PostgreSQL we can use commands: +To check status of PostgreSQL, use commands: - on Ubuntu: @@ -181,13 +190,13 @@ systemctl status postgresql-10 where postgresql-10 is only an example, because the number differs from version to version. Please refer to your version number in case of using this command. -We can also check if PostgreSQL service is listening at the port 5432: +Check if PostgreSQL service is listening at the port 5432: ```shell netstat -antup | grep 5432 ``` -We can also use the pg_isready command, to get information if the PostgreSQL server is running and accepting connections +Use the pg_isready command, to get information if the PostgreSQL server is running and accepting connections with command: - on Ubuntu: diff --git a/tests/spec/spec/filebeat/filebeat_spec.rb b/tests/spec/spec/filebeat/filebeat_spec.rb index 54432233ca..985662fcd7 100644 --- a/tests/spec/spec/filebeat/filebeat_spec.rb +++ b/tests/spec/spec/filebeat/filebeat_spec.rb @@ -44,16 +44,6 @@ end end -if hostInGroups?("kubernetes_master") || hostInGroups?("kubernetes_node") - describe 'Check extra configuration for master/worker roles - setting Filebeat to be started after Docker' do - describe file("/etc/systemd/system/filebeat.service.d/extra-dependencies.conf") do - it { should exist } - it { should be_a_file } - its(:content) { should match /After=docker\.service/ } - end - end -end - if es_logstash_user_is_active listInventoryHosts("logging").each do |val| describe 'Check the connection to the Elasticsearch hosts' do diff --git a/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb b/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb index b4e9393b3e..c3750b39d3 100644 --- a/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb +++ b/tests/spec/spec/kubernetes_master/kubernetes_master_spec.rb @@ -11,6 +11,13 @@ end end +describe 'Check if containerd service is enabled/running' do + describe service('containerd') do + it { should be_enabled } + it { should be_running } + end +end + describe 'Checking if kubelet service is running' do describe service('kubelet') do it { should be_enabled } @@ -187,7 +194,6 @@ its(:exit_status) { should eq 0 } end end - end describe 'Check the kubelet cgroup driver' do @@ -207,18 +213,32 @@ end end -describe 'Check the docker cgroup and logging driver' do - describe file('/etc/docker/daemon.json') do +describe 'Check the containerd' do + describe command('crictl version') do let(:disable_sudo) { false } - its(:content_as_json) { should include('exec-opts' => include('native.cgroupdriver=systemd')) } - its(:content_as_json) { should include('log-driver' => 'json-file') } - its(:content_as_json) { should_not include('exec-opts' => include('native.cgroupdriver=cgroupfs')) } + its(:stdout) { should include('RuntimeName: containerd') } + end + describe command("kubectl get nodes -o jsonpath='{.items[].status.nodeInfo.containerRuntimeVersion}'") do + its(:stdout) { should include('containerd://1.4.12') } end - describe command('docker info | grep -i driver') do + describe file('/etc/containerd/config.toml') do let(:disable_sudo) { false } - its(:stdout) { should match(/Cgroup Driver: systemd/) } - its(:stdout) { should match(/Logging Driver: json-file/) } - its(:exit_status) { should eq 0 } + its(:content) { should match(/SystemdCgroup = true/) } + end +end + +describe 'Check the OCI-spec' do + describe command('crictl info') do + let(:disable_sudo) { false } + its(:stdout) { should match('\"defaultRuntimeName\": \"runc\"') } + end +end + +describe 'Check the kubelet cgroup driver' do + describe file('/var/lib/kubelet/config.yaml') do + let(:disable_sudo) { false } + its(:content_as_yaml) { should include('cgroupDriver' => 'systemd') } + its(:content_as_yaml) { should_not include('cgroupDriver' => 'cgroupfs') } end end @@ -228,8 +248,8 @@ its(:content_as_yaml) { should include('rotateCertificates' => true) } end describe command("kubectl describe cm $(kubectl get cm -n kube-system \ - | awk '/kubelet-config/{print $1}') -n kube-system | grep -i rotateCertificates") do - its(:stdout) { should match(/rotateCertificates: true/) } + | awk '/kubelet-config/{print $1}') -n kube-system") do + its(:stdout) { should contain('rotateCertificates: true') } its(:exit_status) { should eq 0 } end end diff --git a/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb b/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb index cf708c8156..93b4b61a42 100644 --- a/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb +++ b/tests/spec/spec/kubernetes_node/kubernetes_node_spec.rb @@ -1,25 +1,28 @@ require 'spec_helper' -describe 'Check the kubelet cgroup driver' do - describe file('/var/lib/kubelet/config.yaml') do - let(:disable_sudo) { false } - its(:content_as_yaml) { should include('cgroupDriver' => 'systemd') } - its(:content_as_yaml) { should_not include('cgroupDriver' => 'cgroupfs') } +describe 'Check if containerd service is enabled/running' do + describe service('containerd') do + it { should be_enabled } + it { should be_running } end end -describe 'Check the docker cgroup and logging driver' do - describe file('/etc/docker/daemon.json') do +describe 'Check the containerd' do + describe command('crictl version') do let(:disable_sudo) { false } - its(:content_as_json) { should include('exec-opts' => include('native.cgroupdriver=systemd')) } - its(:content_as_json) { should include('log-driver' => 'json-file') } - its(:content_as_json) { should_not include('exec-opts' => include('native.cgroupdriver=cgroupfs')) } + its(:stdout) { should include('RuntimeName: containerd') } end - describe command('docker info | grep -i driver') do + describe file('/etc/containerd/config.toml') do let(:disable_sudo) { false } - its(:stdout) { should match(/Cgroup Driver: systemd/) } - its(:stdout) { should match(/Logging Driver: json-file/) } - its(:exit_status) { should eq 0 } + its(:content) { should match(/SystemdCgroup = true/) } + end +end + +describe 'Check the kubelet cgroup driver' do + describe file('/var/lib/kubelet/config.yaml') do + let(:disable_sudo) { false } + its(:content_as_yaml) { should include('cgroupDriver' => 'systemd') } + its(:content_as_yaml) { should_not include('cgroupDriver' => 'cgroupfs') } end end