Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

K8s improvements #2918

Merged
merged 11 commits into from
Feb 4, 2022
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
- name: Assert that dest_file variable is defined
assert:
that:
- dest_file is defined
fail_msg: Variable 'dest_file' must be defined

- name: Collect kubeadm-config
command: |-
kubectl get configmap kubeadm-config \
--namespace kube-system \
--output jsonpath={{ jsonpath }}
vars:
jsonpath: >-
'{.data.ClusterConfiguration}'
register: kubeadm_config
changed_when: false

- name: Create {{ dest_file }}
copy:
dest: "{{ dest_file }}"
mode: u=rw,go=
content: >-
{{ kubeadm_config.stdout }}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
---
- name: Generate certificates block
- name: Set pki_backup_dir fact
set_fact:
pki_backup_dir: "{{ pki.location | regex_replace('\\/$', '') }}-backup-{{ ansible_date_time.iso8601_basic_short }}"

- name: Back up current certificates
synchronize:
src: "{{ pki.location }}/"
dest: "{{ pki_backup_dir }}"
delegate_to: "{{ inventory_hostname }}"

- name: Generate certificates
vars:
# https://kubernetes.io/docs/setup/best-practices/certificates/#all-certificates
_certificates_opt_mapping:
Expand Down Expand Up @@ -62,13 +72,6 @@
certificates_renewal_list: "{{ _certificates_opt_mapping | map(attribute='name') }}"
when: certificates_renewal_list is not defined

- name: Save old certificates
synchronize:
src: "{{ pki.location }}/"
dest: >-
{{ pki.location | regex_replace('\\/$', '') }}-backup-{{ ansible_date_time.iso8601_basic_short }}
delegate_to: "{{ inventory_hostname }}"

- name: Ensure necessary directories exist
file:
path: "{{ item }}"
Expand Down Expand Up @@ -195,20 +198,31 @@
- 'csr'
- 'ext'

- name: Restart systemd services
when:
- services_to_restart is defined
- services_to_restart | difference(['docker', 'kubelet']) | length == 0
block:
- name: Restart services
systemd:
name: "{{ item }}"
state: restarted
loop: "{{ services_to_restart }}"

- name: Wait until cluster is available
command: kubectl cluster-info
retries: 50
delay: 1
register: result
until: result is succeeded and "running" in result.stdout
rescue:
atsikham marked this conversation as resolved.
Show resolved Hide resolved
- name: Restore certificates
synchronize:
src: "{{ pki_backup_dir }}/"
dest: "{{ pki.location | regex_replace('\\/$', '') }}"
delegate_to: "{{ inventory_hostname }}"

- name: Fail certificates generation
fail:
msg: Certificates generation failed, restored an initial state

- name: Restart systemd services
when:
- services_to_restart is defined
- services_to_restart | difference(['docker', 'kubelet']) | length == 0
block:
- name: Restart services
systemd:
name: "{{ item }}"
state: restarted
loop: "{{ services_to_restart }}"

- name: Wait until cluster is available
command: kubectl cluster-info
retries: 60
delay: 1
register: result
until: result is succeeded and "running" in result.stdout
121 changes: 74 additions & 47 deletions ansible/playbooks/roles/kubernetes_master/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
---
- name: Set is_first_deployment fact
set_fact:
is_first_deployment: false

- when: use_ha_control_plane
block:
- name: Configure internal load-balancer (HAProxy)
Expand All @@ -28,71 +24,102 @@
- import_tasks: copy-kubernetes-pki.yml
- import_tasks: master-join.yml

- name: Collect current apiserver certificate 'not_after' date by openssl
command: openssl x509 -enddate -noout -in apiserver.crt
- name: Collect subject alternative names of apiserver certificate
# -ext option is not used as it requires newer openssl version
shell: |-
set -o pipefail && \
openssl x509 -in apiserver.crt -text -noout \
| grep DNS:
args:
executable: /bin/bash
chdir: "{{ pki.location }}"
register: apiserver_certificate_info
changed_when: false
register: apiserver_certificate_san

- name: Check presence of each control plane address in SANs list
set_fact:
san_search_results: "{{ san_search_results | default([]) + [apiserver_certificate_san.stdout is search(item)] }}"
loop: >-
{{ (groups['kubernetes_master'] | map('extract', hostvars, ['ansible_default_ipv4', 'address']) | list)
+ (groups['kubernetes_master'] | map('extract', hostvars, ['ansible_host']) | list) }}

- name: Regenerate apiserver certificates
when: kubernetes_common.automation_designated_master != inventory_hostname or not is_first_deployment
# It's almost always necessary to regenerate apiserver certificates for designated and non-designated masters
# because of a few points:
# a. Update certificates for old clusters have to be supported
# b. Execution order is not defined, so when cluster is promoted to HA,
# non-designated masters may join cluster before designated master's certificate update
when: "not (san_search_results is all)"
block:
# Executed on all hosts as /etc/kubeadm/kubeadm-config.yml is required for apiserver certificate generation
- name: Extend kubeadm config
vars:
update:
apiServer:
certSANs: >-
{{ (groups['kubernetes_master'] | map('extract', hostvars, ['ansible_default_ipv4', 'address']) | list)
+ (groups['kubernetes_master'] | map('extract', hostvars, ['ansible_host']) | list)
+ [ 'localhost', '127.0.0.1' ] | unique }}
+ [ '127.0.0.1', 'localhost' ] }}
include_role:
name: kubernetes_common
tasks_from: extend-kubeadm-config

- name: Backup and generate apiserver certificates with latest kubeadm config
include_tasks: apiserver-certificates.yml
- name: Update in-cluster configuration
run_once: true
include_role:
name: kubernetes_common
tasks_from: update-in-cluster-config

# kubeadm certs renewal uses the existing certificates as the authoritative source for attributes (Common Name, Organization, SAN, etc.)
# instead of the kubeadm-config ConfigMap, so it's not possible to combine this step with previous ones
# See https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/kubeadm-certs/#manual-certificate-renewal
- name: Update apiserver certificate expiration date
when: not (specification.advanced.certificates.renew | bool)
block:
- name: Regenerate apiserver certificate with previous expiration value
vars:
certificates_renewal_list:
- apiserver
valid_days: "{{ apiserver_certificate_info.stdout | openssl_date2days }}"
include_tasks: generate-certificates.yml
# When specification.advanced.certificates.renew is set to true, certificate will be re-generated later
- name: Update apiserver certificate expiration date
when: not specification.advanced.certificates.renew
block:
- name: Collect current apiserver certificate 'not_after' date by openssl
command: openssl x509 -enddate -noout -in apiserver.crt
args:
chdir: "{{ pki.location }}"
register: apiserver_certificate_enddate
changed_when: false

- name: Restart apiserver
shell: |
docker ps \
--filter 'name=kube-apiserver_kube-apiserver' \
--format '{{ "{{.ID}}" }}' \
| xargs --no-run-if-empty docker kill
args:
executable: /bin/bash
- name: Regenerate apiserver certificate with previous expiration value
vars:
certificates_renewal_list:
- apiserver
valid_days: "{{ apiserver_certificate_enddate.stdout | openssl_date2days }}"
include_tasks: generate-certificates.yml

- name: Update in-cluster configuration
when: kubernetes_common.automation_designated_master == inventory_hostname
include_role:
name: kubernetes_common
tasks_from: update-in-cluster-config
- name: Restart apiserver
shell: |-
set -o pipefail && \
docker ps \
--filter 'name=kube-apiserver_kube-apiserver' \
--format '{{ "{{.ID}}" }}' \
| xargs --no-run-if-empty docker kill
args:
executable: /bin/bash

- name: Regenerate all certificates
when: specification.advanced.certificates.renew | bool
vars:
valid_days: "{{ specification.advanced.certificates.expiration_days }}"
services_to_restart:
- docker
include_tasks: generate-certificates.yml
when: specification.advanced.certificates.renew
block:
- name: Save kubeadm config to file
when:
- san_search_results is all # kubeadm config was not extended/saved for apiserver cert generation
vars:
dest_file: /etc/kubeadm/kubeadm-config.yml
include_role:
name: kubernetes_common
tasks_from: save-in-cluster-config

- name: Regenerate certificates
vars:
valid_days: "{{ specification.advanced.certificates.expiration_days }}"
services_to_restart:
- docker
include_tasks: generate-certificates.yml

# kubeadm-config.yml can appear not only on 'automation_designated_master' in 2 cases:
# - the number of control-plane nodes was changed -> apiserver certificate had to be updated
# - the file was created to renew all certificates
- name: Ensure kubeadm-config.yml exists only on 'automation_designated_master'
when: kubernetes_common.automation_designated_master != inventory_hostname
file:
path: /etc/kubeadm/kubeadm-config.yml
state: absent

- import_tasks: master-untaint.yml

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@

- when: not stat_kube_apiserver_yaml.stat.exists
block:
- name: Set is_first_deployment fact
set_fact:
is_first_deployment: true

- name: Ensure /etc/kubeadm/ directory exists
file:
path: /etc/kubeadm/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ controlPlaneEndpoint: "localhost:3446"
apiServer:
timeoutForControlPlane: 4m0s
certSANs:
{% set ip_address_list = ['127.0.0.1', 'localhost'] %}
{% set address_list = ['127.0.0.1', 'localhost'] %}
{% for host in groups['kubernetes_master'] %}
{% set _ = ip_address_list.extend([ hostvars[host]['ansible_default_ipv4']['address'], hostvars[host]['ansible_host'] ]) %}
{% set _ = address_list.extend([ hostvars[host]['ansible_default_ipv4']['address'], hostvars[host]['ansible_host'] ]) %}
{% endfor %}
{% for ip in ip_address_list|unique %}
- {{ ip }}
{% for address in address_list|unique %}
- {{ address }}
{% endfor %}
extraArgs: # https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/
{% if specification.advanced.etcd_args.encrypted | bool %}
Expand Down
45 changes: 16 additions & 29 deletions ansible/playbooks/roles/kubernetes_node/tasks/node-join.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
{{ hostvars[groups.kubernetes_master.0].ansible_default_ipv4.address }}:6443
{%- endif -%}
block:
- name: Creates directory
- name: Create kubeadm directory
file:
path: /etc/kubeadm/
state: directory
Expand All @@ -38,35 +38,22 @@
register: kubeadm_join_result

rescue:
- name: Attempt to join with kubeadm reset
when: kubeadm_join_result.stderr is search('/etc/kubernetes/kubelet.conf already exists')
block:
- name: Reset node
command: kubeadm reset --force

- <<: *soft-join

rescue:
- name: Join to cluster with ignores
command: |-
kubeadm join \
--config /etc/kubeadm/kubeadm-join-node.yml \
--ignore-preflight-errors all
register: kubeadm_join_result

atsikham marked this conversation as resolved.
Show resolved Hide resolved
always:
- name: Display kubeadm join stderr if any
- name: Display kubeadm join stderr
debug:
msg: |
Joined with warnings
Node join attempt failed:
{{ kubeadm_join_result.stderr_lines }}
when: kubeadm_join_result is failed

- name: Mark node regardless of join result
set_fact:
kubernetes_common: >-
{{ kubernetes_common | default({}) | combine(set_fact, recursive=true) }}
vars:
set_fact:
node_already_joined: >-
{{ kubeadm_join_result is succeeded }}
- name: Reset node
command: kubeadm reset --force

- <<: *soft-join

- name: Mark node as joined
set_fact:
kubernetes_common: >-
{{ kubernetes_common | default({}) | combine(set_fact, recursive=true) }}
vars:
set_fact:
node_already_joined: >-
{{ kubeadm_join_result is succeeded }}
Loading