Skip to content

Commit

Permalink
K8s improvements (hitachienergy#2918)
Browse files Browse the repository at this point in the history
  • Loading branch information
atsikham authored and rafzei committed Feb 8, 2022
1 parent 739bde7 commit 0d74d4a
Show file tree
Hide file tree
Showing 10 changed files with 189 additions and 138 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
- name: Assert that dest_file variable is defined
assert:
that:
- dest_file is defined
fail_msg: Variable 'dest_file' must be defined

- name: Collect kubeadm-config
command: |-
kubectl get configmap kubeadm-config \
--namespace kube-system \
--output jsonpath={{ jsonpath }}
vars:
jsonpath: >-
'{.data.ClusterConfiguration}'
register: kubeadm_config
changed_when: false

- name: Create {{ dest_file }}
copy:
dest: "{{ dest_file }}"
mode: u=rw,go=
content: >-
{{ kubeadm_config.stdout }}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
---
- name: Generate certificates block
- name: Set pki_backup_dir fact
set_fact:
pki_backup_dir: "{{ pki.location | regex_replace('\\/$', '') }}-backup-{{ ansible_date_time.iso8601_basic_short }}"

- name: Back up current certificates
synchronize:
src: "{{ pki.location }}/"
dest: "{{ pki_backup_dir }}"
delegate_to: "{{ inventory_hostname }}"

- name: Generate certificates
vars:
# https://kubernetes.io/docs/setup/best-practices/certificates/#all-certificates
_certificates_opt_mapping:
Expand Down Expand Up @@ -62,13 +72,6 @@
certificates_renewal_list: "{{ _certificates_opt_mapping | map(attribute='name') }}"
when: certificates_renewal_list is not defined

- name: Save old certificates
synchronize:
src: "{{ pki.location }}/"
dest: >-
{{ pki.location | regex_replace('\\/$', '') }}-backup-{{ ansible_date_time.iso8601_basic_short }}
delegate_to: "{{ inventory_hostname }}"

- name: Ensure necessary directories exist
file:
path: "{{ item }}"
Expand Down Expand Up @@ -195,20 +198,31 @@
- 'csr'
- 'ext'

- name: Restart systemd services
when:
- services_to_restart is defined
- services_to_restart | difference(['kubelet']) | length == 0
block:
- name: Restart services
systemd:
name: "{{ item }}"
state: restarted
loop: "{{ services_to_restart }}"

- name: Wait until cluster is available
command: kubectl cluster-info
retries: 50
delay: 1
register: result
until: result is succeeded and "running" in result.stdout
rescue:
- name: Restore certificates
synchronize:
src: "{{ pki_backup_dir }}/"
dest: "{{ pki.location | regex_replace('\\/$', '') }}"
delegate_to: "{{ inventory_hostname }}"

- name: Fail certificates generation
fail:
msg: Certificates generation failed, restored an initial state

- name: Restart systemd services
when:
- services_to_restart is defined
- services_to_restart | difference(['docker', 'kubelet']) | length == 0
block:
- name: Restart services
systemd:
name: "{{ item }}"
state: restarted
loop: "{{ services_to_restart }}"

- name: Wait until cluster is available
command: kubectl cluster-info
retries: 60
delay: 1
register: result
until: result is succeeded and "running" in result.stdout
117 changes: 74 additions & 43 deletions ansible/playbooks/roles/kubernetes_master/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
---
- name: Set is_first_deployment fact
set_fact:
is_first_deployment: false

- when: use_ha_control_plane
block:
- name: Configure internal load-balancer (HAProxy)
Expand All @@ -28,67 +24,102 @@
- import_tasks: copy-kubernetes-pki.yml
- import_tasks: master-join.yml

- name: Collect current apiserver certificate 'not_after' date by openssl
command: openssl x509 -enddate -noout -in apiserver.crt
- name: Collect subject alternative names of apiserver certificate
# -ext option is not used as it requires newer openssl version
shell: |-
set -o pipefail && \
openssl x509 -in apiserver.crt -text -noout \
| grep DNS:
args:
executable: /bin/bash
chdir: "{{ pki.location }}"
register: apiserver_certificate_info
changed_when: false
register: apiserver_certificate_san

- name: Check presence of each control plane address in SANs list
set_fact:
san_search_results: "{{ san_search_results | default([]) + [apiserver_certificate_san.stdout is search(item)] }}"
loop: >-
{{ (groups['kubernetes_master'] | map('extract', hostvars, ['ansible_default_ipv4', 'address']) | list)
+ (groups['kubernetes_master'] | map('extract', hostvars, ['ansible_host']) | list) }}
- name: Regenerate apiserver certificates
when: kubernetes_common.automation_designated_master != inventory_hostname or not is_first_deployment
# It's almost always necessary to regenerate apiserver certificates for designated and non-designated masters
# because of a few points:
# a. Update certificates for old clusters have to be supported
# b. Execution order is not defined, so when cluster is promoted to HA,
# non-designated masters may join cluster before designated master's certificate update
when: "not (san_search_results is all)"
block:
# Executed on all hosts as /etc/kubeadm/kubeadm-config.yml is required for apiserver certificate generation
- name: Extend kubeadm config
vars:
update:
apiServer:
certSANs: >-
{{ (groups['kubernetes_master'] | map('extract', hostvars, ['ansible_default_ipv4', 'address']) | list)
+ (groups['kubernetes_master'] | map('extract', hostvars, ['ansible_host']) | list)
+ [ 'localhost', '127.0.0.1' ] | unique }}
+ [ '127.0.0.1', 'localhost' ] }}
include_role:
name: kubernetes_common
tasks_from: extend-kubeadm-config

- name: Backup and generate apiserver certificates with latest kubeadm config
include_tasks: apiserver-certificates.yml
- name: Update in-cluster configuration
run_once: true
include_role:
name: kubernetes_common
tasks_from: update-in-cluster-config

# kubeadm certs renewal uses the existing certificates as the authoritative source for attributes (Common Name, Organization, SAN, etc.)
# instead of the kubeadm-config ConfigMap, so it's not possible to combine this step with previous ones
# See https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/kubeadm-certs/#manual-certificate-renewal
- name: Update apiserver certificate expiration date
when: not (specification.advanced.certificates.renew | bool)
block:
- name: Regenerate apiserver certificate with previous expiration value
vars:
certificates_renewal_list:
- apiserver
valid_days: "{{ apiserver_certificate_info.stdout | openssl_date2days }}"
include_tasks: generate-certificates.yml
# When specification.advanced.certificates.renew is set to true, certificate will be re-generated later
- name: Update apiserver certificate expiration date
when: not specification.advanced.certificates.renew
block:
- name: Collect current apiserver certificate 'not_after' date by openssl
command: openssl x509 -enddate -noout -in apiserver.crt
args:
chdir: "{{ pki.location }}"
register: apiserver_certificate_enddate
changed_when: false

- name: Restart apiserver
shell: |
crictl ps --name 'kube-apiserver' -q \
| xargs --no-run-if-empty crictl stop --timeout=0
args:
executable: /bin/bash
- name: Regenerate apiserver certificate with previous expiration value
vars:
certificates_renewal_list:
- apiserver
valid_days: "{{ apiserver_certificate_enddate.stdout | openssl_date2days }}"
include_tasks: generate-certificates.yml

- name: Update in-cluster configuration
when: kubernetes_common.automation_designated_master == inventory_hostname
include_role:
name: kubernetes_common
tasks_from: update-in-cluster-config
- name: Restart apiserver
shell: |-
set -o pipefail && \
docker ps \
--filter 'name=kube-apiserver_kube-apiserver' \
--format '{{ "{{.ID}}" }}' \
| xargs --no-run-if-empty docker kill
args:
executable: /bin/bash

- name: Regenerate all certificates
when: specification.advanced.certificates.renew | bool
vars:
valid_days: "{{ specification.advanced.certificates.expiration_days }}"
include_tasks: generate-certificates.yml
when: specification.advanced.certificates.renew
block:
- name: Save kubeadm config to file
when:
- san_search_results is all # kubeadm config was not extended/saved for apiserver cert generation
vars:
dest_file: /etc/kubeadm/kubeadm-config.yml
include_role:
name: kubernetes_common
tasks_from: save-in-cluster-config

- name: Regenerate certificates
vars:
valid_days: "{{ specification.advanced.certificates.expiration_days }}"
services_to_restart:
- docker
include_tasks: generate-certificates.yml

# kubeadm-config.yml can appear not only on 'automation_designated_master' in 2 cases:
# - the number of control-plane nodes was changed -> apiserver certificate had to be updated
# - the file was created to renew all certificates
- name: Ensure kubeadm-config.yml exists only on 'automation_designated_master'
when: kubernetes_common.automation_designated_master != inventory_hostname
file:
path: /etc/kubeadm/kubeadm-config.yml
state: absent

- import_tasks: master-untaint.yml

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@

- when: not stat_kube_apiserver_yaml.stat.exists
block:
- name: Set is_first_deployment fact
set_fact:
is_first_deployment: true

- name: Ensure /etc/kubeadm/ directory exists
file:
path: /etc/kubeadm/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ controlPlaneEndpoint: "localhost:3446"
apiServer:
timeoutForControlPlane: 4m0s
certSANs:
{% set ip_address_list = ['127.0.0.1', 'localhost'] %}
{% set address_list = ['127.0.0.1', 'localhost'] %}
{% for host in groups['kubernetes_master'] %}
{% set _ = ip_address_list.extend([ hostvars[host]['ansible_default_ipv4']['address'], hostvars[host]['ansible_host'] ]) %}
{% set _ = address_list.extend([ hostvars[host]['ansible_default_ipv4']['address'], hostvars[host]['ansible_host'] ]) %}
{% endfor %}
{% for ip in ip_address_list|unique %}
- {{ ip }}
{% for address in address_list|unique %}
- {{ address }}
{% endfor %}
extraArgs: # https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/
{% if specification.advanced.etcd_args.encrypted | bool %}
Expand Down
45 changes: 16 additions & 29 deletions ansible/playbooks/roles/kubernetes_node/tasks/node-join.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
{{ hostvars[groups.kubernetes_master.0].ansible_default_ipv4.address }}:6443
{%- endif -%}
block:
- name: Creates directory
- name: Create kubeadm directory
file:
path: /etc/kubeadm/
state: directory
Expand All @@ -38,35 +38,22 @@
register: kubeadm_join_result

rescue:
- name: Attempt to join with kubeadm reset
when: kubeadm_join_result.stderr is search('/etc/kubernetes/kubelet.conf already exists')
block:
- name: Reset node
command: kubeadm reset --force

- <<: *soft-join

rescue:
- name: Join to cluster with ignores
command: |-
kubeadm join \
--config /etc/kubeadm/kubeadm-join-node.yml \
--ignore-preflight-errors all
register: kubeadm_join_result

always:
- name: Display kubeadm join stderr if any
- name: Display kubeadm join stderr
debug:
msg: |
Joined with warnings
Node join attempt failed:
{{ kubeadm_join_result.stderr_lines }}
when: kubeadm_join_result is failed
- name: Mark node regardless of join result
set_fact:
kubernetes_common: >-
{{ kubernetes_common | default({}) | combine(set_fact, recursive=true) }}
vars:
set_fact:
node_already_joined: >-
{{ kubeadm_join_result is succeeded }}
- name: Reset node
command: kubeadm reset --force

- <<: *soft-join

- name: Mark node as joined
set_fact:
kubernetes_common: >-
{{ kubernetes_common | default({}) | combine(set_fact, recursive=true) }}
vars:
set_fact:
node_already_joined: >-
{{ kubeadm_join_result is succeeded }}
Loading

0 comments on commit 0d74d4a

Please sign in to comment.