From f1a6a13cf87eb00cb8ab1566a77259041b06f541 Mon Sep 17 00:00:00 2001 From: Przemek Date: Mon, 8 Jul 2019 16:24:10 +0200 Subject: [PATCH] Fixed upgrade and recovery roles --- .../src/ansible/roles/recovery/tasks/main.yml | 44 +++++++++++++++++-- .../roles/upgrade/tasks/upgrade_master.yml | 37 +++++++++++++--- .../roles/upgrade/tasks/upgrade_nodes.yml | 12 +++-- .../src/ansible/roles/upgrade/tasks/wait.yml | 9 ++++ 4 files changed, 85 insertions(+), 17 deletions(-) diff --git a/core/core/src/ansible/roles/recovery/tasks/main.yml b/core/core/src/ansible/roles/recovery/tasks/main.yml index 50ff99e609..48d47ce37a 100644 --- a/core/core/src/ansible/roles/recovery/tasks/main.yml +++ b/core/core/src/ansible/roles/recovery/tasks/main.yml @@ -61,11 +61,49 @@ /bin/sh -c "etcdctl snapshot restore '/backup/etcd-snapshot.db'; mv /default.etcd/member/ /var/lib/etcd/" - name: Initialize the master with backup - shell: kubeadm init --ignore-preflight-errors=DirAvailable--var-lib-etcd + shell: kubeadm init --ignore-preflight-errors=DirAvailable--var-lib-etcd,NumCPU + +- name: Wait for all nodes to be ready + environment: + KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config" + shell: kubectl get nodes -o json + register: output + until: output.stdout|from_json|json_query("items[*].status.conditions[?(@.type=='Ready')].status[]")|unique == ["True"] + retries: 120 + delay: 10 + +- name: Check cluster version + environment: + KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config" + shell: kubectl version --short | grep -i server + register: cluster_version + +# https://github.com/kubernetes/kubeadm/issues/1471 Upgrading a 1.12 cluster thru 1.13 to 1.14 fails + +- name: Validate whether current cluster is upgradeable (from ver. 1.13) + + block: + - name: Show upgrade plan + shell: kubeadm upgrade plan + when: '"1.13" in cluster_version.stdout' + rescue: + - name: Find the existing etcd server certificates + find: + paths: /etc/kubernetes/pki/etcd + patterns: "*server.*" + register: files_to_delete + + - name: Remove the existing etcd server certificates + file: + path: "{{ item.path }}" + state: absent + with_items: "{{ files_to_delete.files }}" + + - name: Regenerate the etcd server certificates + shell: kubeadm init phase certs etcd-server + - name: Clean temporary directory file: state: absent path: "{{ backup_dir }}/tmp/" - - diff --git a/core/core/src/ansible/roles/upgrade/tasks/upgrade_master.yml b/core/core/src/ansible/roles/upgrade/tasks/upgrade_master.yml index 6f00a8c357..ba4e8c63f3 100644 --- a/core/core/src/ansible/roles/upgrade/tasks/upgrade_master.yml +++ b/core/core/src/ansible/roles/upgrade/tasks/upgrade_master.yml @@ -55,6 +55,31 @@ - name: Wait for the cluster's readiness include_tasks: "wait.yml" +# https://github.com/kubernetes/kubeadm/issues/1471 Upgrading a 1.12 cluster thru 1.13 to 1.14 fails + +- name: Validate whether current cluster is upgradeable (from ver. 1.13) + + block: + - name: Show upgrade plan + shell: kubeadm upgrade plan v{{ version }} + when: '"1.13" in cluster_version.stdout' + + rescue: + - name: Find the existing etcd server certificates + find: + paths: /etc/kubernetes/pki/etcd + patterns: "*server.*" + register: files_to_delete + + - name: Remove the existing etcd server certificates + file: + path: "{{ item.path }}" + state: absent + with_items: "{{ files_to_delete.files }}" + + - name: Regenerate the etcd server certificates + shell: kubeadm init phase certs etcd-server + - name: Validate whether current cluster is upgradeable shell: kubeadm upgrade plan v{{ version }} @@ -78,18 +103,16 @@ shell: systemctl status kubelet - name: Wait for the cluster's readiness - environment: - KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config" - shell: kubectl cluster-info - retries: 10 - delay: 10 - register: output - until: output is succeeded + include_tasks: "wait.yml" - name: Uncordon master - mark master as schedulable environment: KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config" shell: kubectl uncordon {{ inventory_hostname }} + retries: 5 + delay: 5 + register: output + until: output is succeeded - name: Verify cluster version include_tasks: "verify.yml" diff --git a/core/core/src/ansible/roles/upgrade/tasks/upgrade_nodes.yml b/core/core/src/ansible/roles/upgrade/tasks/upgrade_nodes.yml index bceacccff2..614a929de3 100644 --- a/core/core/src/ansible/roles/upgrade/tasks/upgrade_nodes.yml +++ b/core/core/src/ansible/roles/upgrade/tasks/upgrade_nodes.yml @@ -66,18 +66,16 @@ shell: systemctl status kubelet - name: Wait for the cluster's readiness - environment: - KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config" - shell: kubectl cluster-info - retries: 10 - delay: 10 - register: output - until: output is succeeded + include_tasks: "wait.yml" - name: Uncordon node - mark node as schedulable environment: KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config" shell: kubectl uncordon {{ inventory_hostname }} + retries: 5 + delay: 5 + register: output + until: output is succeeded delegate_to: "{{ groups['master'][0] }}" - name: Verify cluster version diff --git a/core/core/src/ansible/roles/upgrade/tasks/wait.yml b/core/core/src/ansible/roles/upgrade/tasks/wait.yml index 513a656b19..3019f114a9 100644 --- a/core/core/src/ansible/roles/upgrade/tasks/wait.yml +++ b/core/core/src/ansible/roles/upgrade/tasks/wait.yml @@ -1,4 +1,13 @@ --- +- name: Wait for kubectl to find and access a Kubernetes cluster + environment: + KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config" + shell: kubectl cluster-info + retries: 10 + delay: 10 + register: output + until: output is succeeded and "running" in output.stdout + - name: Wait for all nodes to be ready environment: KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config"