Skip to content

Commit

Permalink
Merge pull request #337 from przemyslawdy/back-upgr-recov
Browse files Browse the repository at this point in the history
Fixed upgrade and recovery roles
  • Loading branch information
toszo authored Jul 9, 2019
2 parents c13b0ab + f1a6a13 commit 2879d3b
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 17 deletions.
44 changes: 41 additions & 3 deletions core/core/src/ansible/roles/recovery/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,49 @@
/bin/sh -c "etcdctl snapshot restore '/backup/etcd-snapshot.db'; mv /default.etcd/member/ /var/lib/etcd/"
- name: Initialize the master with backup
shell: kubeadm init --ignore-preflight-errors=DirAvailable--var-lib-etcd
shell: kubeadm init --ignore-preflight-errors=DirAvailable--var-lib-etcd,NumCPU

- name: Wait for all nodes to be ready
environment:
KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config"
shell: kubectl get nodes -o json
register: output
until: output.stdout|from_json|json_query("items[*].status.conditions[?(@.type=='Ready')].status[]")|unique == ["True"]
retries: 120
delay: 10

- name: Check cluster version
environment:
KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config"
shell: kubectl version --short | grep -i server
register: cluster_version

# https://github.com/kubernetes/kubeadm/issues/1471 Upgrading a 1.12 cluster thru 1.13 to 1.14 fails

- name: Validate whether current cluster is upgradeable (from ver. 1.13)

block:
- name: Show upgrade plan
shell: kubeadm upgrade plan
when: '"1.13" in cluster_version.stdout'

rescue:
- name: Find the existing etcd server certificates
find:
paths: /etc/kubernetes/pki/etcd
patterns: "*server.*"
register: files_to_delete

- name: Remove the existing etcd server certificates
file:
path: "{{ item.path }}"
state: absent
with_items: "{{ files_to_delete.files }}"

- name: Regenerate the etcd server certificates
shell: kubeadm init phase certs etcd-server

- name: Clean temporary directory
file:
state: absent
path: "{{ backup_dir }}/tmp/"


37 changes: 30 additions & 7 deletions core/core/src/ansible/roles/upgrade/tasks/upgrade_master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,31 @@
- name: Wait for the cluster's readiness
include_tasks: "wait.yml"

# https://github.com/kubernetes/kubeadm/issues/1471 Upgrading a 1.12 cluster thru 1.13 to 1.14 fails

- name: Validate whether current cluster is upgradeable (from ver. 1.13)

block:
- name: Show upgrade plan
shell: kubeadm upgrade plan v{{ version }}
when: '"1.13" in cluster_version.stdout'

rescue:
- name: Find the existing etcd server certificates
find:
paths: /etc/kubernetes/pki/etcd
patterns: "*server.*"
register: files_to_delete

- name: Remove the existing etcd server certificates
file:
path: "{{ item.path }}"
state: absent
with_items: "{{ files_to_delete.files }}"

- name: Regenerate the etcd server certificates
shell: kubeadm init phase certs etcd-server

- name: Validate whether current cluster is upgradeable
shell: kubeadm upgrade plan v{{ version }}

Expand All @@ -78,18 +103,16 @@
shell: systemctl status kubelet

- name: Wait for the cluster's readiness
environment:
KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config"
shell: kubectl cluster-info
retries: 10
delay: 10
register: output
until: output is succeeded
include_tasks: "wait.yml"

- name: Uncordon master - mark master as schedulable
environment:
KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config"
shell: kubectl uncordon {{ inventory_hostname }}
retries: 5
delay: 5
register: output
until: output is succeeded

- name: Verify cluster version
include_tasks: "verify.yml"
12 changes: 5 additions & 7 deletions core/core/src/ansible/roles/upgrade/tasks/upgrade_nodes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,16 @@
shell: systemctl status kubelet

- name: Wait for the cluster's readiness
environment:
KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config"
shell: kubectl cluster-info
retries: 10
delay: 10
register: output
until: output is succeeded
include_tasks: "wait.yml"

- name: Uncordon node - mark node as schedulable
environment:
KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config"
shell: kubectl uncordon {{ inventory_hostname }}
retries: 5
delay: 5
register: output
until: output is succeeded
delegate_to: "{{ groups['master'][0] }}"

- name: Verify cluster version
Expand Down
9 changes: 9 additions & 0 deletions core/core/src/ansible/roles/upgrade/tasks/wait.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
---
- name: Wait for kubectl to find and access a Kubernetes cluster
environment:
KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config"
shell: kubectl cluster-info
retries: 10
delay: 10
register: output
until: output is succeeded and "running" in output.stdout

- name: Wait for all nodes to be ready
environment:
KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config"
Expand Down

0 comments on commit 2879d3b

Please sign in to comment.