Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kubernetes HA upgrades #1456

Merged
merged 19 commits into from
Jul 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG-0.7.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
- [#1399](https://github.com/epiphany-platform/epiphany/issues/1399) - Epicli upgrade: Kubernetes upgrade may hang
- [#1398](https://github.com/epiphany-platform/epiphany/issues/1398) - Vault installation fails when using canal/calico network plugin
- [#1412](https://github.com/epiphany-platform/epiphany/issues/1412) - Certificate in Vault is also generated or copied even if flag in configuration tls_disable is set to true
- [#1408](https://github.com/epiphany-platform/epiphany/issues/1408) - Epiphany does not support upgrades for Kubernetes in HA mode

### Added

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,4 @@ def upgrade(self):
# save new inventory
save_inventory(new_inventory, self.cluster_model, self.build_dir)

return 0
return 0
42 changes: 35 additions & 7 deletions core/src/epicli/cli/engine/ansible/AnsibleVarsGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
import copy

from cli.helpers.Step import Step
from cli.helpers.build_saver import get_ansible_path, get_ansible_path_for_build, get_ansible_vault_path
from cli.helpers.doc_list_helpers import select_first
from cli.helpers.build_saver import get_ansible_path, get_ansible_path_for_build, get_ansible_vault_path, MANIFEST_FILE_NAME
from cli.helpers.doc_list_helpers import select_first, select_single
from cli.helpers.naming_helpers import to_feature_name, to_role_name
from cli.helpers.ObjDict import ObjDict
from cli.helpers.yaml_helpers import dump
from cli.helpers.Config import Config
from cli.helpers.data_loader import load_yaml_obj, types, load_all_documents_from_folder
from cli.helpers.data_loader import load_yaml_obj, types, load_yamls_file, load_all_documents_from_folder

from cli.engine.schema.DefaultMerger import DefaultMerger


class AnsibleVarsGenerator(Step):
Expand Down Expand Up @@ -93,10 +95,14 @@ def populate_group_vars(self, ansible_dir):
main_vars['is_upgrade_run'] = self.is_upgrade_run
main_vars['roles_with_generated_vars'] = sorted(self.roles_with_generated_vars)

shared_config_doc = select_first(self.config_docs, lambda x: x.kind == 'configuration/shared-config')
if shared_config_doc == None:
if self.is_upgrade_run:
shared_config_doc = self.get_shared_config_from_manifest()
else:
shared_config_doc = select_first(self.config_docs, lambda x: x.kind == 'configuration/shared-config')

if shared_config_doc is None:
shared_config_doc = load_yaml_obj(types.DEFAULT, 'common', 'configuration/shared-config')

self.set_vault_path(shared_config_doc)
main_vars.update(shared_config_doc.specification)

Expand All @@ -115,7 +121,7 @@ def set_vault_path(self, shared_config):
shared_config.specification.vault_tmp_file_location = Config().vault_password_location
cluster_name = self.get_cluster_name()
shared_config.specification.vault_location = get_ansible_vault_path(cluster_name)

def get_cluster_name(self):
if 'name' in self.cluster_model.specification.keys():
return self.cluster_model.specification.name
Expand All @@ -128,6 +134,28 @@ def get_clean_cluster_model(self):
self.clear_object(cluster_model, 'credentials')
return cluster_model

def get_shared_config_from_manifest(self):
# Reuse shared config from existing manifest
# Shared config contains the use_ha_control_plane flag which is required during upgrades

path_to_manifest = os.path.join(self.inventory_upgrade.build_dir, MANIFEST_FILE_NAME)
if not os.path.isfile(path_to_manifest):
raise Exception('No manifest.yml inside the build folder')

manifest_docs = load_yamls_file(path_to_manifest)

cluster_model = select_single(manifest_docs, lambda x: x.kind == 'epiphany-cluster')

shared_config_doc = select_single(manifest_docs, lambda x: x.kind == 'configuration/shared-config')
shared_config_doc['provider'] = cluster_model['provider']

# Merge the shared config doc with defaults
with DefaultMerger([shared_config_doc]) as doc_merger:
shared_config_doc = doc_merger.run()[0]
del shared_config_doc['provider']

return shared_config_doc

def clear_object(self, obj_to_clean, key_to_clean):
for key, val in obj_to_clean.items():
if key == key_to_clean:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@ kubelet_custom_config:
systemReserved:
cpu: 50m
memory: 768Mi # based on RedHat 7.5 on Standard_DS1_v2 Azure VM with =~ 30 pods

epiphany_manifests_dir: /etc/epiphany/manifests
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
include_tasks: deployments/deploy-template.yml

- name: Check if kubernetes-dashboard is already deployed
become_user: "{{ admin_user.name }}"
environment:
KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config"
KUBECONFIG: /etc/kubernetes/admin.conf
shell: |
kubectl get pods \
--namespace kubernetes-dashboard \
Expand Down
Original file line number Diff line number Diff line change
@@ -1,25 +1,7 @@
---
- name: Apply network plugin configured by user
include_tasks: "./cni-plugins/{{ network_plugin }}.yml"
include_tasks: cni-plugins/{{ network_plugin }}.yml

# Wait for CNI plugin become ready to prevent failure of 'Get token from master' task on node before joining master
- name: Wait for CNI plugin become ready
shell: >-
kubectl wait --for=condition=Ready pods -l {{ selectors[network_plugin] }}
--field-selector=spec.nodeName=$(hostname --long) -n kube-system --timeout=10s
args:
executable: /bin/bash
environment:
KUBECONFIG: /home/{{ admin_user.name }}/.kube/config
register: wait_for_cni_plugin
until: wait_for_cni_plugin is succeeded
retries: 30
delay: 1
changed_when: false
vars:
selectors:
calico: k8s-app=calico-node
canal: k8s-app=canal
flannel: app=flannel
when:
- network_plugin in selectors.keys()
# Wait for CNI plugin to become ready to prevent failure of 'Get token from master' task on node before joining master
- name: Include wait-for-cni-plugin.yml
include_tasks: cni-plugins/wait-for-cni-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,5 @@

- name: Apply calico definition
environment:
KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config"
command: kubectl apply -f /home/{{ admin_user.name }}/calico.yml
KUBECONFIG: /etc/kubernetes/admin.conf
command: kubectl apply -f /home/{{ admin_user.name }}/calico.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@

- name: Apply canal deployment
environment:
KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config"
command: kubectl apply -f /home/{{ admin_user.name }}/canal.yml
KUBECONFIG: /etc/kubernetes/admin.conf
command: kubectl apply -f /home/{{ admin_user.name }}/canal.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@

- name: Apply flannel definition
environment:
KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config"
command: kubectl apply -f /home/{{ admin_user.name }}/kube-flannel.yml
KUBECONFIG: /etc/kubernetes/admin.conf
command: kubectl apply -f /home/{{ admin_user.name }}/kube-flannel.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
# This file is meant to be also used by upgrade role

- name: Wait for CNI plugin to become ready
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
shell: >-
kubectl wait --for=condition=Ready pods -l {{ selectors[network_plugin] }}
--field-selector=spec.nodeName=$(hostname --long) -n kube-system --timeout=10s
args:
executable: /bin/bash
register: wait_for_cni_plugin
until: wait_for_cni_plugin is succeeded
retries: 30
delay: 1
changed_when: false
vars:
selectors:
calico: k8s-app=calico-node
canal: k8s-app=canal
flannel: app=flannel
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
---
- name: "Apply /etc/epiphany/manifests/{{ file_name }} file"
- name: Apply {{ file_path }} file
environment:
KUBECONFIG: "/home/{{ admin_user.name }}/.kube/config"
shell: |
kubectl apply \
-f /etc/epiphany/manifests/{{ file_name }}
args:
executable: /bin/bash
KUBECONFIG: /etc/kubernetes/admin.conf
shell: >-
kubectl apply -f {{ file_path }}
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,22 @@
- name: Create directory for files
become: true
file:
path: /etc/epiphany/manifests
path: "{{ epiphany_manifests_dir }}"
state: directory
owner: root
group: root
mode: u=rw,go=r
mode: u=rwx,go=r

- name: "Copy {{ file_name }}"
- name: Upload {{ file_name }} file
become: true
copy:
src: "{{ file_name }}"
dest: "/etc/epiphany/manifests/{{ file_name }}"
dest: "{{ epiphany_manifests_dir }}/{{ file_name }}"
owner: "{{ admin_user.name }}"
group: "{{ admin_user.name }}"
mode: u=rw,go=r

- name: Apply file
include_tasks: apply-file.yml
vars:
file_path: "{{ epiphany_manifests_dir }}/{{ file_name }}"
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,22 @@
- name: Create directory for files
become: true
file:
path: /etc/epiphany/manifests
path: "{{ epiphany_manifests_dir }}"
state: directory
owner: root
group: root
mode: u=rwx,go=r

- name: "Upload {{ file_name }} file"
- name: Upload {{ file_name }} file
become: true
template:
src: "{{ file_name }}"
dest: "/etc/epiphany/manifests/{{ file_name }}"
dest: "{{ epiphany_manifests_dir }}/{{ file_name | regex_replace('.j2$') }}"
owner: "{{ admin_user.name }}"
group: "{{ admin_user.name }}"
mode: u=rw,go=r

- name: Apply file
include_tasks: apply-file.yml
vars:
file_path: "{{ epiphany_manifests_dir }}/{{ file_name | regex_replace('.j2$') }}"

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,32 @@
- name: Reconfigure Docker for pulling images from local registry
block:
- name: image-registry | Drain node in preparation for Docker reconfiguration
include_tasks: kubernetes/node/drain.yml
include_tasks: kubernetes/utils/drain.yml
when:
- groups['kubernetes_node'] is defined
- inventory_hostname in groups['kubernetes_node']
- groups.kubernetes_node is defined
- inventory_hostname in groups.kubernetes_node

- name: image-registry | Wait for cluster's readiness
include_tasks: kubernetes/wait.yml
include_tasks: kubernetes/utils/wait.yml
when:
- groups['kubernetes_node'] is defined
- inventory_hostname in groups['kubernetes_node']
- groups.kubernetes_node is defined
- inventory_hostname in groups.kubernetes_node

- name: image-registry | Reconfigure Docker if necessary # this restarts Docker daemon
include_role:
name: docker
tasks_from: configure-docker

- name: Include wait-for-kube-apiserver.yml
include_tasks: kubernetes/wait-for-kube-apiserver.yml
include_tasks: kubernetes/utils/wait-for-kube-apiserver.yml
when:
- inventory_hostname in groups['kubernetes_master']
- inventory_hostname in groups.kubernetes_master

- name: image-registry | Uncordon node - mark node as schedulable
include_tasks: kubernetes/node/uncordon.yml
include_tasks: kubernetes/utils/uncordon.yml
when:
- groups['kubernetes_node'] is defined
- inventory_hostname in groups['kubernetes_node']
- groups.kubernetes_node is defined
- inventory_hostname in groups.kubernetes_node

when:
- not image_registry_address in result.stdout
- not image_registry_address in result.stdout
Original file line number Diff line number Diff line change
@@ -1,37 +1,48 @@
---
- name: Include wait-for-kube-apiserver.yml
import_tasks: kubernetes/wait-for-kube-apiserver.yml
delegate_to: "{{ groups['kubernetes_master'][0] }}"
- name: k8s | Wait for kube-apiserver then get cluster and kubelet version
delegate_to: "{{ groups.kubernetes_master[0] }}"
block:
- name: k8s | Include wait-for-kube-apiserver.yml
import_tasks: kubernetes/utils/wait-for-kube-apiserver.yml

- name: Include get-cluster-version.yml
import_tasks: kubernetes/get-cluster-version.yml # sets cluster_version
delegate_to: "{{ groups['kubernetes_master'][0] }}"
- name: k8s | Include get-cluster-version.yml
import_tasks: kubernetes/get-cluster-version.yml # sets cluster_version

- name: Check if upgrade from current K8s version is supported
- name: k8s | Check if upgrade from current K8s version is supported
assert:
that: cluster_version is version('v1.14.6', '>=')
fail_msg: Your Kubernetes version ({{ cluster_version }}) is not supported by this version of Epiphany which requires at least version 1.14.6 (Epiphany v0.4.4). For more information, refer to the documentation.
quiet: true

- name: Include get-kubelet-version.yml
- name: k8s | Include get-kubelet-version.yml
import_tasks: kubernetes/get-kubelet-version.yml # sets kubelet_version
delegate_to: "{{ groups['kubernetes_master'][0] }}"

- name: Upgrade master to v{{ version }}
include_tasks: kubernetes/upgrade-master.yml
- name: k8s | Upgrade masters then nodes
vars:
version: "{{ ver }}"
cni_version: "{{ cni_ver }}"
when:
- groups['kubernetes_master'][0] == inventory_hostname
- cluster_version is version('v' + version, '<=')
block:
- name: k8s | Upgrade masters
when: cluster_version is version('v' + version, '<=')
block:
- name: k8s | Upgrade first master to v{{ version }}
include_tasks: kubernetes/upgrade-master0.yml
when:
- inventory_hostname == groups.kubernetes_master[0]

- name: Upgrade node to v{{ version }}
include_tasks: kubernetes/upgrade-node.yml
vars:
version: "{{ ver }}"
cni_version: "{{ cni_ver }}"
when:
- groups['kubernetes_node'] is defined
- inventory_hostname in groups['kubernetes_node']
- kubelet_version is version('v' + version, '<=')
- name: k8s | Upgrade next master to v{{ version }}
include_tasks: kubernetes/upgrade-masterN.yml
when:
- inventory_hostname in groups.kubernetes_master[1:]

- name: k8s | Upgrade nodes
when: kubelet_version is version('v' + version, '<=')
block:
- name: k8s | Upgrade node to v{{ version }}
include_tasks: kubernetes/upgrade-node.yml
when:
- groups.kubernetes_node is defined
- inventory_hostname in groups.kubernetes_node

# TODO: Create a flag file that the upgrade completed to not run it again for the same version next time
Loading