Skip to content

Commit

Permalink
Stopping control plane on HA (#1406)
Browse files Browse the repository at this point in the history
  • Loading branch information
ktsakalozos committed Jul 22, 2020
1 parent e7653bf commit 8cf2616
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 16 deletions.
12 changes: 12 additions & 0 deletions microk8s-resources/wrappers/control-plane-kicker
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash

set -eu

export PATH="$SNAP/usr/sbin:$SNAP/usr/bin:$SNAP/sbin:$SNAP/bin:$PATH"
ARCH="$($SNAP/bin/uname -m)"
export IN_SNAP_LD_LIBRARY_PATH="$SNAP/lib:$SNAP/usr/lib:$SNAP/lib/$ARCH-linux-gnu:$SNAP/usr/lib/$ARCH-linux-gnu"
export PYTHONNOUSERSITE=false

source $SNAP/actions/common/utils.sh

LD_LIBRARY_PATH=$IN_SNAP_LD_LIBRARY_PATH ${SNAP}/usr/bin/python3 ${SNAP}/scripts/wrappers/control-plane-kicker.py $@
26 changes: 26 additions & 0 deletions scripts/wrappers/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,29 @@ def get_addon_by_name(addons, name):
if name == addon["name"]:
filtered_addon.append(addon)
return filtered_addon


def is_service_expected_to_start(service):
"""
Check if a service is supposed to start
:param service: the service name
:return: True if the service is meant to start
"""
lock_path = os.path.expandvars("${SNAP_DATA}/var/lock")
lock = "{}/{}".format(lock_path, service)
return os.path.exists(lock_path) and not os.path.isfile(lock)


def set_service_expected_to_start(service, start=True):
"""
Check if a service is not expected to start.
:param service: the service name
:param start: should the service start or not
"""
lock_path = os.path.expandvars("${SNAP_DATA}/var/lock")
lock = "{}/{}".format(lock_path, service)
if start:
os.remove(lock)
else:
fd = os.open(lock, os.O_CREAT, mode=0o700)
os.close(fd)
94 changes: 94 additions & 0 deletions scripts/wrappers/control-plane-kicker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/python3
import netifaces
import subprocess

from time import sleep

from common.utils import (
is_cluster_ready,
get_dqlite_info,
is_ha_enabled,
is_service_expected_to_start,
set_service_expected_to_start,
)

services = [
'controller-manager',
'scheduler',
]


def start_control_plane_services():
"""
Start the control plane services
"""
for service in services:
if not is_service_expected_to_start(service):
systemd_service_name = "microk8s.daemon-{}".format(service)
print("Starting {}".format(systemd_service_name), flush=True)
cmd = "snapctl start {}".format(systemd_service_name)
subprocess.check_output((cmd.split()))
set_service_expected_to_start(service, True)


def stop_control_plane_services():
"""
Stop the control plane services
"""
for service in services:
if is_service_expected_to_start(service):
systemd_service_name = "microk8s.daemon-{}".format(service)
print("Stopping {}".format(systemd_service_name), flush=True)
cmd = "snapctl stop {}".format(systemd_service_name)
subprocess.check_output(cmd.split())
set_service_expected_to_start(service, False)


if __name__ == '__main__':
while True:
# Check for changes every 10 seconds
sleep(10)
try:
# We will not attempt to stop services if:
# 1. The cluster is not ready
# 2. We are not on an HA cluster
# 3. The control plane kicker is disabled
# 4. dqlite has less than 4 nodes
if (
not is_cluster_ready()
or not is_ha_enabled()
or not is_service_expected_to_start('control-plane-kicker')
):
start_control_plane_services()
continue

info = get_dqlite_info()
if len(info) <= 3:
start_control_plane_services()
continue

local_ips = []
for interface in netifaces.interfaces():
if netifaces.AF_INET not in netifaces.ifaddresses(interface):
continue
for link in netifaces.ifaddresses(interface)[netifaces.AF_INET]:
local_ips.append(link['addr'])

voter_ips = []
for node in info:
if node[1] == "voter":
ip_parts = node[0].split(':')
voter_ips.append(ip_parts[0])

should_run = False
for ip in local_ips:
if ip in voter_ips:
should_run = True
start_control_plane_services()
break

if not should_run:
stop_control_plane_services()

except Exception as e:
print(e, flush=True)
16 changes: 0 additions & 16 deletions scripts/wrappers/status.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,22 +180,6 @@ def ha_cluster_formed(info):
return ha_formed


def print_ha_pretty(ha_ready, info):
ha_formed = ha_cluster_formed(info)

if ha_ready and ha_formed:
print("The cluster is highly available.")
elif ha_ready and not ha_formed:
print("HA is enabled on this node but an HA cluster has not formed yet.")
elif not ha_ready:
print("HA is not enabled on this node, enable it with 'microk8s enable ha-cluster'.")

if ha_ready:
print("Cluster nodes:")
for node in info:
print(" - {} is a {} node".format(node[0], node[1]))


if __name__ == '__main__':
exit_if_no_permission()
is_cluster_locked()
Expand Down
3 changes: 3 additions & 0 deletions snap/snapcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ apps:
daemon-apiserver-kicker:
command: apiservice-kicker
daemon: simple
daemon-control-plane-kicker:
command: control-plane-kicker
daemon: simple
daemon-cluster-agent:
command: run-cluster-agent-with-args
daemon: simple
Expand Down

0 comments on commit 8cf2616

Please sign in to comment.