From b4c15027b6e5c651efed501efa9ddfe802f135ef Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Wed, 14 Oct 2020 12:20:49 -0700 Subject: [PATCH 1/8] Add teamd as a depedent service to swss and take care of warm-restart scenario's. --- files/image_config/misc/docker-wait-any | 70 ++++++++++++++++++++----- files/scripts/swss.sh | 8 ++- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/files/image_config/misc/docker-wait-any b/files/image_config/misc/docker-wait-any index b6a2d95c8821..a22a92d0e5c8 100755 --- a/files/image_config/misc/docker-wait-any +++ b/files/image_config/misc/docker-wait-any @@ -14,39 +14,81 @@ NOTE: This script is written against docker Python package 4.1.0. Newer versions of docker may have a different API. """ - import sys +import time +import argparse import threading from docker import APIClient +from swsssdk import SonicV2Connector # Instantiate a global event to share among our threads g_thread_exit_event = threading.Event() +g_service = [] +g_dep_services = [] +def check_warm_restart_state(container_name): + state_db = SonicV2Connector(host='127.0.0.1') + state_db.connect(state_db.STATE_DB, False) -def usage(): - print("Usage: {} [ ...]".format(sys.argv[0])) - sys.exit(1) + # Get the system warm reboot enable state + TABLE_NAME_SEPARATOR = '|' + prefix = 'WARM_RESTART_ENABLE_TABLE' + TABLE_NAME_SEPARATOR + _hash = '{}{}'.format(prefix, 'system') + wr_system_state = state_db.get(state_db.STATE_DB, _hash, "enable") + wr_enable_state = True if wr_system_state == "true" else False + # Get the container warm reboot enable state + prefix = 'WARM_RESTART_ENABLE_TABLE' + TABLE_NAME_SEPARATOR + _hash = '{}{}'.format(prefix, container_name) + wr_container_state = state_db.get(state_db.STATE_DB, _hash, "enable") + wr_enable_state |= True if wr_container_state == "true" else False -def wait_for_container(docker_client, container_name): - docker_client.wait(container_name) + state_db.close(state_db.STATE_DB) + return wr_enable_state - print("No longer waiting on container '{}'".format(container_name)) +def wait_for_container(docker_client, container_name): + while True: + while docker_client.inspect_container(container_name)['State']['Status'] != "running": + time.sleep(1) - # Signal the main thread to exit - g_thread_exit_event.set() + docker_client.wait(container_name) + print("No longer waiting on container '{}'".format(container_name)) + # If this is a dependent service and WR is enabled, DON'T signal main thread to exit + if container_name in g_dep_services and check_warm_restart_state(container_name): + continue + else: + # Signal the main thread to exit + g_thread_exit_event.set() def main(): thread_list = [] docker_client = APIClient(base_url='unix://var/run/docker.sock') - # Ensure we were passed at least one argument - if len(sys.argv) < 2: - usage() - - container_names = sys.argv[1:] + parser = argparse.ArgumentParser(description='Wait for dependent docker services', + version='1.0.0', + formatter_class=argparse.RawTextHelpFormatter, + epilog=""" +Examples: + docker-wait-any -s swss -d syncd teamd +""") + + parser.add_argument('-s','--service', nargs='+', default=None, help='The service which is waiting for dependent services') + parser.add_argument('-d','--dependent', nargs='*', default=None, help='The dependent services') + + args = parser.parse_args() + global g_service + global g_dep_services + if args.service is not None: + g_service = args.service + if args.dependent is not None: + g_dep_services = args.dependent + + container_names = g_service + g_dep_services + + if container_names == []: + sys.exit(0) for container_name in container_names: t = threading.Thread(target=wait_for_container, args=[docker_client, container_name]) diff --git a/files/scripts/swss.sh b/files/scripts/swss.sh index 129e5d148dc8..1a84e017bef3 100755 --- a/files/scripts/swss.sh +++ b/files/scripts/swss.sh @@ -179,10 +179,14 @@ wait() { # NOTE: This assumes Docker containers share the same names as their # corresponding services + for dep in ${MULTI_INST_DEPENDENT}; do + ALL_DEPS="$ALL_DEPS $dep$DEV" + done + if [[ ! -z $DEV ]]; then - /usr/bin/docker-wait-any ${SERVICE}$DEV ${PEER}$DEV + /usr/bin/docker-wait-any -s ${SERVICE}$DEV -d ${PEER}$DEV ${ALL_DEPS} else - /usr/bin/docker-wait-any ${SERVICE} ${PEER} + /usr/bin/docker-wait-any -s ${SERVICE} -d ${PEER} ${ALL_DEPS} fi } From 65b62d4bc0d01caf3f3ea9216fe2ed93ad4664f4 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Fri, 16 Oct 2020 16:54:49 -0700 Subject: [PATCH 2/8] Updates to take care of fast reboot and adding logger instead of print. --- files/image_config/misc/docker-wait-any | 47 +++++++++++++++++++------ 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/files/image_config/misc/docker-wait-any b/files/image_config/misc/docker-wait-any index a22a92d0e5c8..1a67b0f37fe2 100755 --- a/files/image_config/misc/docker-wait-any +++ b/files/image_config/misc/docker-wait-any @@ -17,28 +17,36 @@ import sys import time import argparse +import subprocess import threading from docker import APIClient from swsssdk import SonicV2Connector +from sonic_py_common import logger + +SYSLOG_IDENTIFIER = 'docker-wait-any' + +# Global logger instance +log = logger.Logger(SYSLOG_IDENTIFIER) # Instantiate a global event to share among our threads g_thread_exit_event = threading.Event() g_service = [] g_dep_services = [] -def check_warm_restart_state(container_name): +# Check if System warm reboot, or Container warm restart is enabled. +def is_warm_restart_enabled(container_name): state_db = SonicV2Connector(host='127.0.0.1') state_db.connect(state_db.STATE_DB, False) - # Get the system warm reboot enable state TABLE_NAME_SEPARATOR = '|' prefix = 'WARM_RESTART_ENABLE_TABLE' + TABLE_NAME_SEPARATOR + + # Get the system warm reboot enable state _hash = '{}{}'.format(prefix, 'system') wr_system_state = state_db.get(state_db.STATE_DB, _hash, "enable") wr_enable_state = True if wr_system_state == "true" else False # Get the container warm reboot enable state - prefix = 'WARM_RESTART_ENABLE_TABLE' + TABLE_NAME_SEPARATOR _hash = '{}{}'.format(prefix, container_name) wr_container_state = state_db.get(state_db.STATE_DB, _hash, "enable") wr_enable_state |= True if wr_container_state == "true" else False @@ -46,20 +54,35 @@ def check_warm_restart_state(container_name): state_db.close(state_db.STATE_DB) return wr_enable_state +# Check if System fast reboot is enabled. +def is_fast_reboot_enabled(): + fb_system_state = 0 + cmd='sonic-db-cli STATE_DB get "FAST_REBOOT|system"' + proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) + (stdout, stderr) = proc.communicate() + + if proc.returncode != 0: + log.log_error("Error running command '{}'".format(cmd)) + elif stdout: + fb_system_state = stdout.rstrip('\n') + + return fb_system_state + def wait_for_container(docker_client, container_name): while True: while docker_client.inspect_container(container_name)['State']['Status'] != "running": time.sleep(1) docker_client.wait(container_name) - print("No longer waiting on container '{}'".format(container_name)) + + log.log_info("No longer waiting on container '{}'".format(container_name)) # If this is a dependent service and WR is enabled, DON'T signal main thread to exit - if container_name in g_dep_services and check_warm_restart_state(container_name): + if container_name in g_dep_services and (is_warm_restart_enabled(container_name) or is_fast_reboot_enabled()): continue - else: - # Signal the main thread to exit - g_thread_exit_event.set() + + # Signal the main thread to exit + g_thread_exit_event.set() def main(): thread_list = [] @@ -74,12 +97,13 @@ Examples: docker-wait-any -s swss -d syncd teamd """) - parser.add_argument('-s','--service', nargs='+', default=None, help='The service which is waiting for dependent services') - parser.add_argument('-d','--dependent', nargs='*', default=None, help='The dependent services') - + parser.add_argument('-s','--service', nargs='+', default=None, help='name of the service') + parser.add_argument('-d','--dependent', nargs='*', default=None, help='other dependent services') args = parser.parse_args() + global g_service global g_dep_services + if args.service is not None: g_service = args.service if args.dependent is not None: @@ -87,6 +111,7 @@ Examples: container_names = g_service + g_dep_services + #If the service and dependents passed as args is empty, then exit if container_names == []: sys.exit(0) From 4088afed688bead1248ec7cd4e42667314c87ee6 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Fri, 16 Oct 2020 18:11:54 -0700 Subject: [PATCH 3/8] Comments update --- files/image_config/misc/docker-wait-any | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/files/image_config/misc/docker-wait-any b/files/image_config/misc/docker-wait-any index 1a67b0f37fe2..f1e86b93893a 100755 --- a/files/image_config/misc/docker-wait-any +++ b/files/image_config/misc/docker-wait-any @@ -14,14 +14,15 @@ NOTE: This script is written against docker Python package 4.1.0. Newer versions of docker may have a different API. """ -import sys -import time import argparse import subprocess +import sys import threading +import time + from docker import APIClient -from swsssdk import SonicV2Connector from sonic_py_common import logger +from swsssdk import SonicV2Connector SYSLOG_IDENTIFIER = 'docker-wait-any' @@ -57,7 +58,7 @@ def is_warm_restart_enabled(container_name): # Check if System fast reboot is enabled. def is_fast_reboot_enabled(): fb_system_state = 0 - cmd='sonic-db-cli STATE_DB get "FAST_REBOOT|system"' + cmd = 'sonic-db-cli STATE_DB get "FAST_REBOOT|system"' proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) (stdout, stderr) = proc.communicate() @@ -77,7 +78,8 @@ def wait_for_container(docker_client, container_name): log.log_info("No longer waiting on container '{}'".format(container_name)) - # If this is a dependent service and WR is enabled, DON'T signal main thread to exit + # If this is a dependent service and warm restart is enabled for the system/container, + # OR if the system is going through a fast-reboot, DON'T signal main thread to exit if container_name in g_dep_services and (is_warm_restart_enabled(container_name) or is_fast_reboot_enabled()): continue @@ -111,7 +113,7 @@ Examples: container_names = g_service + g_dep_services - #If the service and dependents passed as args is empty, then exit + # If the service and dependents passed as args is empty, then exit if container_names == []: sys.exit(0) From 9be883d466bb590c3821e0acbc3cc18963b623e5 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Mon, 19 Oct 2020 22:12:40 -0700 Subject: [PATCH 4/8] Move common WR/FB API's to sonic-py-common --- files/image_config/misc/docker-wait-any | 41 ++----------------- .../sonic_py_common/device_info.py | 37 ++++++++++++++++- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/files/image_config/misc/docker-wait-any b/files/image_config/misc/docker-wait-any index f1e86b93893a..04f4cc0921cd 100755 --- a/files/image_config/misc/docker-wait-any +++ b/files/image_config/misc/docker-wait-any @@ -21,8 +21,7 @@ import threading import time from docker import APIClient -from sonic_py_common import logger -from swsssdk import SonicV2Connector +from sonic_py_common import logger, device_info SYSLOG_IDENTIFIER = 'docker-wait-any' @@ -34,41 +33,6 @@ g_thread_exit_event = threading.Event() g_service = [] g_dep_services = [] -# Check if System warm reboot, or Container warm restart is enabled. -def is_warm_restart_enabled(container_name): - state_db = SonicV2Connector(host='127.0.0.1') - state_db.connect(state_db.STATE_DB, False) - - TABLE_NAME_SEPARATOR = '|' - prefix = 'WARM_RESTART_ENABLE_TABLE' + TABLE_NAME_SEPARATOR - - # Get the system warm reboot enable state - _hash = '{}{}'.format(prefix, 'system') - wr_system_state = state_db.get(state_db.STATE_DB, _hash, "enable") - wr_enable_state = True if wr_system_state == "true" else False - - # Get the container warm reboot enable state - _hash = '{}{}'.format(prefix, container_name) - wr_container_state = state_db.get(state_db.STATE_DB, _hash, "enable") - wr_enable_state |= True if wr_container_state == "true" else False - - state_db.close(state_db.STATE_DB) - return wr_enable_state - -# Check if System fast reboot is enabled. -def is_fast_reboot_enabled(): - fb_system_state = 0 - cmd = 'sonic-db-cli STATE_DB get "FAST_REBOOT|system"' - proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) - (stdout, stderr) = proc.communicate() - - if proc.returncode != 0: - log.log_error("Error running command '{}'".format(cmd)) - elif stdout: - fb_system_state = stdout.rstrip('\n') - - return fb_system_state - def wait_for_container(docker_client, container_name): while True: while docker_client.inspect_container(container_name)['State']['Status'] != "running": @@ -80,7 +44,8 @@ def wait_for_container(docker_client, container_name): # If this is a dependent service and warm restart is enabled for the system/container, # OR if the system is going through a fast-reboot, DON'T signal main thread to exit - if container_name in g_dep_services and (is_warm_restart_enabled(container_name) or is_fast_reboot_enabled()): + if container_name in g_dep_services and + (device_info.is_warm_restart_enabled(container_name) or device_info.is_fast_reboot_enabled()): continue # Signal the main thread to exit diff --git a/src/sonic-py-common/sonic_py_common/device_info.py b/src/sonic-py-common/sonic_py_common/device_info.py index 4d1c9f862953..5c29a45169f9 100644 --- a/src/sonic-py-common/sonic_py_common/device_info.py +++ b/src/sonic-py-common/sonic_py_common/device_info.py @@ -7,7 +7,7 @@ from natsort import natsorted # TODO: Replace with swsscommon -from swsssdk import ConfigDBConnector, SonicDBConfig +from swsssdk import ConfigDBConnector, SonicDBConfig, SonicV2Connector USR_SHARE_SONIC_PATH = "/usr/share/sonic" HOST_DEVICE_PATH = USR_SHARE_SONIC_PATH + "/device" @@ -428,3 +428,38 @@ def get_system_routing_stack(): raise OSError("Cannot detect routing stack") return result + +# Check if System warm reboot or Container warm restart is enabled. +def is_warm_restart_enabled(container_name): + state_db = SonicV2Connector(host='127.0.0.1') + state_db.connect(state_db.STATE_DB, False) + + TABLE_NAME_SEPARATOR = '|' + prefix = 'WARM_RESTART_ENABLE_TABLE' + TABLE_NAME_SEPARATOR + + # Get the system warm reboot enable state + _hash = '{}{}'.format(prefix, 'system') + wr_system_state = state_db.get(state_db.STATE_DB, _hash, "enable") + wr_enable_state = True if wr_system_state == "true" else False + + # Get the container warm reboot enable state + _hash = '{}{}'.format(prefix, container_name) + wr_container_state = state_db.get(state_db.STATE_DB, _hash, "enable") + wr_enable_state |= True if wr_container_state == "true" else False + + state_db.close(state_db.STATE_DB) + return wr_enable_state + +# Check if System fast reboot is enabled. +def is_fast_reboot_enabled(): + fb_system_state = 0 + cmd = 'sonic-db-cli STATE_DB get "FAST_REBOOT|system"' + proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) + (stdout, stderr) = proc.communicate() + + if proc.returncode != 0: + log.log_error("Error running command '{}'".format(cmd)) + elif stdout: + fb_system_state = stdout.rstrip('\n') + + return fb_system_state From 9c7c59eb365fabd98d3295798771d1d12c9de51a Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Mon, 19 Oct 2020 22:41:26 -0700 Subject: [PATCH 5/8] fix error in python multi-line condition. --- files/image_config/misc/docker-wait-any | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/files/image_config/misc/docker-wait-any b/files/image_config/misc/docker-wait-any index 04f4cc0921cd..3b13c53aa1b4 100755 --- a/files/image_config/misc/docker-wait-any +++ b/files/image_config/misc/docker-wait-any @@ -44,8 +44,8 @@ def wait_for_container(docker_client, container_name): # If this is a dependent service and warm restart is enabled for the system/container, # OR if the system is going through a fast-reboot, DON'T signal main thread to exit - if container_name in g_dep_services and - (device_info.is_warm_restart_enabled(container_name) or device_info.is_fast_reboot_enabled()): + if (container_name in g_dep_services and + (device_info.is_warm_restart_enabled(container_name) or device_info.is_fast_reboot_enabled())): continue # Signal the main thread to exit From 312eadcc1e34feb00e21620ed90e18d5d6ec9f2d Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Mon, 19 Oct 2020 23:00:44 -0700 Subject: [PATCH 6/8] Remove unused import. --- files/image_config/misc/docker-wait-any | 1 - 1 file changed, 1 deletion(-) diff --git a/files/image_config/misc/docker-wait-any b/files/image_config/misc/docker-wait-any index 3b13c53aa1b4..ff883d91c15d 100755 --- a/files/image_config/misc/docker-wait-any +++ b/files/image_config/misc/docker-wait-any @@ -15,7 +15,6 @@ versions of docker may have a different API. """ import argparse -import subprocess import sys import threading import time From c1422e23ae2a1a7f365113567322c4135431da70 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Wed, 21 Oct 2020 12:21:10 -0700 Subject: [PATCH 7/8] Additional safety checkes to make sure the dependent service are UP as well. --- files/scripts/swss.sh | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/files/scripts/swss.sh b/files/scripts/swss.sh index 1a84e017bef3..11613b5a8a5c 100755 --- a/files/scripts/swss.sh +++ b/files/scripts/swss.sh @@ -170,7 +170,20 @@ wait() { else RUNNING=$(docker inspect -f '{{.State.Running}}' ${PEER}) fi - if [[ x"$RUNNING" == x"true" ]]; then + ALL_DEPS_RUNNING=true + for dep in ${MULTI_INST_DEPENDENT}; do + if [[ ! -z $DEV ]]; then + DEP_RUNNING=$(docker inspect -f '{{.State.Running}}' ${dep}$DEV) + else + DEP_RUNNING=$(docker inspect -f '{{.State.Running}}' ${dep}) + fi + if [[ x"$DEP_RUNNING" != x"true" ]]; then + ALL_DEPS_RUNNING=false + break + fi + done + + if [[ x"$RUNNING" == x"true" && x"$ALL_DEPS_RUNNING" == x"true" ]]; then break else sleep 1 @@ -180,7 +193,11 @@ wait() { # NOTE: This assumes Docker containers share the same names as their # corresponding services for dep in ${MULTI_INST_DEPENDENT}; do - ALL_DEPS="$ALL_DEPS $dep$DEV" + if [[ ! -z $DEV ]]; then + ALL_DEPS="$ALL_DEPS ${dep}$DEV" + else + ALL_DEPS="$ALL_DEPS ${dep}" + fi done if [[ ! -z $DEV ]]; then From 82ea6422bf736ed53e01dd0552e41ac0bf720693 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Wed, 21 Oct 2020 18:25:31 -0700 Subject: [PATCH 8/8] Modify the comments in the docker-wait-any script. --- files/image_config/misc/docker-wait-any | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/files/image_config/misc/docker-wait-any b/files/image_config/misc/docker-wait-any index ff883d91c15d..d006aec47a5a 100755 --- a/files/image_config/misc/docker-wait-any +++ b/files/image_config/misc/docker-wait-any @@ -3,14 +3,26 @@ """ docker-wait-any This script takes one or more Docker container names as arguments, - and it will block indefinitely while all of the specified containers - are running. If any of the specified containers stop, the script will + [-s] argument is for the service which invokes this script + [-d] argument is to list the dependent services for the above service. + It will block indefinitely while all of the specified containers + are running.If any of the specified containers stop, the script will exit. + This script was created because the 'docker wait' command is lacking this functionality. It will block until ALL specified containers have stopped running. Here, we spawn multiple threads and wait on one container per thread. If any of the threads exit, the entire - application will exit. + application will exit, unless we are in a scenario where the following + conditions are met. + (i) the container is a dependent service + (ii) warm restart is enabled at system level or for that container OR + fast reboot is enabled system level + In this scenario, the g_thread_exit_event won't be propogated to the parent, + instead the thread will continue to do docker_client.wait again.This help's + cases where we need the dependent container to be warm-restarted without + affecting other services (eg: warm restart of teamd service) + NOTE: This script is written against docker Python package 4.1.0. Newer versions of docker may have a different API. """