diff --git a/files/image_config/misc/docker-wait-any b/files/image_config/misc/docker-wait-any index b6a2d95c8821..d006aec47a5a 100755 --- a/files/image_config/misc/docker-wait-any +++ b/files/image_config/misc/docker-wait-any @@ -3,50 +3,95 @@ """ docker-wait-any This script takes one or more Docker container names as arguments, - and it will block indefinitely while all of the specified containers - are running. If any of the specified containers stop, the script will + [-s] argument is for the service which invokes this script + [-d] argument is to list the dependent services for the above service. + It will block indefinitely while all of the specified containers + are running.If any of the specified containers stop, the script will exit. + This script was created because the 'docker wait' command is lacking this functionality. It will block until ALL specified containers have stopped running. Here, we spawn multiple threads and wait on one container per thread. If any of the threads exit, the entire - application will exit. + application will exit, unless we are in a scenario where the following + conditions are met. + (i) the container is a dependent service + (ii) warm restart is enabled at system level or for that container OR + fast reboot is enabled system level + In this scenario, the g_thread_exit_event won't be propogated to the parent, + instead the thread will continue to do docker_client.wait again.This help's + cases where we need the dependent container to be warm-restarted without + affecting other services (eg: warm restart of teamd service) + NOTE: This script is written against docker Python package 4.1.0. Newer versions of docker may have a different API. """ - +import argparse import sys import threading -from docker import APIClient +import time -# Instantiate a global event to share among our threads -g_thread_exit_event = threading.Event() +from docker import APIClient +from sonic_py_common import logger, device_info +SYSLOG_IDENTIFIER = 'docker-wait-any' -def usage(): - print("Usage: {} [ ...]".format(sys.argv[0])) - sys.exit(1) +# Global logger instance +log = logger.Logger(SYSLOG_IDENTIFIER) +# Instantiate a global event to share among our threads +g_thread_exit_event = threading.Event() +g_service = [] +g_dep_services = [] def wait_for_container(docker_client, container_name): - docker_client.wait(container_name) + while True: + while docker_client.inspect_container(container_name)['State']['Status'] != "running": + time.sleep(1) - print("No longer waiting on container '{}'".format(container_name)) + docker_client.wait(container_name) - # Signal the main thread to exit - g_thread_exit_event.set() + log.log_info("No longer waiting on container '{}'".format(container_name)) + # If this is a dependent service and warm restart is enabled for the system/container, + # OR if the system is going through a fast-reboot, DON'T signal main thread to exit + if (container_name in g_dep_services and + (device_info.is_warm_restart_enabled(container_name) or device_info.is_fast_reboot_enabled())): + continue + + # Signal the main thread to exit + g_thread_exit_event.set() def main(): thread_list = [] docker_client = APIClient(base_url='unix://var/run/docker.sock') - # Ensure we were passed at least one argument - if len(sys.argv) < 2: - usage() + parser = argparse.ArgumentParser(description='Wait for dependent docker services', + version='1.0.0', + formatter_class=argparse.RawTextHelpFormatter, + epilog=""" +Examples: + docker-wait-any -s swss -d syncd teamd +""") + + parser.add_argument('-s','--service', nargs='+', default=None, help='name of the service') + parser.add_argument('-d','--dependent', nargs='*', default=None, help='other dependent services') + args = parser.parse_args() + + global g_service + global g_dep_services + + if args.service is not None: + g_service = args.service + if args.dependent is not None: + g_dep_services = args.dependent + + container_names = g_service + g_dep_services - container_names = sys.argv[1:] + # If the service and dependents passed as args is empty, then exit + if container_names == []: + sys.exit(0) for container_name in container_names: t = threading.Thread(target=wait_for_container, args=[docker_client, container_name]) diff --git a/files/scripts/swss.sh b/files/scripts/swss.sh index 129e5d148dc8..11613b5a8a5c 100755 --- a/files/scripts/swss.sh +++ b/files/scripts/swss.sh @@ -170,7 +170,20 @@ wait() { else RUNNING=$(docker inspect -f '{{.State.Running}}' ${PEER}) fi - if [[ x"$RUNNING" == x"true" ]]; then + ALL_DEPS_RUNNING=true + for dep in ${MULTI_INST_DEPENDENT}; do + if [[ ! -z $DEV ]]; then + DEP_RUNNING=$(docker inspect -f '{{.State.Running}}' ${dep}$DEV) + else + DEP_RUNNING=$(docker inspect -f '{{.State.Running}}' ${dep}) + fi + if [[ x"$DEP_RUNNING" != x"true" ]]; then + ALL_DEPS_RUNNING=false + break + fi + done + + if [[ x"$RUNNING" == x"true" && x"$ALL_DEPS_RUNNING" == x"true" ]]; then break else sleep 1 @@ -179,10 +192,18 @@ wait() { # NOTE: This assumes Docker containers share the same names as their # corresponding services + for dep in ${MULTI_INST_DEPENDENT}; do + if [[ ! -z $DEV ]]; then + ALL_DEPS="$ALL_DEPS ${dep}$DEV" + else + ALL_DEPS="$ALL_DEPS ${dep}" + fi + done + if [[ ! -z $DEV ]]; then - /usr/bin/docker-wait-any ${SERVICE}$DEV ${PEER}$DEV + /usr/bin/docker-wait-any -s ${SERVICE}$DEV -d ${PEER}$DEV ${ALL_DEPS} else - /usr/bin/docker-wait-any ${SERVICE} ${PEER} + /usr/bin/docker-wait-any -s ${SERVICE} -d ${PEER} ${ALL_DEPS} fi } diff --git a/src/sonic-py-common/sonic_py_common/device_info.py b/src/sonic-py-common/sonic_py_common/device_info.py index 4d1c9f862953..5c29a45169f9 100644 --- a/src/sonic-py-common/sonic_py_common/device_info.py +++ b/src/sonic-py-common/sonic_py_common/device_info.py @@ -7,7 +7,7 @@ from natsort import natsorted # TODO: Replace with swsscommon -from swsssdk import ConfigDBConnector, SonicDBConfig +from swsssdk import ConfigDBConnector, SonicDBConfig, SonicV2Connector USR_SHARE_SONIC_PATH = "/usr/share/sonic" HOST_DEVICE_PATH = USR_SHARE_SONIC_PATH + "/device" @@ -428,3 +428,38 @@ def get_system_routing_stack(): raise OSError("Cannot detect routing stack") return result + +# Check if System warm reboot or Container warm restart is enabled. +def is_warm_restart_enabled(container_name): + state_db = SonicV2Connector(host='127.0.0.1') + state_db.connect(state_db.STATE_DB, False) + + TABLE_NAME_SEPARATOR = '|' + prefix = 'WARM_RESTART_ENABLE_TABLE' + TABLE_NAME_SEPARATOR + + # Get the system warm reboot enable state + _hash = '{}{}'.format(prefix, 'system') + wr_system_state = state_db.get(state_db.STATE_DB, _hash, "enable") + wr_enable_state = True if wr_system_state == "true" else False + + # Get the container warm reboot enable state + _hash = '{}{}'.format(prefix, container_name) + wr_container_state = state_db.get(state_db.STATE_DB, _hash, "enable") + wr_enable_state |= True if wr_container_state == "true" else False + + state_db.close(state_db.STATE_DB) + return wr_enable_state + +# Check if System fast reboot is enabled. +def is_fast_reboot_enabled(): + fb_system_state = 0 + cmd = 'sonic-db-cli STATE_DB get "FAST_REBOOT|system"' + proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) + (stdout, stderr) = proc.communicate() + + if proc.returncode != 0: + log.log_error("Error running command '{}'".format(cmd)) + elif stdout: + fb_system_state = stdout.rstrip('\n') + + return fb_system_state