From 1e70c8ea29ad0c071f0b36f48315ab5f2a8b5d5c Mon Sep 17 00:00:00 2001 From: Mahesh Maddikayala <10645050+smaheshm@users.noreply.github.com> Date: Mon, 8 Feb 2021 09:53:02 -0800 Subject: [PATCH] [syncd swap] Added MASIC platform support for swapping syncd with syncd-rpc (#2929) --- tests/common/devices.py | 141 +++++++++++++++++++++++++++- tests/common/system_utils/docker.py | 58 ++++-------- tests/conftest.py | 9 +- 3 files changed, 163 insertions(+), 45 deletions(-) diff --git a/tests/common/devices.py b/tests/common/devices.py index 78677dd1a82..81ade41f8a8 100644 --- a/tests/common/devices.py +++ b/tests/common/devices.py @@ -24,9 +24,9 @@ from errors import RunAnsibleModuleFail from errors import UnsupportedAnsibleModule +from tests.common.cache import cached from tests.common.helpers.constants import DEFAULT_ASIC_ID, DEFAULT_NAMESPACE, NAMESPACE_PREFIX from tests.common.helpers.dut_utils import is_supervisor_node -from tests.common.cache import cached # HACK: This is a hack for issue https://github.com/Azure/sonic-mgmt/issues/1941 and issue # https://github.com/ansible/pytest-ansible/issues/47 @@ -390,6 +390,29 @@ def is_service_fully_started(self, service): except: return False + def is_container_present(self, service): + """ + Checks where a container exits. + + @param service: Container name + + Returns: + True or False + """ + status = self.command( + "docker ps -f name={}".format(service), + module_ignore_errors=True + ) + + if len(status["stdout_lines"]) > 1: + logging.info("container {} status: {}".format( + service, status["stdout"]) + ) + else: + logging.info("container {} does not exist".format(service)) + + return len(status["stdout_lines"]) > 1 + def critical_services_status(self): result = {} for service in self.critical_services: @@ -1261,6 +1284,45 @@ def get_crm_facts(self): return crm_facts + def stop_service(self, service_name, docker_name): + logging.debug("Stopping {}".format(service_name)) + if self.is_service_fully_started(docker_name): + self.command("systemctl stop {}".format(service_name)) + logging.debug("Stopped {}".format(service_name)) + + def delete_container(self, service): + if self.is_container_present(service): + self.command("docker rm {}".format(service)) + + def is_bgp_state_idle(self): + bgp_summary = self.command("show ip bgp summary")["stdout_lines"] + + idle_count = 0 + expected_idle_count = 0 + for line in bgp_summary: + if "Idle (Admin)" in line: + idle_count += 1 + + if "Total number of neighbors" in line: + tokens = line.split() + expected_idle_count = int(tokens[-1]) + + return idle_count == expected_idle_count + + def is_service_running(self, service_name, docker_name): + service_status = self.command( + "docker exec {} supervisorctl status {}".format( + docker_name, service_name + ), + module_ignore_errors=True + )["stdout"] + + logging.info("service {}:{} status: {} ".format( + docker_name, service_name, service_status) + ) + + return "RUNNING" in service_status + class K8sMasterHost(AnsibleHostBase): """ @@ -1688,6 +1750,9 @@ class SonicAsic(object): """ _DEFAULT_ASIC_SERVICES = ["bgp", "database", "lldp", "swss", "syncd", "teamd"] + _MULTI_ASIC_SERVICE_NAME = "{}@{}" # service name, asic_id + _MULTI_ASIC_DOCKER_NAME = "{}{}" # docker name, asic_id + def __init__(self, sonichost, asic_index): """ Initializing a ASIC on a SONiC host. @@ -1839,6 +1904,41 @@ def interface_facts(self, *module_args, **complex_args): return self.sonichost.interface_facts(*module_args, **complex_args) + def stop_service(self, service): + if not self.sonichost.is_multi_asic: + service_name = service + docker_name = service + else: + service_name = self._MULTI_ASIC_SERVICE_NAME.format( + service, self.asic_index + ) + docker_name = self._MULTI_ASIC_DOCKER_NAME.format( + service, self.asic_index + ) + return self.sonichost.stop_service(service_name, docker_name) + + def delete_container(self, service): + if self.sonichost.is_multi_asic: + service = self._MULTI_ASIC_DOCKER_NAME.format( + service, self.asic_index + ) + return self.sonichost.delete_container(service) + + def is_container_present(self, service): + if self.sonichost.is_multi_asic: + service = self._MULTI_ASIC_DOCKER_NAME.format( + service, self.asic_index + ) + return self.sonichost.is_container_present(service) + + def is_service_running(self, service_name, docker_name): + if self.sonichost.is_multi_asic: + docker_name = self._MULTI_ASIC_DOCKER_NAME.format( + docker_name, self.asic_index + ) + return self.sonichost.is_service_running(service_name, docker_name) + + class MultiAsicSonicHost(object): """ This class represents a Multi-asic SonicHost It has two attributes: sonic_host: a SonicHost instance. This object is for interacting with the SONiC host through pytest_ansible. @@ -2008,6 +2108,45 @@ def get_asic(self, asic_id): return self.asics[0] return self.asics[asic_id] + def stop_service(self, service): + if service in self._DEFAULT_SERVICES: + return self.sonichost.stop_service(service, service) + + for asic in self.asics: + asic.stop_service(service) + + def delete_container(self, service): + if service in self._DEFAULT_SERVICES: + return self.sonichost.delete_container(service) + + for asic in self.asics: + asic.delete_container(service) + + def is_container_present(self, service): + if service in self._DEFAULT_SERVICES: + return self.sonichost.is_container_present(service) + + for asic in self.asics: + if asic.is_container_present(service): + return True + + return False + + def is_bgp_state_idle(self): + return self.sonichost.is_bgp_state_idle() + + def is_service_running(self, service_name, docker_name=None): + docker_name = service_name if docker_name is None else docker_name + + if docker_name in self._DEFAULT_SERVICES: + return self.sonichost.is_service_running(service_name, docker_name) + + for asic in self.asics: + if not asic.is_service_running(service_name, docker_name): + return False + + return True + class DutHosts(object): """ Represents all the DUTs (nodes) in a testbed. class has 3 important attributes: diff --git a/tests/common/system_utils/docker.py b/tests/common/system_utils/docker.py index 349dbec6592..d088a00702b 100644 --- a/tests/common/system_utils/docker.py +++ b/tests/common/system_utils/docker.py @@ -112,7 +112,8 @@ def tag_image(duthost, tag, image_name, image_version="latest"): def swap_syncd(duthost, creds): """Replaces the running syncd container with the RPC version of it. - This will download a new Docker image to the duthost and restart the swss service. + This will download a new Docker image to the duthost and restart the swss + service. Args: duthost (SonicHost): The target device. @@ -123,9 +124,10 @@ def swap_syncd(duthost, creds): docker_syncd_name = "docker-syncd-{}".format(vendor_id) docker_rpc_image = docker_syncd_name + "-rpc" - duthost.command("config bgp shutdown all") # Force image download to go through mgmt network - duthost.command("systemctl stop swss", module_ignore_errors=True) - delete_container(duthost, "syncd") + # Force image download to go through mgmt network + duthost.command("config bgp shutdown all") + duthost.stop_service("swss") + duthost.delete_container("syncd") # Set sysctl RCVBUF parameter for tests duthost.command("sysctl -w net.core.rmem_max=609430500") @@ -164,8 +166,8 @@ def restore_default_syncd(duthost, creds): docker_syncd_name = "docker-syncd-{}".format(vendor_id) - duthost.command("systemctl stop swss", module_ignore_errors=True) - delete_container(duthost, "syncd") + duthost.stop_service("swss") + duthost.delete_container("syncd") tag_image( duthost, @@ -188,52 +190,24 @@ def restore_default_syncd(duthost, creds): def _perform_swap_syncd_shutdown_check(duthost): def ready_for_swap(): - syncd_status = duthost.command("docker ps -f name=syncd")["stdout_lines"] - if len(syncd_status) > 1: + if any([ + duthost.is_container_present("syncd"), + duthost.is_container_present("swss"), + not duthost.is_bgp_state_idle() + ]): return False - swss_status = duthost.command("docker ps -f name=swss")["stdout_lines"] - if len(swss_status) > 1: - return False - - bgp_summary = duthost.command("show ip bgp summary")["stdout_lines"] - idle_count = 0 - expected_idle_count = 0 - for line in bgp_summary: - if "Idle (Admin)" in line: - idle_count += 1 - - if "Total number of neighbors" in line: - tokens = line.split() - expected_idle_count = int(tokens[-1]) - - return idle_count == expected_idle_count + return True shutdown_check = wait_until(30, 3, ready_for_swap) - - logging.info("syncd status:\n%s", duthost.command("docker ps -f name=syncd", module_ignore_errors=True)["stdout"]) - logging.info("swss status:\n%s", duthost.command("docker ps -f name=swss", module_ignore_errors=True)["stdout"]) - logging.info("bgp status:\n%s", duthost.command("show ip bgp summary", module_ignore_errors=True)["stdout"]) - pytest_assert(shutdown_check, "Docker and/or BGP failed to shut down in 30s") def _perform_syncd_liveness_check(duthost): def check_liveness(): - syncd_status = duthost.command( - "docker exec syncd supervisorctl status syncd", - module_ignore_errors=True - )["stdout"] - - return "RUNNING" in syncd_status - - liveness_check = wait_until(10, 1, check_liveness) - - logging.info( - "syncd status:\n%s", - duthost.command("docker exec syncd supervisorctl status syncd", module_ignore_errors=True)["stdout"] - ) + return duthost.is_service_running("syncd") + liveness_check = wait_until(30, 1, check_liveness) pytest_assert(liveness_check, "syncd crashed after swap_syncd") diff --git a/tests/conftest.py b/tests/conftest.py index 47a299805fc..69f337d9460 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -836,8 +836,13 @@ def pytest_generate_tests(metafunc): if "enum_asic_index" in metafunc.fixturenames: metafunc.parametrize("enum_asic_index", generate_param_asic_index(metafunc, dut_indices, ASIC_PARAM_TYPE_ALL)) if "enum_frontend_asic_index" in metafunc.fixturenames: - metafunc.parametrize("enum_frontend_asic_index",generate_param_asic_index(metafunc, dut_indices, ASIC_PARAM_TYPE_FRONTEND)) - + metafunc.parametrize( + "enum_frontend_asic_index", + generate_param_asic_index( + metafunc, dut_indices, ASIC_PARAM_TYPE_FRONTEND + ), + scope="class" + ) if "enum_dut_portname" in metafunc.fixturenames: metafunc.parametrize("enum_dut_portname", generate_port_lists(metafunc, "all_ports")) if "enum_dut_portname_oper_up" in metafunc.fixturenames: