Skip to content

Commit

Permalink
[syncd swap] Added MASIC platform support for swapping syncd with syn…
Browse files Browse the repository at this point in the history
…cd-rpc (#2929)
  • Loading branch information
smaheshm authored Feb 8, 2021
1 parent 2b00e37 commit 1e70c8e
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 45 deletions.
141 changes: 140 additions & 1 deletion tests/common/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@

from errors import RunAnsibleModuleFail
from errors import UnsupportedAnsibleModule
from tests.common.cache import cached
from tests.common.helpers.constants import DEFAULT_ASIC_ID, DEFAULT_NAMESPACE, NAMESPACE_PREFIX
from tests.common.helpers.dut_utils import is_supervisor_node
from tests.common.cache import cached

# HACK: This is a hack for issue https://github.com/Azure/sonic-mgmt/issues/1941 and issue
# https://github.com/ansible/pytest-ansible/issues/47
Expand Down Expand Up @@ -390,6 +390,29 @@ def is_service_fully_started(self, service):
except:
return False

def is_container_present(self, service):
"""
Checks where a container exits.
@param service: Container name
Returns:
True or False
"""
status = self.command(
"docker ps -f name={}".format(service),
module_ignore_errors=True
)

if len(status["stdout_lines"]) > 1:
logging.info("container {} status: {}".format(
service, status["stdout"])
)
else:
logging.info("container {} does not exist".format(service))

return len(status["stdout_lines"]) > 1

def critical_services_status(self):
result = {}
for service in self.critical_services:
Expand Down Expand Up @@ -1261,6 +1284,45 @@ def get_crm_facts(self):

return crm_facts

def stop_service(self, service_name, docker_name):
logging.debug("Stopping {}".format(service_name))
if self.is_service_fully_started(docker_name):
self.command("systemctl stop {}".format(service_name))
logging.debug("Stopped {}".format(service_name))

def delete_container(self, service):
if self.is_container_present(service):
self.command("docker rm {}".format(service))

def is_bgp_state_idle(self):
bgp_summary = self.command("show ip bgp summary")["stdout_lines"]

idle_count = 0
expected_idle_count = 0
for line in bgp_summary:
if "Idle (Admin)" in line:
idle_count += 1

if "Total number of neighbors" in line:
tokens = line.split()
expected_idle_count = int(tokens[-1])

return idle_count == expected_idle_count

def is_service_running(self, service_name, docker_name):
service_status = self.command(
"docker exec {} supervisorctl status {}".format(
docker_name, service_name
),
module_ignore_errors=True
)["stdout"]

logging.info("service {}:{} status: {} ".format(
docker_name, service_name, service_status)
)

return "RUNNING" in service_status


class K8sMasterHost(AnsibleHostBase):
"""
Expand Down Expand Up @@ -1688,6 +1750,9 @@ class SonicAsic(object):
"""

_DEFAULT_ASIC_SERVICES = ["bgp", "database", "lldp", "swss", "syncd", "teamd"]
_MULTI_ASIC_SERVICE_NAME = "{}@{}" # service name, asic_id
_MULTI_ASIC_DOCKER_NAME = "{}{}" # docker name, asic_id

def __init__(self, sonichost, asic_index):
""" Initializing a ASIC on a SONiC host.
Expand Down Expand Up @@ -1839,6 +1904,41 @@ def interface_facts(self, *module_args, **complex_args):
return self.sonichost.interface_facts(*module_args, **complex_args)


def stop_service(self, service):
if not self.sonichost.is_multi_asic:
service_name = service
docker_name = service
else:
service_name = self._MULTI_ASIC_SERVICE_NAME.format(
service, self.asic_index
)
docker_name = self._MULTI_ASIC_DOCKER_NAME.format(
service, self.asic_index
)
return self.sonichost.stop_service(service_name, docker_name)

def delete_container(self, service):
if self.sonichost.is_multi_asic:
service = self._MULTI_ASIC_DOCKER_NAME.format(
service, self.asic_index
)
return self.sonichost.delete_container(service)

def is_container_present(self, service):
if self.sonichost.is_multi_asic:
service = self._MULTI_ASIC_DOCKER_NAME.format(
service, self.asic_index
)
return self.sonichost.is_container_present(service)

def is_service_running(self, service_name, docker_name):
if self.sonichost.is_multi_asic:
docker_name = self._MULTI_ASIC_DOCKER_NAME.format(
docker_name, self.asic_index
)
return self.sonichost.is_service_running(service_name, docker_name)


class MultiAsicSonicHost(object):
""" This class represents a Multi-asic SonicHost It has two attributes:
sonic_host: a SonicHost instance. This object is for interacting with the SONiC host through pytest_ansible.
Expand Down Expand Up @@ -2008,6 +2108,45 @@ def get_asic(self, asic_id):
return self.asics[0]
return self.asics[asic_id]

def stop_service(self, service):
if service in self._DEFAULT_SERVICES:
return self.sonichost.stop_service(service, service)

for asic in self.asics:
asic.stop_service(service)

def delete_container(self, service):
if service in self._DEFAULT_SERVICES:
return self.sonichost.delete_container(service)

for asic in self.asics:
asic.delete_container(service)

def is_container_present(self, service):
if service in self._DEFAULT_SERVICES:
return self.sonichost.is_container_present(service)

for asic in self.asics:
if asic.is_container_present(service):
return True

return False

def is_bgp_state_idle(self):
return self.sonichost.is_bgp_state_idle()

def is_service_running(self, service_name, docker_name=None):
docker_name = service_name if docker_name is None else docker_name

if docker_name in self._DEFAULT_SERVICES:
return self.sonichost.is_service_running(service_name, docker_name)

for asic in self.asics:
if not asic.is_service_running(service_name, docker_name):
return False

return True


class DutHosts(object):
""" Represents all the DUTs (nodes) in a testbed. class has 3 important attributes:
Expand Down
58 changes: 16 additions & 42 deletions tests/common/system_utils/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ def tag_image(duthost, tag, image_name, image_version="latest"):
def swap_syncd(duthost, creds):
"""Replaces the running syncd container with the RPC version of it.
This will download a new Docker image to the duthost and restart the swss service.
This will download a new Docker image to the duthost and restart the swss
service.
Args:
duthost (SonicHost): The target device.
Expand All @@ -123,9 +124,10 @@ def swap_syncd(duthost, creds):
docker_syncd_name = "docker-syncd-{}".format(vendor_id)
docker_rpc_image = docker_syncd_name + "-rpc"

duthost.command("config bgp shutdown all") # Force image download to go through mgmt network
duthost.command("systemctl stop swss", module_ignore_errors=True)
delete_container(duthost, "syncd")
# Force image download to go through mgmt network
duthost.command("config bgp shutdown all")
duthost.stop_service("swss")
duthost.delete_container("syncd")

# Set sysctl RCVBUF parameter for tests
duthost.command("sysctl -w net.core.rmem_max=609430500")
Expand Down Expand Up @@ -164,8 +166,8 @@ def restore_default_syncd(duthost, creds):

docker_syncd_name = "docker-syncd-{}".format(vendor_id)

duthost.command("systemctl stop swss", module_ignore_errors=True)
delete_container(duthost, "syncd")
duthost.stop_service("swss")
duthost.delete_container("syncd")

tag_image(
duthost,
Expand All @@ -188,52 +190,24 @@ def restore_default_syncd(duthost, creds):

def _perform_swap_syncd_shutdown_check(duthost):
def ready_for_swap():
syncd_status = duthost.command("docker ps -f name=syncd")["stdout_lines"]
if len(syncd_status) > 1:
if any([
duthost.is_container_present("syncd"),
duthost.is_container_present("swss"),
not duthost.is_bgp_state_idle()
]):
return False

swss_status = duthost.command("docker ps -f name=swss")["stdout_lines"]
if len(swss_status) > 1:
return False

bgp_summary = duthost.command("show ip bgp summary")["stdout_lines"]
idle_count = 0
expected_idle_count = 0
for line in bgp_summary:
if "Idle (Admin)" in line:
idle_count += 1

if "Total number of neighbors" in line:
tokens = line.split()
expected_idle_count = int(tokens[-1])

return idle_count == expected_idle_count
return True

shutdown_check = wait_until(30, 3, ready_for_swap)

logging.info("syncd status:\n%s", duthost.command("docker ps -f name=syncd", module_ignore_errors=True)["stdout"])
logging.info("swss status:\n%s", duthost.command("docker ps -f name=swss", module_ignore_errors=True)["stdout"])
logging.info("bgp status:\n%s", duthost.command("show ip bgp summary", module_ignore_errors=True)["stdout"])

pytest_assert(shutdown_check, "Docker and/or BGP failed to shut down in 30s")


def _perform_syncd_liveness_check(duthost):
def check_liveness():
syncd_status = duthost.command(
"docker exec syncd supervisorctl status syncd",
module_ignore_errors=True
)["stdout"]

return "RUNNING" in syncd_status

liveness_check = wait_until(10, 1, check_liveness)

logging.info(
"syncd status:\n%s",
duthost.command("docker exec syncd supervisorctl status syncd", module_ignore_errors=True)["stdout"]
)
return duthost.is_service_running("syncd")

liveness_check = wait_until(30, 1, check_liveness)
pytest_assert(liveness_check, "syncd crashed after swap_syncd")


Expand Down
9 changes: 7 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,8 +836,13 @@ def pytest_generate_tests(metafunc):
if "enum_asic_index" in metafunc.fixturenames:
metafunc.parametrize("enum_asic_index", generate_param_asic_index(metafunc, dut_indices, ASIC_PARAM_TYPE_ALL))
if "enum_frontend_asic_index" in metafunc.fixturenames:
metafunc.parametrize("enum_frontend_asic_index",generate_param_asic_index(metafunc, dut_indices, ASIC_PARAM_TYPE_FRONTEND))

metafunc.parametrize(
"enum_frontend_asic_index",
generate_param_asic_index(
metafunc, dut_indices, ASIC_PARAM_TYPE_FRONTEND
),
scope="class"
)
if "enum_dut_portname" in metafunc.fixturenames:
metafunc.parametrize("enum_dut_portname", generate_port_lists(metafunc, "all_ports"))
if "enum_dut_portname_oper_up" in metafunc.fixturenames:
Expand Down

0 comments on commit 1e70c8e

Please sign in to comment.