Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[reload] Improve reload by using sonic.target. #1199

Merged
merged 11 commits into from
Mar 1, 2021
Merged
171 changes: 25 additions & 146 deletions config/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import re
import subprocess
import sys
import threading
import time

from socket import AF_INET, AF_INET6
Expand Down Expand Up @@ -49,10 +48,6 @@

INIT_CFG_FILE = '/etc/sonic/init_cfg.json'

SYSTEMCTL_ACTION_STOP="stop"
SYSTEMCTL_ACTION_RESTART="restart"
SYSTEMCTL_ACTION_RESET_FAILED="reset-failed"

DEFAULT_NAMESPACE = ''
CFG_LOOPBACK_PREFIX = "Loopback"
CFG_LOOPBACK_PREFIX_LEN = len(CFG_LOOPBACK_PREFIX)
Expand Down Expand Up @@ -204,54 +199,6 @@ def breakout_Ports(cm, delPorts=list(), portJson=dict(), force=False, \
# Helper functions
#

# Execute action per NPU instance for multi instance services.
def execute_systemctl_per_asic_instance(inst, event, service, action):
try:
click.echo("Executing {} of service {}@{}...".format(action, service, inst))
clicommon.run_command("systemctl {} {}@{}.service".format(action, service, inst))
except SystemExit as e:
log.log_error("Failed to execute {} of service {}@{} with error {}".format(action, service, inst, e))
# Set the event object if there is a failure and exception was raised.
event.set()

# Execute action on list of systemd services
def execute_systemctl(list_of_services, action):
num_asic = multi_asic.get_num_asics()
generated_services_list, generated_multi_instance_services = _get_sonic_generated_services(num_asic)
if ((generated_services_list == []) and
(generated_multi_instance_services == [])):
log.log_error("Failed to get generated services")
return

for service in list_of_services:
if (service + '.service' in generated_services_list):
try:
click.echo("Executing {} of service {}...".format(action, service))
clicommon.run_command("systemctl {} {}".format(action, service))
except SystemExit as e:
log.log_error("Failed to execute {} of service {} with error {}".format(action, service, e))
raise

if (service + '.service' in generated_multi_instance_services):
# With Multi NPU, Start a thread per instance to do the "action" on multi instance services.
if multi_asic.is_multi_asic():
threads = []
# Use this event object to co-ordinate if any threads raised exception
e = threading.Event()

kwargs = {'service': service, 'action': action}
for inst in range(num_asic):
t = threading.Thread(target=execute_systemctl_per_asic_instance, args=(inst, e), kwargs=kwargs)
threads.append(t)
t.start()

# Wait for all the threads to finish.
for inst in range(num_asic):
threads[inst].join()

# Check if any of the threads have raised exception, if so exit the process.
if e.is_set():
sys.exit(1)

def _get_device_type():
"""
Expand Down Expand Up @@ -659,93 +606,25 @@ def _get_disabled_services_list(config_db):

return disabled_services_list

def _stop_services(config_db):
# This list is order-dependent. Please add services in the order they should be stopped
# on Mellanox platform pmon is stopped by syncd
services_to_stop = [
'telemetry',
'restapi',
'swss',
'lldp',
'pmon',
'bgp',
'hostcfgd',
'nat'
]

if asic_type == 'mellanox' and 'pmon' in services_to_stop:
services_to_stop.remove('pmon')

disabled_services = _get_disabled_services_list(config_db)

for service in disabled_services:
if service in services_to_stop:
services_to_stop.remove(service)

execute_systemctl(services_to_stop, SYSTEMCTL_ACTION_STOP)


def _reset_failed_services(config_db):
# This list is order-independent. Please keep list in alphabetical order
services_to_reset = [
'bgp',
'dhcp_relay',
'hostcfgd',
'hostname-config',
'interfaces-config',
'lldp',
'nat',
'ntp-config',
'pmon',
'radv',
'restapi',
'rsyslog-config',
'sflow',
'snmp',
'swss',
'syncd',
'teamd',
'telemetry'
]

disabled_services = _get_disabled_services_list(config_db)

for service in disabled_services:
if service in services_to_reset:
services_to_reset.remove(service)

execute_systemctl(services_to_reset, SYSTEMCTL_ACTION_RESET_FAILED)


def _restart_services(config_db):
# This list is order-dependent. Please add services in the order they should be started
# on Mellanox platform pmon is started by syncd
services_to_restart = [
'hostname-config',
'interfaces-config',
'ntp-config',
'rsyslog-config',
'swss',
'bgp',
'pmon',
'lldp',
'hostcfgd',
'nat',
'sflow',
'restapi',
'telemetry'
]

disabled_services = _get_disabled_services_list(config_db)

for service in disabled_services:
if service in services_to_restart:
services_to_restart.remove(service)

if asic_type == 'mellanox' and 'pmon' in services_to_restart:
services_to_restart.remove('pmon')

execute_systemctl(services_to_restart, SYSTEMCTL_ACTION_RESTART)
def _stop_services():
click.echo("Stopping SONiC target ...")
clicommon.run_command("sudo systemctl stop sonic.target")


def _get_sonic_services():
out = clicommon.run_command("systemctl list-dependencies --plain sonic.target | sed '1d'", return_cmd=True)
return [unit.strip() for unit in out.splitlines()]


def _reset_failed_services():
for service in _get_sonic_services():
click.echo("Resetting failed status on {}".format(service))
clicommon.run_command("systemctl reset-failed {}".format(service))


def _restart_services():
click.echo("Restarting SONiC target ...")
clicommon.run_command("sudo systemctl restart sonic.target")

# Reload Monit configuration to pick up new hostname in case it changed
click.echo("Reloading Monit configuration ...")
Expand Down Expand Up @@ -1046,7 +925,7 @@ def reload(db, filename, yes, load_sysinfo, no_service_restart):
#Stop services before config push
if not no_service_restart:
log.log_info("'reload' stopping services...")
_stop_services(db.cfgdb)
_stop_services()

# In Single ASIC platforms we have single DB service. In multi-ASIC platforms we have a global DB
# service running in the host + DB services running in each ASIC namespace created per ASIC.
Expand Down Expand Up @@ -1117,9 +996,9 @@ def reload(db, filename, yes, load_sysinfo, no_service_restart):
# We first run "systemctl reset-failed" to remove the "failed"
# status from all services before we attempt to restart them
if not no_service_restart:
_reset_failed_services(db.cfgdb)
_reset_failed_services()
log.log_info("'reload' restarting services...")
_restart_services(db.cfgdb)
_restart_services()

@config.command("load_mgmt_config")
@click.option('-y', '--yes', is_flag=True, callback=_abort_if_false,
Expand Down Expand Up @@ -1158,7 +1037,7 @@ def load_minigraph(db, no_service_restart):
#Stop services before config push
if not no_service_restart:
log.log_info("'load_minigraph' stopping services...")
_stop_services(db.cfgdb)
_stop_services()

# For Single Asic platform the namespace list has the empty string
# for mulit Asic platform the empty string to generate the config
Expand Down Expand Up @@ -1214,10 +1093,10 @@ def load_minigraph(db, no_service_restart):
# We first run "systemctl reset-failed" to remove the "failed"
# status from all services before we attempt to restart them
if not no_service_restart:
_reset_failed_services(db.cfgdb)
_reset_failed_services()
#FIXME: After config DB daemon is implemented, we'll no longer need to restart every service.
log.log_info("'load_minigraph' restarting services...")
_restart_services(db.cfgdb)
_restart_services()
click.echo("Please note setting loaded from minigraph will be lost after system reboot. To preserve setting, run `config save`.")


Expand Down
54 changes: 3 additions & 51 deletions tests/config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,43 +8,11 @@
from utilities_common.db import Db

load_minigraph_command_output="""\
Executing stop of service telemetry...
Executing stop of service swss...
Executing stop of service lldp...
Executing stop of service pmon...
Executing stop of service bgp...
Executing stop of service hostcfgd...
Executing stop of service nat...
Stopping SONiC target ...
Running command: /usr/local/bin/sonic-cfggen -H -m --write-to-db
Running command: pfcwd start_default
Running command: config qos reload --no-dynamic-buffer
Executing reset-failed of service bgp...
Executing reset-failed of service dhcp_relay...
Executing reset-failed of service hostcfgd...
Executing reset-failed of service hostname-config...
Executing reset-failed of service interfaces-config...
Executing reset-failed of service lldp...
Executing reset-failed of service nat...
Executing reset-failed of service ntp-config...
Executing reset-failed of service pmon...
Executing reset-failed of service radv...
Executing reset-failed of service rsyslog-config...
Executing reset-failed of service snmp...
Executing reset-failed of service swss...
Executing reset-failed of service syncd...
Executing reset-failed of service teamd...
Executing reset-failed of service telemetry...
Executing restart of service hostname-config...
Executing restart of service interfaces-config...
Executing restart of service ntp-config...
Executing restart of service rsyslog-config...
Executing restart of service swss...
Executing restart of service bgp...
Executing restart of service pmon...
Executing restart of service lldp...
Executing restart of service hostcfgd...
Executing restart of service nat...
Executing restart of service telemetry...
Restarting SONiC target ...
Reloading Monit configuration ...
Please note setting loaded from minigraph will be lost after system reboot. To preserve setting, run `config save`.
"""
Expand Down Expand Up @@ -74,24 +42,8 @@ def test_load_minigraph(self, get_cmd_module, setup_single_broacom_asic):
assert "\n".join([l.rstrip() for l in result.output.split('\n')]) == load_minigraph_command_output
assert mock_run_command.call_count == 38

def test_load_minigraph_with_disabled_telemetry(self, get_cmd_module, setup_single_broacom_asic):
with mock.patch("utilities_common.cli.run_command", mock.MagicMock(side_effect=mock_run_command_side_effect)) as mock_run_command:
(config, show) = get_cmd_module
db = Db()
runner = CliRunner()
result = runner.invoke(config.config.commands["feature"].commands["state"], ["telemetry", "disabled"], obj=db)
assert result.exit_code == 0
result = runner.invoke(show.cli.commands["feature"].commands["status"], ["telemetry"], obj=db)
print(result.output)
assert result.exit_code == 0
result = runner.invoke(config.config.commands["load_minigraph"], ["-y"], obj=db)
print(result.exit_code)
print(result.output)
assert result.exit_code == 0
assert "telemetry" not in result.output
assert mock_run_command.call_count == 35

@classmethod
def teardown_class(cls):
os.environ['UTILITIES_UNIT_TESTING'] = "0"
print("TEARDOWN")

3 changes: 3 additions & 0 deletions utilities_common/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,9 @@ def run_command(command, display_cmd=False, ignore_error=False, return_cmd=False
if display_cmd == True:
click.echo(click.style("Running command: ", fg='cyan') + click.style(command, fg='green'))

if os.getenv("UTILITIES_UNIT_TESTING") == "1":
return None if not return_cmd else ""

# No conversion needed for intfutil commands as it already displays
# both SONiC interface name and alias name for all interfaces.
if get_interface_naming_mode() == "alias" and not command.startswith("intfutil"):
Expand Down