Skip to content

Commit

Permalink
Improve load_mingraph to wait eth0 restart before exit (sonic-net#3365)
Browse files Browse the repository at this point in the history
* Improve load_mingraph to wait eth0 restart before exist
  • Loading branch information
liuh-80 authored Jun 18, 2024
1 parent d0856af commit 31f5fa8
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 2 deletions.
37 changes: 37 additions & 0 deletions config/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,10 +898,47 @@ def _reset_failed_services():
for service in _get_sonic_services():
clicommon.run_command(['systemctl', 'reset-failed', str(service)])


def get_service_finish_timestamp(service):
out, _ = clicommon.run_command(['sudo',
'systemctl',
'show',
'--no-pager',
service,
'-p',
'ExecMainExitTimestamp',
'--value'],
return_cmd=True)
return out.strip(' \t\n\r')


def wait_service_restart_finish(service, last_timestamp, timeout=30):
start_time = time.time()
elapsed_time = 0
while elapsed_time < timeout:
current_timestamp = get_service_finish_timestamp(service)
if current_timestamp and (current_timestamp != last_timestamp):
return

time.sleep(1)
elapsed_time = time.time() - start_time

log.log_warning("Service: {} does not restart in {} seconds, stop waiting".format(service, timeout))


def _restart_services():
last_interface_config_timestamp = get_service_finish_timestamp('interfaces-config')
last_networking_timestamp = get_service_finish_timestamp('networking')

click.echo("Restarting SONiC target ...")
clicommon.run_command(['sudo', 'systemctl', 'restart', 'sonic.target'])

# These service will restart eth0 and cause device lost network for 10 seconds
# When enable TACACS, every remote user commands will authorize by TACACS service via network
# If load_minigraph exit before eth0 restart, commands after load_minigraph may failed
wait_service_restart_finish('interfaces-config', last_interface_config_timestamp)
wait_service_restart_finish('networking', last_networking_timestamp)

try:
subprocess.check_call(['sudo', 'monit', 'status'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
click.echo("Enabling container monitoring ...")
Expand Down
9 changes: 7 additions & 2 deletions tests/config_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import copy
import datetime
import pytest
import filecmp
import importlib
Expand Down Expand Up @@ -244,6 +245,10 @@ def mock_run_command_side_effect(*args, **kwargs):
return 'enabled', 0
elif command == 'cat /var/run/dhclient.eth0.pid':
return '101', 0
elif command == 'sudo systemctl show --no-pager interfaces-config -p ExecMainExitTimestamp --value':
return f'{datetime.datetime.now()}', 0
elif command == 'sudo systemctl show --no-pager networking -p ExecMainExitTimestamp --value':
return f'{datetime.datetime.now()}', 0
else:
return '', 0

Expand Down Expand Up @@ -656,7 +661,7 @@ def test_load_minigraph(self, get_cmd_module, setup_single_broadcom_asic):
assert "\n".join([l.rstrip() for l in result.output.split('\n')]) == load_minigraph_command_output
# Verify "systemctl reset-failed" is called for services under sonic.target
mock_run_command.assert_any_call(['systemctl', 'reset-failed', 'swss'])
assert mock_run_command.call_count == 8
assert mock_run_command.call_count == 12

@mock.patch('sonic_py_common.device_info.get_paths_to_platform_and_hwsku_dirs', mock.MagicMock(return_value=(load_minigraph_platform_path, None)))
def test_load_minigraph_platform_plugin(self, get_cmd_module, setup_single_broadcom_asic):
Expand All @@ -671,7 +676,7 @@ def test_load_minigraph_platform_plugin(self, get_cmd_module, setup_single_broad
assert "\n".join([l.rstrip() for l in result.output.split('\n')]) == load_minigraph_platform_plugin_command_output
# Verify "systemctl reset-failed" is called for services under sonic.target
mock_run_command.assert_any_call(['systemctl', 'reset-failed', 'swss'])
assert mock_run_command.call_count == 8
assert mock_run_command.call_count == 12

@mock.patch('sonic_py_common.device_info.get_paths_to_platform_and_hwsku_dirs', mock.MagicMock(return_value=(load_minigraph_platform_false_path, None)))
def test_load_minigraph_platform_plugin_fail(self, get_cmd_module, setup_single_broadcom_asic):
Expand Down

0 comments on commit 31f5fa8

Please sign in to comment.