From 3eb462f35d02ccaac0ed643fd5539911a94f5f77 Mon Sep 17 00:00:00 2001 From: Hua Liu <58683130+liuh-80@users.noreply.github.com> Date: Wed, 19 Jun 2024 09:39:41 +0800 Subject: [PATCH] Improve load_mingraph to wait eth0 restart before exit (#3365) (#3371) Improve load_mingraph to wait eth0 restart before exit This is cherry-pick PR for #3365 --- config/main.py | 37 +++++++++++++++++++++++++++++++++++++ tests/config_test.py | 9 +++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/config/main.py b/config/main.py index 34d95a58b3..dd87abc314 100644 --- a/config/main.py +++ b/config/main.py @@ -891,10 +891,47 @@ def _reset_failed_services(): for service in _get_sonic_services(): clicommon.run_command(['systemctl', 'reset-failed', str(service)]) + +def get_service_finish_timestamp(service): + out, _ = clicommon.run_command(['sudo', + 'systemctl', + 'show', + '--no-pager', + service, + '-p', + 'ExecMainExitTimestamp', + '--value'], + return_cmd=True) + return out.strip(' \t\n\r') + + +def wait_service_restart_finish(service, last_timestamp, timeout=30): + start_time = time.time() + elapsed_time = 0 + while elapsed_time < timeout: + current_timestamp = get_service_finish_timestamp(service) + if current_timestamp and (current_timestamp != last_timestamp): + return + + time.sleep(1) + elapsed_time = time.time() - start_time + + log.log_warning("Service: {} does not restart in {} seconds, stop waiting".format(service, timeout)) + + def _restart_services(): + last_interface_config_timestamp = get_service_finish_timestamp('interfaces-config') + last_networking_timestamp = get_service_finish_timestamp('networking') + click.echo("Restarting SONiC target ...") clicommon.run_command(['sudo', 'systemctl', 'restart', 'sonic.target']) + # These service will restart eth0 and cause device lost network for 10 seconds + # When enable TACACS, every remote user commands will authorize by TACACS service via network + # If load_minigraph exit before eth0 restart, commands after load_minigraph may failed + wait_service_restart_finish('interfaces-config', last_interface_config_timestamp) + wait_service_restart_finish('networking', last_networking_timestamp) + try: subprocess.check_call(['sudo', 'monit', 'status'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) click.echo("Enabling container monitoring ...") diff --git a/tests/config_test.py b/tests/config_test.py index cc0ac22e98..d19511a71b 100644 --- a/tests/config_test.py +++ b/tests/config_test.py @@ -1,3 +1,4 @@ +import datetime import pytest import filecmp import importlib @@ -186,6 +187,10 @@ def mock_run_command_side_effect(*args, **kwargs): return 'enabled', 0 elif command == 'cat /var/run/dhclient.eth0.pid': return '101', 0 + elif command == 'sudo systemctl show --no-pager interfaces-config -p ExecMainExitTimestamp --value': + return f'{datetime.datetime.now()}', 0 + elif command == 'sudo systemctl show --no-pager networking -p ExecMainExitTimestamp --value': + return f'{datetime.datetime.now()}', 0 else: return '', 0 @@ -413,7 +418,7 @@ def test_load_minigraph(self, get_cmd_module, setup_single_broadcom_asic): assert "\n".join([l.rstrip() for l in result.output.split('\n')]) == load_minigraph_command_output # Verify "systemctl reset-failed" is called for services under sonic.target mock_run_command.assert_any_call(['systemctl', 'reset-failed', 'swss']) - assert mock_run_command.call_count == 8 + assert mock_run_command.call_count == 12 @mock.patch('sonic_py_common.device_info.get_paths_to_platform_and_hwsku_dirs', mock.MagicMock(return_value=(load_minigraph_platform_path, None))) def test_load_minigraph_platform_plugin(self, get_cmd_module, setup_single_broadcom_asic): @@ -428,7 +433,7 @@ def test_load_minigraph_platform_plugin(self, get_cmd_module, setup_single_broad assert "\n".join([l.rstrip() for l in result.output.split('\n')]) == load_minigraph_platform_plugin_command_output # Verify "systemctl reset-failed" is called for services under sonic.target mock_run_command.assert_any_call(['systemctl', 'reset-failed', 'swss']) - assert mock_run_command.call_count == 8 + assert mock_run_command.call_count == 12 @mock.patch('sonic_py_common.device_info.get_paths_to_platform_and_hwsku_dirs', mock.MagicMock(return_value=(load_minigraph_platform_false_path, None))) def test_load_minigraph_platform_plugin_fail(self, get_cmd_module, setup_single_broadcom_asic):