Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[202305] [cherry-pick] Improve load_mingraph to wait eth0 restart before exit (#3365) #3371

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions config/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,10 +891,47 @@ def _reset_failed_services():
for service in _get_sonic_services():
clicommon.run_command(['systemctl', 'reset-failed', str(service)])


def get_service_finish_timestamp(service):
out, _ = clicommon.run_command(['sudo',
'systemctl',
'show',
'--no-pager',
service,
'-p',
'ExecMainExitTimestamp',
'--value'],
return_cmd=True)
return out.strip(' \t\n\r')


def wait_service_restart_finish(service, last_timestamp, timeout=30):
start_time = time.time()
elapsed_time = 0
while elapsed_time < timeout:
current_timestamp = get_service_finish_timestamp(service)
if current_timestamp and (current_timestamp != last_timestamp):
return

time.sleep(1)
elapsed_time = time.time() - start_time

log.log_warning("Service: {} does not restart in {} seconds, stop waiting".format(service, timeout))


def _restart_services():
last_interface_config_timestamp = get_service_finish_timestamp('interfaces-config')
last_networking_timestamp = get_service_finish_timestamp('networking')

click.echo("Restarting SONiC target ...")
clicommon.run_command(['sudo', 'systemctl', 'restart', 'sonic.target'])

# These service will restart eth0 and cause device lost network for 10 seconds
# When enable TACACS, every remote user commands will authorize by TACACS service via network
# If load_minigraph exit before eth0 restart, commands after load_minigraph may failed
wait_service_restart_finish('interfaces-config', last_interface_config_timestamp)
wait_service_restart_finish('networking', last_networking_timestamp)

try:
subprocess.check_call(['sudo', 'monit', 'status'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
click.echo("Enabling container monitoring ...")
Expand Down
9 changes: 7 additions & 2 deletions tests/config_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import pytest
import filecmp
import importlib
Expand Down Expand Up @@ -186,6 +187,10 @@ def mock_run_command_side_effect(*args, **kwargs):
return 'enabled', 0
elif command == 'cat /var/run/dhclient.eth0.pid':
return '101', 0
elif command == 'sudo systemctl show --no-pager interfaces-config -p ExecMainExitTimestamp --value':
return f'{datetime.datetime.now()}', 0
elif command == 'sudo systemctl show --no-pager networking -p ExecMainExitTimestamp --value':
return f'{datetime.datetime.now()}', 0
else:
return '', 0

Expand Down Expand Up @@ -413,7 +418,7 @@ def test_load_minigraph(self, get_cmd_module, setup_single_broadcom_asic):
assert "\n".join([l.rstrip() for l in result.output.split('\n')]) == load_minigraph_command_output
# Verify "systemctl reset-failed" is called for services under sonic.target
mock_run_command.assert_any_call(['systemctl', 'reset-failed', 'swss'])
assert mock_run_command.call_count == 8
assert mock_run_command.call_count == 12

@mock.patch('sonic_py_common.device_info.get_paths_to_platform_and_hwsku_dirs', mock.MagicMock(return_value=(load_minigraph_platform_path, None)))
def test_load_minigraph_platform_plugin(self, get_cmd_module, setup_single_broadcom_asic):
Expand All @@ -428,7 +433,7 @@ def test_load_minigraph_platform_plugin(self, get_cmd_module, setup_single_broad
assert "\n".join([l.rstrip() for l in result.output.split('\n')]) == load_minigraph_platform_plugin_command_output
# Verify "systemctl reset-failed" is called for services under sonic.target
mock_run_command.assert_any_call(['systemctl', 'reset-failed', 'swss'])
assert mock_run_command.call_count == 8
assert mock_run_command.call_count == 12

@mock.patch('sonic_py_common.device_info.get_paths_to_platform_and_hwsku_dirs', mock.MagicMock(return_value=(load_minigraph_platform_false_path, None)))
def test_load_minigraph_platform_plugin_fail(self, get_cmd_module, setup_single_broadcom_asic):
Expand Down
Loading