diff --git a/src/system-health/pytest.ini b/src/system-health/pytest.ini index a9c5a74860a3..b5b94c91c307 100644 --- a/src/system-health/pytest.ini +++ b/src/system-health/pytest.ini @@ -1,2 +1,2 @@ [pytest] -addopts = --cov=health_checker --cov-report html --cov-report term --cov-report xml +addopts = --cov=health_checker --cov=healthd --cov-report html --cov-report term --cov-report xml diff --git a/src/system-health/scripts/healthd b/src/system-health/scripts/healthd index df52969d3aff..cb404de29ee1 100644 --- a/src/system-health/scripts/healthd +++ b/src/system-health/scripts/healthd @@ -7,6 +7,7 @@ import signal import threading +import time from sonic_py_common.daemon_base import DaemonBase from swsscommon.swsscommon import SonicV2Connector @@ -79,18 +80,27 @@ class HealthDaemon(DaemonBase): return sysmon = Sysmonitor() sysmon.task_run() - while 1: - stat = manager.check(chassis) - self._process_stat(chassis, manager.config, stat) - - if self.stop_event.wait(manager.config.interval): - break + while self._run_checker(manager, chassis): + pass except ImportError: self.log_warning("sonic_platform package not installed. Cannot start system-health daemon") self.deinit() sysmon.task_stop() + def _run_checker(self, manager, chassis): + begin = time.time() + stat = manager.check(chassis) + self._process_stat(chassis, manager.config, stat) + elapse = time.time() - begin + sleep_time_in_sec = manager.config.interval - elapse + if sleep_time_in_sec < 0: + self.log_notice(f'System health takes {elapse} seconds for one iteration') + sleep_time_in_sec = 1 + if self.stop_event.wait(sleep_time_in_sec): + return False + return True + def _process_stat(self, chassis, config, stat): from health_checker.health_checker import HealthChecker self._clear_system_health_table() diff --git a/src/system-health/tests/test_system_health.py b/src/system-health/tests/test_system_health.py index 4dd0462a89d0..cea80b48a349 100644 --- a/src/system-health/tests/test_system_health.py +++ b/src/system-health/tests/test_system_health.py @@ -12,6 +12,7 @@ import copy import os import sys +from imp import load_source from swsscommon import swsscommon from mock import Mock, MagicMock, patch @@ -23,7 +24,9 @@ test_path = os.path.dirname(os.path.abspath(__file__)) modules_path = os.path.dirname(test_path) +scripts_path = os.path.join(modules_path, 'scripts') sys.path.insert(0, modules_path) +sys.path.insert(0, scripts_path) from health_checker import utils from health_checker.config import Config from health_checker.hardware_checker import HardwareChecker @@ -35,6 +38,9 @@ from health_checker.sysmonitor import MonitorStateDbTask from health_checker.sysmonitor import MonitorSystemBusTask +load_source('healthd', os.path.join(scripts_path, 'healthd')) +from healthd import HealthDaemon + mock_supervisorctl_output = """ snmpd RUNNING pid 67, uptime 1:03:56 snmp-subagent EXITED Oct 19 01:53 AM @@ -740,3 +746,26 @@ def test_get_service_from_feature_table(): sysmon.get_service_from_feature_table(dir_list) assert 'bgp.service' in dir_list assert 'swss.service' not in dir_list + + +@patch('healthd.time.time') +def test_healthd_check_interval(mock_time): + daemon = HealthDaemon() + manager = MagicMock() + manager.check = MagicMock() + manager.config = MagicMock() + chassis = MagicMock() + daemon._process_stat = MagicMock() + daemon.stop_event = MagicMock() + daemon.stop_event.wait = MagicMock() + + daemon.stop_event.wait.return_value = False + manager.config.interval = 60 + mock_time.side_effect = [0, 3, 0, 61, 0, 1] + assert daemon._run_checker(manager, chassis) + daemon.stop_event.wait.assert_called_with(57) + assert daemon._run_checker(manager, chassis) + daemon.stop_event.wait.assert_called_with(1) + + daemon.stop_event.wait.return_value = True + assert not daemon._run_checker(manager, chassis)