Skip to content

Commit

Permalink
Reset failed API call counter on successful API call (#4862)
Browse files Browse the repository at this point in the history
* Reset failed API call counter on successful API call

Make sure to reset the failed API call counter after a successful
API call. While at it also update the log messages a bit to make it
clearer what the problem is exactly.

* Address pytest changes
  • Loading branch information
agners authored Jan 31, 2024
1 parent 7652657 commit 1908940
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 15 deletions.
23 changes: 14 additions & 9 deletions supervisor/misc/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@

_LOGGER: logging.Logger = logging.getLogger(__name__)

HASS_WATCHDOG_API = "HASS_WATCHDOG_API"
HASS_WATCHDOG_API_FAILURES = "HASS_WATCHDOG_API_FAILURES"
HASS_WATCHDOG_REANIMATE_FAILURES = "HASS_WATCHDOG_REANIMATE_FAILURES"
HASS_WATCHDOG_MAX_API_ATTEMPTS = 2
HASS_WATCHDOG_MAX_REANIMATE_ATTEMPTS = 5

RUN_UPDATE_SUPERVISOR = 29100
Expand Down Expand Up @@ -169,30 +170,34 @@ async def _watchdog_homeassistant_api(self):
if await self.sys_homeassistant.api.check_api_state():
# Home Assistant is running properly
self._cache[HASS_WATCHDOG_REANIMATE_FAILURES] = 0
self._cache[HASS_WATCHDOG_API_FAILURES] = 0
return

# Give up after 5 reanimation failures in a row. Supervisor cannot fix this issue.
reanimate_fails = self._cache.get(HASS_WATCHDOG_REANIMATE_FAILURES, 0)
if reanimate_fails >= HASS_WATCHDOG_MAX_REANIMATE_ATTEMPTS:
if reanimate_fails == HASS_WATCHDOG_MAX_REANIMATE_ATTEMPTS:
_LOGGER.critical(
"Watchdog cannot reanimate Home Assistant, failed all %s attempts.",
"Watchdog cannot reanimate Home Assistant Core, failed all %s attempts.",
reanimate_fails,
)
self._cache[HASS_WATCHDOG_REANIMATE_FAILURES] += 1
return

# Init cache data
retry_scan = self._cache.get(HASS_WATCHDOG_API, 0)
api_fails = self._cache.get(HASS_WATCHDOG_API_FAILURES, 0)

# Look like we run into a problem
retry_scan += 1
if retry_scan == 1:
self._cache[HASS_WATCHDOG_API] = retry_scan
_LOGGER.warning("Watchdog miss API response from Home Assistant")
api_fails += 1
if api_fails < HASS_WATCHDOG_MAX_API_ATTEMPTS:
self._cache[HASS_WATCHDOG_API_FAILURES] = api_fails
_LOGGER.warning("Watchdog missed an Home Assistant Core API response.")
return

_LOGGER.error("Watchdog found a problem with Home Assistant API!")
_LOGGER.error(
"Watchdog missed %s Home Assistant Core API responses in a row. Restarting Home Assistant Core API!",
HASS_WATCHDOG_MAX_API_ATTEMPTS,
)
try:
await self.sys_homeassistant.core.restart()
except HomeAssistantError as err:
Expand All @@ -203,7 +208,7 @@ async def _watchdog_homeassistant_api(self):
else:
self._cache[HASS_WATCHDOG_REANIMATE_FAILURES] = 0
finally:
self._cache[HASS_WATCHDOG_API] = 0
self._cache[HASS_WATCHDOG_API_FAILURES] = 0

@Job(name="tasks_update_cli", conditions=PLUGIN_AUTO_UPDATE_CONDITIONS)
async def _update_cli(self):
Expand Down
18 changes: 12 additions & 6 deletions tests/misc/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,21 @@ async def test_watchdog_homeassistant_api(
await tasks._watchdog_homeassistant_api()

restart.assert_not_called()
assert "Watchdog miss API response from Home Assistant" in caplog.text
assert "Watchdog found a problem with Home Assistant API!" not in caplog.text
assert "Watchdog missed an Home Assistant Core API response." in caplog.text
assert (
"Watchdog missed 2 Home Assistant Core API responses in a row. Restarting Home Assistant Core API!"
not in caplog.text
)

caplog.clear()
await tasks._watchdog_homeassistant_api()

restart.assert_called_once()
assert "Watchdog miss API response from Home Assistant" not in caplog.text
assert "Watchdog found a problem with Home Assistant API!" in caplog.text
assert "Watchdog missed an Home Assistant Core API response." not in caplog.text
assert (
"Watchdog missed 2 Home Assistant Core API responses in a row. Restarting Home Assistant Core API!"
in caplog.text
)


async def test_watchdog_homeassistant_api_off(tasks: Tasks, coresys: CoreSys):
Expand Down Expand Up @@ -120,10 +126,10 @@ async def test_watchdog_homeassistant_api_reanimation_limit(
await tasks._watchdog_homeassistant_api()

restart.assert_not_called()
assert "Watchdog miss API response from Home Assistant" not in caplog.text
assert "Watchdog missed an Home Assistant Core API response." not in caplog.text
assert "Watchdog found a problem with Home Assistant API!" not in caplog.text
assert (
"Watchdog cannot reanimate Home Assistant, failed all 5 attempts."
"Watchdog cannot reanimate Home Assistant Core, failed all 5 attempts."
in caplog.text
)

Expand Down

0 comments on commit 1908940

Please sign in to comment.