From d562f4719aefdf7d90c13ca6738a5e8c27ce352f Mon Sep 17 00:00:00 2001 From: sarayourfriend Date: Thu, 20 Jun 2024 09:55:24 +1000 Subject: [PATCH] Log dead link verification request timings (#4508) * Reduce logging from dead link checks Each call to `get_aiohttp_session` logs a line about whether a new session is created or being reused. This adds a huge number of log lines with zero value, as for each request, they will be identical. Instead, they just fill up the logs and make them harder to read and visually scan * Log dead link verification request timings * Use perf counter --- api/api/utils/check_dead_links/__init__.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/api/api/utils/check_dead_links/__init__.py b/api/api/utils/check_dead_links/__init__.py index d21e89b9d4b..ed65cf03f15 100644 --- a/api/api/utils/check_dead_links/__init__.py +++ b/api/api/utils/check_dead_links/__init__.py @@ -43,22 +43,34 @@ def _get_expiry(status, default): _timeout = aiohttp.ClientTimeout(total=2) -async def _head(url: str) -> tuple[str, int]: +async def _head(url: str, session: aiohttp.ClientSession) -> tuple[str, int]: + start_time = time.perf_counter() + try: - session = await get_aiohttp_session() response = await session.head( url, allow_redirects=False, headers=HEADERS, timeout=_timeout ) - return url, response.status + status = response.status except (aiohttp.ClientError, asyncio.TimeoutError) as exception: _log_validation_failure(exception) - return url, -1 + status = -1 + + end_time = time.perf_counter() + logger.info( + "dead_link_validation_timing", + url=url, + status=status, + time=end_time - start_time, + ) + + return url, status # https://stackoverflow.com/q/55259755 @async_to_sync async def _make_head_requests(urls: list[str]) -> list[tuple[str, int]]: - tasks = [asyncio.ensure_future(_head(url)) for url in urls] + session = await get_aiohttp_session() + tasks = [asyncio.ensure_future(_head(url, session)) for url in urls] responses = asyncio.gather(*tasks) await responses return responses.result()