Skip to content

Commit

Permalink
Make tallying of thumbnail errors Redis-resilient
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvkb committed Dec 16, 2023
1 parent cdc929d commit 0c652b1
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 16 deletions.
23 changes: 21 additions & 2 deletions api/api/utils/image_proxy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import itertools
import logging
from dataclasses import dataclass
from typing import Literal
Expand All @@ -12,6 +13,7 @@
import sentry_sdk
from aiohttp.client_exceptions import ClientResponseError
from asgiref.sync import sync_to_async
from redis.exceptions import ConnectionError
from sentry_sdk import push_scope, set_context

from api.utils.aiohttp import get_aiohttp_session
Expand All @@ -24,6 +26,8 @@

parent_logger = logging.getLogger(__name__)

exception_iterator = itertools.count()

HEADERS = {
"User-Agent": settings.OUTBOUND_USER_AGENT_TEMPLATE.format(
purpose="ThumbnailGeneration"
Expand Down Expand Up @@ -113,6 +117,15 @@ def _tally_response(
)


@sync_to_async
def _tally_client_response_errors(tallies, month: str, domain: str, status: int):
logger = parent_logger.getChild("_tally_client_response_errors")
try:
tallies.incr(f"thumbnail_http_error:{domain}:{month}:{status}")
except ConnectionError:
logger.warning("Redis connect failed, thumbnail HTTP errors not tallied.")


_UPSTREAM_TIMEOUT = aiohttp.ClientTimeout(15)


Expand Down Expand Up @@ -178,7 +191,13 @@ async def get(
except Exception as exc:
exception_name = f"{exc.__class__.__module__}.{exc.__class__.__name__}"
key = f"thumbnail_error:{exception_name}:{domain}:{month}"
count = await tallies_incr(key)
try:
count = await tallies_incr(key)
except ConnectionError:
logger.warning("Redis connect failed, thumbnail errors not tallied.")
# We will use a counter to space out Sentry logs.
count = next(exception_iterator)

if count <= settings.THUMBNAIL_ERROR_INITIAL_ALERT_THRESHOLD or (
count % settings.THUMBNAIL_ERROR_REPEATED_ALERT_FREQUENCY == 0
):
Expand All @@ -198,7 +217,7 @@ async def get(
if isinstance(exc, ClientResponseError):
status = exc.status
do_not_wait_for(
tallies_incr(f"thumbnail_http_error:{domain}:{month}:{status}")
_tally_client_response_errors(tallies, month, domain, status)
)
logger.warning(
f"Failed to render thumbnail "
Expand Down
63 changes: 49 additions & 14 deletions api/test/unit/utils/test_image_proxy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import asyncio
import itertools
from dataclasses import replace
from test.factory.models.image import ImageFactory
from unittest.mock import patch
from urllib.parse import urlencode

from django.conf import settings
Expand Down Expand Up @@ -374,6 +376,7 @@ def test_get_successful_records_response_code(
),
],
)
@cache_availability_params
@alert_count_params
def test_get_exception_handles_error(
photon_get,
Expand All @@ -383,14 +386,25 @@ def test_get_exception_handles_error(
should_alert,
sentry_capture_exception,
setup_request_exception,
redis,
is_cache_reachable,
cache_name,
request,
caplog,
):
cache = request.getfixturevalue(cache_name)

setup_request_exception(exc)
month = get_monthly_timestamp()
key = f"thumbnail_error:{exc_name}:{TEST_IMAGE_DOMAIN}:{month}"
redis.set(key, count_start)
if is_cache_reachable:
cache.set(key, count_start)

with pytest.raises(UpstreamThumbnailException):
with (
pytest.raises(UpstreamThumbnailException),
patch(
"api.utils.image_proxy.exception_iterator", itertools.count(count_start + 1)
),
):
photon_get(TEST_MEDIA_INFO)

assert_func = (
Expand All @@ -399,9 +413,14 @@ def test_get_exception_handles_error(
else sentry_capture_exception.assert_not_called
)
assert_func()
assert redis.get(key) == str(count_start + 1).encode()

if is_cache_reachable:
assert cache.get(key) == str(count_start + 1).encode()
else:
assert "Redis connect failed, thumbnail errors not tallied." in caplog.text


@cache_availability_params
@alert_count_params
@pytest.mark.parametrize(
"status_code, text",
Expand All @@ -419,15 +438,23 @@ def test_get_http_exception_handles_error(
count_start,
should_alert,
sentry_capture_exception,
redis,
is_cache_reachable,
cache_name,
request,
caplog,
):
cache = request.getfixturevalue(cache_name)

month = get_monthly_timestamp()
key = f"thumbnail_error:aiohttp.client_exceptions.ClientResponseError:{TEST_IMAGE_DOMAIN}:{month}"
redis.set(key, count_start)
if is_cache_reachable:
cache.set(key, count_start)

with pytest.raises(UpstreamThumbnailException):
with pytest.raises(UpstreamThumbnailException), patch(
"api.utils.image_proxy.exception_iterator", itertools.count(count_start + 1)
):
with pook.use():
pook.get(PHOTON_URL_FOR_TEST_IMAGE).reply(status_code, text).mock
pook.get(PHOTON_URL_FOR_TEST_IMAGE).reply(status_code, text)
photon_get(TEST_MEDIA_INFO)

assert_func = (
Expand All @@ -436,13 +463,21 @@ def test_get_http_exception_handles_error(
else sentry_capture_exception.assert_not_called
)
assert_func()
assert redis.get(key) == str(count_start + 1).encode()

# Assertions about the HTTP error specific message
assert (
redis.get(f"thumbnail_http_error:{TEST_IMAGE_DOMAIN}:{month}:{status_code}")
== b"1"
)
if is_cache_reachable:
assert cache.get(key) == str(count_start + 1).encode()
assert (
cache.get(f"thumbnail_http_error:{TEST_IMAGE_DOMAIN}:{month}:{status_code}")
== b"1"
)
else:
assert all(
message in caplog.text
for message in [
"Redis connect failed, thumbnail HTTP errors not tallied.",
"Redis connect failed, thumbnail errors not tallied.",
]
)


@pook.on
Expand Down

0 comments on commit 0c652b1

Please sign in to comment.