From 82300a3697c1c8835ddbf64c73fa60d3dcce0115 Mon Sep 17 00:00:00 2001 From: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> Date: Fri, 12 Aug 2022 18:44:38 -0400 Subject: [PATCH] Extend valid link cache time to 30 days and make cache expiration times minimally configurable (#878) * Make link validation cache configurable * Extend valid link cache time to 30 days --- api/catalog/api/utils/validate_images.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/api/catalog/api/utils/validate_images.py b/api/catalog/api/utils/validate_images.py index 6ad23f6f7..763be48fc 100644 --- a/api/catalog/api/utils/validate_images.py +++ b/api/catalog/api/utils/validate_images.py @@ -3,6 +3,7 @@ import django_redis import grequests +from decouple import config from catalog.api.utils.dead_link_mask import get_query_mask, save_query_mask @@ -18,6 +19,10 @@ def _get_cached_statuses(redis, image_urls): return [int(b.decode("utf-8")) if b is not None else None for b in cached_statuses] +def _get_expiry(status, default): + return config(f"LINK_VALIDATION_CACHE_EXPIRY__{status}", default=default, cast=int) + + def validate_images(query_hash, start_slice, results, image_urls): """ Make sure images exist before we display them. Treat redirects as broken @@ -69,14 +74,17 @@ def validate_images(query_hash, start_slice, results, image_urls): # Cache successful links for a day, and broken links for 120 days. if status == 200: logger.debug("healthy link " f"key={key} ") - pipe.expire(key, twenty_four_hours_seconds) + expiry = _get_expiry(200, twenty_four_hours_seconds * 30) elif status == -1: logger.debug("no response from provider " f"key={key}") # Content provider failed to respond; try again in a short interval - pipe.expire(key, thirty_minutes) + expiry = _get_expiry("_1", thirty_minutes) else: logger.debug("broken link " f"key={key} ") - pipe.expire(key, twenty_four_hours_seconds * 120) + expiry = _get_expiry("DEFAULT", twenty_four_hours_seconds * 120) + + pipe.expire(key, expiry) + pipe.execute() # Merge newly verified results with cached statuses