Skip to content
This repository has been archived by the owner on Feb 22, 2023. It is now read-only.

Commit

Permalink
Tally provider occurrences in results
Browse files Browse the repository at this point in the history
  • Loading branch information
sarayourfriend committed Jan 14, 2023
1 parent d0e2e5e commit 0965bd0
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 20 deletions.
3 changes: 3 additions & 0 deletions api/catalog/api/controllers/search_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from elasticsearch_dsl.response import Hit, Response

import catalog.api.models as models
from catalog.api.utils import tallies
from catalog.api.utils.dead_link_mask import get_query_hash, get_query_mask
from catalog.api.utils.validate_images import validate_images

Expand Down Expand Up @@ -412,6 +413,8 @@ def search(
result_count, page_count = _get_result_and_page_count(
search_response, results, page_size, page
)

tallies.count_provider_occurrences(results)
return results or [], page_count, result_count


Expand Down
39 changes: 39 additions & 0 deletions api/catalog/api/utils/tallies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from collections import defaultdict
from datetime import datetime

from django_redis import get_redis_connection
from django_redis.client.default import Redis


def _get_weekly_timestamp() -> str:
"""Get a timestamp for the Monday of any given week."""
now = datetime.now()
return datetime(
now.year,
now.month,
# Set the day to Monday of the current week
now.day - now.weekday(),
).strftime("%Y-%m-%d")


def count_provider_occurrences(results: list[dict]) -> None:
# Use ``get_redis_connection`` rather than Django's caches
# so that we can open a pipeline rather than sending off ``n``
# writes and because the RedisPy client's ``incr`` method
# is safe by default rather than Django's handspun method which:
# 1. Takes two requests to execute; and
# 2. Raises a ``ValueError`` if the key doesn't exist rather than
# just initialising the key to the value like Redis's behaviour.
tallies: Redis = get_redis_connection("tallies")

provider_occurrences = defaultdict(int)
for result in results:
provider_occurrences[result["provider"]] += 1

week = _get_weekly_timestamp()
with tallies.pipeline() as pipe:
for provider, occurrences in provider_occurrences.items():
pipe.incr(f"provider_occurrences:{week}:{provider}", occurrences)
pipe.incr(f"provider_appeared_in_searches:{week}:{provider}", 1)

pipe.execute()
39 changes: 19 additions & 20 deletions api/catalog/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,32 +178,31 @@
REDIS_HOST = config("REDIS_HOST", default="localhost")
REDIS_PORT = config("REDIS_PORT", default=6379, cast=int)
REDIS_PASSWORD = config("REDIS_PASSWORD", default="")
CACHES = {
# Site cache writes to 'default'
"default": {


def _make_cache_config(dbnum: int, **overrides) -> dict:
return {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": f"redis://{REDIS_HOST}:{REDIS_PORT}/0",
"LOCATION": f"redis://{REDIS_HOST}:{REDIS_PORT}/{dbnum}",
"OPTIONS": {
"CLIENT_CLASS": "django_redis.client.DefaultClient",
},
},
}
| overrides.pop("OPTIONS", {}),
} | overrides


CACHES = {
# Site cache writes to 'default'
"default": _make_cache_config(0),
# For rapidly changing stats that we don't want to hammer the database with
"traffic_stats": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": f"redis://{REDIS_HOST}:{REDIS_PORT}/1",
"OPTIONS": {
"CLIENT_CLASS": "django_redis.client.DefaultClient",
},
},
"traffic_stats": _make_cache_config(1),
# For ensuring consistency among multiple Django workers and servers.
# Used by Redlock.
"locks": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": f"redis://{REDIS_HOST}:{REDIS_PORT}/2",
"OPTIONS": {
"CLIENT_CLASS": "django_redis.client.DefaultClient",
},
},
"locks": _make_cache_config(2),
# Used for tracking tallied figures that shouldn't expire and are indexed
# with a timestamp range (for example, the key could a timestamp valid
# for a given week), allowing historical data analysis.
"tallies": _make_cache_config(3, {"TIMEOUT": None}),
}

# If key is not present then the authentication header won't be sent
Expand Down

0 comments on commit 0965bd0

Please sign in to comment.