Skip to content
This repository has been archived by the owner on Feb 22, 2023. It is now read-only.

Commit

Permalink
Save individual searches and result IDs in Postgres
Browse files Browse the repository at this point in the history
  • Loading branch information
sarayourfriend committed Jan 17, 2023
1 parent 0965bd0 commit 59e8569
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 42 deletions.
9 changes: 7 additions & 2 deletions api/catalog/api/controllers/search_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from elasticsearch_dsl.response import Hit, Response

import catalog.api.models as models
from catalog.api.utils import tallies
from catalog.api.utils.dead_link_mask import get_query_hash, get_query_mask
from catalog.api.utils.validate_images import validate_images

Expand Down Expand Up @@ -414,7 +413,13 @@ def search(
search_response, results, page_size, page
)

tallies.count_provider_occurrences(results)
models.Search(
query_hash=get_query_hash(s),
page=page,
results=[r["identifier"] for r in results],
query_params=search_params.data,
).save()

return results or [], page_count, result_count


Expand Down
25 changes: 25 additions & 0 deletions api/catalog/api/migrations/0053_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 4.1.4 on 2023-01-17 07:16

import django.contrib.postgres.fields
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('api', '0052_relational_fields'),
]

operations = [
migrations.CreateModel(
name='Search',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created_at', models.DateTimeField(auto_now=True, help_text='The date the query was executed.')),
('query_hash', models.CharField(help_text='The query hash generated by ``dead_link_mask.get_query_hash``.', max_length=256)),
('query_params', models.JSONField(help_text='The query parameters used to execute the query.')),
('results', django.contrib.postgres.fields.ArrayField(base_field=models.UUIDField(), help_text='List of result IDs for the query.', size=None)),
('page', models.IntegerField(help_text='The page number of the request. Extracted from ``query_params`` for ease of querying.')),
],
),
]
1 change: 1 addition & 0 deletions api/catalog/api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@
OAuth2Verification,
ThrottledApplication,
)
from catalog.api.models.search import Search
53 changes: 53 additions & 0 deletions api/catalog/api/models/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from django.contrib.postgres.fields import ArrayField
from django.db import models


class Search(models.Model):

"""
An individual query with attached results.
TODO(sarayourfriend): Which fields need indexes? :thinking:
"""

created_at = models.DateTimeField(
auto_now=True,
null=False,
help_text="The date the query was executed.",
)

query_hash = models.CharField(
# We use SHA256 which only needs 64, but we can leave
# room for changes (maybe not necessary?)
max_length=128,
null=False,
help_text=("The query hash generated by " "``dead_link_mask.get_query_hash``."),
)

query_params = models.JSONField(
null=False,
help_text="The query parameters used to execute the query.",
)

results = ArrayField(
# Is this going to be a pain to join to retrieve the providers?
# If we rely on this approach then we'd probably forever by reliant
# on catalogue data being duplicated between ES and Postgres API.
# Maybe that wouldn't be the case if we did weekly extractions
# of the data from here into the catalogue DB and then did the joining
# etc, over there? In that case, is there somewhere other than
# Postgres we can keep the data? Is that option expedient (would we be
# able to use it in a matter of a week or two so that we can actually
# benefit from this w/r/t iNaturalist?)
models.UUIDField(null=False),
null=False,
help_text="List of result IDs for the query.",
)

page = models.IntegerField(
null=False,
help_text=(
"The page number of the request. "
"Extracted from ``query_params`` for ease of querying."
),
)
39 changes: 0 additions & 39 deletions api/catalog/api/utils/tallies.py

This file was deleted.

2 changes: 1 addition & 1 deletion api/catalog/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def _make_cache_config(dbnum: int, **overrides) -> dict:
# Used for tracking tallied figures that shouldn't expire and are indexed
# with a timestamp range (for example, the key could a timestamp valid
# for a given week), allowing historical data analysis.
"tallies": _make_cache_config(3, {"TIMEOUT": None}),
"tallies": _make_cache_config(3, TIMEOUT=None),
}

# If key is not present then the authentication header won't be sent
Expand Down

0 comments on commit 59e8569

Please sign in to comment.