Skip to content

Commit

Permalink
Merge branch 'main' into add/nuxt_time_resp_alarms_runbooks
Browse files Browse the repository at this point in the history
  • Loading branch information
obulat authored Sep 19, 2023
2 parents 72da513 + aa16d4f commit 6ad416a
Show file tree
Hide file tree
Showing 148 changed files with 3,136 additions and 3,142 deletions.
8 changes: 3 additions & 5 deletions api/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,20 @@ django-cors-headers = "~=4.2"
django-log-request-id = "~=2.0"
django-oauth-toolkit = "~=2.3"
django-redis = "~=5.3"
django-sslserver = "~=0.22"
django-storages = "~=1.13"
django-tqdm = "~=1.3"
django-uuslug = "~=2.0"
djangorestframework = "~=3.14"
drf-spectacular = "*"
elasticsearch-dsl = "~=7.4"
elasticsearch = "==8.8.2"
elasticsearch-dsl = "~=8.9"
future = "~=0.18"
gunicorn = "~=21.2"
limit = "~=0.2"
Pillow = "~=10.0"
Pillow = "~=10.0.1"
psycopg2 = "~=2.9"
python-decouple = "~=3.8"
python-xmp-toolkit = "~=2.0"
redlock-py = "~=1.0"
requests-oauthlib = "~=1.3"
sentry-sdk = "~=1.30"
django-split-settings = "*"

Expand Down
630 changes: 287 additions & 343 deletions api/Pipfile.lock

Large diffs are not rendered by default.

16 changes: 12 additions & 4 deletions api/api/controllers/search_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,9 +486,13 @@ def search(
# check things like provider density for a set of queries.
tallies.count_provider_occurrences(results_to_tally, index)

search_context = SearchContext.build(results, origin_index)
if not results:
results = []

result_ids = [result.identifier for result in results]
search_context = SearchContext.build(result_ids, origin_index)

return results or [], page_count, result_count, search_context.asdict()
return results, page_count, result_count, search_context.asdict()


def related_media(uuid, index, filter_dead):
Expand Down Expand Up @@ -522,8 +526,12 @@ def related_media(uuid, index, filter_dead):

result_count, _ = _get_result_and_page_count(response, results, page_size, page)

search_context = SearchContext.build(results, index)
return results or [], result_count, search_context.asdict()
if not results:
results = []

result_ids = [result.identifier for result in results]
search_context = SearchContext.build(result_ids, index)
return results, result_count, search_context.asdict()


def get_sources(index):
Expand Down
4 changes: 2 additions & 2 deletions api/api/migrations/0052_relational_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ class Migration(migrations.Migration):
migrations.AlterField(
model_name='matureaudio',
name='identifier',
field=models.OneToOneField(db_column="identifier", db_constraint=False, help_text='The reference to the mature audio.', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, related_name='mature_audio', serialize=False, to='api.audio', to_field='identifier'),
field=models.OneToOneField(db_column="identifier", db_constraint=False, help_text='The reference to the sensitive audio.', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, related_name='mature_audio', serialize=False, to='api.audio', to_field='identifier'),
),
migrations.AlterField(
model_name='matureimage',
name='identifier',
field=models.OneToOneField(db_column="identifier", db_constraint=False, help_text='The reference to the mature image.', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, related_name='mature_image', serialize=False, to='api.image', to_field='identifier'),
field=models.OneToOneField(db_column="identifier", db_constraint=False, help_text='The reference to the sensitive image.', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, related_name='mature_image', serialize=False, to='api.image', to_field='identifier'),
),
migrations.RenameField(
model_name="audioreport",
Expand Down
2 changes: 1 addition & 1 deletion api/api/models/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ class MatureAudio(AbstractMatureMedia):
db_constraint=False,
db_column="identifier",
related_name="mature_audio",
help_text="The reference to the mature audio.",
help_text="The reference to the sensitive audio.",
)

class Meta:
Expand Down
22 changes: 10 additions & 12 deletions api/api/models/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from django.db import models
from django.utils.html import format_html

from elasticsearch import Elasticsearch, TransportError
from elasticsearch import Elasticsearch, NotFoundError

from api.models.base import OpenLedgerModel
from api.models.mixins import ForeignIdentifierMixin, IdentifierMixin, MediaMixin
Expand Down Expand Up @@ -275,16 +275,14 @@ def _perform_index_update(self, method: str, raise_errors: bool, **es_method_arg
refresh=True,
**es_method_args,
)
except TransportError as e:
if e.status_code == 404:
# This is expected for the filtered index, but we should still
# log, just in case.
logger.warning(
f"Document with _id {document_id} not found "
f"in {index} index. No update performed."
)
else:
raise e
except NotFoundError:
# This is expected for the filtered index, but we should still
# log, just in case.
logger.warning(
f"Document with _id {document_id} not found "
f"in {index} index. No update performed."
)
continue


class AbstractDeletedMedia(PerformIndexUpdateMixin, OpenLedgerModel):
Expand Down Expand Up @@ -353,7 +351,7 @@ class AbstractMatureMedia(PerformIndexUpdateMixin, models.Model):
db_constraint=False,
db_column="identifier",
related_name="mature_abstract_media",
help_text="The reference to the mature media.",
help_text="The reference to the sensitive media.",
)
"""
Sub-classes must override this field to point to a concrete sub-class of
Expand Down
42 changes: 41 additions & 1 deletion api/api/serializers/media_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ class MediaSearchRequestSerializer(serializers.Serializer):
label="mature",
default=False,
required=False,
help_text="Whether to include content for mature audiences.",
help_text="Whether to include sensitive content.",
)

# The ``unstable__`` prefix is used in the query params.
Expand Down Expand Up @@ -365,6 +365,17 @@ class Meta:
fields = ["identifier", "reason", "description"]
read_only_fields = ["identifier"]

def to_internal_value(self, data):
"""
Map data before validation.
See ``MediaReportRequestSerializer::_map_reason`` docstring for
further explanation.
"""

data["reason"] = self._map_reason(data["reason"])
return super().to_internal_value(data)

def validate(self, attrs):
if (
attrs["reason"] == "other"
Expand All @@ -373,8 +384,37 @@ def validate(self, attrs):
raise serializers.ValidationError(
"Description must be at least be 20 characters long"
)

return attrs

def _map_reason(self, value):
"""
Map `sensitive` to `mature` for forwards compatibility.
This is an interim implementation until the API is updated
to use the new "sensitive" terminology.
Once the API is updated to use "sensitive" as the designator
rather than the current "mature" term, this function should
be updated to reverse the mapping, that is, map `mature` to
`sensitive`, for backwards compatibility.
Note: This cannot be implemented as a simpler `validate_reason` method
on the serializer because field validation runs _before_ validators
declared on the serializer. This means the choice field's validation
will complain about `reason` set to the incorrect value before we have
a chance to map it to the correct value.
This could be mitigated by adding all values, current, future, and
deprecated, to the model field. However, that requires a migration
each time we make that change, and would send an incorrect message
about our data expectations. It's cleaner and more consistent to map
the data up-front, at serialization time, to prevent any confusion at
the data model level.
"""

return "mature" if value == "sensitive" else value


########################
# Response serializers #
Expand Down
19 changes: 10 additions & 9 deletions api/api/utils/search_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from django.conf import settings

from elasticsearch_dsl import Q, Search
from elasticsearch_dsl.response import Hit

from api.constants.media_types import OriginIndex

Expand All @@ -15,18 +14,18 @@ class SearchContext:
# to convey that it is the Openverse result identifier and
# not the document _id

all_result_identifiers: set[str]
all_result_identifiers: list[str]
"""All the result identifiers gathered for the search."""

sensitive_text_result_identifiers: set[str]
"""Subset of result identifiers for results with sensitive textual content."""

@classmethod
def build(cls, results: list[Hit], origin_index: OriginIndex) -> Self:
if not results:
return cls(set(), set())

all_result_identifiers = {r.identifier for r in results}
def build(
cls, all_result_identifiers: list[str], origin_index: OriginIndex
) -> Self:
if not all_result_identifiers:
return cls(list(), set())

if not settings.ENABLE_FILTERED_INDEX_QUERIES:
return cls(all_result_identifiers, set())
Expand All @@ -41,14 +40,16 @@ def build(cls, results: list[Hit], origin_index: OriginIndex) -> Self:
# cf: https://github.com/WordPress/openverse/issues/2154
Q(
"terms",
**{"identifier.keyword": [result.identifier for result in results]},
**{"identifier.keyword": all_result_identifiers},
)
)

# The default query size is 10, so we need to slice the query
# to change the size to be big enough to encompass all the
# results.
results_in_filtered_index = filtered_index_search[: len(results)].execute()
results_in_filtered_index = filtered_index_search[
: len(all_result_identifiers)
].execute()
filtered_index_identifiers = {
result.identifier for result in results_in_filtered_index
}
Expand Down
11 changes: 11 additions & 0 deletions api/api/views/media_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from api.serializers.provider_serializers import ProviderSerializer
from api.utils import image_proxy
from api.utils.pagination import StandardPagination
from api.utils.search_context import SearchContext


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -87,6 +88,16 @@ def get_db_results(self, results):

# Standard actions

def retrieve(self, request, *_, **__):
instance = self.get_object()
search_context = SearchContext.build(
[str(instance.identifier)], self.default_index
).asdict()
serializer_context = search_context | self.get_serializer_context()
serializer = self.get_serializer(instance, context=serializer_context)

return Response(serializer.data)

def list(self, request, *_, **__):
params = self._get_request_serializer(request)

Expand Down
2 changes: 0 additions & 2 deletions api/conf/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
"django.contrib.sessions",
"django.contrib.messages",
"django.contrib.staticfiles",
# Third-party installed apps, more can be added in other settings files.
"sslserver",
]

MIDDLEWARE = [
Expand Down
35 changes: 12 additions & 23 deletions api/conf/settings/elasticsearch.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,44 @@
"""This file contains configuration pertaining to Elasticsearch."""

from aws_requests_auth.aws_auth import AWSRequestsAuth
from decouple import config
from elasticsearch import Elasticsearch, RequestsHttpConnection
from elasticsearch import Elasticsearch
from elasticsearch_dsl import connections

from api.constants.media_types import MEDIA_TYPES
from conf.settings.aws import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY


def _elasticsearch_connect():
def _elasticsearch_connect() -> tuple[Elasticsearch, str]:
"""
Connect to configured Elasticsearch domain.
:return: An Elasticsearch connection object.
"""

es_scheme = config("ELASTICSEARCH_SCHEME", default="http://")
es_url = config("ELASTICSEARCH_URL", default="localhost")
es_port = config("ELASTICSEARCH_PORT", default=9200, cast=int)
es_aws_region = config("ELASTICSEARCH_AWS_REGION", default="us-east-1")

auth = AWSRequestsAuth(
aws_access_key=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
aws_host=es_url,
aws_region=es_aws_region,
aws_service="es",
)
auth.encode = lambda x: bytes(x.encode("utf-8"))

es_endpoint = f"{es_scheme}{es_url}:{es_port}"

_es = Elasticsearch(
host=es_url,
port=es_port,
connection_class=RequestsHttpConnection,
timeout=10,
es_endpoint,
request_timeout=10,
max_retries=1,
retry_on_timeout=True,
http_auth=auth,
wait_for_status="yellow",
)
_es.info()
return _es
_es.cluster.health(wait_for_status="yellow")
return _es, es_endpoint


SETUP_ES = config("SETUP_ES", default=True, cast=bool)
if SETUP_ES:
ES = _elasticsearch_connect()
ES, ES_ENDPOINT = _elasticsearch_connect()
#: Elasticsearch client, also aliased to connection 'default'

connections.add_connection("default", ES)
else:
ES = None
ES, ES_ENDPOINT = None, None

MEDIA_INDEX_MAPPING = {
media_type: config(f"{media_type.upper()}_INDEX_NAME", default=media_type)
Expand Down
7 changes: 6 additions & 1 deletion api/test/factory/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from test.factory.models.audio import (
AudioAddOnFactory,
AudioFactory,
AudioReportFactory,
MatureAudioFactory,
)
from test.factory.models.image import ImageFactory, MatureImageFactory
from test.factory.models.image import (
ImageFactory,
ImageReportFactory,
MatureImageFactory,
)
from test.factory.models.oauth2 import (
AccessTokenFactory,
OAuth2RegistrationFactory,
Expand Down
9 changes: 8 additions & 1 deletion api/test/factory/models/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import factory
from factory.django import DjangoModelFactory

from api.models.audio import Audio, AudioAddOn, MatureAudio
from api.models.audio import Audio, AudioAddOn, AudioReport, MatureAudio


class MatureAudioFactory(DjangoModelFactory):
Expand All @@ -28,3 +28,10 @@ class Meta:
audio_identifier = IdentifierFactory(AudioFactory)

waveform_peaks = Faker("waveform")


class AudioReportFactory(DjangoModelFactory):
class Meta:
model = AudioReport

media_obj = factory.SubFactory(AudioFactory)
Loading

0 comments on commit 6ad416a

Please sign in to comment.