Merge branch 'main' into add/nuxt_time_resp_alarms_runbooks

WordPress · Sep 19, 2023 · 6ad416a · 6ad416a
2 parents 72da513 + aa16d4f
commit 6ad416a
Show file tree

Hide file tree

Showing 148 changed files with 3,136 additions and 3,142 deletions.
diff --git a/api/Pipfile b/api/Pipfile
@@ -27,22 +27,20 @@ django-cors-headers = "~=4.2"
 django-log-request-id = "~=2.0"
 django-oauth-toolkit = "~=2.3"
 django-redis = "~=5.3"
-django-sslserver = "~=0.22"
 django-storages = "~=1.13"
 django-tqdm = "~=1.3"
 django-uuslug = "~=2.0"
 djangorestframework = "~=3.14"
 drf-spectacular = "*"
-elasticsearch-dsl = "~=7.4"
+elasticsearch = "==8.8.2"
+elasticsearch-dsl = "~=8.9"
 future = "~=0.18"
 gunicorn = "~=21.2"
 limit = "~=0.2"
-Pillow = "~=10.0"
+Pillow = "~=10.0.1"
 psycopg2 = "~=2.9"
 python-decouple = "~=3.8"
 python-xmp-toolkit = "~=2.0"
-redlock-py = "~=1.0"
-requests-oauthlib = "~=1.3"
 sentry-sdk = "~=1.30"
 django-split-settings = "*"
 

diff --git a/api/Pipfile.lock b/api/Pipfile.lock
diff --git a/api/api/controllers/search_controller.py b/api/api/controllers/search_controller.py
@@ -486,9 +486,13 @@ def search(
         # check things like provider density for a set of queries.
         tallies.count_provider_occurrences(results_to_tally, index)
 
-    search_context = SearchContext.build(results, origin_index)
+    if not results:
+        results = []
+
+    result_ids = [result.identifier for result in results]
+    search_context = SearchContext.build(result_ids, origin_index)
 
-    return results or [], page_count, result_count, search_context.asdict()
+    return results, page_count, result_count, search_context.asdict()
 
 
 def related_media(uuid, index, filter_dead):
@@ -522,8 +526,12 @@ def related_media(uuid, index, filter_dead):
 
     result_count, _ = _get_result_and_page_count(response, results, page_size, page)
 
-    search_context = SearchContext.build(results, index)
-    return results or [], result_count, search_context.asdict()
+    if not results:
+        results = []
+
+    result_ids = [result.identifier for result in results]
+    search_context = SearchContext.build(result_ids, index)
+    return results, result_count, search_context.asdict()
 
 
 def get_sources(index):

diff --git a/api/api/migrations/0052_relational_fields.py b/api/api/migrations/0052_relational_fields.py
@@ -39,12 +39,12 @@ class Migration(migrations.Migration):
         migrations.AlterField(
             model_name='matureaudio',
             name='identifier',
-            field=models.OneToOneField(db_column="identifier", db_constraint=False, help_text='The reference to the mature audio.', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, related_name='mature_audio', serialize=False, to='api.audio', to_field='identifier'),
+            field=models.OneToOneField(db_column="identifier", db_constraint=False, help_text='The reference to the sensitive audio.', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, related_name='mature_audio', serialize=False, to='api.audio', to_field='identifier'),
         ),
         migrations.AlterField(
             model_name='matureimage',
             name='identifier',
-            field=models.OneToOneField(db_column="identifier", db_constraint=False, help_text='The reference to the mature image.', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, related_name='mature_image', serialize=False, to='api.image', to_field='identifier'),
+            field=models.OneToOneField(db_column="identifier", db_constraint=False, help_text='The reference to the sensitive image.', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, related_name='mature_image', serialize=False, to='api.image', to_field='identifier'),
         ),
         migrations.RenameField(
             model_name="audioreport",

diff --git a/api/api/models/audio.py b/api/api/models/audio.py
@@ -279,7 +279,7 @@ class MatureAudio(AbstractMatureMedia):
         db_constraint=False,
         db_column="identifier",
         related_name="mature_audio",
-        help_text="The reference to the mature audio.",
+        help_text="The reference to the sensitive audio.",
     )
 
     class Meta:

diff --git a/api/api/models/media.py b/api/api/models/media.py
@@ -6,7 +6,7 @@
 from django.db import models
 from django.utils.html import format_html
 
-from elasticsearch import Elasticsearch, TransportError
+from elasticsearch import Elasticsearch, NotFoundError
 
 from api.models.base import OpenLedgerModel
 from api.models.mixins import ForeignIdentifierMixin, IdentifierMixin, MediaMixin
@@ -275,16 +275,14 @@ def _perform_index_update(self, method: str, raise_errors: bool, **es_method_arg
                     refresh=True,
                     **es_method_args,
                 )
-            except TransportError as e:
-                if e.status_code == 404:
-                    # This is expected for the filtered index, but we should still
-                    # log, just in case.
-                    logger.warning(
-                        f"Document with _id {document_id} not found "
-                        f"in {index} index. No update performed."
-                    )
-                else:
-                    raise e
+            except NotFoundError:
+                # This is expected for the filtered index, but we should still
+                # log, just in case.
+                logger.warning(
+                    f"Document with _id {document_id} not found "
+                    f"in {index} index. No update performed."
+                )
+                continue
 
 
 class AbstractDeletedMedia(PerformIndexUpdateMixin, OpenLedgerModel):
@@ -353,7 +351,7 @@ class AbstractMatureMedia(PerformIndexUpdateMixin, models.Model):
         db_constraint=False,
         db_column="identifier",
         related_name="mature_abstract_media",
-        help_text="The reference to the mature media.",
+        help_text="The reference to the sensitive media.",
     )
     """
     Sub-classes must override this field to point to a concrete sub-class of

diff --git a/api/api/serializers/media_serializers.py b/api/api/serializers/media_serializers.py
@@ -122,7 +122,7 @@ class MediaSearchRequestSerializer(serializers.Serializer):
         label="mature",
         default=False,
         required=False,
-        help_text="Whether to include content for mature audiences.",
+        help_text="Whether to include sensitive content.",
     )
 
     # The ``unstable__`` prefix is used in the query params.
@@ -365,6 +365,17 @@ class Meta:
         fields = ["identifier", "reason", "description"]
         read_only_fields = ["identifier"]
 
+    def to_internal_value(self, data):
+        """
+        Map data before validation.
+
+        See ``MediaReportRequestSerializer::_map_reason`` docstring for
+        further explanation.
+        """
+
+        data["reason"] = self._map_reason(data["reason"])
+        return super().to_internal_value(data)
+
     def validate(self, attrs):
         if (
             attrs["reason"] == "other"
@@ -373,8 +384,37 @@ def validate(self, attrs):
             raise serializers.ValidationError(
                 "Description must be at least be 20 characters long"
             )
+
         return attrs
 
+    def _map_reason(self, value):
+        """
+        Map `sensitive` to `mature` for forwards compatibility.
+
+        This is an interim implementation until the API is updated
+        to use the new "sensitive" terminology.
+
+        Once the API is updated to use "sensitive" as the designator
+        rather than the current "mature" term, this function should
+        be updated to reverse the mapping, that is, map `mature` to
+        `sensitive`, for backwards compatibility.
+
+        Note: This cannot be implemented as a simpler `validate_reason` method
+        on the serializer because field validation runs _before_ validators
+        declared on the serializer. This means the choice field's validation
+        will complain about `reason` set to the incorrect value before we have
+        a chance to map it to the correct value.
+
+        This could be mitigated by adding all values, current, future, and
+        deprecated, to the model field. However, that requires a migration
+        each time we make that change, and would send an incorrect message
+        about our data expectations. It's cleaner and more consistent to map
+        the data up-front, at serialization time, to prevent any confusion at
+        the data model level.
+        """
+
+        return "mature" if value == "sensitive" else value
+
 
 ########################
 # Response serializers #

diff --git a/api/api/utils/search_context.py b/api/api/utils/search_context.py
@@ -4,7 +4,6 @@
 from django.conf import settings
 
 from elasticsearch_dsl import Q, Search
-from elasticsearch_dsl.response import Hit
 
 from api.constants.media_types import OriginIndex
 
@@ -15,18 +14,18 @@ class SearchContext:
     # to convey that it is the Openverse result identifier and
     # not the document _id
 
-    all_result_identifiers: set[str]
+    all_result_identifiers: list[str]
     """All the result identifiers gathered for the search."""
 
     sensitive_text_result_identifiers: set[str]
     """Subset of result identifiers for results with sensitive textual content."""
 
     @classmethod
-    def build(cls, results: list[Hit], origin_index: OriginIndex) -> Self:
-        if not results:
-            return cls(set(), set())
-
-        all_result_identifiers = {r.identifier for r in results}
+    def build(
+        cls, all_result_identifiers: list[str], origin_index: OriginIndex
+    ) -> Self:
+        if not all_result_identifiers:
+            return cls(list(), set())
 
         if not settings.ENABLE_FILTERED_INDEX_QUERIES:
             return cls(all_result_identifiers, set())
@@ -41,14 +40,16 @@ def build(cls, results: list[Hit], origin_index: OriginIndex) -> Self:
             # cf: https://github.com/WordPress/openverse/issues/2154
             Q(
                 "terms",
-                **{"identifier.keyword": [result.identifier for result in results]},
+                **{"identifier.keyword": all_result_identifiers},
             )
         )
 
         # The default query size is 10, so we need to slice the query
         # to change the size to be big enough to encompass all the
         # results.
-        results_in_filtered_index = filtered_index_search[: len(results)].execute()
+        results_in_filtered_index = filtered_index_search[
+            : len(all_result_identifiers)
+        ].execute()
         filtered_index_identifiers = {
             result.identifier for result in results_in_filtered_index
         }

diff --git a/api/api/views/media_views.py b/api/api/views/media_views.py
@@ -12,6 +12,7 @@
 from api.serializers.provider_serializers import ProviderSerializer
 from api.utils import image_proxy
 from api.utils.pagination import StandardPagination
+from api.utils.search_context import SearchContext
 
 
 logger = logging.getLogger(__name__)
@@ -87,6 +88,16 @@ def get_db_results(self, results):
 
     # Standard actions
 
+    def retrieve(self, request, *_, **__):
+        instance = self.get_object()
+        search_context = SearchContext.build(
+            [str(instance.identifier)], self.default_index
+        ).asdict()
+        serializer_context = search_context | self.get_serializer_context()
+        serializer = self.get_serializer(instance, context=serializer_context)
+
+        return Response(serializer.data)
+
     def list(self, request, *_, **__):
         params = self._get_request_serializer(request)
 

diff --git a/api/conf/settings/base.py b/api/conf/settings/base.py
@@ -12,8 +12,6 @@
     "django.contrib.sessions",
     "django.contrib.messages",
     "django.contrib.staticfiles",
-    # Third-party installed apps, more can be added in other settings files.
-    "sslserver",
 ]
 
 MIDDLEWARE = [

diff --git a/api/conf/settings/elasticsearch.py b/api/conf/settings/elasticsearch.py
@@ -1,55 +1,44 @@
 """This file contains configuration pertaining to Elasticsearch."""
 
-from aws_requests_auth.aws_auth import AWSRequestsAuth
 from decouple import config
-from elasticsearch import Elasticsearch, RequestsHttpConnection
+from elasticsearch import Elasticsearch
 from elasticsearch_dsl import connections
 
 from api.constants.media_types import MEDIA_TYPES
-from conf.settings.aws import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
 
 
-def _elasticsearch_connect():
+def _elasticsearch_connect() -> tuple[Elasticsearch, str]:
     """
     Connect to configured Elasticsearch domain.
 
     :return: An Elasticsearch connection object.
     """
 
+    es_scheme = config("ELASTICSEARCH_SCHEME", default="http://")
     es_url = config("ELASTICSEARCH_URL", default="localhost")
     es_port = config("ELASTICSEARCH_PORT", default=9200, cast=int)
-    es_aws_region = config("ELASTICSEARCH_AWS_REGION", default="us-east-1")
-
-    auth = AWSRequestsAuth(
-        aws_access_key=AWS_ACCESS_KEY_ID,
-        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
-        aws_host=es_url,
-        aws_region=es_aws_region,
-        aws_service="es",
-    )
-    auth.encode = lambda x: bytes(x.encode("utf-8"))
+
+    es_endpoint = f"{es_scheme}{es_url}:{es_port}"
+
     _es = Elasticsearch(
-        host=es_url,
-        port=es_port,
-        connection_class=RequestsHttpConnection,
-        timeout=10,
+        es_endpoint,
+        request_timeout=10,
         max_retries=1,
         retry_on_timeout=True,
-        http_auth=auth,
-        wait_for_status="yellow",
     )
     _es.info()
-    return _es
+    _es.cluster.health(wait_for_status="yellow")
+    return _es, es_endpoint
 
 
 SETUP_ES = config("SETUP_ES", default=True, cast=bool)
 if SETUP_ES:
-    ES = _elasticsearch_connect()
+    ES, ES_ENDPOINT = _elasticsearch_connect()
     #: Elasticsearch client, also aliased to connection 'default'
 
     connections.add_connection("default", ES)
 else:
-    ES = None
+    ES, ES_ENDPOINT = None, None
 
 MEDIA_INDEX_MAPPING = {
     media_type: config(f"{media_type.upper()}_INDEX_NAME", default=media_type)

diff --git a/api/test/factory/models/__init__.py b/api/test/factory/models/__init__.py
@@ -1,9 +1,14 @@
 from test.factory.models.audio import (
     AudioAddOnFactory,
     AudioFactory,
+    AudioReportFactory,
     MatureAudioFactory,
 )
-from test.factory.models.image import ImageFactory, MatureImageFactory
+from test.factory.models.image import (
+    ImageFactory,
+    ImageReportFactory,
+    MatureImageFactory,
+)
 from test.factory.models.oauth2 import (
     AccessTokenFactory,
     OAuth2RegistrationFactory,

diff --git a/api/test/factory/models/audio.py b/api/test/factory/models/audio.py
@@ -4,7 +4,7 @@
 import factory
 from factory.django import DjangoModelFactory
 
-from api.models.audio import Audio, AudioAddOn, MatureAudio
+from api.models.audio import Audio, AudioAddOn, AudioReport, MatureAudio
 
 
 class MatureAudioFactory(DjangoModelFactory):
@@ -28,3 +28,10 @@ class Meta:
     audio_identifier = IdentifierFactory(AudioFactory)
 
     waveform_peaks = Faker("waveform")
+
+
+class AudioReportFactory(DjangoModelFactory):
+    class Meta:
+        model = AudioReport
+
+    media_obj = factory.SubFactory(AudioFactory)