WordPress · sarayourfriend · Apr 8, 2024 · Apr 4, 2024 · Apr 4, 2024 · Apr 8, 2024
diff --git a/api/api/examples/audio_responses.py b/api/api/examples/audio_responses.py
@@ -64,6 +64,7 @@
         "results": [
             base_audio | {"fields_matched": ["title"]},
         ],
+        "warnings": [],
     },
 }
 

diff --git a/api/api/examples/image_responses.py b/api/api/examples/image_responses.py
@@ -72,6 +72,7 @@
         "page_size": 20,
         "page": 1,
         "results": [base_image | {"fields_matched": ["title"]}],
+        "warnings": [],
     },
 }
 

@@ -1,17 +1,21 @@
 import logging
 from collections import namedtuple
+from typing import TypedDict
 
 from django.conf import settings
 from django.core.exceptions import ValidationError as DjangoValidationError
 from django.core.validators import MaxValueValidator
+from django.urls import reverse
 from rest_framework import serializers
 from rest_framework.exceptions import NotAuthenticated, ValidationError
+from rest_framework.request import Request
 
 from drf_spectacular.utils import extend_schema_serializer
 from elasticsearch_dsl.response import Hit
 
 from api.constants import sensitivity
 from api.constants.licenses import LICENSE_GROUPS
+from api.constants.media_types import MediaType
 from api.constants.parameters import COLLECTION, TAG
 from api.constants.sorting import DESCENDING, RELEVANCE, SORT_DIRECTIONS, SORT_FIELDS
 from api.controllers import search_controller
@@ -294,8 +298,16 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer):
         required=False,
     )
 
+    class Context(TypedDict, total=True):
+        warnings: list[dict]
+        media_type: MediaType
+        request: Request
+
+    context: Context
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
+        self.context["warnings"] = []
         self.media_type = self.context.get("media_type")
         if not self.media_type:
             raise ValueError(
@@ -398,15 +410,31 @@ def validate_source(self, value):
                 )
             return value
         else:
-            sources = value.lower().split(",")
-            valid_sources = set(
-                [source for source in sources if source in allowed_sources]
-            )
-            if len(sources) > len(valid_sources):
-                invalid_sources = set(sources).difference(valid_sources)
-                logger.warning(
-                    f"Invalid sources in search query: {invalid_sources}; sources query: '{value}'"
+            sources = set(value.lower().split(","))
+            valid_sources = {source for source in sources if source in allowed_sources}
+            if not valid_sources:
+                # Raise only if there are _no_ valid sources selected
+                # If the requester passed only `mispelled_museum_name1,mispelled_musesum_name2`
+                # the request cannot move forward, as all the top responses will likely be from Flickr
+                # which provides radically different responses than most other providers.
+                # If even one source is valid, it won't be a problem, in which case we'll issue a warning
+                raise serializers.ValidationError(
+                    f"Invalid source parameter '{value}'. No valid sources selected. "
+                    f"Refer to the source list for valid options: {sources_list}."
+                )
+            elif invalid_sources := (sources - valid_sources):
+                self.context["warnings"].append(
+                    {
+                        "code": "partially invalid source parameter",
+                        "message": "The source parameter was partially invalid.",
+                        "invalid_sources": invalid_sources,
+                        "referenced_sources": valid_sources,
-                        "referenced_sources": valid_sources,
+                        "valid_sources": valid_sources,
-                        "referenced_sources": valid_sources,
+                        "valid_sources": valid_sources,
+                        "available_sources": self.context["request"].build_absolute_uri(
+                            reverse(f"{self.media_type}-stats")
+                        ),
+                    }
                 )
+
             return ",".join(valid_sources)
 
     def validate_excluded_source(self, input_sources):

@@ -7,11 +7,17 @@ class StandardPagination(PageNumberPagination):
     page_size_query_param = "page_size"
     page_query_param = "page"
 
+    result_count: int | None
+    page_count: int | None
+    page: int
+    warnings: list[dict]
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.result_count = None  # populated later
         self.page_count = None  # populated later
         self.page = 1  # default, gets updated when necessary
+        self.warnings = []  # populated later as needed
 
     def get_paginated_response(self, data):
         return Response(
@@ -21,6 +27,7 @@ def get_paginated_response(self, data):
                 "page_size": self.page_size,
                 "page": self.page,
                 "results": data,
+                "warnings": list(self.warnings),
             }
         )
 
@@ -39,15 +46,44 @@ def get_paginated_response_schema(self, schema):
             "page_size": ("The number of items per page.", 20),
             "page": ("The current page number returned in the response.", 1),
         }
+
+        properties = {
+            field: {
+                "type": "integer",
+                "description": description,
+                "example": example,
+            }
+            for field, (description, example) in field_descriptions.items()
+        } | {
+            "results": schema,
+            "warnings": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                },
+                "description": (
+                    "Warnings pertinent to the request. "
+                    "If there are no warnings, this list will be empty. "
+                    "Warnings are non-critical problems with the request. "
+                    "Responses with warnings should be treated as unstable. "
+                    "Warning descriptions must not be treated as machine readable "
+                    "and their schema can change at any time."
+                ),
+                "example": [
+                    {
+                        "code": "partially invalid request parameter",
+                        "message": (
+                            "Some of the request parameters were bad, "
+                            "but we processed the request anywhere. "
+                            "Here's some information that might help you "
+                            "fix the problem for future requests."
+                        ),
+                    }
+                ],
+            },
+        }
+
         return {
             "type": "object",
-            "properties": {
-                field: {
-                    "type": "integer",
-                    "description": description,
-                    "example": example,
-                }
-                for field, (description, example) in field_descriptions.items()
-            }
-            | {"results": schema},
+            "properties": properties,
         }
@@ -160,6 +160,7 @@ def get_media_results(
     ):
         page_size = self.paginator.page_size = params.data["page_size"]
         page = self.paginator.page = params.data["page"]
+        self.paginator.warnings = params.context["warnings"]
 
         hashed_ip = hash(self._get_user_ip(request))
         filter_dead = params.validated_data.get("filter_dead", True)

@@ -4,7 +4,7 @@
 
 
 @pytest.fixture
-def api_client():
+def api_client() -> APIClient:
     return APIClient()
 
 

@@ -329,6 +329,50 @@ def test_detail_view_for_invalid_uuids_returns_not_found(
     assert res.status_code == 404
 
 
+def test_search_with_only_valid_sources_produces_no_warning(media_type, api_client):
+    search = api_client.get(
+        f"/v1/{media_type.path}/",
+        {"source": ",".join(media_type.providers)},
+    )
+    assert search.status_code == 200
+    assert search.json()["warnings"] == []
+
+
+def test_search_with_partially_invalid_sources_produces_warning_but_still_succeeds(
+    media_type: MediaType, api_client
+):
+    invalid_sources = [
+        "surely_neither_this_one",
+        "this_is_sure_not_to_ever_be_a_real_source_name",
+    ]
+
+    search = api_client.get(
+        f"/v1/{media_type.path}/",
+        {"source": ",".join([media_type.providers[0]] + invalid_sources)},
+    )
+    assert search.status_code == 200
+    result = search.json()
+
+    assert {w["code"] for w in result["warnings"]} == {
+        "partially invalid source parameter"
+    }
+    warning = result["warnings"][0]
+    assert set(warning["invalid_sources"]) == set(invalid_sources)
+    assert warning["referenced_sources"] == [media_type.providers[0]]
+    assert f"v1/{media_type.path}/stats/" in warning["available_sources"]
+
+
+def test_search_with_all_invalid_sources_fails(media_type, api_client):
+    invalid_sources = [
+        "this_is_sure_not_to_ever_be_a_real_source_name",
+        "surely_neither_this_one",
+    ]
+    search = api_client.get(
+        f"/v1/{media_type.path}/", {"source": ",".join(invalid_sources)}
+    )
+    assert search.status_code == 400
+
+
 def test_detail_view_returns_ok(single_result, api_client):
     media_type, item = single_result
     res = api_client.get(f"/v1/{media_type.path}/{item['id']}/")

@@ -99,25 +99,23 @@ def test_media_serializer_adds_license_url_if_missing(
     assert repr["license_url"] == "https://creativecommons.org/publicdomain/zero/1.0/"
 
 
-def test_media_serializer_logs_when_invalid_or_duplicate_source(media_type_config):
+def test_media_serializer_recovers_invalid_or_duplicate_source(
+    media_type_config, request_factory
+):
     sources = {
         "image": ("flickr,flickr,invalid", "flickr"),
         "audio": ("freesound,freesound,invalid", "freesound"),
     }
-    with patch("api.serializers.media_serializers.logger.warning") as mock_logger:
-        serializer_class = media_type_config.search_request_serializer(
-            context={"media_type": media_type_config.media_type},
-            data={"source": sources[media_type_config.media_type][0]},
-        )
-        assert serializer_class.is_valid()
-        assert (
-            serializer_class.validated_data["source"]
-            == sources[media_type_config.media_type][1]
-        )
-        mock_logger.assert_called_with(
-            f"Invalid sources in search query: {{'invalid'}}; "
-            f"sources query: '{sources[media_type_config.media_type][0]}'"
-        )
+    request = request_factory.get("/v1/images/")
+    serializer_class = media_type_config.search_request_serializer(
+        context={"media_type": media_type_config.media_type, "request": request},
+        data={"source": sources[media_type_config.media_type][0]},
+    )
+    assert serializer_class.is_valid()
+    assert (
+        serializer_class.validated_data["source"]
+        == sources[media_type_config.media_type][1]
+    )
 
 
 @pytest.mark.parametrize(
-Original file line number
+Diff line change
@@ Expand Up / @@ -64,6 +64,7 @@ @@
             "results": [
                 base_audio | {"fields_matched": ["title"]},
             ],
+            "warnings": [],
         },
     }
@@ Expand Down @@