diff --git a/api/api/admin/__init__.py b/api/api/admin/__init__.py index 64ac53e7a83..dc7eb7ef42f 100644 --- a/api/api/admin/__init__.py +++ b/api/api/admin/__init__.py @@ -2,7 +2,7 @@ from api.admin.site import openverse_admin from api.models import PENDING, Audio, AudioReport, ContentProvider, Image, ImageReport -from api.models.media import AbstractDeletedMedia, AbstractMatureMedia +from api.models.media import AbstractDeletedMedia, AbstractSensitiveMedia admin.site = openverse_admin @@ -72,7 +72,7 @@ def has_add_permission(self, *args, **kwargs): for klass in [ - *AbstractMatureMedia.__subclasses__(), + *AbstractSensitiveMedia.__subclasses__(), *AbstractDeletedMedia.__subclasses__(), ]: admin.site.register(klass, MediaSubreportAdmin) diff --git a/api/api/migrations/0055_alter_matureaudio_table_alter_matureimage_table.py b/api/api/migrations/0055_alter_matureaudio_table_alter_matureimage_table.py new file mode 100644 index 00000000000..0fc930a2ada --- /dev/null +++ b/api/api/migrations/0055_alter_matureaudio_table_alter_matureimage_table.py @@ -0,0 +1,21 @@ +# Generated by Django 4.2.9 on 2024-02-08 01:52 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0054_throttledapplication_post_logout_redirect_uris'), + ] + + operations = [ + migrations.AlterModelTable( + name='matureaudio', + table='api_matureaudio', + ), + migrations.AlterModelTable( + name='matureimage', + table='api_matureimage', + ), + ] diff --git a/api/api/migrations/0056_rename_mature_to_sensitive.py b/api/api/migrations/0056_rename_mature_to_sensitive.py new file mode 100644 index 00000000000..d0ef5a76e6b --- /dev/null +++ b/api/api/migrations/0056_rename_mature_to_sensitive.py @@ -0,0 +1,27 @@ +# Generated by Django 4.2.9 on 2024-02-08 18:26 + +import api.models.media +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0055_alter_matureaudio_table_alter_matureimage_table'), + ] + + operations = [ + migrations.RenameModel('MatureImage', 'SensitiveImage'), + migrations.RenameModel('MatureAudio', 'SensitiveAudio'), + migrations.AlterField( + model_name='sensitiveaudio', + name='media_obj', + field=models.OneToOneField(db_column='identifier', db_constraint=False, help_text='The reference to the sensitive audio.', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, related_name='sensitive_audio', serialize=False, to='api.audio', to_field='identifier'), + ), + migrations.AlterField( + model_name='sensitiveimage', + name='media_obj', + field=models.OneToOneField(db_column='identifier', db_constraint=False, help_text='The reference to the sensitive image.', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, related_name='sensitive_image', serialize=False, to='api.image', to_field='identifier'), + ), + ] diff --git a/api/api/models/__init__.py b/api/api/models/__init__.py index c34e863d798..e91d289a99d 100644 --- a/api/api/models/__init__.py +++ b/api/api/models/__init__.py @@ -6,9 +6,9 @@ AudioReport, AudioSet, DeletedAudio, - MatureAudio, + SensitiveAudio, ) -from api.models.image import DeletedImage, Image, ImageList, ImageReport, MatureImage +from api.models.image import DeletedImage, Image, ImageList, ImageReport, SensitiveImage from api.models.media import ( DEINDEXED, DMCA, diff --git a/api/api/models/audio.py b/api/api/models/audio.py index 377fc8d716f..fc25b1546f2 100644 --- a/api/api/models/audio.py +++ b/api/api/models/audio.py @@ -9,10 +9,10 @@ from api.models.media import ( AbstractAltFile, AbstractDeletedMedia, - AbstractMatureMedia, AbstractMedia, AbstractMediaList, AbstractMediaReport, + AbstractSensitiveMedia, ) from api.models.mixins import FileMixin, ForeignIdentifierMixin, MediaMixin from api.utils.waveform import generate_peaks @@ -189,8 +189,8 @@ class Audio(AudioFileMixin, AbstractMedia): ) @property - def mature(self) -> bool: - return hasattr(self, "mature_audio") + def sensitive(self) -> bool: + return hasattr(self, "sensitive_audio") @property def alternative_files(self): @@ -260,7 +260,7 @@ class Meta: verbose_name_plural = "Deleted audio" -class MatureAudio(AbstractMatureMedia): +class SensitiveAudio(AbstractSensitiveMedia): """ Stores all audio tracks that have been flagged as 'mature'. @@ -278,17 +278,18 @@ class MatureAudio(AbstractMatureMedia): primary_key=True, db_constraint=False, db_column="identifier", - related_name="mature_audio", + related_name="sensitive_audio", help_text="The reference to the sensitive audio.", ) class Meta: + db_table = "api_matureaudio" verbose_name_plural = "Mature audio" class AudioReport(AbstractMediaReport): media_class = Audio - mature_class = MatureAudio + sensitive_class = SensitiveAudio deleted_class = DeletedAudio media_obj = models.ForeignKey( diff --git a/api/api/models/image.py b/api/api/models/image.py index bc4fffb0755..21bafdcd9f9 100644 --- a/api/api/models/image.py +++ b/api/api/models/image.py @@ -6,10 +6,10 @@ from api.constants.media_types import IMAGE_TYPE from api.models.media import ( AbstractDeletedMedia, - AbstractMatureMedia, AbstractMedia, AbstractMediaList, AbstractMediaReport, + AbstractSensitiveMedia, ) from api.models.mixins import FileMixin @@ -53,8 +53,8 @@ class Meta(AbstractMedia.Meta): db_table = "image" @property - def mature(self) -> bool: - return hasattr(self, "mature_image") + def sensitive(self) -> bool: + return hasattr(self, "sensitive_image") class DeletedImage(AbstractDeletedMedia): @@ -80,7 +80,7 @@ class DeletedImage(AbstractDeletedMedia): ) -class MatureImage(AbstractMatureMedia): +class SensitiveImage(AbstractSensitiveMedia): """ Stores all images that have been flagged as 'mature'. @@ -98,14 +98,17 @@ class MatureImage(AbstractMatureMedia): primary_key=True, db_constraint=False, db_column="identifier", - related_name="mature_image", + related_name="sensitive_image", help_text="The reference to the sensitive image.", ) + class Meta: + db_table = "api_matureimage" + class ImageReport(AbstractMediaReport): media_class = Image - mature_class = MatureImage + sensitive_class = SensitiveImage deleted_class = DeletedImage media_obj = models.ForeignKey( diff --git a/api/api/models/media.py b/api/api/models/media.py index 187e0c6817f..fead18dcc87 100644 --- a/api/api/models/media.py +++ b/api/api/models/media.py @@ -133,15 +133,15 @@ class AbstractMediaReport(models.Model): """ Generic model from which to inherit all reported media classes. - 'Reported' here refers to content reports such as mature, copyright-violating or - deleted content. Subclasses must populate ``media_class``, ``mature_class`` and + 'Reported' here refers to content reports such as sensitive, copyright-violating or + deleted content. Subclasses must populate ``media_class``, ``sensitive_class`` and ``deleted_class`` fields. """ media_class: type[models.Model] = None """the model class associated with this media type e.g. ``Image`` or ``Audio``""" - mature_class: type[models.Model] = None - """the class storing mature media e.g. ``MatureImage`` or ``MatureAudio``""" + sensitive_class: type[models.Model] = None + """the class storing sensitive media e.g. ``SensitiveImage`` or ``SensitiveAudio``""" deleted_class: type[models.Model] = None """the class storing deleted media e.g. ``DeletedImage`` or ``DeletedAudio``""" @@ -213,8 +213,8 @@ def save(self, *args, **kwargs): Extend the built-in ``save()`` functionality of Django with Elasticsearch integration to update records and refresh indices. - Media marked as mature or deleted also leads to instantiation of their - corresponding mature or deleted classes. + Media marked as sensitive or deleted also leads to instantiation of their + corresponding sensitive or deleted classes. """ self.clean() @@ -222,9 +222,9 @@ def save(self, *args, **kwargs): super().save(*args, **kwargs) if self.status == MATURE_FILTERED: - # Create an instance of the mature class for this media. This will + # Create an instance of the sensitive class for this media. This will # automatically set the ``mature`` field in the ES document. - self.mature_class.objects.create(media_obj=self.media_obj) + self.sensitive_class.objects.create(media_obj=self.media_obj) elif self.status == DEINDEXED: # Create an instance of the deleted class for this media, so that we don't # reindex it later. This will automatically delete the ES document and the @@ -290,7 +290,7 @@ class AbstractDeletedMedia(PerformIndexUpdateMixin, OpenLedgerModel): Generic model from which to inherit all deleted media classes. 'Deleted' here refers to media which has been deleted at the source or intentionally - de-indexed by us. Unlike mature reports, this action is irreversible. Subclasses + de-indexed by us. Unlike sensitive reports, this action is irreversible. Subclasses must populate ``media_class`` and ``es_index`` fields. """ @@ -329,9 +329,9 @@ def save(self, *args, **kwargs): self.media_obj.delete() # remove the actual model instance -class AbstractMatureMedia(PerformIndexUpdateMixin, models.Model): +class AbstractSensitiveMedia(PerformIndexUpdateMixin, models.Model): """ - Generic model from which to inherit all mature media classes. + Generic model from which to inherit all sensitive media classes. Subclasses must populate ``media_class`` and ``es_index`` fields. """ @@ -350,7 +350,7 @@ class AbstractMatureMedia(PerformIndexUpdateMixin, models.Model): primary_key=True, db_constraint=False, db_column="identifier", - related_name="mature_abstract_media", + related_name="sensitive_abstract_media", help_text="The reference to the sensitive media.", ) """ diff --git a/api/api/serializers/media_serializers.py b/api/api/serializers/media_serializers.py index 1dd62e8b054..3b14e01dfa7 100644 --- a/api/api/serializers/media_serializers.py +++ b/api/api/serializers/media_serializers.py @@ -516,6 +516,7 @@ class Meta: mature = serializers.BooleanField( help_text="Whether the media item is marked as mature", + source="sensitive", ) # This should be promoted to a stable field alongside @@ -540,9 +541,9 @@ def get_unstable__sensitivity(self, obj: Hit | AbstractMedia) -> list[str]: ): result.append(sensitivity.TEXT) - # ``obj.mature`` will either be `mature` from the ES document - # or the ``mature`` property on the Image or Audio model. - if obj.mature: + # ``obj.sensitive`` will either be `mature` from the ES document (see below) + # or the ``sensitive`` property on the Image or Audio model. + if obj.sensitive: # We do not currently have any documents marked `mature=true` # that were not marked so as a result of a confirmed user report. # This is despite the fact that the ingestion server _does_ copy @@ -569,6 +570,16 @@ def get_unstable__sensitivity(self, obj: Hit | AbstractMedia) -> list[str]: return result def to_representation(self, *args, **kwargs): + # This serializer adapts both ES Hits *and* Media instances. Currently, + # ES has a `mature` field on it which represents if maturity was present on + # the record in the database. The attributes in the code have been renamed + # to `sensitive`, but for the time being this flag still exists on the ES index. + # In order to prevent failures in serialization (since the serializer is looking + # for the `sensitive` attribute), we rename it here. + obj = args[0] + if isinstance(obj, Hit): + obj.sensitive = obj.mature + output = super().to_representation(*args, **kwargs) # Ensure lists are ``[]`` instead of ``None`` diff --git a/api/api/views/audio_views.py b/api/api/views/audio_views.py index 9960ddb524e..390792b0687 100644 --- a/api/api/views/audio_views.py +++ b/api/api/views/audio_views.py @@ -50,7 +50,7 @@ class AudioViewSet(MediaViewSet): collection_serializer_class = AudioCollectionRequestSerializer def get_queryset(self): - return super().get_queryset().select_related("mature_audio", "audioset") + return super().get_queryset().select_related("sensitive_audio", "audioset") # Extra actions @creator_collection diff --git a/api/api/views/image_views.py b/api/api/views/image_views.py index 5d15b3185d4..fd9c88d03fe 100644 --- a/api/api/views/image_views.py +++ b/api/api/views/image_views.py @@ -63,7 +63,7 @@ class ImageViewSet(MediaViewSet): } def get_queryset(self): - return super().get_queryset().select_related("mature_image") + return super().get_queryset().select_related("sensitive_image") # Extra actions @creator_collection diff --git a/api/test/factory/models/__init__.py b/api/test/factory/models/__init__.py index 628c8d484eb..7560ee6cafb 100644 --- a/api/test/factory/models/__init__.py +++ b/api/test/factory/models/__init__.py @@ -2,12 +2,12 @@ AudioAddOnFactory, AudioFactory, AudioReportFactory, - MatureAudioFactory, + SensitiveAudioFactory, ) from test.factory.models.image import ( ImageFactory, ImageReportFactory, - MatureImageFactory, + SensitiveImageFactory, ) from test.factory.models.oauth2 import ( AccessTokenFactory, diff --git a/api/test/factory/models/audio.py b/api/test/factory/models/audio.py index 7f0ca49a728..0004e39f7ef 100644 --- a/api/test/factory/models/audio.py +++ b/api/test/factory/models/audio.py @@ -1,20 +1,20 @@ import factory from factory.django import DjangoModelFactory -from api.models.audio import Audio, AudioAddOn, AudioReport, MatureAudio +from api.models.audio import Audio, AudioAddOn, AudioReport, SensitiveAudio from test.factory.faker import Faker from test.factory.models.media import IdentifierFactory, MediaFactory -class MatureAudioFactory(DjangoModelFactory): +class SensitiveAudioFactory(DjangoModelFactory): class Meta: - model = MatureAudio + model = SensitiveAudio media_obj = factory.SubFactory("test.factory.models.audio.AudioFactory") class AudioFactory(MediaFactory): - _mature_factory = MatureAudioFactory + _sensitive_factory = SensitiveAudioFactory class Meta: model = Audio diff --git a/api/test/factory/models/image.py b/api/test/factory/models/image.py index e0758b6a6a6..856c42d686d 100644 --- a/api/test/factory/models/image.py +++ b/api/test/factory/models/image.py @@ -1,19 +1,19 @@ import factory from factory.django import DjangoModelFactory -from api.models.image import Image, ImageReport, MatureImage +from api.models.image import Image, ImageReport, SensitiveImage from test.factory.models.media import MediaFactory, MediaReportFactory -class MatureImageFactory(DjangoModelFactory): +class SensitiveImageFactory(DjangoModelFactory): class Meta: - model = MatureImage + model = SensitiveImage media_obj = factory.SubFactory("test.factory.models.image.ImageFactory") class ImageFactory(MediaFactory): - _mature_factory = MatureImageFactory + _sensitive_factory = SensitiveImageFactory class Meta: model = Image diff --git a/api/test/factory/models/media.py b/api/test/factory/models/media.py index a6e451fa825..075c3bf765a 100644 --- a/api/test/factory/models/media.py +++ b/api/test/factory/models/media.py @@ -37,8 +37,8 @@ class MediaFactory(DjangoModelFactory): ) # Sub-factories must set this to their corresponding - # ``AbstractMatureMedia`` subclass - _mature_factory = None + # ``AbstractSensitiveMedia`` subclass + _sensitive_factory = None _highest_pre_existing_pk = None @@ -126,7 +126,7 @@ def create(cls, *args, **kwargs) -> AbstractMedia | tuple[AbstractMedia, Hit]: hit = None if mature_reported: - cls._mature_factory.create(media_obj=model) + cls._sensitive_factory.create(media_obj=model) if pook_active: # Reactivate pook if it was active diff --git a/api/test/unit/conftest.py b/api/test/unit/conftest.py index 674b97a6eab..3d32872f518 100644 --- a/api/test/unit/conftest.py +++ b/api/test/unit/conftest.py @@ -10,10 +10,10 @@ DeletedAudio, DeletedImage, Image, - MatureAudio, - MatureImage, + SensitiveAudio, + SensitiveImage, ) -from api.models.media import AbstractDeletedMedia, AbstractMatureMedia, AbstractMedia +from api.models.media import AbstractDeletedMedia, AbstractMedia, AbstractSensitiveMedia from api.serializers.audio_serializers import ( AudioReportRequestSerializer, AudioSearchRequestSerializer, @@ -53,8 +53,8 @@ class MediaTypeConfig: filtered_index: str model_factory: MediaFactory model_class: AbstractMedia - mature_factory: MediaFactory - mature_class: AbstractMatureMedia + sensitive_factory: MediaFactory + sensitive_class: AbstractSensitiveMedia search_request_serializer: MediaSearchRequestSerializer model_serializer: MediaSerializer report_serializer: MediaReportRequestSerializer @@ -74,12 +74,12 @@ def indexes(self): filtered_index="image-filtered", model_factory=model_factories.ImageFactory, model_class=Image, - mature_factory=model_factories.MatureImageFactory, + sensitive_factory=model_factories.SensitiveImageFactory, search_request_serializer=ImageSearchRequestSerializer, model_serializer=ImageSerializer, report_serializer=ImageReportRequestSerializer, report_factory=model_factories.ImageReportFactory, - mature_class=MatureImage, + sensitive_class=SensitiveImage, deleted_class=DeletedImage, ), "audio": MediaTypeConfig( @@ -89,12 +89,12 @@ def indexes(self): filtered_index="audio-filtered", model_factory=model_factories.AudioFactory, model_class=Audio, - mature_factory=model_factories.MatureAudioFactory, + sensitive_factory=model_factories.SensitiveAudioFactory, search_request_serializer=AudioSearchRequestSerializer, model_serializer=AudioSerializer, report_serializer=AudioReportRequestSerializer, report_factory=model_factories.AudioReportFactory, - mature_class=MatureAudio, + sensitive_class=SensitiveAudio, deleted_class=DeletedAudio, ), } diff --git a/api/test/unit/models/test_media_report.py b/api/test/unit/models/test_media_report.py index e936c47d356..b8ead57113e 100644 --- a/api/test/unit/models/test_media_report.py +++ b/api/test/unit/models/test_media_report.py @@ -6,7 +6,7 @@ import pytest from elasticsearch import BadRequestError, NotFoundError -from api.models import DeletedAudio, DeletedImage, MatureAudio, MatureImage +from api.models import DeletedAudio, DeletedImage, SensitiveAudio, SensitiveImage from api.models.media import ( DEINDEXED, DMCA, @@ -15,7 +15,7 @@ OTHER, PENDING, AbstractDeletedMedia, - AbstractMatureMedia, + AbstractSensitiveMedia, ) @@ -42,18 +42,20 @@ def test_pending_reports_have_no_subreport_models( report = media_type_config.report_factory.create(media_obj=media, reason=reason) assert report.status == PENDING - assert not media_type_config.mature_class.objects.filter(media_obj=media).exists() + assert not media_type_config.sensitive_class.objects.filter( + media_obj=media + ).exists() assert not media_type_config.deleted_class.objects.filter(media_obj=media).exists() -def test_mature_filtering_creates_mature_image_instance(media_type_config, settings): +def test_mature_filtering_creates_sensitive_image_instance(media_type_config, settings): media = media_type_config.model_factory.create() media_type_config.report_factory.create( media_obj=media, reason=MATURE, status=MATURE_FILTERED ) - assert media_type_config.mature_class.objects.filter(media_obj=media).exists() + assert media_type_config.sensitive_class.objects.filter(media_obj=media).exists() for index in media_type_config.indexes: doc = settings.ES.get( @@ -67,17 +69,19 @@ def test_mature_filtering_creates_mature_image_instance(media_type_config, setti assert doc["found"] assert doc["_source"]["mature"] - assert media.mature + assert media.sensitive -def test_deleting_mature_image_instance_resets_mature_flag(media_type_config, settings): +def test_deleting_sensitive_image_instance_resets_mature_flag( + media_type_config, settings +): media = media_type_config.model_factory.create() # Mark as mature. media_type_config.report_factory.create( media_obj=media, reason=MATURE, status=MATURE_FILTERED ) - # Delete mature instance. - media_type_config.mature_class.objects.get(media_obj=media).delete() + # Delete sensitive instance. + media_type_config.sensitive_class.objects.get(media_obj=media).delete() # Assert the media are back to mature=False # The previous test asserts they get set to mature=True @@ -96,7 +100,7 @@ def test_deleting_mature_image_instance_resets_mature_flag(media_type_config, se assert not doc["_source"]["mature"] media.refresh_from_db() - assert not media.mature + assert not media.sensitive def test_deindexing_creates_deleted_image_instance(media_type_config, settings): @@ -135,13 +139,16 @@ def test_all_deleted_media_covered(): assert set(AbstractDeletedMedia.__subclasses__()) == {DeletedAudio, DeletedImage} -def test_all_mature_media_covered(): +def test_all_sensitive_media_covered(): """ Imperfect test to ensure all subclasses are covered by the tests in this module. Relies on all models being present in ``catalog.api.models`` (i.e., exported from `__init__`). """ - assert set(AbstractMatureMedia.__subclasses__()) == {MatureAudio, MatureImage} + assert set(AbstractSensitiveMedia.__subclasses__()) == { + SensitiveAudio, + SensitiveImage, + } def test_deleted_media_deletes_from_all_indexes( @@ -221,7 +228,7 @@ def test_deleted_media_raises_elasticsearch_400_errors(settings, media_type_conf @pook.on -def test_mature_media_ignores_elasticsearch_404_errors( +def test_sensitive_media_ignores_elasticsearch_404_errors( settings, media_type_config, ): @@ -238,7 +245,7 @@ def test_mature_media_ignores_elasticsearch_404_errors( ) # This should pass despite the 404 enforced above - media_type_config.mature_factory.create( + media_type_config.sensitive_factory.create( media_obj=media, ) @@ -247,7 +254,7 @@ def test_mature_media_ignores_elasticsearch_404_errors( @pook.on -def test_mature_media_reraises_elasticsearch_400_errors(settings, media_type_config): +def test_sensitive_media_reraises_elasticsearch_400_errors(settings, media_type_config): media = media_type_config.model_factory.create() es_mocks = [] @@ -262,7 +269,7 @@ def test_mature_media_reraises_elasticsearch_400_errors(settings, media_type_con # This should fail due to the 400 enforced above with pytest.raises(BadRequestError): - media_type_config.mature_factory.create( + media_type_config.sensitive_factory.create( media_obj=media, )