Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add backfillmoderationdecision management command #4415

Merged
merged 6 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions api/api/management/commands/backfillmoderationdecision.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import argparse

from django.contrib.auth import get_user_model

from django_tqdm import BaseCommand

from api.constants.moderation import DecisionAction
from api.models import (
AudioDecision,
AudioDecisionThrough,
AudioReport,
ImageDecision,
ImageDecisionThrough,
ImageReport,
)
from api.models.media import DMCA, MATURE_FILTERED, NO_ACTION, PENDING


class Command(BaseCommand):
help = "Back-fill the moderation decision table for a given media type."
batch_size = 3

@staticmethod
def add_arguments(parser):
parser.add_argument(
"--dry-run",
help="Count reports to process, and don't do anything else.",
type=bool,
default=True,
action=argparse.BooleanOptionalAction,
)
parser.add_argument(
"--media-type",
help="The media type to back-fill moderation decisions.",
type=str,
default="image",
choices=["image", "audio"],
)
parser.add_argument(
"--moderator",
help="The username of the moderator to attribute the decisions to.",
type=str,
default="opener",
)

def handle(self, *args, **options):
dry = options["dry_run"]
username = options["moderator"]
media_type = options["media_type"]

MediaReport = ImageReport
MediaDecision = ImageDecision
MediaDecisionThrough = ImageDecisionThrough
if media_type == "audio":
MediaReport = AudioReport
MediaDecision = AudioDecision
MediaDecisionThrough = AudioDecisionThrough

non_pending_reports = MediaReport.objects.filter(decision=None).exclude(
status=PENDING
)
count_to_process = non_pending_reports.count()

if dry:
self.info(
f"{count_to_process} {media_type} reports to back-fill. "
f"This is a dry run, exiting without making changes."
)
return

if not count_to_process:
self.info("No reports to process.")
return

t = self.tqdm(total=count_to_process // self.batch_size)
User = get_user_model()
try:
moderator = User.objects.get(username=username)
except User.DoesNotExist:
t.error(f"User '{username}' not found.")
return

while reports_chunk := non_pending_reports[: self.batch_size]:
decisions = MediaDecision.objects.bulk_create(
MediaDecision(
action=self.get_action(report),
moderator=moderator,
notes="__backfilled_from_report_status",
)
for report in reports_chunk
)
for report, decision in zip(reports_chunk, decisions):
report.decision = decision
MediaReport.objects.bulk_update(reports_chunk, ["decision"])
MediaDecisionThrough.objects.bulk_create(
[
MediaDecisionThrough(media_obj=report.media_obj, decision=decision)
for report, decision in zip(reports_chunk, decisions)
]
)
t.update(1)

t.info(
self.style.SUCCESS(
f"Created {count_to_process} {media_type} moderation decisions from existing reports."
)
)

@staticmethod
def get_action(report):
if report.status == MATURE_FILTERED:
return DecisionAction.MARKED_SENSITIVE

if report.status == NO_ACTION:
return DecisionAction.REJECTED_REPORTS

# Cases with status = DEINDEXED
if report.reason == DMCA:
return DecisionAction.DEINDEXED_COPYRIGHT

return DecisionAction.DEINDEXED_SENSITIVE # For reasons MATURE and OTHER
8 changes: 6 additions & 2 deletions api/test/factory/models/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@

from api.models.audio import Audio, AudioAddOn, AudioReport, SensitiveAudio
from test.factory.faker import Faker
from test.factory.models.media import IdentifierFactory, MediaFactory
from test.factory.models.media import (
IdentifierFactory,
MediaFactory,
MediaReportFactory,
)


class SensitiveAudioFactory(DjangoModelFactory):
Expand All @@ -29,7 +33,7 @@ class Meta:
waveform_peaks = Faker("waveform")


class AudioReportFactory(DjangoModelFactory):
class AudioReportFactory(MediaReportFactory):
class Meta:
model = AudioReport

Expand Down
6 changes: 6 additions & 0 deletions api/test/factory/models/oauth2.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from django.contrib.auth import get_user_model
from django.utils import timezone

import factory
Expand Down Expand Up @@ -67,3 +68,8 @@ class Meta:
tzinfo=timezone.get_current_timezone(),
)
application = factory.SubFactory(ThrottledApplicationFactory)


class UserFactory(DjangoModelFactory):
class Meta:
model = get_user_model()
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from io import StringIO

from django.core.management import call_command

import pytest

from api.constants.moderation import DecisionAction
from api.models import (
DEINDEXED,
DMCA,
MATURE,
MATURE_FILTERED,
NO_ACTION,
OTHER,
AudioDecision,
AudioDecisionThrough,
ImageDecision,
ImageDecisionThrough,
)
from test.factory.models.audio import AudioReportFactory
from test.factory.models.image import ImageReportFactory
from test.factory.models.oauth2 import UserFactory


def call_cmd(**options):
out = StringIO()
err = StringIO()
call_command(
"backfillmoderationdecision",
**options,
stdout=out,
stderr=err,
)
res = out.getvalue(), err.getvalue()
print(res)

return res


def make_reports(media_type, reason: str, status: str, count: int = 1):
if media_type == "audio":
return AudioReportFactory.create_batch(count, status=status, reason=reason)
else:
return ImageReportFactory.create_batch(count, status=status, reason=reason)


@pytest.mark.parametrize(
("reason", "status", "expected_action"),
(
(MATURE, MATURE_FILTERED, DecisionAction.MARKED_SENSITIVE),
(DMCA, MATURE_FILTERED, DecisionAction.MARKED_SENSITIVE),
(OTHER, MATURE_FILTERED, DecisionAction.MARKED_SENSITIVE),
(MATURE, NO_ACTION, DecisionAction.REJECTED_REPORTS),
(DMCA, NO_ACTION, DecisionAction.REJECTED_REPORTS),
(OTHER, NO_ACTION, DecisionAction.REJECTED_REPORTS),
(MATURE, DEINDEXED, DecisionAction.DEINDEXED_SENSITIVE),
(DMCA, DEINDEXED, DecisionAction.DEINDEXED_COPYRIGHT),
(OTHER, DEINDEXED, DecisionAction.DEINDEXED_SENSITIVE),
),
)
@pytest.mark.parametrize(("media_type"), ("image", "audio"))
@pytest.mark.django_db
def test_create_moderation_decision_for_reports(
media_type, reason, status, expected_action
):
username = "opener"
UserFactory.create(username=username)

report = make_reports(media_type=media_type, reason=reason, status=status)[0]

out, err = call_cmd(dry_run=False, media_type=media_type, moderator=username)

MediaDecision = ImageDecision if media_type == "image" else AudioDecision
MediaDecisionThrough = (
ImageDecisionThrough if media_type == "image" else AudioDecisionThrough
)
assert MediaDecision.objects.count() == 1
assert f"Created 1 {media_type} moderation decisions from existing reports." in out

decision = MediaDecision.objects.first()
assert decision.media_objs.count() == 1
assert decision.action == expected_action
assert decision.moderator.username == username

decision_through = MediaDecisionThrough.objects.first()
assert decision_through.media_obj == report.media_obj
assert decision_through.decision == decision


@pytest.mark.django_db
def test_catch_user_exception():
make_reports(media_type="image", reason=MATURE, status=MATURE_FILTERED)
_, err = call_cmd(dry_run=False, moderator="nonexistent")

assert "User 'nonexistent' not found." in err