Skip to content

Commit

Permalink
moderation: add CLI for link domains management
Browse files Browse the repository at this point in the history
  • Loading branch information
slint committed Nov 11, 2024
1 parent dcc93f9 commit 8ef8e40
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 26 deletions.
2 changes: 1 addition & 1 deletion site/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ tests =
[options.entry_points]
flask.commands =
zenodo-admin = zenodo_rdm.cli:zenodo_admin
moderation = zenodo_rdm.cli:moderation
moderation = zenodo_rdm.cli:moderation_cli
invenio_base.blueprints =
zenodo_rdm_legacy = zenodo_rdm.legacy.views:blueprint
zenodo_rdm_support = zenodo_rdm.views:create_blueprint
Expand Down
75 changes: 61 additions & 14 deletions site/zenodo_rdm/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from invenio_requests.records.models import RequestMetadata

from zenodo_rdm.api import ZenodoRDMRecord
from zenodo_rdm.moderation.models import ModerationQuery
from zenodo_rdm.moderation.models import LinkDomain, LinkDomainStatus, ModerationQuery
from zenodo_rdm.moderation.percolator import (
create_percolator_index,
get_percolator_index,
Expand Down Expand Up @@ -264,7 +264,7 @@ def moderation_cli():
"""Moderation commands."""


@moderation_cli.group()
@moderation_cli.group("queries")
def queries_cli():
"""Moderation queries commands."""

Expand Down Expand Up @@ -333,18 +333,14 @@ def add_query(record_cls, query_string, notes, score, active, file):
"""Command to add a moderation query from CSV or directly and index it."""
record_cls = ZenodoRDMRecord if record_cls == "records" else Community

try:
if file:
add_queries_from_csv(file, record_cls)
else:
create_and_index_query(record_cls, query_string, notes, score, active)
if file:
_add_queries_from_csv(file, record_cls)
else:
_create_and_index_query(record_cls, query_string, notes, score, active)
click.secho("Queries added and indexed successfully.", fg="green")

click.secho("Queries added and indexed successfully.")
except Exception as e:
click.secho(f"Error adding or indexing query: {e}")


def add_queries_from_csv(file_path, record_cls=ZenodoRDMRecord):
def _add_queries_from_csv(file_path, record_cls=ZenodoRDMRecord):
"""Load queries from a CSV file, add them to the database, and index them."""
with open(file_path, mode="r", newline="", encoding="utf-8") as csvfile:
csvreader = csv.reader(csvfile)
Expand All @@ -360,16 +356,67 @@ def add_queries_from_csv(file_path, record_cls=ZenodoRDMRecord):

# Ensure to add query only if there's a query string
if query_string:
create_and_index_query(
_create_and_index_query(
record_cls, query_string, notes, score, active
)


def create_and_index_query(record_cls, query_string, notes, score, active):
def _create_and_index_query(record_cls, query_string, notes, score, active):
"""Create and index a single moderation query."""
query = ModerationQuery.create(
query_string=query_string, notes=notes, score=score, active=active
)

db.session.commit()
index_percolate_query(record_cls, query.id, query_string, active, score, notes)


@moderation_cli.group("domains")
def domains_cli():
"""Moderation domains commands."""


@domains_cli.command("add")
@click.option("-d", "--domain", required=True, help="The domain to add.")
@click.option("-n", "--notes", help="Additional notes for the domain.")
@click.option(
"-s",
"--status",
type=click.Choice(["banned", "safe"], case_sensitive=False),
help="The status for the domain.",
default="banned",
)
@click.option("--score", type=int, help="The score for the domain.")
@click.option(
"-f",
"--file",
type=click.Path(exists=True, readable=True),
help="Path to CSV file containing queries.",
)
@with_appcontext
def add_domain(domain, notes, status, score, file):
"""Command to add a moderated links domain."""
if file:
_add_domains_from_csv(file)
else:
_create_domain(domain, notes, score, status)


def _create_domain(domain, notes, score, status):
"""Create a moderated links domain."""
status = LinkDomainStatus.BANNED if status == "banned" else LinkDomainStatus.SAFE
domain = LinkDomain.create(domain, status, score, notes)
db.session.commit()
click.secho(f"Domain {domain} added successfully.", fg="green")


def _add_domains_from_csv(file_path):
"""Load domains from a CSV file, add them to the database."""
with open(file_path, mode="r", encoding="utf-8") as csvfile:
reader = csv.DictReader(csvfile)
for entry in reader:
domain = entry["domain"].strip()
notes = entry.get("notes", None)
score = entry.get("score") or None
status = entry.get("status", "banned")
_create_domain(domain, notes, score, status)
4 changes: 4 additions & 0 deletions site/zenodo_rdm/moderation/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ def lookup_domain(cls, url):
.scalar()
)

def __repr__(self):
"""Get a string representation of the link domain."""
return f"<LinkDomain {self.domain} ({self.status})>"


class ModerationQuery(db.Model):
"""Moderation queries model."""
Expand Down
21 changes: 10 additions & 11 deletions site/zenodo_rdm/moderation/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

from flask import current_app
from invenio_search import current_search_client
from invenio_search.utils import build_alias_name

from .models import LinkDomain, LinkDomainStatus
from .percolator import get_percolator_index
Expand Down Expand Up @@ -70,16 +69,16 @@ def links_rule(identity, draft=None, record=None):
domain = LinkDomain.lookup_domain(link)
if domain is None:
continue
if domain.status == LinkDomainStatus.BANNED:
if domain.score is not None:
score += domain.score
else:
score += current_scores.spam_link
elif domain == LinkDomainStatus.SAFE:
if domain.score is not None:
score += domain.score
else:
score += current_scores.ham_link
default_score = (
current_scores.ham_link
if domain.status == LinkDomainStatus.SAFE
else current_scores.spam_link
)
if domain.score is not None:
score += domain.score
else:
score += default_score

return score


Expand Down

0 comments on commit 8ef8e40

Please sign in to comment.