Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[omm] background task placeholders #1370

Merged
merged 1 commit into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions open-media-match/src/OpenMediaMatch/app.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.

import logging
import os
import sys
import warnings

import flask
from flask.logging import default_handler
import flask_migrate

# Import pdq first with its hash order warning squelched, it's before our time
with warnings.catch_warnings():
warnings.simplefilter("ignore")
from threatexchange.signal_type.pdq import signal as _

from OpenMediaMatch import database
from OpenMediaMatch.background_tasks import build_index, fetcher
from OpenMediaMatch.persistence import get_storage
from OpenMediaMatch.blueprints import hashing, matching, curation


Expand All @@ -15,6 +25,7 @@ def create_app():
Create and configure the Flask app
"""
app = flask.Flask(__name__)

migrate = flask_migrate.Migrate()

if "OMM_CONFIG" in os.environ:
Expand Down Expand Up @@ -66,10 +77,37 @@ def status():

@app.cli.command("seed")
def seed_data():
"""Insert plausible-looking data into the database layer"""
# TODO: This is a placeholder for where some useful seed data can be loaded;
# particularly important for development
bank = database.Bank(name="bad_stuff", enabled=True)
database.db.session.add(bank)
database.db.session.commit()

@app.cli.command("fetch")
def fetch():
"""Run the 'background task' to fetch from 3p data and sync to local banks"""
storage = get_storage()
task_logger = logging.getLogger(fetcher.__name__)
task_logger.addHandler(default_handler)
task_logger.setLevel(logging.NOTSET)
logging.getLogger().setLevel(logging.NOTSET)
fetcher.fetch_all(
storage,
{
st.signal_type.get_name(): st.signal_type
for st in storage.get_signal_type_configs().values()
},
)

@app.cli.command("build_indices")
def build_indices():
"""Run the 'background task' to rebuild indices from bank contents"""
storage = get_storage()
task_logger = logging.getLogger(build_index.__name__)
task_logger.addHandler(default_handler)
task_logger.setLevel(logging.NOTSET)
logging.getLogger().setLevel(logging.NOTSET)
build_index.build_all_indices(storage, None, storage)

return app
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.

import logging
import typing as t

from threatexchange.signal_type.signal_base import SignalType

from OpenMediaMatch.storage.interface import (
ISignalTypeIndexStore,
ISignalTypeConfigStore,
)

logger = logging.getLogger(__name__)


def build_all_indices(
signal_type_cfgs: ISignalTypeConfigStore,
bank_store: None, # TODO
index_store: ISignalTypeIndexStore,
) -> None:
"""
Build all indices from scratch from current bank contents and persist them

Any additional indices (for disabled SignalTypes) are deleted.
"""
logger.info("Running the %s background task", build_all_indices.__name__)
enabled = signal_type_cfgs.get_enabled_signal_types()
for st in enabled.values():
build_index(st, bank_store, index_store)

# TODO cleanup disabled / deleted signal types


def build_index(
for_signal_type: t.Type[SignalType],
bank_store: None, # TODO
index_store: ISignalTypeIndexStore,
) -> None:
"""
Build one index from scratch with the current bank contents and persist it.
"""
# TODO
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.

import typing as t
import logging

from OpenMediaMatch.storage.interface import ICollaborationStore, SignalType
from threatexchange.exchanges.fetch_state import CollaborationConfigBase

logger = logging.getLogger(__name__)


def fetch_all(
collab_store: ICollaborationStore,
Expand All @@ -13,6 +16,7 @@ def fetch_all(
"""
For all collaborations registered with OMM, fetch()
"""
logger.info("Running the %s background task", fetch_all.__name__)
collabs = collab_store.get_collaborations()
for c in collabs.values():
fetch(collab_store, enabled_signal_types, c)
Expand Down
Loading