Skip to content

Commit

Permalink
[omm][api] Hash api to detect content type (#1359)
Browse files Browse the repository at this point in the history
  • Loading branch information
Dcallies authored Sep 14, 2023
1 parent cc04f20 commit 9af70ce
Showing 1 changed file with 23 additions and 7 deletions.
30 changes: 23 additions & 7 deletions open-media-match/src/OpenMediaMatch/blueprints/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import requests

from threatexchange.content_type.content_base import ContentType
from threatexchange.content_type.photo import PhotoContent
from threatexchange.content_type.video import VideoContent
from threatexchange.signal_type.signal_base import FileHasher, SignalType

from OpenMediaMatch import app_resources
Expand All @@ -24,19 +26,21 @@
def hash_media():
"""
Fetch content and return its hash.
TODO: implement
"""

content_type = _parse_request_content_type()
signal_types = _parse_request_signal_type(content_type)

media_url = request.args.get("url", None)
if media_url is None:
abort(400, "url is required")

download_resp = requests.get(media_url, allow_redirects=True, timeout=30 * 1000)
download_resp.raise_for_status()

url_content_type = download_resp.headers["content-type"]

current_app.logger.debug("%s is type %s", media_url, url_content_type)

content_type = _parse_request_content_type(url_content_type)
signal_types = _parse_request_signal_type(content_type)

ret = {}

# For images, we may need to copy the file suffix (.png, jpeg, etc) for it to work
Expand All @@ -53,9 +57,21 @@ def hash_media():
return ret


def _parse_request_content_type() -> ContentType:
storage = app_resources.get_storage()
def _parse_request_content_type(url_content_type: str) -> ContentType:
arg = request.args.get("content_type", "")
if not arg:
if url_content_type.lower().startswith("image"):
arg = PhotoContent.get_name()
elif url_content_type.lower().startswith("video"):
arg = VideoContent.get_name()
else:
abort(
400,
f"unsupported url ContentType: '{url_content_type}', "
"if you know the expected type, provide it with content_type",
)

storage = app_resources.get_storage()
content_type_config = storage.get_content_type_configs().get(arg)
if content_type_config is None:
abort(400, f"no such content_type: '{arg}'")
Expand Down

0 comments on commit 9af70ce

Please sign in to comment.