Skip to content
This repository has been archived by the owner on Feb 22, 2023. It is now read-only.

Commit

Permalink
Add User-Agent header to all outbound requests
Browse files Browse the repository at this point in the history
  • Loading branch information
sarayourfriend committed Aug 11, 2022
1 parent cd76576 commit c4359de
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 12 deletions.
11 changes: 9 additions & 2 deletions api/catalog/api/utils/validate_images.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging
import time

from django.conf import settings

import django_redis
import grequests

Expand All @@ -11,6 +13,9 @@


CACHE_PREFIX = "valid:"
HEADERS = {
"User-Agent": settings.OUTBOUND_USER_AGENT_TEMPLATE.format(purpose="LinkValidation")
}


def _get_cached_statuses(redis, image_urls):
Expand Down Expand Up @@ -45,8 +50,10 @@ def validate_images(query_hash, start_slice, results, image_urls):
to_verify[url] = idx
logger.debug(f"len(to_verify)={len(to_verify)}")
reqs = (
grequests.head(u, allow_redirects=False, timeout=2, verify=False)
for u in to_verify.keys()
grequests.head(
url, headers=HEADERS, allow_redirects=False, timeout=2, verify=False
)
for url in to_verify.keys()
)
verified = grequests.map(reqs, exception_handler=_validation_failure)
# Cache newly verified image statuses.
Expand Down
20 changes: 16 additions & 4 deletions api/catalog/api/utils/watermark.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
import logging
import os
from enum import Flag, auto
from io import BytesIO
from textwrap import wrap

from django.conf import settings

import piexif
import requests
from PIL import Image, ImageDraw, ImageFont
from sentry_sdk import capture_exception


parent_logger = logging.getLogger(__name__)


BREAKPOINT_DIMENSION = 400 # 400px
Expand All @@ -14,6 +21,9 @@

FRAME_COLOR = "#fff" # White frame
TEXT_COLOR = "#000" # Black text
HEADERS = {
"User-Agent": settings.OUTBOUND_USER_AGENT_TEMPLATE.format(purpose="Watermark")
}


class Dimension(Flag):
Expand Down Expand Up @@ -143,9 +153,9 @@ def _open_image(url):
:param url: the URL from where to read the image
:return: the PIL image object with the EXIF data
"""

logger = parent_logger.getChild("_open_image")
try:
response = requests.get(url)
response = requests.get(url, headers=HEADERS)
img_bytes = BytesIO(response.content)
img = Image.open(img_bytes)
# Preserve EXIF metadata
Expand All @@ -154,8 +164,10 @@ def _open_image(url):
else:
exif = None
return img, exif
except requests.exceptions.RequestException:
print("Error loading image data")
except requests.exceptions.RequestException as e:
capture_exception(e)
logger.error(f"Error loading image data: {e}")
return None, None


def _print_attribution_on_image(img, image_info):
Expand Down
4 changes: 3 additions & 1 deletion api/catalog/api/utils/waveform.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
import subprocess
from typing import List

from django.conf import settings

import requests


parent_logger = logging.getLogger(__name__)

TMP_DIR = pathlib.Path("/tmp").resolve()
UA_STRING = "OpenverseWaveform/0.0 (https://wordpress.org/openverse)"
UA_STRING = settings.OUTBOUND_USER_AGENT_TEMPLATE.format(purpose="Waveform")


def ext_from_url(url):
Expand Down
6 changes: 5 additions & 1 deletion api/catalog/api/views/image_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ class ImageViewSet(MediaViewSet):

serializer_class = ImageSerializer

OEMBED_HEADERS = {
"User-Agent": settings.OUTBOUND_USER_AGENT_TEMPLATE.format(purpose="OEmbed"),
}

# Extra actions

@action(
Expand All @@ -79,7 +83,7 @@ def oembed(self, request, *_, **__):
except Image.DoesNotExist:
return get_api_exception("Could not find image.", 404)
if not (image.height and image.width):
image_file = requests.get(image.url)
image_file = requests.get(image.url, headers=self.OEMBED_HEADERS)
width, height = PILImage.open(io.BytesIO(image_file.content)).size
context |= {
"width": width,
Expand Down
20 changes: 16 additions & 4 deletions api/catalog/api/views/media_views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
import logging as log
import logging
from http.client import RemoteDisconnected
from urllib.error import HTTPError
from urllib.parse import urlencode
Expand All @@ -20,6 +20,9 @@
from catalog.custom_auto_schema import CustomAutoSchema


parent_logger = logging.getLogger(__name__)


class MediaViewSet(ReadOnlyModelViewSet):
swagger_schema = CustomAutoSchema

Expand Down Expand Up @@ -163,26 +166,35 @@ def _get_user_ip(request):
ip = request.META.get("REMOTE_ADDR")
return ip

THUMBNAIL_PROXY_COMM_HEADERS = {
"User-Agent": settings.OUTBOUND_USER_AGENT_TEMPLATE.format(
purpose="ThumbnailGeneration"
)
}

@staticmethod
def _thumbnail_proxy_comm(
path: str,
params: dict,
headers: tuple[tuple[str, str]] = (),
):
logger = parent_logger.getChild("_thumbnail_proxy_comm")
proxy_url = settings.THUMBNAIL_PROXY_URL
query_string = urlencode(params)
upstream_url = f"{proxy_url}/{path}?{query_string}"
log.debug(f"Image proxy upstream URL: {upstream_url}")
logger.debug(f"Image proxy upstream URL: {upstream_url}")

try:
req = Request(upstream_url)
req = Request(
upstream_url, headers=MediaViewSet.THUMBNAIL_PROXY_COMM_HEADERS
)
for key, val in headers:
req.add_header(key, val)
upstream_response = urlopen(req, timeout=10)

res_status = upstream_response.status
content_type = upstream_response.headers.get("Content-Type")
log.debug(
logger.debug(
"Image proxy response "
f"status: {res_status}, content-type: {content_type}"
)
Expand Down
5 changes: 5 additions & 0 deletions api/catalog/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,11 @@
# The version of the API. We follow the semantic version specification.
API_VERSION = config("SEMANTIC_VERSION", default="Version not specified")

OUTBOUND_USER_AGENT_TEMPLATE = config(
"OUTBOUND_USER_AGENT_TEMPLATE",
default=f"Openverse{{purpose}}/{API_VERSION} (https://wordpress.org/openverse)",
)

# The contact email of the Openverse team
CONTACT_EMAIL = config("CONTACT_EMAIL", default="[email protected]")

Expand Down

0 comments on commit c4359de

Please sign in to comment.