Skip to content

Commit

Permalink
Replace retry with backoff, add backoff to all Freesound requests (#4663
Browse files Browse the repository at this point in the history
)

* Replace retry library with backoff

* Back off of all Freesound requests

* Move comments about flaky states to where the values are defined
  • Loading branch information
AetherUnbound authored Aug 15, 2024
1 parent 431d0d7 commit 05f8c55
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 18 deletions.
41 changes: 26 additions & 15 deletions catalog/dags/providers/provider_api_scripts/freesound.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
import logging
from datetime import datetime

import backoff
from airflow.models import Variable
from requests.exceptions import ConnectionError, HTTPError, SSLError
from retry import retry

from common.licenses.licenses import get_license_info
from common.loader import provider_details as prov
Expand All @@ -35,7 +35,6 @@ class FreesoundDataIngester(ProviderDataIngester):
host = "freesound.org"
endpoint = f"https://{host}/apiv2/search/text"
providers = {"audio": prov.FREESOUND_DEFAULT_PROVIDER}
flaky_exceptions = (SSLError, ConnectionError)
preferred_preview = "preview-hq-mp3"
preview_bitrates = {
"preview-hq-mp3": 128000,
Expand All @@ -44,6 +43,20 @@ class FreesoundDataIngester(ProviderDataIngester):
"preview-lq-ogg": 80000,
}

# Freesound often tends to be flaky when making requests during ingestion.
# For the flaky HTTP errors above, we wait some time before trying the request
# again in the hopes that the upstream issues resolve in that time:
# * 400 - Although framed as bad requests, these appear to succeed later
# * 503 - Service unavailable, so we should just try to wait
flaky_error_codes = {400, 503}
# For certain requests, we want to retry it a few times on these conditions:
# * SSLError - 'EOF occurred in violation of protocol (_ssl.c:1129)'
# * ConnectionError - '[Errno 113] No route to host'
# Both of these seem transient and may be the result of some odd behavior on the
# Freesound API end. We have an API key that's supposed to be maxed out, so
# I can't imagine it's throttling (aetherunbound).
flaky_exceptions = (SSLError, ConnectionError)

def __init__(self, *args, **kwargs):
self.api_key = Variable.get("API_KEY_FREESOUND")
self.headers = {
Expand All @@ -53,6 +66,15 @@ def __init__(self, *args, **kwargs):

super().__init__(*args, **kwargs)

get_response_json = backoff.on_exception(
backoff.expo,
HTTPError,
max_time=60 * 2,
# Raise all other errors
giveup=lambda e: e.response.status_code
not in FreesoundDataIngester.flaky_error_codes,
)(ProviderDataIngester.get_response_json)

def get_next_query_params(self, prev_query_params: dict | None) -> dict:
if not prev_query_params:
start_date = "*"
Expand Down Expand Up @@ -157,20 +179,9 @@ def _get_audio_set_info(self, media_data):
else:
return None, None, None

@retry(flaky_exceptions, tries=3, delay=1, backoff=2)
@backoff.on_exception(backoff.expo, flaky_exceptions, max_tries=3)
def _get_audio_file_size(self, url):
"""
Get the content length of a provided URL.
Freesound can be finicky, so we want to retry it a few times on
these conditions:
* SSLError - 'EOF occurred in violation of protocol (_ssl.c:1129)'
* ConnectionError - '[Errno 113] No route to host'
Both of these seem transient and may be the result of some odd behavior on the
Freesound API end. We have an API key that's supposed to be maxed out, so
I can't imagine it's throttling (aetherunbound).
"""
"""Get the content length of a provided URL."""
response = self.delayed_requester.head(url)

if response:
Expand Down
4 changes: 2 additions & 2 deletions catalog/dags/providers/provider_api_scripts/smithsonian.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@

import logging

import backoff
from airflow.exceptions import AirflowException
from airflow.models import Variable
from retry import retry

from common.licenses import get_license_info
from common.loader import provider_details as prov
Expand Down Expand Up @@ -172,7 +172,7 @@ def get_record_data(self, data: dict) -> dict | list[dict] | None:
images += self._get_associated_images(image_list, shared_image_data)
return images

@retry(ValueError, tries=3, delay=1, backoff=2)
@backoff.on_exception(backoff.expo, ValueError, max_tries=3)
def _get_unit_codes_from_api(self) -> set:
query_params = {"api_key": self.api_key, "q": "online_media_type:Images"}
response_json = self.get_response_json(
Expand Down
2 changes: 1 addition & 1 deletion catalog/requirements-prod.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# Note: Unpinned packages have their versions determined by the Airflow constraints file

apache-airflow[amazon,postgres,http,elasticsearch]==2.9.3
backoff==2.2.1
lxml
psycopg2-binary
requests-file
requests-oauthlib
retry==0.9.2
tabulate==0.9.0
tldextract==5.1.2
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,29 @@ def audio_data():
yield _get_resource_json("audio_data_example.json")


@pytest.mark.parametrize(
"http_error",
[
# These should be fine
*FreesoundDataIngester.flaky_error_codes,
# These should raise immediately
pytest.param(401, marks=pytest.mark.raises(exception=HTTPError)),
pytest.param(500, marks=pytest.mark.raises(exception=HTTPError)),
],
)
def test_get_response_json_retries(http_error):
valid_response = MagicMock(status_code=200, json=MagicMock())
# Patch the sleep function so it doesn't take long
with patch.object(fsd.delayed_requester, "get") as get_patch, patch("time.sleep"):
# Error out at first, then return a value
get_patch.side_effect = [
HTTPError(request=MagicMock(), response=MagicMock(status_code=http_error))
] * 5 + [valid_response]
result = fsd.get_response_json("endpoint")
assert get_patch.call_count == 6
assert result == valid_response.json.return_value


def test_get_audio_pages_returns_correctly_with_no_data():
actual_result = fsd.get_batch_data({})
assert actual_result is None
Expand Down

0 comments on commit 05f8c55

Please sign in to comment.