diff --git a/src/navigator_data_ingest/base/api_client.py b/src/navigator_data_ingest/base/api_client.py index c11bfad..a2a2c85 100644 --- a/src/navigator_data_ingest/base/api_client.py +++ b/src/navigator_data_ingest/base/api_client.py @@ -139,13 +139,21 @@ def upload_document( def _download_from_source( session: requests.Session, source_url: str ) -> requests.Response: + # Try the orginal source url download_response = session.get(source_url, allow_redirects=True, timeout=5) # TODO this is a hack and we should handle source urls upstream in the backend if download_response.status_code == 404: - download_response_altered = session.get(source_url.replace("%", ""), allow_redirects=True, timeout=5) - if download_response_altered.status_code == 200: - download_response = download_response_altered + # mutation 1 - remove % + download_response = session.get( + source_url.replace("%", ""), allow_redirects=True, timeout=5 + ) + + if download_response.status_code == 404: + # mutation 2 - replace % with the encoded version, i.e. %25 + download_response = session.get( + source_url.replace("%", "%25"), allow_redirects=True, timeout=5 + ) if download_response.status_code >= 300: raise Exception(