From 6f92f40dbdfaf82d5f69b359085c8629034c1c8b Mon Sep 17 00:00:00 2001 From: Staci Cooper <63313398+stacimc@users.noreply.github.com> Date: Wed, 7 Dec 2022 13:03:51 -0800 Subject: [PATCH] Allow overriding the date from the DagRun conf (#880) --- .../provider_data_ingester.py | 7 ++++++ .../test_provider_data_ingester.py | 24 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/openverse_catalog/dags/providers/provider_api_scripts/provider_data_ingester.py b/openverse_catalog/dags/providers/provider_api_scripts/provider_data_ingester.py index 5b356007217..8ee35b3a173 100644 --- a/openverse_catalog/dags/providers/provider_api_scripts/provider_data_ingester.py +++ b/openverse_catalog/dags/providers/provider_api_scripts/provider_data_ingester.py @@ -2,6 +2,7 @@ import logging import traceback from abc import ABC, abstractmethod +from datetime import datetime from airflow.exceptions import AirflowException from airflow.models import Variable @@ -129,6 +130,12 @@ def __init__(self, conf: dict = None, dag_id: str = None, date: str = None): # dag_run configuration options conf = conf or {} + # Allow overriding the date with a %Y-%m-%d string from the dagrun conf. + date_override = conf.get("date") + if date_override and datetime.strptime(date_override, "%Y-%m-%d"): + logger.info(f"Using date {date_override} from dagrun conf.") + self.date = date_override + # Used to skip over errors and continue ingestion. When enabled, errors # are not reported until ingestion has completed. self.skip_ingestion_errors = conf.get("skip_ingestion_errors", False) diff --git a/tests/dags/providers/provider_api_scripts/test_provider_data_ingester.py b/tests/dags/providers/provider_api_scripts/test_provider_data_ingester.py index d7c2e573aed..44aaf12270d 100644 --- a/tests/dags/providers/provider_api_scripts/test_provider_data_ingester.py +++ b/tests/dags/providers/provider_api_scripts/test_provider_data_ingester.py @@ -88,6 +88,30 @@ def test_batch_limit_is_capped_to_ingestion_limit(): assert ingester.limit == 20 +@pytest.mark.parametrize( + "date, date_override, expected_date", + [ + # No override + ("2022-01-01", None, "2022-01-01"), + # Simple override + ("2022-01-01", "2022-12-12", "2022-12-12"), + # Incorrect date format throws error + pytest.param( + "2022-01-01", + "12/12/22", + None, + marks=pytest.mark.raises(exception=ValueError), + ), + ], +) +def test_date_override(date, date_override, expected_date): + ingester = MockProviderDataIngester( + conf={"date": date_override}, # DagRun conf object + date=date, + ) + assert ingester.date == expected_date + + def test_get_batch_data(): response_json = _get_resource_json("complete_response.json") batch = ingester.get_batch_data(response_json)