-
Notifications
You must be signed in to change notification settings - Fork 212
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add DAG for creating staging indices (#3232)
* Optionally override ES environment in ingestion server * Revert "Optionally override ES environment in ingestion server" This reverts commit 4840acf. * Add staging ingestion server connection * Make ingestion server utilities accept optional http_conn_id * Make sharedtask for notifying slack * Consider ingestion server task active if it has active workers This is necessary to fix a bug where an ingestion server task is considered to be in the "errored" state by the TaskStatus, when it schedules some indexer workers and then completes (because in this state, the task is no longer alive but progress has not yet reached 100%). By checking whether there are active workers associated with the task id, we can correctly determine whether the task is actually in an errored state. * Add recreate_full_staging_index dags * Add Dag docs * Test DAG parsing * Remove unused constant from previous implementation * Update DAG tags * Respect the data_refresh_limit in reindex task as well as ingest_upstream * Clarify defaults for data_refresh_limit * Simplify the params by making target_alias customizable * Make media_type a dag param, refactor with TaskFlow * Prevent staging DB restore and index creation from simultaneous runs * Fix imports to prevent errors when filling DagBag * Update dag docs * Avoid module name conflicts
- Loading branch information
Showing
19 changed files
with
457 additions
and
71 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
95 changes: 95 additions & 0 deletions
95
catalog/dags/es/recreate_staging_index/recreate_full_staging_index.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
from datetime import timedelta | ||
|
||
from airflow.decorators import task, task_group | ||
from airflow.exceptions import AirflowSensorTimeout | ||
from airflow.sensors.external_task import ExternalTaskSensor | ||
from airflow.utils.state import State | ||
|
||
from common import ingestion_server | ||
from common.sensors.utils import get_most_recent_dag_run | ||
from data_refresh.data_refresh_types import DATA_REFRESH_CONFIGS | ||
from database.staging_database_restore.constants import ( | ||
DAG_ID as STAGING_DB_RESTORE_DAG_ID, | ||
) | ||
|
||
|
||
DAG_ID = "recreate_full_staging_index" | ||
|
||
|
||
@task(retries=0) | ||
def prevent_concurrency_with_staging_database_restore(**context): | ||
wait_for_dag = ExternalTaskSensor( | ||
task_id="check_for_running_staging_db_restore", | ||
external_dag_id=STAGING_DB_RESTORE_DAG_ID, | ||
# Set timeout to 0 to prevent retries. If the staging DB restoration is running, | ||
# immediately fail the staging index creation DAG. | ||
timeout=0, | ||
# Wait for the whole DAG, not just a part of it | ||
external_task_id=None, | ||
check_existence=False, | ||
execution_date_fn=lambda _: get_most_recent_dag_run(STAGING_DB_RESTORE_DAG_ID), | ||
# Any "finished" state is sufficient for us to continue. | ||
allowed_states=[State.SUCCESS, State.FAILED], | ||
mode="reschedule", | ||
) | ||
try: | ||
wait_for_dag.execute(context) | ||
except AirflowSensorTimeout: | ||
raise ValueError( | ||
"Concurrency check failed. Staging index creation cannot start" | ||
" during staging DB restoration." | ||
) | ||
|
||
|
||
@task | ||
def get_target_alias(media_type: str, target_alias_override: str): | ||
return target_alias_override or f"{media_type}-full" | ||
|
||
|
||
@task.branch | ||
def should_delete_index(should_delete: bool, old_index: str): | ||
if should_delete and old_index: | ||
# We should try to delete the old index only if the param is enabled, | ||
# and we were able to find an index with the target_alias in the | ||
# preceding task. | ||
return "trigger_delete_index" | ||
# Skip straight to notifying Slack. | ||
return "notify_complete" | ||
|
||
|
||
@task_group(group_id="create_index") | ||
def create_index(media_type: str, index_suffix: str) -> None: | ||
"""Create the new elasticsearch index on the staging cluster.""" | ||
|
||
# Get the DataRefresh config associated with this media type, in order to get | ||
# the reindexing timeout information. | ||
config = DATA_REFRESH_CONFIGS.get(media_type) | ||
data_refresh_timeout = config.data_refresh_timeout if config else timedelta(days=1) | ||
|
||
ingestion_server.trigger_and_wait_for_task( | ||
action="REINDEX", | ||
model=media_type, | ||
data={"index_suffix": index_suffix}, | ||
timeout=data_refresh_timeout, | ||
http_conn_id="staging_data_refresh", | ||
) | ||
|
||
|
||
@task_group(group_id="point_alias") | ||
def point_alias(media_type: str, target_alias: str, index_suffix: str) -> None: | ||
""" | ||
Alias the index with the given suffix to the target_alias, first removing the | ||
target_alias from any other indices to which it is linked. | ||
""" | ||
point_alias_payload = { | ||
"alias": target_alias, | ||
"index_suffix": index_suffix, | ||
} | ||
|
||
ingestion_server.trigger_and_wait_for_task( | ||
action="POINT_ALIAS", | ||
model=media_type, | ||
data=point_alias_payload, | ||
timeout=timedelta(hours=12), # matches the ingestion server's wait time | ||
http_conn_id="staging_data_refresh", | ||
) |
Oops, something went wrong.