From b3e67252502473ee501ce8026975e801cfb65961 Mon Sep 17 00:00:00 2001 From: Michael Peteuil Date: Fri, 6 May 2022 11:30:41 -0400 Subject: [PATCH] Apply specific ID collation to root_dag_id too In certain databases there is a need to set the collation for ID fields like dag_id or task_id to something different than the database default. This is because in MySQL with utf8mb4 the index size becomes too big for the MySQL limits. In past pull requests this was handled [#7570](https://github.com/apache/airflow/pull/7570), [#17729](https://github.com/apache/airflow/pull/17729), but the root_dag_id field on the dag model was missed. Since this field is used to join with the dag_id in various other models ([and self-referentially](https://github.com/apache/airflow/blob/451c7cbc42a83a180c4362693508ed33dd1d1dab/airflow/models/dag.py#L2766)), it also needs to have the same collation as other ID fields. This can be seen by running `airflow db reset` before and after applying this change while also specifying `sql_engine_collation_for_ids` in the configuration. Other related PRs [#19408](https://github.com/apache/airflow/pull/19408) --- .../versions/0045_b3b105409875_add_root_dag_id_to_dag.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/airflow/migrations/versions/0045_b3b105409875_add_root_dag_id_to_dag.py b/airflow/migrations/versions/0045_b3b105409875_add_root_dag_id_to_dag.py index 3d184e6b9bd1d..f879450369ab8 100644 --- a/airflow/migrations/versions/0045_b3b105409875_add_root_dag_id_to_dag.py +++ b/airflow/migrations/versions/0045_b3b105409875_add_root_dag_id_to_dag.py @@ -27,6 +27,8 @@ import sqlalchemy as sa from alembic import op +from airflow.migrations.db_types import StringID + # revision identifiers, used by Alembic. revision = 'b3b105409875' down_revision = 'd38e04c12aa2' @@ -37,7 +39,7 @@ def upgrade(): """Apply Add ``root_dag_id`` to ``DAG``""" - op.add_column('dag', sa.Column('root_dag_id', sa.String(length=250), nullable=True)) + op.add_column('dag', sa.Column('root_dag_id', StringID(), nullable=True)) op.create_index('idx_root_dag_id', 'dag', ['root_dag_id'], unique=False)