Skip to content

Commit

Permalink
Increase batch size for image data refresh alter step (#5145)
Browse files Browse the repository at this point in the history
  • Loading branch information
krysal authored Nov 5, 2024
1 parent 62d1ebf commit c50abaa
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions catalog/dags/data_refresh/data_refresh_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from datetime import datetime, timedelta
from typing import Optional

from airflow.models import Variable

from common.constants import (
AUDIO,
DATA_REFRESH_ALTER_BATCH_SIZE,
Expand Down Expand Up @@ -137,8 +139,10 @@ def table_mappings(self) -> list[TableMapping]:
copy_data_timeout=timedelta(hours=12),
add_primary_key_timeout=timedelta(hours=12),
# Larger batches for image data refresh to avoid overloading XCOMs
alter_data_batch_size=int(
os.getenv("DATA_REFRESH_ALTER_BATCH_SIZE", 1_000_000)
alter_data_batch_size=Variable.get(
"IMAGE_DATA_REFRESH_ALTER_BATCH_SIZE",
default_var=DATA_REFRESH_ALTER_BATCH_SIZE,
deserialize_json=True,
),
indexer_worker_timeout=timedelta(days=1),
concurrency_check_poke_interval=int(
Expand Down

0 comments on commit c50abaa

Please sign in to comment.