From 6602431ed46af3117439816bd1ee561f2ee0c507 Mon Sep 17 00:00:00 2001 From: Ryan Rymarczyk Date: Tue, 1 Oct 2024 15:23:50 -0400 Subject: [PATCH] FIX: Only load initial snapshot folders from the archive bucket(#24) ODS-QLIK table loading for EDW.PATRON_TRIP is currently not functional. This is because the latest initial snapshot folder (based on timestamp) is currently located in the data-platform error bucket. While the snapshot folder that the cubic qlik process is populating is in the archive bucket. Having the ods-qlik loader only look for snapshot files in the archive bucket will fix this issue for the EDW.PATRON_TRIP table. However, if an initial snapshot folder is moved to the error bucket, and remains in-use by the cubic ods-qlik process, then this functionality will again be broken. --- src/cubic_loader/qlik/ods_qlik.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/cubic_loader/qlik/ods_qlik.py b/src/cubic_loader/qlik/ods_qlik.py index 4633368..dca5479 100644 --- a/src/cubic_loader/qlik/ods_qlik.py +++ b/src/cubic_loader/qlik/ods_qlik.py @@ -84,13 +84,12 @@ def status_schema_to_df(status: TableStatus) -> pl.DataFrame: def get_snapshot_dfms(table: str) -> List[DFMDetails]: - """find all available snapshot dfm files for a qlik table from Archive and Error buckets""" + """find all available snapshot dfm files for a qlik table from Archive bucket""" prefix = os.path.join(QLIK, f"{table}/") archive_dfms = s3_list_objects(S3_ARCHIVE, prefix, in_filter=".dfm") - error_dfms = s3_list_objects(S3_ERROR, prefix, in_filter=".dfm") found_snapshots = [] - for dfm in archive_dfms + error_dfms: + for dfm in archive_dfms: found_snapshots.append(DFMDetails(path=dfm, ts=re_get_first(dfm, RE_SNAPSHOT_TS))) assert len(found_snapshots) > 0