Skip to content

Commit

Permalink
Performance speedup using Exists
Browse files Browse the repository at this point in the history
  • Loading branch information
slb-halfspace committed Dec 3, 2024
1 parent 4e64fdd commit 62fda34
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,10 @@ def write_calculation(
)

# And since the combination with DH2 calculations requires the identity column to decide the calculation_version, we also
execute_spark_sql_in_retry_loop(
spark,
METADATA_CHANGED_RETRIES,
spark.sql(
f"UPDATE {infrastructure_settings.catalog_name}.{WholesaleInternalDatabase.DATABASE_NAME}.{WholesaleInternalDatabase.CALCULATIONS_TABLE_NAME}"
f"SET {TableColumnNames.calculation_version} = CASE WHEN {TableColumnNames.calculation_version_dh2} IS NOT NONE THEN 0 ELSE {TableColumnNames.calculation_version_dh3} END"
f"WHERE {TableColumnNames.calculation_version} IS NULL",
table_targeted_by_query,
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,20 @@
--

-- STEP 1: Delete existing rows across Wholesale's domain
DELETE FROM {CATALOG_NAME}.{WHOLESALE_RESULTS_INTERNAL_DATABASE_NAME}.energy
WHERE calculation_id in (SELECT calculation_id FROM {CATALOG_NAME}.{WHOLESALE_INTERNAL_DATABASE_NAME}.calculations WHERE calculation_version_dh2 is not null)
DELETE FROM {CATALOG_NAME}.{WHOLESALE_RESULTS_INTERNAL_DATABASE_NAME}.energy as e1
WHERE EXISTS (SELECT calculation_id FROM {CATALOG_NAME}.{WHOLESALE_INTERNAL_DATABASE_NAME}.calculations WHERE e1.calculation_id = calculation_id and calculation_version = 0)
GO

DELETE FROM {CATALOG_NAME}.{WHOLESALE_RESULTS_INTERNAL_DATABASE_NAME}.energy_per_brp
WHERE calculation_id in (SELECT calculation_id FROM {CATALOG_NAME}.{WHOLESALE_INTERNAL_DATABASE_NAME}.calculations WHERE calculation_version_dh2 is not null)
DELETE FROM {CATALOG_NAME}.{WHOLESALE_RESULTS_INTERNAL_DATABASE_NAME}.energy_per_b as e2
WHERE EXISTS (SELECT calculation_id FROM {CATALOG_NAME}.{WHOLESALE_INTERNAL_DATABASE_NAME}.calculations WHERE e2.calculation_id = calculation_id and calculation_version = 0)
GO

DELETE FROM {CATALOG_NAME}.{WHOLESALE_RESULTS_INTERNAL_DATABASE_NAME}.energy_per_es
WHERE calculation_id in (SELECT calculation_id FROM {CATALOG_NAME}.{WHOLESALE_INTERNAL_DATABASE_NAME}.calculations WHERE calculation_version_dh2 is not null)
DELETE FROM {CATALOG_NAME}.{WHOLESALE_RESULTS_INTERNAL_DATABASE_NAME}.energy_per_es as e3
WHERE EXISTS (SELECT calculation_id FROM {CATALOG_NAME}.{WHOLESALE_INTERNAL_DATABASE_NAME}.calculations WHERE e3.calculation_id = calculation_id and calculation_version = 0)
GO

DELETE FROM {CATALOG_NAME}.{WHOLESALE_INTERNAL_DATABASE_NAME}.calculation_grid_areas
WHERE calculation_id in (SELECT calculation_id FROM {CATALOG_NAME}.{WHOLESALE_INTERNAL_DATABASE_NAME}.calculations WHERE calculation_version_dh2 is not null)
DELETE FROM {CATALOG_NAME}.{WHOLESALE_INTERNAL_DATABASE_NAME}.calculation_grid_areas as g1
WHERE EXISTS (SELECT calculation_id FROM {CATALOG_NAME}.{WHOLESALE_INTERNAL_DATABASE_NAME}.calculations WHERE g1.calculation_id = calculation_id and calculation_version = 0)
GO

-- STEP 2: Remove the DH2 calculations from the main table
Expand Down

0 comments on commit 62fda34

Please sign in to comment.