From d212775ecc5356286ffc939cdeb4eb3ad9069b58 Mon Sep 17 00:00:00 2001 From: "Svend (SLB)" Date: Wed, 18 Dec 2024 15:52:32 +0100 Subject: [PATCH] Updated with remaining tables --- .../202412021300_migrate_dh2_calculations.sql | 214 +++++++++++++++--- 1 file changed, 182 insertions(+), 32 deletions(-) diff --git a/source/databricks/calculation_engine/package/datamigration/manual_migration_scripts/202412021300_migrate_dh2_calculations.sql b/source/databricks/calculation_engine/package/datamigration/manual_migration_scripts/202412021300_migrate_dh2_calculations.sql index e5af886ef8..91b27a97c0 100644 --- a/source/databricks/calculation_engine/package/datamigration/manual_migration_scripts/202412021300_migrate_dh2_calculations.sql +++ b/source/databricks/calculation_engine/package/datamigration/manual_migration_scripts/202412021300_migrate_dh2_calculations.sql @@ -15,6 +15,7 @@ -- * calculation_grid_areas -- + -- STEP 0: -- Verify the input CREATE OR REPLACE TEMP VIEW calc_ids_from_dh2_view AS @@ -32,9 +33,9 @@ FROM ( SELECT DISTINCT calculation_id FROM ctl_shres_t_we_001.shared_wholesale_input.calculation_grid_areas_view_v1 ) AS calc_ids -LEFT JOIN ctl_shres_t_we_001.shared_wholesale_input.calculations_view_v1 AS calc_view +FULL OUTER JOIN ctl_shres_t_we_001.shared_wholesale_input.calculations_view_v1 AS calc_view ON calc_ids.calculation_id = calc_view.calculation_id -WHERE calc_view.calculation_id IS NULL; +WHERE calc_view.calculation_id IS NULL or calc_ids.calculation_id IS NULL; -- Check the count and throw an error if the count is greater than 0 WITH no_unparanted_calculations_check AS ( @@ -46,6 +47,8 @@ SELECT CASE END AS UnparentedCalculationIdsCheck FROM no_unparanted_calculations_check; + + -- STEP 1: Delete existing rows across Wholesale's domain MERGE INTO ctl_shres_t_we_001.wholesale_results_internal.energy e1 USING ctl_shres_t_we_001.wholesale_internal.calculations c @@ -67,10 +70,14 @@ USING ctl_shres_t_we_001.wholesale_internal.calculations c ON c.calculation_id <=> g1.calculation_id and c.calculation_version_dh2 is not null WHEN MATCHED THEN DELETE; + + -- STEP 2: Remove the DH2 calculations from the main table DELETE FROM ctl_shres_t_we_001.wholesale_internal.calculations WHERE calculation_version_dh2 is not null; + + -- STEP 3: Re-migrate each of the tables with calculations from DH2. -- TODO: Replace "0" with whatever version is given by VOLT later. INSERT INTO ctl_shres_t_we_001.wholesale_internal.calculations @@ -98,40 +105,76 @@ SELECT FROM ctl_shres_t_we_001.shared_wholesale_input.calculations_view_v1; --- Result ID for the energy-tables should be unique per: --- table_name, calculation_id, grid_area_code, balance_responsible_party_id, energy_supplier_id, time_series_type --- To make it unique per table, each one's result_id follows a different formula: --- [time_series_type's first 8 characters]-[grid_area_code]-Null-Null-[calculation_id's final 10 characters]P1 + +-- Result ID for the tables we are migrating is faking a MD5 hash based on the same group-by columns used for the UUID. +-- For the energy tables it is calculation_id, grid_area_code, from_grid_area_code, balance_responsible_party_id, energy_supplier_id and time_series_type. +-- If a view is missing one of them, we mark it as NULL. + +-- Target table: wholesale_results_internal.energy +with energy_view_with_hash AS ( + select + *, + md5( + CONCAT( + CASE WHEN calculation_id IS NULL THEN 'null' ELSE calculation_id END, + CASE WHEN grid_area_code IS NULL THEN 'null' ELSE grid_area_code END, + CASE WHEN from_grid_area_code IS NULL THEN 'null' ELSE from_grid_area_code END, + CASE WHEN balance_responsible_party_id IS NULL THEN 'null' ELSE balance_responsible_party_id END, + CASE WHEN energy_supplier_id IS NULL THEN 'null' ELSE energy_supplier_id END, + CASE WHEN time_series_type IS NULL THEN 'null' ELSE time_series_type END, + 'energy_per_brp' + ) + ) as md5_hash_of_result_id_group + from + ctl_shres_t_we_001.shared_wholesale_input.calculation_results_energy_per_brp_view_v1 +) INSERT INTO ctl_shres_t_we_001.wholesale_results_internal.energy SELECT calculation_id, CONCAT( - SUBSTRING(time_series_type, 1, 8), '-', - LPAD(grid_area_code, 4, '0'), '-', - 'Null', '-', - 'Null', '-', - SUBSTRING(calculation_id, -10), 'P1' - ) as result_id, + SUBSTRING(md5_hash_of_result_id_group, 1, 8), '-', + SUBSTRING(md5_hash_of_result_id_group, 9, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 13, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 17, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 21, 12) + ) as result_id, grid_area_code, time_series_type, resolution, time, quantity, quantity_qualities -FROM ctl_shres_t_we_001.shared_wholesale_input.calculation_results_energy_view_v1; +FROM energy_view_with_hash; + --- ResultID follows: --- [balance_responsible_party_id's last 8 characters]-[grid_area_code]-[time_series_type's first 4 characters]-[time_series_type's last 4 characters]-[calculation_id's final 10 characters]P2 +-- Target table: wholesale_results_internal.energy_per_brp +with energy_per_brp_view_with_hash AS ( + select + *, + md5( + CONCAT( + CASE WHEN calculation_id IS NULL THEN 'null' ELSE calculation_id END, + CASE WHEN grid_area_code IS NULL THEN 'null' ELSE grid_area_code END, + 'null', -- from_grid_area_code + CASE WHEN balance_responsible_party_id IS NULL THEN 'null' ELSE balance_responsible_party_id END, + 'null', -- energy_supplier_id + CASE WHEN time_series_type IS NULL THEN 'null' ELSE time_series_type END, + 'energy_per_brp' + ) + ) as md5_hash_of_result_id_group + from + ctl_shres_t_we_001.shared_wholesale_input.calculation_results_energy_per_brp_view_v1 +) INSERT INTO ctl_shres_t_we_001.wholesale_results_internal.energy_per_brp SELECT calculation_id, CONCAT( - SUBSTRING(balance_responsible_party_id, -8), '-', - LPAD(grid_area_code, 4, '0'), '-', - SUBSTRING(time_series_type, 1, 4), '-', - SUBSTRING(time_series_type, -4), '-', - SUBSTRING(calculation_id, -8), 'Ver2' - ) as result_id, + SUBSTRING(md5_hash_of_result_id_group, 1, 8), '-', + SUBSTRING(md5_hash_of_result_id_group, 9, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 13, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 17, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 21, 12) + ) as result_id, grid_area_code, balance_responsible_party_id, time_series_type, @@ -139,20 +182,37 @@ SELECT time, quantity, quantity_qualities -FROM ctl_shres_t_we_001.shared_wholesale_input.calculation_results_energy_per_brp_view_v1; +FROM energy_per_brp_view_with_hash; --- ResultID follows: --- [balance_responsible_party_id's last 8 characters]-[grid_area_code]-[time_series_type's first 4 characters]-[energy_supplier's last 4 characters]-[calculation_id's final 10 characters]P2 + +-- Target table: wholesale_results_internal.energy_per_es +with energy_per_es_view_with_hash AS ( + select + *, + md5( + CONCAT( + CASE WHEN calculation_id IS NULL THEN 'null' ELSE calculation_id END, + CASE WHEN grid_area_code IS NULL THEN 'null' ELSE grid_area_code END, + 'null', -- from_grid_area_code + CASE WHEN balance_responsible_party_id IS NULL THEN 'null' ELSE balance_responsible_party_id END, + CASE WHEN energy_supplier_id IS NULL THEN 'null' ELSE energy_supplier_id END, + CASE WHEN time_series_type IS NULL THEN 'null' ELSE time_series_type END, + 'energy_per_es' + ) + ) as md5_hash_of_result_id_group + from + ctl_shres_t_we_001.shared_wholesale_input.calculation_results_energy_per_es_view_v1 +) INSERT INTO ctl_shres_t_we_001.wholesale_results_internal.energy_per_es SELECT calculation_id, CONCAT( - SUBSTRING(balance_responsible_party_id, -8), '-', - LPAD(grid_area_code, 4, '0'), '-', - SUBSTRING(time_series_type, 1, 4), '-', - SUBSTRING(energy_supplier_id, -4), '-', - SUBSTRING(calculation_id, -8), 'Ver2' - ) as result_id, + SUBSTRING(md5_hash_of_result_id_group, 1, 8), '-', + SUBSTRING(md5_hash_of_result_id_group, 9, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 13, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 17, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 21, 12) + ) as result_id, grid_area_code, energy_supplier_id, balance_responsible_party_id, @@ -161,8 +221,98 @@ SELECT time, quantity, quantity_qualities -FROM ctl_shres_t_we_001.shared_wholesale_input.calculation_results_energy_per_es_view_v1; +FROM energy_per_es_view_with_hash; +-- Target table: wholesale_internal.calculation_grid_areas INSERT INTO ctl_shres_t_we_001.wholesale_internal.calculation_grid_areas -SELECT calculation_id, grid_area_code FROM ctl_shres_t_we_001.shared_wholesale_input.calculation_grid_areas_view_v1; \ No newline at end of file +SELECT calculation_id, grid_area_code FROM ctl_shres_t_we_001.shared_wholesale_input.calculation_grid_areas_view_v1; + + +-- Target table: wholesale_results_internal.amounts_per_charge +with amounts_per_charge_view_with_hash AS ( + select + *, + md5(CONCAT(calculation_id,resolution,charge_type,CASE WHEN charge_owner_id IS NULL THEN 'null' ELSE charge_owner_id END,grid_area_code,energy_supplier_id,metering_point_type,CASE WHEN settlement_method IS NULL THEN 'null' ELSE settlement_method END,'amounts_per_charge')) as md5_hash_of_result_id_group + from + ctl_shres_t_we_001.shared_wholesale_input.amounts_per_charge_view_v1 +) +INSERT INTO ctl_shres_t_we_001.wholesale_results_internal.amounts_per_charge +SELECT + calculation_id, + CONCAT( + SUBSTRING(md5_hash_of_result_id_group, 1, 8), '-', + SUBSTRING(md5_hash_of_result_id_group, 9, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 13, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 17, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 21, 12) + ) as result_id, + energy_supplier_id, + quantity_unit, + time, + amount, + is_tax, + charge_code, + charge_type, + charge_owner_id +from + amounts_per_charge_view_with_hash + where md5_hash_of_result_id_group is null; + + +-- Target table: wholesale_results_internal.monthly_amounts_per_charge +with monthly_amounts_per_charge_view_with_hash AS ( + select + *, + md5(CONCAT(calculation_id,charge_type,charge_code, CASE WHEN charge_owner_id IS NULL THEN 'null' ELSE charge_owner_id END, grid_area_code, energy_supplier_id, 'monthly_amounts_per_charge')) as md5_hash_of_result_id_group + from + ctl_shres_t_we_001.shared_wholesale_input.monthly_amounts_per_charge_view_v1 +) +INSERT INTO ctl_shres_t_we_001.wholesale_results_internal.monthly_amounts_per_charge +SELECT + calculation_id, + CONCAT( + SUBSTRING(md5_hash_of_result_id_group, 1, 8), '-', + SUBSTRING(md5_hash_of_result_id_group, 9, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 13, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 17, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 21, 12) + ) as result_id, + grid_area_code, + energy_supplier_id, + quantity_unit, + time, + amount, + is_tax, + charge_code, + charge_type, + charge_owner_id +from + monthly_amounts_per_charge_view_with_hash; + + +-- Target table: wholesale_results_internal.monthly_amounts_per_charge +with total_amounts_per_charge_view_with_hash AS ( + select + *, + md5(CONCAT(calculation_id, CASE WHEN charge_owner_id IS NULL THEN 'null' ELSE charge_owner_id END, grid_area_code, energy_supplier_id, 'total_amounts_per_charge')) as md5_hash_of_result_id_group + from + ctl_shres_t_we_001.shared_wholesale_input.total_amounts_per_charge_view_v1 +) +INSERT INTO ctl_shres_t_we_001.wholesale_results_internal.monthly_amounts_per_charge +SELECT + calculation_id, + CONCAT( + SUBSTRING(md5_hash_of_result_id_group, 1, 8), '-', + SUBSTRING(md5_hash_of_result_id_group, 9, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 13, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 17, 4), '-', + SUBSTRING(md5_hash_of_result_id_group, 21, 12) + ) as result_id, + grid_area_code, + energy_supplier_id, + time, + amount, + charge_owner_id +from + total_amounts_per_charge_view_with_hash; \ No newline at end of file