Skip to content

Commit

Permalink
Merge branch 'main' into trore/remove_unused_folder
Browse files Browse the repository at this point in the history
  • Loading branch information
energinet-ajw authored Jan 24, 2025
2 parents 6541f4d + 655e7d4 commit 3c64601
Show file tree
Hide file tree
Showing 15 changed files with 36 additions and 80 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-orchestrator.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ jobs:
- name: Create release matrix
shell: python
if: ${{ steps.list_contents.outputs.any_artifact == 'true' }}
if: ${{ steps.list_artifacts.outputs.any_artifact == 'true' }}
id: create_matrix
run: |
from pathlib import Path
Expand Down
6 changes: 3 additions & 3 deletions source/electrical_heating/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,6 @@ exclude = [
convention = "google"

[tool.uv.sources]
opengeh-telemetry = { git = "https://github.com/Energinet-DataHub/opengeh-python-packages", subdirectory = "source/telemetry", rev = "3.0.1" }
opengeh-testcommon = { git = "https://github.com/Energinet-DataHub/opengeh-python-packages", subdirectory = "source/testcommon", rev = "3.0.1" }
opengeh-pyspark = { git = "https://github.com/Energinet-DataHub/opengeh-python-packages", subdirectory = "source/pyspark_functions", rev = "3.1.0" }
opengeh-telemetry = { git = "https://github.com/Energinet-DataHub/opengeh-python-packages", subdirectory = "source/telemetry", rev = "3.1.1" }
opengeh-testcommon = { git = "https://github.com/Energinet-DataHub/opengeh-python-packages", subdirectory = "source/testcommon", rev = "3.1.1" }
opengeh-pyspark = { git = "https://github.com/Energinet-DataHub/opengeh-python-packages", subdirectory = "source/pyspark_functions", rev = "3.1.1" }
13 changes: 6 additions & 7 deletions source/electrical_heating/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
from pathlib import Path
from typing import Generator

import pyspark.sql.functions as F
import pytest
from delta import configure_spark_with_delta_pip
from pyspark.sql import SparkSession
from pyspark.sql.types import ArrayType
from telemetry_logging.logging_configuration import configure_logging
from testcommon.delta_lake import create_database, create_table

from electrical_heating.infrastructure.measurements_bronze.database_definitions import (
MeasurementsBronzeDatabase,
Expand All @@ -17,13 +16,11 @@
)
from tests import PROJECT_ROOT
from tests.testsession_configuration import TestSessionConfiguration
from tests.utils.measurements_utils import create_measurements_dataframe
from tests.utils.delta_table_utils import (
create_database,
create_delta_table,
read_from_csv,
write_dataframe_to_table,
)
from tests.utils.measurements_utils import create_measurements_dataframe


@pytest.fixture(scope="module", autouse=True)
Expand Down Expand Up @@ -86,10 +83,12 @@ def test_files_folder_path(tests_path: str) -> str:


@pytest.fixture(scope="session")
def create_measurements_delta_table(spark: SparkSession, test_files_folder_path: str) -> None:
def create_measurements_delta_table(
spark: SparkSession, test_files_folder_path: str
) -> None:
create_database(spark, MeasurementsBronzeDatabase.DATABASE_NAME)

create_delta_table(
create_table(
spark,
database_name=MeasurementsBronzeDatabase.DATABASE_NAME,
table_name=MeasurementsBronzeDatabase.MEASUREMENTS_NAME,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
metering_point_id;has_electrical_heating;net_settlement_group;settlement_month;period_from_date;period_to_date;#comment
170000000000000201;true;;1;2023-11-30T23:00:00Z;;Two periods because crossing years. Period 1: December 2023 (31 days). Period 2: Jan. 2024 until today (full limit)
170000000000000202;true;;1;2023-11-21T23:00:00Z;2024-02-29T23:00:00Z;Two periods because crossing years. Period 1: November 21st to end of year 2023 (9+31 days). Period 2: Jan-feb 2024 (31+29 days)
170000000000000203;true;;1;2024-02-29T23:00:00Z;2024-03-01T23:00:00Z;One period of one day.
170000000000000204;true;;1;2024-02-29T23:00:00Z;;One period of 10 months - Mar-Dec 2024 (306 days)
170000000000000201;true;2;1;2023-11-30T23:00:00Z;;Two periods because crossing years. Period 1: December 2023 (31 days). Period 2: Jan. 2024 until today (full limit)
170000000000000202;true;2;1;2023-11-21T23:00:00Z;2024-02-29T23:00:00Z;Two periods because crossing years. Period 1: November 21st to end of year 2023 (9+31 days). Period 2: Jan-feb 2024 (31+29 days)
170000000000000203;true;2;1;2024-02-29T23:00:00Z;2024-03-01T23:00:00Z;One period of one day.
170000000000000204;true;2;1;2024-02-29T23:00:00Z;;One period of 10 months - Mar-Dec 2024 (306 days)
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
metering_point_id;has_electrical_heating;net_settlement_group;settlement_month;period_from_date;period_to_date;#comment
170000000000000201;true;;1;2023-12-31T23:00:00Z;2024-01-03T23:00:00Z;Previously period_to_date was null here. Now there is a new period: Limit = 3/366*4000 = 32.786
170000000000000201;true;;1;2024-01-03T23:00:00Z;;Limit for the rest of the year: 363/366*4000 = 3967.213
170000000000000201;true;2;1;2023-12-31T23:00:00Z;2024-01-03T23:00:00Z;Previously period_to_date was null here. Now there is a new period: Limit = 3/366*4000 = 32.786
170000000000000201;true;2;1;2024-01-03T23:00:00Z;;Limit for the rest of the year: 363/366*4000 = 3967.213
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
metering_point_id;has_electrical_heating;net_settlement_group;settlement_month;period_from_date;period_to_date;#comment
170000000000000201;true;;1;2023-12-31T23:00:00Z;;
170000000000000201;true;2;1;2023-12-31T23:00:00Z;;
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
metering_point_id;has_electrical_heating;net_settlement_group;settlement_month;period_from_date;period_to_date;#comment
170000000000000201;true;;1;2023-12-31T23:00:00Z;;Generates output because there is consumption data and an electrical heating child metering point
170000000000000201;true;2;1;2023-12-31T23:00:00Z;;Generates output because there is consumption data and an electrical heating child metering point
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
metering_point_id;has_electrical_heating;net_settlement_group;settlement_month;period_from_date;period_to_date;#comment
170000000000000201;true;;1;2023-12-31T23:00:00Z;2024-02-29T23:00:00Z;Case 1: Period 1 (Jan-Feb). Limit: 60/366*4000 = 655.738
170000000000000201;true;;1;2024-04-30T22:00:00Z;2024-07-31T22:00:00Z;Case 1: Period 2 (May-Jul). Limit: 92/366*4000 = 1005.464
170000000000000201;true;;1;2024-10-31T23:00:00Z;;Case 1: Period 3 (Nov-Dec). Limit: 61/366*4000 = 666.667
170000000000000201;true;2;1;2023-12-31T23:00:00Z;2024-02-29T23:00:00Z;Case 1: Period 1 (Jan-Feb). Limit: 60/366*4000 = 655.738
170000000000000201;true;2;1;2024-04-30T22:00:00Z;2024-07-31T22:00:00Z;Case 1: Period 2 (May-Jul). Limit: 92/366*4000 = 1005.464
170000000000000201;true;2;1;2024-10-31T23:00:00Z;;Case 1: Period 3 (Nov-Dec). Limit: 61/366*4000 = 666.667
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
metering_point_id;has_electrical_heating;net_settlement_group;settlement_month;period_from_date;period_to_date;#comment
170000000000000201;true;;1;2023-12-31T23:00:00Z;;
170000000000000202;true;;1;2023-12-31T23:00:00Z;;
170000000000000203;true;;1;2023-12-31T23:00:00Z;;
170000000000000201;true;2;1;2023-12-31T23:00:00Z;;
170000000000000202;true;2;1;2023-12-31T23:00:00Z;;
170000000000000203;true;2;1;2023-12-31T23:00:00Z;;
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
metering_point_id;has_electrical_heating;net_settlement_group;settlement_month;period_from_date;period_to_date;#comment
170000000000000201;true;;1;2023-12-31T23:00:00Z;;Case 1
170000000000000201;true;2;1;2023-12-31T23:00:00Z;;Case 1
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
metering_point_id;has_electrical_heating;net_settlement_group;settlement_month;period_from_date;period_to_date;#comment
170000000000000201;true;;1;2023-12-31T23:00:00Z;;EH consumption MP with no end date for has_electrical_heating
170000000000000202;true;;1;2023-12-31T23:00:00Z;;EH consumption MP without child MP
170000000000000201;true;2;1;2023-12-31T23:00:00Z;;EH consumption MP with no end date for has_electrical_heating
170000000000000202;true;2;1;2023-12-31T23:00:00Z;;EH consumption MP without child MP
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
metering_point_id;has_electrical_heating;net_settlement_group;settlement_month;period_from_date;period_to_date;#comment
170000000000000201;true;;1;2023-12-31T23:00:00Z;;Generates output because there is consumption data and an electrical heating child metering point
170000000000000201;true;2;1;2023-12-31T23:00:00Z;;Generates output because there is consumption data and an electrical heating child metering point
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
metering_point_id;has_electrical_heating;net_settlement_group;settlement_month;period_from_date;period_to_date;#comment
170000000000000201;true;;1;2023-12-31T23:00:00Z;;Generates output because there is consumption data and an electrical heating child metering point
170000000000000201;true;2;1;2023-12-31T23:00:00Z;;Generates output because there is consumption data and an electrical heating child metering point
43 changes: 0 additions & 43 deletions source/electrical_heating/tests/utils/delta_table_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
# TODO AJW: This is a copy of the function from the wholesale codebase.
# This should be moved to a shared location when the time comes.

import os
import shutil

from pyspark.sql import DataFrame, SparkSession
from pyspark.sql.types import StructType


def read_from_csv(
Expand All @@ -20,43 +17,3 @@ def write_dataframe_to_table(
df: DataFrame, database_name: str, table_name: str, mode: str = "overwrite"
) -> None:
df.write.format("delta").mode(mode).saveAsTable(f"{database_name}.{table_name}")


def create_delta_table(
spark: SparkSession,
database_name: str,
table_name: str,
table_location: str,
schema: StructType,
) -> None:
if os.path.exists(table_location) and os.listdir(table_location):
# Clear the location if it is not empty
shutil.rmtree(table_location)

sql_schema = _struct_type_to_sql_schema(schema)
spark.sql(
f"CREATE TABLE IF NOT EXISTS {database_name}.{table_name} ({sql_schema}) USING DELTA LOCATION '{table_location}'"
)

print(f"Created Delta table {database_name}.{table_name} at {table_location}.") # noqa: T201


def create_database(spark: SparkSession, database_name: str) -> None:
spark.sql(f"CREATE DATABASE IF NOT EXISTS {database_name}")
print(f"Created database {database_name}.") # noqa: T201


def _struct_type_to_sql_schema(schema: StructType) -> str:
schema_string = ""
for field in schema.fields:
field_name = field.name
field_type = field.dataType.simpleString()

if not field.nullable:
field_type += " NOT NULL"

schema_string += f"{field_name} {field_type}, "

# Remove the trailing comma and space
schema_string = schema_string.rstrip(", ")
return schema_string
14 changes: 7 additions & 7 deletions source/electrical_heating/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 3c64601

Please sign in to comment.