-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ed4bc04
commit 7064e93
Showing
185 changed files
with
17,994 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/bin/bash -l | ||
|
||
# Description | ||
# This script checks if all the tests are included in the matrix in the test step in ci-databricks.yml. | ||
# It is used in the pipeline to ensure that all the tests are included in the matrix. | ||
# The script must be invoked with a filter matching the paths NOT included in the matrix | ||
|
||
# $@: (Optional) Can be set to specify a filter for running python tests at the specified path. | ||
echo "Filter (paths): '$@'" | ||
|
||
# Exit immediately with failure status if any command fails | ||
set -e | ||
|
||
cd source/settlement_report_python/tests/ | ||
# Enable extended globbing. E.g. see https://stackoverflow.com/questions/8525437/list-files-not-matching-a-pattern | ||
shopt -s extglob | ||
|
||
# This script runs pytest with the --collect-only flag to get the number of tests. | ||
# 'grep' filters the output to get the line with the number of tests collected. Multiple lines can be returned. | ||
# 'awk' is used to get the second column of the output which contains the number of tests. | ||
# 'head' is used to get the first line of the output which contains the number of tests. | ||
# Example output line returned by the grep filter: 'collected 10 items' | ||
executed_test_count=$(coverage run --branch -m pytest $@ --collect-only | grep collected | awk '{print $2}' | head -n 1) | ||
|
||
total_test_count=$(coverage run --branch -m pytest --collect-only | grep collected | awk '{print $2}' | head -n 1) | ||
|
||
echo "Number of tests being executed: $executed_test_count" | ||
echo "Total number of pytest tests: $total_test_count" | ||
|
||
|
||
if [ "$total_test_count" == "$executed_test_count" ]; then | ||
echo "Not missing any tests." | ||
else | ||
difference=$((total_test_count - executed_test_count)) | ||
echo "Found $difference tests not executed. A folder is missing in the matrix." | ||
exit 1 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
{ | ||
"name": "Spark Dev", | ||
"build": { | ||
"dockerfile": "../.docker/Dockerfile", | ||
"args": {} | ||
}, | ||
"customizations": { | ||
"vscode": { | ||
"extensions": [ | ||
"matangover.mypy", | ||
"ms-python.flake8", | ||
"ms-dotnettools.dotnet-interactive-vscode", | ||
"ms-python.python", | ||
"ms-python.black-formatter", | ||
"littlefoxteam.vscode-python-test-adapter", | ||
"hbenl.vscode-test-explorer", | ||
"eamodio.gitlens", | ||
"ms-python.vscode-pylance", | ||
"HashiCorp.terraform", | ||
"christian-kohler.path-intellisense", | ||
"Gruntfuggly.todo-tree", | ||
"DavidAnson.vscode-markdownlint", | ||
"kevinglasson.cornflakes-linter", | ||
"KevinRose.vsc-python-indent", | ||
"sonarsource.sonarlint-vscode" | ||
], | ||
// Set *default* container specific settings.json values on container create. | ||
"settings": { | ||
"terminal.integrated.shell.linux": "/bin/bash", | ||
"editor.formatOnSave": false, | ||
"[python]": { | ||
"editor.formatOnSave": true | ||
}, | ||
"python.formatting.provider": "black", | ||
"python.defaultInterpreterPath": "/opt/conda/bin/python", | ||
"python.languageServer": "Pylance", | ||
"markdownlint.config": { | ||
"MD007": { | ||
"indent": 4 | ||
} | ||
} | ||
} | ||
} | ||
}, | ||
"containerEnv": { | ||
"GRANT_SUDO": "yes" | ||
}, | ||
"forwardPorts": [ | ||
5568 | ||
], | ||
"appPort": [ | ||
"5568:5050" | ||
], | ||
"containerUser": "root" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
services: | ||
python-unit-test: | ||
image: ghcr.io/energinet-datahub/geh-settlement-report/python-unit-test:${IMAGE_TAG:-latest} | ||
volumes: | ||
# Forwards the local Docker socket to the container. | ||
- /var/run/docker.sock:/var/run/docker-host.sock | ||
# Update this to wherever you want VS Code to mount the folder of your project | ||
- ..:/workspaces/geh-settlement-report:cached | ||
# Map to Azure CLI token cache location (on Windows) | ||
- "${USERPROFILE}/.azure:/home/joyvan/.azure" | ||
environment: | ||
# Pass the environment variables from your shell straight through to your containers. | ||
# No warning is issued if the variable in the shell environment is not set. | ||
# See https://docs.docker.com/compose/environment-variables/set-environment-variables/#additional-information-1 | ||
- AZURE_KEYVAULT_URL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
services: | ||
python-unit-test: | ||
image: ghcr.io/energinet-datahub/geh-settlement-report/python-unit-test:${IMAGE_TAG:-latest} | ||
volumes: | ||
# Forwards the local Docker socket to the container. | ||
- /var/run/docker.sock:/var/run/docker-host.sock | ||
# Update this to wherever you want VS Code to mount the folder of your project | ||
- ..:/workspaces/geh-settlement-report:cached | ||
# Map to Azure CLI token cache location (on Linux) | ||
- "${HOME}/.azure:/home/joyvan/.azure" | ||
environment: | ||
# Pass the environment variables from your shell straight through to your containers. | ||
# No warning is issued if the variable in the shell environment is not set. | ||
# See https://docs.docker.com/compose/environment-variables/set-environment-variables/#additional-information-1 | ||
- AZURE_KEYVAULT_URL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# This is a pip 'requirements.txt' file | ||
# See https://pip.pypa.io/en/stable/reference/requirements-file-format/ | ||
|
||
# | ||
# PYTHON TOOLS | ||
# | ||
black | ||
build | ||
coverage-threshold | ||
flake8 | ||
mypy | ||
pyspelling | ||
pytest-xdist | ||
|
||
# | ||
# CODE DEPENDENCIES | ||
# - Make sure any packages specified in setup.py are pinned to the same version here | ||
# | ||
databricks-cli==0.18 | ||
dataclasses-json==0.6.7 | ||
delta-spark==3.2.0 | ||
pyspark==3.5.1 | ||
dependency_injector==4.43.0 | ||
azure-identity==1.17.1 | ||
azure-keyvault-secrets==4.8.0 | ||
azure-monitor-opentelemetry==1.6.4 | ||
azure-core==1.32.0 | ||
azure-monitor-query==1.4.0 | ||
opengeh-spark-sql-migrations @ git+https://[email protected]/Energinet-DataHub/[email protected]#subdirectory=source/spark_sql_migrations | ||
python-dateutil==2.8.2 | ||
types-python-dateutil==2.9.0.20241003 | ||
opengeh-telemetry @ git+https://[email protected]/Energinet-DataHub/[email protected]#subdirectory=source/telemetry | ||
|
||
coverage==7.6.8 | ||
pytest==8.3.3 | ||
configargparse==1.7.0 | ||
pytest-mock==3.14.0 | ||
virtualenv==20.24.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# Copyright 2020 Energinet DataHub A/S | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License2"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# The spark version should follow the spark version in databricks. | ||
# The databricks version of spark is controlled from dh3-infrastructure and uses latest LTS (ATTOW - Spark v3.5.0) | ||
# pyspark-slim version should match pyspark version in requirements.txt | ||
FROM ghcr.io/energinet-datahub/pyspark-slim:3.5.1-5 | ||
|
||
SHELL ["/bin/bash", "-o", "pipefail", "-c"] | ||
|
||
USER root | ||
|
||
RUN apt-get update; \ | ||
# Install git as it is needed by spark | ||
apt-get install --no-install-recommends -y git; \ | ||
# Curl is temporarily installed in order to download the Azure CLI (consider multi stage build instead) | ||
apt-get install --no-install-recommends -y curl; \ | ||
# Install Azure CLI, see https://learn.microsoft.com/en-us/cli/azure/install-azure-cli-linux?pivots=apt | ||
# as it is needed by integration tests | ||
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash; \ | ||
# Cleanup apt cache to reduce image size | ||
apt-get remove -y curl; \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# This replaces the default spark configuration in the docker image with the ones defined in the sibling file | ||
COPY spark-defaults.conf $SPARK_HOME/conf/ | ||
|
||
# Install python packages used in pyspark development (keep spark dependent packages alligned) | ||
# delta-spark version has to have compatibility with spark version (https://docs.delta.io/latest/releases.html) | ||
# example (delta 2.2.x = spark 3.3.x) | ||
COPY requirements.txt requirements.txt | ||
RUN pip --no-cache-dir install -r requirements.txt | ||
|
||
# Set misc environment variables required for properly run spark | ||
# note the amount of memory used on the driver is adjusted here | ||
ENV PATH=$SPARK_HOME/bin:$HADOOP_HOME/bin:$PATH \ | ||
PYTHONPATH="${SPARK_HOME}/python:${SPARK_HOME}/python/lib/py4j-0.10.9-src.zip" \ | ||
SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" | ||
|
||
# Dynamically downloading spark dependencies from conf/spark-defaults.conf. This is done to save time in the build pipeline so that we don't need to download on every build. | ||
RUN spark-shell | ||
|
||
# Make $HOME owned by the root, which is the user used in the container | ||
# This is needed for e.g. commands that create files or folders in $HOME | ||
RUN sudo chown -R root:users $HOME |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#!/bin/bash -l | ||
|
||
# Copyright 2020 Energinet DataHub A/S | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License2"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# $1: Mandatory test folder path | ||
# $2: (Optional) Can be set to specify a filter for running python tests by using 'keyword expressions'. | ||
# See use of '-k' and 'keyword expressions' here: https://docs.pytest.org/en/7.4.x/how-to/usage.html#specifying-which-tests-to-run | ||
echo "Tests folder path: '$1'" | ||
echo "Filter (paths): '$2'" | ||
|
||
# Configure Azure CLI to use token cache which must be mapped as volume from host machine | ||
export AZURE_CONFIG_DIR=/home/joyvan/.azure | ||
|
||
# There env vars are important to ensure that the driver and worker nodes in spark are alligned | ||
export PYSPARK_PYTHON=/opt/conda/bin/python | ||
export PYSPARK_DRIVER_PYTHON=/opt/conda/bin/python | ||
|
||
# Exit immediately with failure status if any command fails | ||
set -e | ||
|
||
# Enable extended globbing. E.g. see https://stackoverflow.com/questions/8525437/list-files-not-matching-a-pattern | ||
shopt -s extglob | ||
|
||
cd $1 | ||
coverage run --branch -m pytest -vv --junitxml=pytest-results.xml $2 | ||
|
||
# Create data for threshold evaluation | ||
coverage json | ||
# Create human reader friendly HTML report | ||
coverage html | ||
|
||
coverage-threshold --line-coverage-min 25 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# This is a pip 'requirements.txt' file | ||
# See https://pip.pypa.io/en/stable/reference/requirements-file-format/ | ||
|
||
# | ||
# PYTHON TOOLS | ||
# | ||
black | ||
build | ||
coverage-threshold | ||
flake8 | ||
mypy | ||
pyspelling | ||
pytest-xdist | ||
|
||
# | ||
# CODE DEPENDENCIES | ||
# - Make sure any packages specified in setup.py are pinned to the same version here | ||
# | ||
databricks-cli==0.18 | ||
dataclasses-json==0.6.7 | ||
delta-spark==3.2.0 | ||
pyspark==3.5.1 | ||
dependency_injector==4.43.0 | ||
azure-identity==1.17.1 | ||
azure-keyvault-secrets==4.8.0 | ||
azure-monitor-opentelemetry==1.6.4 | ||
azure-core==1.32.0 | ||
azure-monitor-query==1.4.0 | ||
opengeh-spark-sql-migrations @ git+https://[email protected]/Energinet-DataHub/[email protected]#subdirectory=source/spark_sql_migrations | ||
python-dateutil==2.8.2 | ||
types-python-dateutil==2.9.0.20241003 | ||
opengeh-telemetry @ git+https://[email protected]/Energinet-DataHub/[email protected]#subdirectory=source/telemetry | ||
|
||
coverage==7.6.8 | ||
pytest==8.3.3 | ||
configargparse==1.7.0 | ||
pytest-mock==3.14.0 | ||
virtualenv==20.24.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Default system properties included when running spark-submit. | ||
# This is useful for setting default environmental settings. | ||
|
||
# Example: | ||
# spark.master spark://master:7077 | ||
# spark.eventLog.enabled true | ||
# spark.eventLog.dir hdfs://namenode:8021/directory | ||
# spark.serializer org.apache.spark.serializer.KryoSerializer | ||
# spark.driver.memory 16g | ||
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" | ||
|
||
spark.jars.packages io.delta:delta-core_2.12:2.2.0 | ||
|
||
# spark.hadoop.fs.AbstractFileSystem.abfss.impl org.apache.hadoop.fs.azurebfs.Abfss | ||
# spark.hadoop.fs.abfss.impl org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.