Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add serialised data to ci #338

Merged
merged 18 commits into from
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 2 additions & 17 deletions .github/workflows/icon4py-qa.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,6 @@ jobs:
python -m pip install --upgrade pip setuptools wheel
python -m pip install -r ./requirements-dev.txt
python -m pip list
- name: Run checks in icon4pytools
- name: Run checks
run: |
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice...

pre-commit run --config tools/.pre-commit-config.yaml --all-files
- name: Run checks icon4py-model-common
run: |
pre-commit run --config model/common/.pre-commit-config.yaml --all-files
- name: Run checks icon4py-model-driver
run: |
pre-commit run --config model/driver/.pre-commit-config.yaml --all-files
- name: Run checks icon4py-model-atmosphere-dycore
run: |
pre-commit run --config model/atmosphere/dycore/.pre-commit-config.yaml --all-files
- name: Run checks icon4py-model-atmosphere-diffusion
run: |
pre-commit run --config model/atmosphere/diffusion/.pre-commit-config.yaml --all-files
- name: Run checks icon4py-model-atmosphere-advection
run: |
pre-commit run --config model/atmosphere/advection/.pre-commit-config.yaml --all-files
pre-commit run
44 changes: 44 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
repos:
- repo: local
hooks:
- id: run-common-precommit
name: Run Model Common Pre-commit
entry: pre-commit run --config model/common/.pre-commit-config.yaml --all-files
language: system
pass_filenames: false
always_run: true

- id: run-driver-precommit
name: Run Model Driver Pre-commit
entry: pre-commit run --config model/driver/.pre-commit-config.yaml --all-files
language: system
pass_filenames: false
always_run: true

- id: run-atmosphere-advection-precommit
name: Run Model Atmosphere Advection Pre-commit
entry: pre-commit run --config model/atmosphere/advection/.pre-commit-config.yaml --all-files
language: system
pass_filenames: false
always_run: true

- id: run-atmosphere-diffusion-precommit
name: Run Model Atmosphere Diffusion Pre-commit
entry: pre-commit run --config model/atmosphere/diffusion/.pre-commit-config.yaml --all-files
language: system
pass_filenames: false
always_run: true

- id: run-atmosphere-dycore-precommit
name: Run Model Atmosphere Dycore Pre-commit
entry: pre-commit run --config model/atmosphere/dycore/.pre-commit-config.yaml --all-files
language: system
pass_filenames: false
always_run: true

- id: run-tools-precommit
name: Run Tools Pre-commit
entry: pre-commit run --config tools/.pre-commit-config.yaml --all-files
language: system
pass_filenames: false
always_run: true
29 changes: 16 additions & 13 deletions ci/cscs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,16 @@ variables:
- pyversion_no_dot="${PYTHON_VERSION//./}"
- pip install tox clang-format
- python -c "import cupy"
- ls ${SERIALIZED_DATA_PATH}
variables:
SLURM_JOB_NUM_NODES: 1
SLURM_NTASKS: 1
SLURM_TIMELIMIT: '06:00:00'
CRAY_CUDA_MPS: 1
NUM_PROCESSES: auto
VIRTUALENV_SYSTEM_SITE_PACKAGES: 1
CSCS_NEEDED_DATA: icon4py
TEST_DATA_PATH: "/apps/daint/UES/jenkssl/ciext/icon4py"

build_job:
extends: .build_template
Expand All @@ -49,14 +52,14 @@ test_model_job_roundtrip_simple_grid:
extends: .test_template
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just leaving my 2 cents here: All of these jobs could be easily expressed using https://docs.gitlab.com/ee/ci/yaml/#needsparallelmatrix

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will try this in a new PR.

stage: test
script:
- tox -r -c model/ --verbose -- --benchmark-skip -n auto
- tox -r -e run_stencil_tests -c model/ --verbose

test_model_job_dace_cpu_simple_grid:
extends: .test_template
stage: test
script:
- pip install dace==$DACE_VERSION
- tox -r -e stencil_tests -c model/ --verbose -- --benchmark-skip -n auto --backend=dace_cpu
- tox -r -e run_stencil_tests -c model/ --verbose -- --backend=dace_cpu
only:
- main
allow_failure: true
Expand All @@ -66,7 +69,7 @@ test_model_job_dace_gpu_simple_grid:
stage: test
script:
- pip install dace==$DACE_VERSION
- tox -r -e stencil_tests -c model/ --verbose -- --benchmark-skip -n auto --backend=dace_gpu
- tox -r -e run_stencil_tests -c model/ --verbose -- --backend=dace_gpu
only:
- main
allow_failure: true
Expand All @@ -75,48 +78,48 @@ test_model_job_gtfn_cpu_simple_grid:
extends: .test_template
stage: test
script:
- tox -r -e stencil_tests -c model/ --verbose -- --benchmark-skip -n auto --backend=gtfn_cpu
- tox -r -e run_stencil_tests -c model/ --verbose -- --backend=gtfn_cpu

test_model_job_gtfn_gpu_simple_grid:
extends: .test_template
stage: test
script:
- tox -r -e stencil_tests -c model/ --verbose -- --benchmark-skip -n auto --backend=gtfn_gpu
- tox -r -e run_stencil_tests -c model/ --verbose -- --backend=gtfn_gpu

test_tools_job:
extends: .test_template
stage: test
script:
- tox -r -c tools/ --verbose

benchmark_model_dace_cpu_simple_grid:
benchmark_model_dace_cpu_icon_grid:
extends: .test_template
stage: benchmark
script:
- pip install dace==$DACE_VERSION
- tox -r -e stencil_tests -c model/ --verbose -- --benchmark-only --backend=dace_cpu --grid=simple_grid
- tox -r -e run_benchmarks -c model/ -- --backend=dace_cpu --grid=icon_grid
only:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was wondering: does our cscs-ci run count as manual trigger? I guess so... Also what does the only: -main refer to the target branch of the PR or are these options ignored in our setup since we run it from outside gitlab?

(don't know how it works and currentlyit runs always all of the jobs and the benchmarks take quite long... once we add the datatest that will get worse, response time wise...)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure to be honest since @edopao added these dace jobs, maybe he can explain more. I would assume these benchmarks run only on main but have to be manually triggered, how I am not sure. Currently the dace jobs seem to be not run when using cscs-ci run.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As commented in today's standup meeting, the intention of this setting was to run the dace benchmark on main after PR is merged. However, this setting is ignored in our setup, as also noted above. I agree that we could have a separate CI pipeline for benchmarking, automatically triggered after PR is merged or by a daily job.

- main
when: manual

benchmark_model_dace_gpu_simple_grid:
benchmark_model_dace_gpu_icon_grid:
extends: .test_template
stage: benchmark
script:
- pip install dace==$DACE_VERSION
- tox -r -e stencil_tests -c model/ --verbose -- --benchmark-only --backend=dace_gpu --grid=simple_grid
- tox -r -e run_benchmarks -c model/ -- --backend=dace_gpu --grid=icon_grid
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you need this double double-dashes? Or did you simply forget to delete?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes the double dashes are used to denote the end of arguments passed to tox itself, and that any subsequent arguments are to be treated as positional arguments passed to whatever command tox invokes, in this case pytest

only:
- main
when: manual

benchmark_model_gtfn_cpu_simple_grid:
benchmark_model_gtfn_cpu_icon_grid:
extends: .test_template
stage: benchmark
script:
- tox -r -e stencil_tests -c model/ --verbose -- --benchmark-only --backend=gtfn_cpu --grid=simple_grid
- tox -r -e run_benchmarks -c model/ -- --backend=gtfn_cpu --grid=icon_grid

benchmark_model_gtfn_gpu_simple_grid:
benchmark_model_gtfn_gpu_icon_grid:
extends: .test_template
stage: benchmark
script:
- tox -r -e stencil_tests -c model/ --verbose -- --benchmark-only --backend=gtfn_gpu --grid=simple_grid
- tox -r -e run_benchmarks -c model/ -- --backend=gtfn_gpu --grid=icon_grid
1 change: 1 addition & 0 deletions model/atmosphere/diffusion/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@
)
from icon4py.model.common.test_utils.helpers import ( # noqa : F401 # fixtures from test_utils
backend,
uses_icon_grid_with_otf,
)
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,12 @@ def reference(
return dict(theta_v=theta_v, exner=exner)

@pytest.fixture
def input_data(self, grid):
def input_data(self, grid, uses_icon_grid_with_otf):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have merged the verification of the global (EXCLAIM Aquaplanet) run, that means there is an additional serialized dataset (which for the datatest we would need to upload to the server) but it contains a global grid. Maybe using that one instead of the mch_ch_r04b09 local experiment would solve some of these issues. Lets discuss this...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok sounds good, let's discuss it tomorrow, uploading to the server should be relatively straightforward, Andreas can help us.

if uses_icon_grid_with_otf:
pytest.skip(
"Execution domain needs to be restricted or boundary taken into account in stencil."
)

kh_smag_e = random_field(grid, EdgeDim, KDim)
inv_dual_edge_length = random_field(grid, EdgeDim)
theta_v_in = random_field(grid, CellDim, KDim)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,12 @@ def reference(
return dict(vn=vn)

@pytest.fixture
def input_data(self, grid):
def input_data(self, grid, uses_icon_grid_with_otf):
if uses_icon_grid_with_otf:
pytest.skip(
"Execution domain needs to be restricted or boundary taken into account in stencil."
)

edge = indices_field(EdgeDim, grid, is_halfdim=False, dtype=int32)

u_vert = random_field(grid, VertexDim, KDim)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import numpy as np
import pytest
from gt4py.next.ffront.fbuiltins import int32
from gt4py.next.program_processors.otf_compile_executor import OTFCompileExecutor

from icon4py.model.atmosphere.diffusion.stencils.calculate_nabla2_for_z import (
calculate_nabla2_for_z,
Expand Down Expand Up @@ -55,11 +54,12 @@ def reference(
return dict(z_nabla2_e=z_nabla2_e)

@pytest.fixture
def input_data(self, grid, backend):
if isinstance(backend, OTFCompileExecutor):
def input_data(self, grid, uses_icon_grid_with_otf):
if uses_icon_grid_with_otf:
pytest.skip(
"Execution domain needs to be restricted or boundary taken into account in stencil."
)

kh_smag_e = random_field(grid, EdgeDim, KDim, dtype=vpfloat)
inv_dual_edge_length = random_field(grid, EdgeDim, dtype=wpfloat)
theta_v = random_field(grid, CellDim, KDim, dtype=wpfloat)
Expand Down
1 change: 1 addition & 0 deletions model/atmosphere/dycore/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@
)
from icon4py.model.common.test_utils.helpers import ( # noqa : F401 # fixtures from test_utils
backend,
uses_icon_grid_with_otf,
)
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,12 @@ def reference(
return dict(ddt_vn_apc=ddt_vn_apc)

@pytest.fixture
def input_data(self, grid):
def input_data(self, grid, uses_icon_grid_with_otf):
if uses_icon_grid_with_otf:
pytest.skip(
"Execution domain needs to be restricted or boundary taken into account in stencil."
)

z_kin_hor_e = random_field(grid, EdgeDim, KDim)
coeff_gradekin = random_field(grid, EdgeDim, E2CDim)
coeff_gradekin_new = as_1D_sparse_field(coeff_gradekin, ECDim)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,12 @@ def reference(
)

@pytest.fixture
def input_data(self, grid):
def input_data(self, grid, uses_icon_grid_with_otf):
if uses_icon_grid_with_otf:
pytest.skip(
"Execution domain needs to be restricted or boundary taken into account in stencil."
)

c_intp = random_field(grid, VertexDim, V2CDim)
vn = random_field(grid, EdgeDim, KDim)
rbf_vec_coeff_e = random_field(grid, EdgeDim, E2C2EDim)
Expand Down
2 changes: 0 additions & 2 deletions model/common/src/icon4py/model/common/grid/icon.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
C2E2CDim,
C2E2CODim,
C2EDim,
C2VDim,
CECDim,
CEDim,
CellDim,
Expand Down Expand Up @@ -56,7 +55,6 @@ def __init__(self):
"E2C2V": (self._get_offset_provider, E2C2VDim, EdgeDim, VertexDim),
"V2E": (self._get_offset_provider, V2EDim, VertexDim, EdgeDim),
"V2C": (self._get_offset_provider, V2CDim, VertexDim, CellDim),
"C2V": (self._get_offset_provider, C2VDim, CellDim, VertexDim),
"E2ECV": (self._get_offset_provider_for_sparse_fields, E2C2VDim, EdgeDim, ECVDim),
"C2CEC": (self._get_offset_provider_for_sparse_fields, C2E2CDim, CellDim, CECDim),
"C2CE": (self._get_offset_provider_for_sparse_fields, C2EDim, CellDim, CEDim),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
DATA_URIS_APE,
GLOBAL_EXPERIMENT,
REGIONAL_EXPERIMENT,
SER_DATA_BASEPATH,
SERIALIZED_DATA_PATH,
create_icon_serial_data_provider,
get_datapath_for_experiment,
get_processor_properties_for_run,
Expand All @@ -40,7 +40,7 @@ def processor_props(request):

@pytest.fixture(scope="session")
def ranked_data_path(processor_props):
return get_ranked_data_path(SER_DATA_BASEPATH, processor_props)
return get_ranked_data_path(SERIALIZED_DATA_PATH, processor_props)


@pytest.fixture
Expand Down
23 changes: 17 additions & 6 deletions model/common/src/icon4py/model/common/test_utils/datatest_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,29 @@
# distribution for a copy of the license or check <https://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: GPL-3.0-or-later

import os
from pathlib import Path

from icon4py.model.common.decomposition.definitions import get_processor_properties


TEST_UTILS_PATH = Path(__file__).parent
MODEL_PATH = TEST_UTILS_PATH.parent.parent
COMMON_PATH = MODEL_PATH.parent.parent.parent.parent
BASE_PATH = COMMON_PATH.parent.joinpath("testdata")
DEFAULT_TEST_DATA_FOLDER = "testdata"


def get_test_data_root_path() -> Path:
test_utils_path = Path(__file__).parent
model_path = test_utils_path.parent.parent
common_path = model_path.parent.parent.parent.parent
env_base_path = os.getenv("TEST_DATA_PATH")

if env_base_path:
return Path(env_base_path)
else:
return common_path.parent.joinpath(DEFAULT_TEST_DATA_FOLDER)


TEST_DATA_ROOT = get_test_data_root_path()
SERIALIZED_DATA_PATH = TEST_DATA_ROOT.joinpath("ser_icondata")

# TODO: a run that contains all the fields needed for dycore, diffusion, interpolation fields needs to be consolidated
DATA_URIS = {
Expand All @@ -29,7 +41,6 @@
4: "https://polybox.ethz.ch/index.php/s/UIHOVJs6FVPpz9V/download",
}
DATA_URIS_APE = {1: "https://polybox.ethz.ch/index.php/s/SdlHTlsHCwcn5J5/download"}
SER_DATA_BASEPATH = BASE_PATH.joinpath("ser_icondata")

REGIONAL_EXPERIMENT = "mch_ch_r04b09_dsl"
GLOBAL_EXPERIMENT = "exclaim_ape_R02B04"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from icon4py.model.common.decomposition.definitions import SingleNodeRun
from icon4py.model.common.test_utils.datatest_utils import (
SER_DATA_BASEPATH,
SERIALIZED_DATA_PATH,
create_icon_serial_data_provider,
get_datapath_for_experiment,
get_processor_properties_for_run,
Expand All @@ -25,11 +25,11 @@

def get_icon_grid():
processor_properties = get_processor_properties_for_run(SingleNodeRun())
ranked_path = get_ranked_data_path(SER_DATA_BASEPATH, processor_properties)
ranked_path = get_ranked_data_path(SERIALIZED_DATA_PATH, processor_properties)
data_path = get_datapath_for_experiment(ranked_path)
icon_data_provider = create_icon_serial_data_provider(data_path, processor_properties)
grid_savepoint = icon_data_provider.from_savepoint_grid()
return grid_savepoint.construct_icon_grid(limited_area=True)
return grid_savepoint.construct_icon_grid()


@pytest.fixture
Expand Down
Loading