From 81318dedbccf5c039b7bd03f79a850fc24fa1acc Mon Sep 17 00:00:00 2001 From: samkellerhals Date: Wed, 22 Mar 2023 11:14:33 +0100 Subject: [PATCH 01/21] Refactor liskov to accept multiple codegen backends --- liskov/src/icon4py/liskov/codegen/common.py | 78 +++++++++++++++++++ .../liskov/codegen/integration/__init__.py | 12 +++ .../codegen/{ => integration}/exceptions.py | 0 .../codegen/{ => integration}/generate.py | 56 +++---------- .../codegen/{ => integration}/interface.py | 8 +- .../{f90.py => integration/template.py} | 34 +------- .../liskov/codegen/serialisation/__init__.py | 12 +++ .../liskov/codegen/serialisation/generate.py | 14 ++++ .../liskov/codegen/serialisation/interface.py | 14 ++++ .../liskov/codegen/serialisation/template.py | 14 ++++ liskov/src/icon4py/liskov/codegen/types.py | 29 +++++++ liskov/src/icon4py/liskov/codegen/write.py | 4 +- liskov/src/icon4py/liskov/external/gt4py.py | 2 +- .../src/icon4py/liskov/parsing/deserialise.py | 4 +- liskov/src/icon4py/liskov/pipeline.py | 8 +- liskov/tests/test_deserialiser.py | 2 +- liskov/tests/test_external.py | 2 +- liskov/tests/test_generation.py | 4 +- liskov/tests/test_writer.py | 13 ++-- 19 files changed, 207 insertions(+), 103 deletions(-) create mode 100644 liskov/src/icon4py/liskov/codegen/common.py create mode 100644 liskov/src/icon4py/liskov/codegen/integration/__init__.py rename liskov/src/icon4py/liskov/codegen/{ => integration}/exceptions.py (100%) rename liskov/src/icon4py/liskov/codegen/{ => integration}/generate.py (85%) rename liskov/src/icon4py/liskov/codegen/{ => integration}/interface.py (97%) rename liskov/src/icon4py/liskov/codegen/{f90.py => integration/template.py} (90%) create mode 100644 liskov/src/icon4py/liskov/codegen/serialisation/__init__.py create mode 100644 liskov/src/icon4py/liskov/codegen/serialisation/generate.py create mode 100644 liskov/src/icon4py/liskov/codegen/serialisation/interface.py create mode 100644 liskov/src/icon4py/liskov/codegen/serialisation/template.py create mode 100644 liskov/src/icon4py/liskov/codegen/types.py diff --git a/liskov/src/icon4py/liskov/codegen/common.py b/liskov/src/icon4py/liskov/codegen/common.py new file mode 100644 index 0000000000..d74cb2c4d1 --- /dev/null +++ b/liskov/src/icon4py/liskov/codegen/common.py @@ -0,0 +1,78 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +import abc +from typing import Any, Optional, Sequence, Type + +import gt4py.eve as eve +from gt4py.eve.codegen import TemplatedGenerator + +from icon4py.bindings.utils import format_fortran_code +from icon4py.liskov.codegen.types import CodeGenInput, GeneratedCode +from icon4py.liskov.common import Step + + +class CodeGenerator(Step): + def __init__(self) -> None: + self.generated: list[GeneratedCode] = [] + + @abc.abstractmethod + def __call__(self, data: Any) -> list[GeneratedCode]: + ... + + @staticmethod + def _generate_fortran_code( + parent_node: Type[eve.Node], + code_generator: Type[TemplatedGenerator], + **kwargs: CodeGenInput | Sequence[CodeGenInput] | Optional[bool], + ) -> str: + """ + Generate Fortran code for the given parent node and code generator. + + Args: + parent_node: A subclass of eve.Node that represents the parent node. + code_generator: A subclass of TemplatedGenerator that will be used + to generate the code. + **kwargs: Arguments to be passed to the parent node constructor. + This can be a single CodeGenInput value, a sequence of CodeGenInput + values, or a boolean value, which is required by some parent nodes which + require a profile argument. + + Returns: + A string containing the formatted Fortran code. + """ + parent = parent_node(**kwargs) + source = code_generator.apply(parent) + formatted_source = format_fortran_code(source) + return formatted_source + + def _generate( + self, + parent_node: Type[eve.Node], + code_generator: Type[TemplatedGenerator], + startln: int, + endln: int, + **kwargs: CodeGenInput | Sequence[CodeGenInput] | Optional[bool] | Any, + ) -> None: + """Add a GeneratedCode object to the `generated` attribute with the given source code and line number information. + + Args: + parent_node: The parent node of the code to be generated. + code_generator: The code generator to use for generating the code. + startln: The start line number of the generated code. + endln: The end line number of the generated code. + **kwargs: Additional keyword arguments to be passed to the code generator. + """ + source = self._generate_fortran_code(parent_node, code_generator, **kwargs) + code = GeneratedCode(source=source, startln=startln, endln=endln) + self.generated.append(code) diff --git a/liskov/src/icon4py/liskov/codegen/integration/__init__.py b/liskov/src/icon4py/liskov/codegen/integration/__init__.py new file mode 100644 index 0000000000..15dfdb0098 --- /dev/null +++ b/liskov/src/icon4py/liskov/codegen/integration/__init__.py @@ -0,0 +1,12 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later diff --git a/liskov/src/icon4py/liskov/codegen/exceptions.py b/liskov/src/icon4py/liskov/codegen/integration/exceptions.py similarity index 100% rename from liskov/src/icon4py/liskov/codegen/exceptions.py rename to liskov/src/icon4py/liskov/codegen/integration/exceptions.py diff --git a/liskov/src/icon4py/liskov/codegen/generate.py b/liskov/src/icon4py/liskov/codegen/integration/generate.py similarity index 85% rename from liskov/src/icon4py/liskov/codegen/generate.py rename to liskov/src/icon4py/liskov/codegen/integration/generate.py index 005f947a1d..0341a7a7b3 100644 --- a/liskov/src/icon4py/liskov/codegen/generate.py +++ b/liskov/src/icon4py/liskov/codegen/integration/generate.py @@ -11,14 +11,15 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -from dataclasses import dataclass -from typing import Optional, Sequence, Type - -import gt4py.eve as eve -from gt4py.eve.codegen import TemplatedGenerator from typing_extensions import Any -from icon4py.liskov.codegen.f90 import ( +from icon4py.liskov.codegen.common import CodeGenerator +from icon4py.liskov.codegen.integration.interface import ( + DeserialisedDirectives, + StartStencilData, + UnusedDirective, +) +from icon4py.liskov.codegen.integration.template import ( DeclareStatement, DeclareStatementGenerator, EndCreateStatement, @@ -41,15 +42,8 @@ StartProfileStatementGenerator, StartStencilStatement, StartStencilStatementGenerator, - generate_fortran_code, -) -from icon4py.liskov.codegen.interface import ( - CodeGenInput, - DeserialisedDirectives, - StartStencilData, - UnusedDirective, ) -from icon4py.liskov.common import Step +from icon4py.liskov.codegen.types import GeneratedCode from icon4py.liskov.external.metadata import CodeMetadata from icon4py.liskov.logger import setup_logger @@ -57,25 +51,16 @@ logger = setup_logger(__name__) -@dataclass -class GeneratedCode: - """A class for storing generated f90 code and its line number information.""" - - source: str - startln: int - endln: int - - -class IntegrationGenerator(Step): +class IntegrationGenerator(CodeGenerator): def __init__( self, directives: DeserialisedDirectives, profile: bool, metadata_gen: bool, ): + super().__init__() self.profile = profile self.directives = directives - self.generated: list[GeneratedCode] = [] self.metadata_gen = metadata_gen def __call__(self, data: Any = None) -> list[GeneratedCode]: @@ -95,27 +80,6 @@ def __call__(self, data: Any = None) -> list[GeneratedCode]: self._generate_insert() return self.generated - def _generate( - self, - parent_node: Type[eve.Node], - code_generator: Type[TemplatedGenerator], - startln: int, - endln: int, - **kwargs: CodeGenInput | Sequence[CodeGenInput] | Optional[bool] | Any, - ) -> None: - """Add a GeneratedCode object to the `generated` attribute with the given source code and line number information. - - Args: - parent_node: The parent node of the code to be generated. - code_generator: The code generator to use for generating the code. - startln: The start line number of the generated code. - endln: The end line number of the generated code. - **kwargs: Additional keyword arguments to be passed to the code generator. - """ - source = generate_fortran_code(parent_node, code_generator, **kwargs) - code = GeneratedCode(source=source, startln=startln, endln=endln) - self.generated.append(code) - def _generate_metadata(self) -> None: """Generate metadata about the current liskov execution.""" if self.metadata_gen: diff --git a/liskov/src/icon4py/liskov/codegen/interface.py b/liskov/src/icon4py/liskov/codegen/integration/interface.py similarity index 97% rename from liskov/src/icon4py/liskov/codegen/interface.py rename to liskov/src/icon4py/liskov/codegen/integration/interface.py index dfa879f0cd..e7f5931854 100644 --- a/liskov/src/icon4py/liskov/codegen/interface.py +++ b/liskov/src/icon4py/liskov/codegen/integration/interface.py @@ -14,17 +14,13 @@ from dataclasses import dataclass from typing import Optional, Sequence +from icon4py.liskov.codegen.types import CodeGenInput + class UnusedDirective: ... -@dataclass -class CodeGenInput: - startln: int - endln: int - - @dataclass class BoundsData: hlower: str diff --git a/liskov/src/icon4py/liskov/codegen/f90.py b/liskov/src/icon4py/liskov/codegen/integration/template.py similarity index 90% rename from liskov/src/icon4py/liskov/codegen/f90.py rename to liskov/src/icon4py/liskov/codegen/integration/template.py index 5e8430d203..a172777510 100644 --- a/liskov/src/icon4py/liskov/codegen/f90.py +++ b/liskov/src/icon4py/liskov/codegen/integration/template.py @@ -13,16 +13,14 @@ import re from dataclasses import asdict -from typing import Optional, Sequence, Type +from typing import Optional import gt4py.eve as eve from gt4py.eve.codegen import JinjaTemplate as as_jinja from gt4py.eve.codegen import TemplatedGenerator -from icon4py.bindings.utils import format_fortran_code -from icon4py.liskov.codegen.exceptions import UndeclaredFieldError -from icon4py.liskov.codegen.interface import ( - CodeGenInput, +from icon4py.liskov.codegen.integration.exceptions import UndeclaredFieldError +from icon4py.liskov.codegen.integration.interface import ( DeclareData, StartStencilData, ) @@ -33,32 +31,6 @@ def enclose_in_parentheses(string: str) -> str: return f"({string})" -def generate_fortran_code( - parent_node: Type[eve.Node], - code_generator: Type[TemplatedGenerator], - **kwargs: CodeGenInput | Sequence[CodeGenInput] | Optional[bool], -) -> str: - """ - Generate Fortran code for the given parent node and code generator. - - Args: - parent_node: A subclass of eve.Node that represents the parent node. - code_generator: A subclass of TemplatedGenerator that will be used - to generate the code. - **kwargs: Arguments to be passed to the parent node constructor. - This can be a single CodeGenInput value, a sequence of CodeGenInput - values, or a boolean value, which is required by some parent nodes which - require a profile argument. - - Returns: - A string containing the formatted Fortran code. - """ - parent = parent_node(**kwargs) - source = code_generator.apply(parent) - formatted_source = format_fortran_code(source) - return formatted_source - - class BoundsFields(eve.Node): vlower: str vupper: str diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/__init__.py b/liskov/src/icon4py/liskov/codegen/serialisation/__init__.py new file mode 100644 index 0000000000..15dfdb0098 --- /dev/null +++ b/liskov/src/icon4py/liskov/codegen/serialisation/__init__.py @@ -0,0 +1,12 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/generate.py b/liskov/src/icon4py/liskov/codegen/serialisation/generate.py new file mode 100644 index 0000000000..ba043c55e8 --- /dev/null +++ b/liskov/src/icon4py/liskov/codegen/serialisation/generate.py @@ -0,0 +1,14 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +# todo: add SerialisationGenerator diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/interface.py b/liskov/src/icon4py/liskov/codegen/serialisation/interface.py new file mode 100644 index 0000000000..58780979a3 --- /dev/null +++ b/liskov/src/icon4py/liskov/codegen/serialisation/interface.py @@ -0,0 +1,14 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +# todo: add serialisation nodes diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/template.py b/liskov/src/icon4py/liskov/codegen/serialisation/template.py new file mode 100644 index 0000000000..4834f3a672 --- /dev/null +++ b/liskov/src/icon4py/liskov/codegen/serialisation/template.py @@ -0,0 +1,14 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +# todo: add serialisation code generation diff --git a/liskov/src/icon4py/liskov/codegen/types.py b/liskov/src/icon4py/liskov/codegen/types.py new file mode 100644 index 0000000000..7b1b36b7d0 --- /dev/null +++ b/liskov/src/icon4py/liskov/codegen/types.py @@ -0,0 +1,29 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from dataclasses import dataclass + + +@dataclass +class GeneratedCode: + """A class for storing generated f90 code and its line number information.""" + + source: str + startln: int + endln: int + + +@dataclass +class CodeGenInput: + startln: int + endln: int diff --git a/liskov/src/icon4py/liskov/codegen/write.py b/liskov/src/icon4py/liskov/codegen/write.py index 6ce3567510..ec2da30dfe 100644 --- a/liskov/src/icon4py/liskov/codegen/write.py +++ b/liskov/src/icon4py/liskov/codegen/write.py @@ -13,7 +13,7 @@ from pathlib import Path from typing import List -from icon4py.liskov.codegen.generate import GeneratedCode +from icon4py.liskov.codegen.types import GeneratedCode from icon4py.liskov.common import Step from icon4py.liskov.logger import setup_logger from icon4py.liskov.parsing.types import DIRECTIVE_IDENT @@ -22,7 +22,7 @@ logger = setup_logger(__name__) -class IntegrationWriter(Step): +class CodegenWriter(Step): def __init__(self, input_filepath: Path, output_filepath: Path) -> None: """Initialize an IntegrationWriter instance with a list of generated code. diff --git a/liskov/src/icon4py/liskov/external/gt4py.py b/liskov/src/icon4py/liskov/external/gt4py.py index bc9654692d..95db4e6b6e 100644 --- a/liskov/src/icon4py/liskov/external/gt4py.py +++ b/liskov/src/icon4py/liskov/external/gt4py.py @@ -17,7 +17,7 @@ from gt4py.next.ffront.decorator import Program from typing_extensions import Any -from icon4py.liskov.codegen.interface import DeserialisedDirectives +from icon4py.liskov.codegen.integration.interface import DeserialisedDirectives from icon4py.liskov.common import Step from icon4py.liskov.external.exceptions import ( IncompatibleFieldError, diff --git a/liskov/src/icon4py/liskov/parsing/deserialise.py b/liskov/src/icon4py/liskov/parsing/deserialise.py index 57f20a666f..a4cc0a0b11 100644 --- a/liskov/src/icon4py/liskov/parsing/deserialise.py +++ b/liskov/src/icon4py/liskov/parsing/deserialise.py @@ -15,9 +15,8 @@ from typing import Any, Callable, Optional, Protocol, Type import icon4py.liskov.parsing.types as ts -from icon4py.liskov.codegen.interface import ( +from icon4py.liskov.codegen.integration.interface import ( BoundsData, - CodeGenInput, DeclareData, DeserialisedDirectives, EndCreateData, @@ -32,6 +31,7 @@ StartStencilData, UnusedDirective, ) +from icon4py.liskov.codegen.types import CodeGenInput from icon4py.liskov.common import Step from icon4py.liskov.logger import setup_logger from icon4py.liskov.parsing.exceptions import ( diff --git a/liskov/src/icon4py/liskov/pipeline.py b/liskov/src/icon4py/liskov/pipeline.py index d452ce6f73..f8f43d2745 100644 --- a/liskov/src/icon4py/liskov/pipeline.py +++ b/liskov/src/icon4py/liskov/pipeline.py @@ -13,9 +13,9 @@ from pathlib import Path -from icon4py.liskov.codegen.generate import IntegrationGenerator -from icon4py.liskov.codegen.interface import DeserialisedDirectives -from icon4py.liskov.codegen.write import IntegrationWriter +from icon4py.liskov.codegen.integration.generate import IntegrationGenerator +from icon4py.liskov.codegen.integration.interface import DeserialisedDirectives +from icon4py.liskov.codegen.write import CodegenWriter from icon4py.liskov.common import Step, linear_pipeline from icon4py.liskov.external.gt4py import UpdateFieldsWithGt4PyStencils from icon4py.liskov.parsing.deserialise import DirectiveDeserialiser @@ -83,5 +83,5 @@ def run_code_generation( """ return [ IntegrationGenerator(parsed, profile, metadatagen), - IntegrationWriter(input_filepath, output_filepath), + CodegenWriter(input_filepath, output_filepath), ] diff --git a/liskov/tests/test_deserialiser.py b/liskov/tests/test_deserialiser.py index f32c4e2c04..57fa10d3e0 100644 --- a/liskov/tests/test_deserialiser.py +++ b/liskov/tests/test_deserialiser.py @@ -16,7 +16,7 @@ import pytest import icon4py.liskov.parsing.types as ts -from icon4py.liskov.codegen.interface import ( +from icon4py.liskov.codegen.integration.interface import ( BoundsData, DeclareData, EndCreateData, diff --git a/liskov/tests/test_external.py b/liskov/tests/test_external.py index c97afed2d5..12a77fe8f0 100644 --- a/liskov/tests/test_external.py +++ b/liskov/tests/test_external.py @@ -17,7 +17,7 @@ import pytest from gt4py.next.ffront.decorator import Program -from icon4py.liskov.codegen.interface import ( +from icon4py.liskov.codegen.integration.interface import ( DeserialisedDirectives, FieldAssociationData, StartStencilData, diff --git a/liskov/tests/test_generation.py b/liskov/tests/test_generation.py index a83f66b8e5..77efbe1118 100644 --- a/liskov/tests/test_generation.py +++ b/liskov/tests/test_generation.py @@ -13,8 +13,8 @@ import pytest -from icon4py.liskov.codegen.generate import IntegrationGenerator -from icon4py.liskov.codegen.interface import ( +from icon4py.liskov.codegen.integration.generate import IntegrationGenerator +from icon4py.liskov.codegen.integration.interface import ( BoundsData, DeclareData, DeserialisedDirectives, diff --git a/liskov/tests/test_writer.py b/liskov/tests/test_writer.py index 2a91077910..0389250eac 100644 --- a/liskov/tests/test_writer.py +++ b/liskov/tests/test_writer.py @@ -14,8 +14,8 @@ from pathlib import Path from tempfile import TemporaryDirectory -from icon4py.liskov.codegen.generate import GeneratedCode -from icon4py.liskov.codegen.write import DIRECTIVE_IDENT, IntegrationWriter +from icon4py.liskov.codegen.types import GeneratedCode +from icon4py.liskov.codegen.write import DIRECTIVE_IDENT, CodegenWriter def test_write_from(): @@ -29,7 +29,7 @@ def test_write_from(): # create an instance of IntegrationWriter and write generated code generated = [GeneratedCode("generated code", 1, 3)] - integration_writer = IntegrationWriter(input_filepath, output_filepath) + integration_writer = CodegenWriter(input_filepath, output_filepath) integration_writer(generated) # check that the generated code was inserted into the file @@ -49,7 +49,7 @@ def test_remove_directives(): "!$DSL another directive", ] expected_output = ["some code", "another line"] - assert IntegrationWriter._remove_directives(current_file) == expected_output + assert CodegenWriter._remove_directives(current_file) == expected_output def test_insert_generated_code(): @@ -65,8 +65,7 @@ def test_insert_generated_code(): "generated code2\n", ] assert ( - IntegrationWriter._insert_generated_code(current_file, generated) - == expected_output + CodegenWriter._insert_generated_code(current_file, generated) == expected_output ) @@ -77,7 +76,7 @@ def test_write_file(): output_filepath = input_filepath.with_suffix(".gen") generated_code = ["some code", "another line"] - writer = IntegrationWriter(input_filepath, output_filepath) + writer = CodegenWriter(input_filepath, output_filepath) writer._write_file(generated_code) # check that the generated code was written to the file From 43c7803fa371cdf0c59b676244c65172a2366068 Mon Sep 17 00:00:00 2001 From: Samuel Date: Mon, 27 Mar 2023 12:59:09 +0200 Subject: [PATCH 02/21] Parse granule using f2py (#176) * Parse granule using f2py * Revert icon4pygen rename * Parse derived types (collect type info in dependencies) * Fully parse variables and inject line numbers * Improve parsing tests --- .../liskov/codegen/integration/generate.py | 2 +- liskov/src/icon4py/liskov/codegen/types.py | 15 +- liskov/src/icon4py/liskov/external/gt4py.py | 2 +- .../src/icon4py/liskov/external/metadata.py | 2 +- liskov/src/icon4py/liskov/py.typed | 0 liskov/tests/test_writer.py | 6 +- .../bindings/codegen/render/location.py | 1 - .../src/icon4py/serialisation/__init__.py | 0 pyutils/src/icon4py/serialisation/cli.py | 12 + .../src/icon4py/serialisation/exceptions.py | 20 + .../src/icon4py/serialisation/interface.py | 47 + pyutils/src/icon4py/serialisation/parse.py | 235 ++ .../tests/samples/derived_types_example.f90 | 150 ++ pyutils/tests/samples/granule_example.f90 | 1985 +++++++++++++++++ pyutils/tests/samples/subroutine_example.f90 | 29 + pyutils/tests/test_parsing.py | 76 + 16 files changed, 2567 insertions(+), 15 deletions(-) create mode 100644 liskov/src/icon4py/liskov/py.typed rename liskov/src/icon4py/liskov/py.typed.py => pyutils/src/icon4py/serialisation/__init__.py (100%) create mode 100644 pyutils/src/icon4py/serialisation/cli.py create mode 100644 pyutils/src/icon4py/serialisation/exceptions.py create mode 100644 pyutils/src/icon4py/serialisation/interface.py create mode 100644 pyutils/src/icon4py/serialisation/parse.py create mode 100644 pyutils/tests/samples/derived_types_example.f90 create mode 100644 pyutils/tests/samples/granule_example.f90 create mode 100644 pyutils/tests/samples/subroutine_example.f90 create mode 100644 pyutils/tests/test_parsing.py diff --git a/liskov/src/icon4py/liskov/codegen/integration/generate.py b/liskov/src/icon4py/liskov/codegen/integration/generate.py index 0341a7a7b3..a4ffd3acbf 100644 --- a/liskov/src/icon4py/liskov/codegen/integration/generate.py +++ b/liskov/src/icon4py/liskov/codegen/integration/generate.py @@ -11,7 +11,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -from typing_extensions import Any +from typing import Any from icon4py.liskov.codegen.common import CodeGenerator from icon4py.liskov.codegen.integration.interface import ( diff --git a/liskov/src/icon4py/liskov/codegen/types.py b/liskov/src/icon4py/liskov/codegen/types.py index 7b1b36b7d0..0ea604719f 100644 --- a/liskov/src/icon4py/liskov/codegen/types.py +++ b/liskov/src/icon4py/liskov/codegen/types.py @@ -12,18 +12,17 @@ # SPDX-License-Identifier: GPL-3.0-or-later from dataclasses import dataclass +from typing import Optional @dataclass -class GeneratedCode: - """A class for storing generated f90 code and its line number information.""" - - source: str +class CodeGenInput: startln: int - endln: int + endln: Optional[int] @dataclass -class CodeGenInput: - startln: int - endln: int +class GeneratedCode(CodeGenInput): + """A class for storing generated f90 code and its line number information.""" + + source: str diff --git a/liskov/src/icon4py/liskov/external/gt4py.py b/liskov/src/icon4py/liskov/external/gt4py.py index 95db4e6b6e..9636c1dd27 100644 --- a/liskov/src/icon4py/liskov/external/gt4py.py +++ b/liskov/src/icon4py/liskov/external/gt4py.py @@ -13,9 +13,9 @@ import importlib from inspect import getmembers +from typing import Any from gt4py.next.ffront.decorator import Program -from typing_extensions import Any from icon4py.liskov.codegen.integration.interface import DeserialisedDirectives from icon4py.liskov.common import Step diff --git a/liskov/src/icon4py/liskov/external/metadata.py b/liskov/src/icon4py/liskov/external/metadata.py index 6171d38fea..c47fedcc3b 100644 --- a/liskov/src/icon4py/liskov/external/metadata.py +++ b/liskov/src/icon4py/liskov/external/metadata.py @@ -13,9 +13,9 @@ import datetime import subprocess from pathlib import Path +from typing import Any import click -from typing_extensions import Any from icon4py.liskov.external.exceptions import ( MissingClickContextError, diff --git a/liskov/src/icon4py/liskov/py.typed b/liskov/src/icon4py/liskov/py.typed new file mode 100644 index 0000000000..e69de29bb2 diff --git a/liskov/tests/test_writer.py b/liskov/tests/test_writer.py index 0389250eac..9d8a0fa03a 100644 --- a/liskov/tests/test_writer.py +++ b/liskov/tests/test_writer.py @@ -28,7 +28,7 @@ def test_write_from(): f.write("!$DSL\n some code\n another line") # create an instance of IntegrationWriter and write generated code - generated = [GeneratedCode("generated code", 1, 3)] + generated = [GeneratedCode(1, 3, "generated code")] integration_writer = CodegenWriter(input_filepath, output_filepath) integration_writer(generated) @@ -55,8 +55,8 @@ def test_remove_directives(): def test_insert_generated_code(): current_file = ["some code", "another line"] generated = [ - GeneratedCode("generated code2", 5, 6), - GeneratedCode("generated code1", 1, 3), + GeneratedCode(5, 6, "generated code2"), + GeneratedCode(1, 3, "generated code1"), ] expected_output = [ "some code", diff --git a/pyutils/src/icon4py/bindings/codegen/render/location.py b/pyutils/src/icon4py/bindings/codegen/render/location.py index e75e5fd2da..0c3d720ddb 100644 --- a/pyutils/src/icon4py/bindings/codegen/render/location.py +++ b/pyutils/src/icon4py/bindings/codegen/render/location.py @@ -15,7 +15,6 @@ class LocationRenderer: - type_dispatcher = {"Cell": "Cells", "Edge": "Edges", "Vertex": "Vertices"} @classmethod diff --git a/liskov/src/icon4py/liskov/py.typed.py b/pyutils/src/icon4py/serialisation/__init__.py similarity index 100% rename from liskov/src/icon4py/liskov/py.typed.py rename to pyutils/src/icon4py/serialisation/__init__.py diff --git a/pyutils/src/icon4py/serialisation/cli.py b/pyutils/src/icon4py/serialisation/cli.py new file mode 100644 index 0000000000..15dfdb0098 --- /dev/null +++ b/pyutils/src/icon4py/serialisation/cli.py @@ -0,0 +1,12 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later diff --git a/pyutils/src/icon4py/serialisation/exceptions.py b/pyutils/src/icon4py/serialisation/exceptions.py new file mode 100644 index 0000000000..318edf9dde --- /dev/null +++ b/pyutils/src/icon4py/serialisation/exceptions.py @@ -0,0 +1,20 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + + +class MissingDerivedTypeError(Exception): + ... + + +class ParsingError(Exception): + ... diff --git a/pyutils/src/icon4py/serialisation/interface.py b/pyutils/src/icon4py/serialisation/interface.py new file mode 100644 index 0000000000..cc11e78c2a --- /dev/null +++ b/pyutils/src/icon4py/serialisation/interface.py @@ -0,0 +1,47 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from dataclasses import dataclass +from typing import Optional + +from icon4py.liskov.codegen.types import CodeGenInput + + +@dataclass +class Metadata(CodeGenInput): + key: str + value: str + + +@dataclass +class InitData(CodeGenInput): + directory_path: str + + +@dataclass +class FieldSerializationData(CodeGenInput): + variable: str + association: str + + +@dataclass +class SavepointData(CodeGenInput): + name: str + fields: list[FieldSerializationData] + metadata: Optional[list[Metadata]] + + +@dataclass +class SerialisationInterface: + init: InitData + savepoint: list[SavepointData] diff --git a/pyutils/src/icon4py/serialisation/parse.py b/pyutils/src/icon4py/serialisation/parse.py new file mode 100644 index 0000000000..8216c4eb9e --- /dev/null +++ b/pyutils/src/icon4py/serialisation/parse.py @@ -0,0 +1,235 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later +import re +from copy import deepcopy +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import Optional + +from numpy.f2py.crackfortran import crackfortran + +from icon4py.serialisation.exceptions import MissingDerivedTypeError, ParsingError + + +def crack(path: Path) -> dict: + return crackfortran(path)[0] + + +class SubroutineType(Enum): + RUN = "_run" + INIT = "_init" + + +@dataclass +class CodegenContext: + last_intent_ln: int + end_subroutine_ln: int + + +class GranuleParser: + """Parses a Fortran source file and extracts information about its subroutines and variables. + + Attributes: + granule (Path): A path to the Fortran source file to be parsed. + dependencies (Optional[list[Path]]): A list of paths to any additional Fortran source files that the input file depends on. + + Methods: + parse(): Parses the input file and returns a dictionary with information about its subroutines and variables. + + Example usage: + parser = GranuleParser(Path("my_file.f90"), dependencies=[Path("common.f90"), Path("constants.f90")]) + parsed_types = parser.parse() + """ + + def __init__( + self, granule: Path, dependencies: Optional[list[Path]] = None + ) -> None: + self.granule = granule + self.dependencies = dependencies + + def parse(self) -> dict: + + parsed = crack(self.granule) + + subroutines = self._extract_subroutines(parsed) + + variables_grouped_by_intent = { + name: self._extract_intent_vars(routine) + for name, routine in subroutines.items() + } + + intrinsic_type_vars, derived_type_vars = self._parse_types( + variables_grouped_by_intent + ) + + combined_type_vars = self._combine_types(derived_type_vars, intrinsic_type_vars) + + vars_with_lines = self._update_with_codegen_lines(combined_type_vars) + + return vars_with_lines + + def _extract_subroutines(self, parsed: dict) -> dict: + subroutines: dict = {} + for elt in parsed["body"]: + name = elt["name"] + if SubroutineType.RUN.value in name: + subroutines[name] = elt + elif SubroutineType.INIT.value in name: + subroutines[name] = elt + + if len(subroutines) != 2: + raise ParsingError( + f"Did not find _init and _run subroutines in {self.granule}" + ) + + return subroutines + + @staticmethod + def _extract_intent_vars(subroutine: dict) -> dict: + intents = ["in", "inout", "out"] + result: dict = {} + for var in subroutine["vars"]: + var_intent = subroutine["vars"][var]["intent"] + common_intents = list(set(intents).intersection(var_intent)) + for intent in common_intents: + if intent not in result: + result[intent] = {} + result[intent][var] = subroutine["vars"][var] + return result + + def _parse_types(self, parsed: dict) -> tuple[dict, dict]: + intrinsic_types: dict = {} + derived_types: dict = {} + + for subroutine, subroutine_vars in parsed.items(): + intrinsic_types[subroutine] = {} + derived_types[subroutine] = {} + + for intent, intent_vars in subroutine_vars.items(): + intrinsic_vars = {} + derived_vars = {} + + for var_name, var_dict in intent_vars.items(): + if var_dict["typespec"] != "type": + intrinsic_vars[var_name] = var_dict + else: + derived_vars[var_name] = var_dict + + intrinsic_types[subroutine][intent] = intrinsic_vars + derived_types[subroutine][intent] = derived_vars + + return intrinsic_types, self._parse_derived_types(derived_types) + + def _parse_derived_types(self, derived_types: dict) -> dict: + # Create a dictionary that maps the typename to the typedef for each derived type + derived_type_defs = {} + for dep in self.dependencies: + parsed = crack(dep) + for block in parsed["body"]: + if block["block"] == "type": + derived_type_defs[block["name"]] = block["vars"] + + # Iterate over the derived types and add the typedef for each derived type + for _, subroutine_vars in derived_types.items(): + for _, intent_vars in subroutine_vars.items(): + for _, var in intent_vars.items(): + if var["typespec"] == "type": + typename = var["typename"] + if typename in derived_type_defs: + var["typedef"] = derived_type_defs[typename] + else: + raise MissingDerivedTypeError( + f"Could not find type definition for TYPE: {typename} in dependency files: {self.dependencies}" + ) + + return self._decompose_derived_types(derived_types) + + @staticmethod + def _decompose_derived_types(derived_types: dict) -> dict: + decomposed_vars: dict = {} + for subroutine, subroutine_vars in derived_types.items(): + decomposed_vars[subroutine] = {} + for intent, intent_vars in subroutine_vars.items(): + decomposed_vars[subroutine][intent] = {} + for var_name, var_dict in intent_vars.items(): + if "typedef" in var_dict: + typedef = var_dict["typedef"] + del var_dict["typedef"] + for subtype_name, subtype_spec in typedef.items(): + new_type_name = f"{var_name}_{subtype_name}" + new_var_dict = var_dict.copy() + new_var_dict.update(subtype_spec) + decomposed_vars[subroutine][intent][ + new_type_name + ] = new_var_dict + else: + decomposed_vars[subroutine][intent][var_name] = var_dict + + return decomposed_vars + + @staticmethod + def _combine_types(derived_type_vars: dict, intrinsic_type_vars: dict) -> dict: + combined = deepcopy(intrinsic_type_vars) + for subroutine_name in combined: + for intent in combined[subroutine_name]: + new_vars = derived_type_vars[subroutine_name][intent] + combined[subroutine_name][intent].update(new_vars) + return combined + + def _update_with_codegen_lines(self, parsed_types: dict) -> dict: + with_lines = deepcopy(parsed_types) + for subroutine in with_lines: + ctx = self.get_line_numbers(subroutine) + for intent in with_lines[subroutine]: + lns = [] + if intent == "in": + lns.append(ctx.last_intent_ln) + elif intent == "inout": + lns.append(ctx.last_intent_ln) + lns.append(ctx.end_subroutine_ln) + elif intent == "out": + lns.append(ctx.end_subroutine_ln) + else: + raise ValueError(f"Unrecognized intent: {intent}") + with_lines[subroutine][intent]["codegen_lines"] = lns + return with_lines + + def get_line_numbers(self, subroutine_name: str) -> CodegenContext: + with open(self.granule, "r") as f: + code = f.read() + + # Find the line number where the subroutine is defined + start_subroutine_pattern = r"SUBROUTINE\s+" + subroutine_name + r"\s*\(" + end_subroutine_pattern = r"END\s+SUBROUTINE\s+" + subroutine_name + r"\s*" + start_match = re.search(start_subroutine_pattern, code) + end_match = re.search(end_subroutine_pattern, code) + if start_match is None or end_match is None: + return None + start_subroutine_ln = code[: start_match.start()].count("\n") + 1 + end_subroutine_ln = code[: end_match.start()].count("\n") + 1 + + # Find the last intent statement line number in the subroutine + intent_pattern = r"\bINTENT\b" + intent_pattern_lines = [ + i + for i, line in enumerate( + code.splitlines()[start_subroutine_ln:end_subroutine_ln] + ) + if re.search(intent_pattern, line) + ] + if not intent_pattern_lines: + raise ParsingError(f"No INTENT declarations found in {self.granule}") + last_intent_ln = intent_pattern_lines[-1] + start_subroutine_ln + 1 + + return CodegenContext(last_intent_ln, end_subroutine_ln) diff --git a/pyutils/tests/samples/derived_types_example.f90 b/pyutils/tests/samples/derived_types_example.f90 new file mode 100644 index 0000000000..afbf542e4f --- /dev/null +++ b/pyutils/tests/samples/derived_types_example.f90 @@ -0,0 +1,150 @@ +!> +!! Contains basic math types +!! +!! @par Revision History +!! +!! @par Copyright and License +!! +!! This code is subject to the DWD and MPI-M-Software-License-Agreement in +!! its most recent form. +!! Please see the file LICENSE in the root of the source tree for this code. +!! Where software is supplied by third parties, it is indicated in the +!! headers of the routines. + +MODULE mo_math_types_base + USE ISO_C_BINDING, ONLY: C_INT64_T, C_DOUBLE + USE mo_kind_base, ONLY: wp, sp, dp + IMPLICIT NONE + + PRIVATE + + PUBLIC :: t_cartesian_coordinates + PUBLIC :: t_geographical_coordinates + PUBLIC :: t_line + PUBLIC :: t_tangent_vectors + PUBLIC :: t_Statistics + + ! cartesian coordinate class + TYPE, BIND(C) :: t_cartesian_coordinates + REAL(C_DOUBLE) :: x(3) + END TYPE t_cartesian_coordinates + + ! geographical coordinate class + TYPE, BIND(C) :: t_geographical_coordinates + REAL(C_DOUBLE) :: lon + REAL(C_DOUBLE) :: lat + END TYPE t_geographical_coordinates + + ! the two coordinates on the tangent plane + TYPE, BIND(C) :: t_tangent_vectors + REAL(C_DOUBLE) :: v1 + REAL(C_DOUBLE) :: v2 + END TYPE t_tangent_vectors + + ! line class + TYPE t_line + TYPE(t_geographical_coordinates) :: p1 + TYPE(t_geographical_coordinates) :: p2 + END TYPE t_line + + TYPE :: t_Statistics + INTEGER(C_INT64_T) :: sampleCount + REAL(wp) :: MIN, mean, MAX + CONTAINS + PROCEDURE :: reset => statistics_reset + GENERIC :: add => addData_s1d, addData_d1d, addStatistics + ! scan a given array AND update the statistics accordingly + PROCEDURE :: addData_s1d => statistics_addData_s1d + PROCEDURE :: addData_d1d => statistics_addData_d1d + PROCEDURE :: addStatistics => statistics_addStatistics ! update the statistics with the contents of another t_Statistics object + END TYPE t_Statistics + +CONTAINS + + SUBROUTINE statistics_reset(me) + CLASS(t_Statistics), INTENT(INOUT) :: me + + me%sampleCount = 0_C_INT64_T + me%MIN = HUGE(me%MIN) + me%mean = 0.0_wp + me%MAX = -HUGE(me%MAX) + END SUBROUTINE statistics_reset + + SUBROUTINE statistics_addData_s1d(me, DATA) + CLASS(t_Statistics), INTENT(INOUT) :: me + REAL(sp), INTENT(IN) :: DATA(:) + INTEGER :: i, icount + REAL(wp) :: data_sum, data_max, data_min, data_wp + + TYPE(t_Statistics) :: newStatistics + + CALL newStatistics%reset() + + icount = 0 + data_sum = 0._wp + data_max = -HUGE(DATA) + data_min = HUGE(DATA) +!$OMP PARALLEL DO PRIVATE(data_wp), REDUCTION(+:data_sum,icount), REDUCTION(MAX:data_max), REDUCTION(MIN:data_min) + DO i = 1, SIZE(DATA) + icount = icount+1 + data_wp = REAL(DATA(i), wp) + data_sum = data_sum + data_wp + data_max = MAX(data_max, data_wp) + data_min = MIN(data_min, data_wp) + ENDDO +!$OMP END PARALLEL DO + newStatistics%sampleCount = icount + newStatistics%MIN = data_min + newStatistics%MAX = data_max + IF (icount > 0) THEN + newStatistics%mean = data_sum / REAL(icount,wp) + ENDIF + CALL me%add(newStatistics) + END SUBROUTINE statistics_addData_s1d + + SUBROUTINE statistics_addData_d1d(me, DATA) + CLASS(t_Statistics), INTENT(INOUT) :: me + REAL(dp), INTENT(IN) :: DATA(:) + INTEGER :: i, icount + REAL(wp) :: data_sum, data_max, data_min + + TYPE(t_Statistics) :: newStatistics + + CALL newStatistics%reset() + + icount = 0 + data_sum = 0._wp + data_max = -HUGE(DATA) + data_min = HUGE(DATA) +!$OMP PARALLEL DO REDUCTION(+:data_sum,icount), REDUCTION(MAX:data_max), REDUCTION(MIN:data_min) + DO i = 1, SIZE(DATA) + icount = icount+1 + data_sum = data_sum + DATA(i) + data_max = MAX(data_max, DATA(i)) + data_min = MIN(data_min, DATA(i)) + ENDDO +!$OMP END PARALLEL DO + newStatistics%sampleCount = icount + newStatistics%MIN = data_min + newStatistics%MAX = data_max + IF (icount > 0) THEN + newStatistics%mean = data_sum / REAL(icount,wp) + ENDIF + + CALL me%add(newStatistics) + END SUBROUTINE statistics_addData_d1d + + SUBROUTINE statistics_addStatistics(me, other) + CLASS(t_Statistics), INTENT(INOUT) :: me + CLASS(t_Statistics), INTENT(IN) :: other + + INTEGER(C_INT64_T) :: newSampleCount + + newSampleCount = me%sampleCount + other%sampleCount + me%MIN = MIN(me%MIN, other%MIN) + me%mean = (me%mean*REAL(me%sampleCount, wp) + other%mean*REAL(other%sampleCount, wp))/REAL(newSampleCount, wp) + me%MAX = MAX(me%MAX, other%MAX) + me%sampleCount = newSampleCount + END SUBROUTINE statistics_addStatistics + +END MODULE mo_math_types_base diff --git a/pyutils/tests/samples/granule_example.f90 b/pyutils/tests/samples/granule_example.f90 new file mode 100644 index 0000000000..bfb91fcc18 --- /dev/null +++ b/pyutils/tests/samples/granule_example.f90 @@ -0,0 +1,1985 @@ +!> +!! mo_nh_diffusion_new +!! +!! Diffusion in the nonhydrostatic model +!! +!! @author Almut Gassmann, MPI-M +!! +!! +!! @par Revision History +!! Initial release by Almut Gassmann, MPI-M (2009-08.25) +!! Modification by William Sawyer, CSCS (2015-02-06) +!! - OpenACC implementation +!! Modification by William Sawyer, CSCS (2015-02-06) +!! - Turned into a granule +!! +!! @par Copyright and License +!! +!! This code is subject to the DWD and MPI-M-Software-License-Agreement in +!! its most recent form. +!! Please see the file LICENSE in the root of the source tree for this code. +!! Where software is supplied by third parties, it is indicated in the +!! headers of the routines. +!! + +!---------------------------- +#include "omp_definitions.inc" +!---------------------------- + +MODULE mo_nh_diffusion_new + +#ifdef __SX__ +! for strange reasons, this routine is faster without mixed precision on the NEC +#undef __MIXED_PRECISION + USE mo_kind_base, ONLY: wp, vp => wp +#else + USE mo_kind_base, ONLY: wp, vp +#endif + USE mo_math_types, ONLY: t_tangent_vectors ! to maintain compatibility w/ p_patch + + USE mo_model_domain_advanced, ONLY: t_patch ! until GridManager available + USE mo_model_domain, ONLY: p_patch + + USE mo_intp_rbf_math, ONLY: rbf_vec_interpol_vertex, rbf_vec_interpol_cell + USE mo_interpolation_scalar_math, ONLY: cells2verts_scalar + USE mo_interpolation_vector_math, ONLY: edges2cells_vector + USE mo_loopindices_advanced, ONLY: get_indices_e, get_indices_c + USE mo_impl_constants_base , ONLY: min_rledge, min_rlcell, min_rlvert, min_rledge_int, min_rlcell_int, min_rlvert_int + USE mo_impl_constants_grf_base, ONLY: grf_bdywidth_e, grf_bdywidth_c + USE mo_math_types_base, ONLY: t_geographical_coordinates + USE mo_math_laplace_math, ONLY: nabla4_vec + USE mo_math_constants_base, ONLY: dbl_eps + USE mo_sync, ONLY: SYNC_E, SYNC_C, SYNC_V, sync_patch_array, & + sync_patch_array_mult, sync_patch_array_mult_mp + USE mo_timer, ONLY: timer_nh_hdiffusion, timer_start, timer_stop + USE mo_exception_advanced, ONLY: finish, message, message_text + +#ifdef _OPENACC + USE mo_mpi_advanced, ONLY: i_am_accel_node +#endif + + IMPLICIT NONE + + PUBLIC :: t_diffusion, diffusion_alloc, diffusion_dealloc, diffusion_init, diffusion_run, diffusion_finalize + PRIVATE + + TYPE :: t_diffusion + LOGICAL :: lphys ! Is a run with physics? + LOGICAL :: ltimer ! Is the timer on? + LOGICAL :: l_limited_area ! Is a limited area run? + LOGICAL :: ltkeshs + LOGICAL :: lfeedback + LOGICAL :: l_zdiffu_t + LOGICAL :: lsmag_3d + LOGICAL :: lhdiff_rcf + LOGICAL :: lhdiff_w + LOGICAL :: lhdiff_temp + LOGICAL :: lvert_nest + LOGICAL :: p_test_run + LOGICAL :: ddt_vn_hdf_is_associated, ddt_vn_dyn_is_associated + INTEGER :: nproma, nlev, nlevp1, nblks_c, nblks_e, nblks_v, nrdmax, ndyn_substeps + INTEGER :: nshift, nshift_total + + INTEGER :: hdiff_order + INTEGER :: discr_vn, discr_t + INTEGER :: itype_sher + INTEGER :: itype_comm + + REAL(wp) :: grav + REAL(vp) :: cvd_o_rd + REAL(wp) :: nudge_max_coeff + REAL(wp) :: denom_diffu_v + REAL(wp) :: k4, k4w + REAL(wp) :: hdiff_smag_z, hdiff_smag_z2, hdiff_smag_z3, hdiff_smag_z4 + REAL(wp) :: hdiff_smag_fac, hdiff_smag_fac2, hdiff_smag_fac3, hdiff_smag_fac4 + REAL(wp) :: hdiff_efdt_ratio + + REAL(wp), POINTER :: vct_a(:) ! vertical coordinate part A + + REAL(wp), POINTER :: c_lin_e(:,:,:) ! p_int + REAL(wp), POINTER :: e_bln_c_s(:,:,:) ! : + REAL(wp), POINTER :: e_bln_c_u(:,:,:) ! : + REAL(wp), POINTER :: e_bln_c_v(:,:,:) ! : + REAL(wp), POINTER :: cells_aw_verts(:,:,:) + REAL(wp), POINTER :: geofac_div(:,:,:) + REAL(wp), POINTER :: geofac_rot(:,:,:) + REAL(wp), POINTER :: geofac_n2s(:,:,:) + REAL(wp), POINTER :: geofac_grg(:,:,:,:) + REAL(wp), POINTER :: nudgecoeff_e(:,:) + INTEGER, POINTER :: rbf_vec_idx_v(:,:,:) + INTEGER, POINTER :: rbf_vec_blk_v(:,:,:) + REAL(wp), POINTER :: rbf_vec_coeff_v(:,:,:,:) + + REAL(wp), POINTER :: enhfac_diffu(:) ! p_nh_metrics + REAL(wp), POINTER :: zd_intcoef(:,:) ! : + REAL(wp), POINTER :: zd_geofac(:,:) ! : + REAL(wp), POINTER :: zd_diffcoef(:) ! : + REAL(vp), POINTER :: ddqz_z_full_e(:,:,:) + REAL(vp), POINTER :: theta_ref_mc(:,:,:) + REAL(vp), POINTER :: wgtfac_c(:,:,:) + REAL(vp), POINTER :: wgtfac_e(:,:,:) + REAL(vp), POINTER :: wgtfacq_e(:,:,:) + REAL(vp), POINTER :: wgtfacq1_e(:,:,:) + INTEGER, POINTER :: zd_indlist(:,:) + INTEGER, POINTER :: zd_blklist(:,:) + INTEGER, POINTER :: zd_vertidx(:,:) + INTEGER :: zd_listdim + + END TYPE t_diffusion + + TYPE (t_diffusion), ALLOCATABLE :: diff_inst(:) + + TYPE (t_patch), ALLOCATABLE :: p_patch_diff(:) + +#ifndef __SX__ +#define __ENABLE_DDT_VN_XYZ__ +#endif + + CONTAINS + + SUBROUTINE diffusion_alloc( n_dom ) + INTEGER, INTENT(IN) :: n_dom + ALLOCATE( diff_inst( n_dom ) ) + ALLOCATE( p_patch_diff( n_dom ) ) + !$ACC ENTER DATA CREATE(diff_inst,p_patch_diff) + END SUBROUTINE diffusion_alloc + + SUBROUTINE diffusion_dealloc( ) + !$ACC EXIT DATA DELETE(diff_inst,p_patch_diff) + IF (ALLOCATED(diff_inst)) DEALLOCATE( diff_inst ) + IF (ALLOCATED(p_patch_diff)) DEALLOCATE( p_patch_diff ) + END SUBROUTINE diffusion_dealloc + + + !> + !! init_diffusion + !! + !! Prepares the horizontal diffusion of velocity and temperature + !! + !! @par Revision History + !! Initial release by William Sawyer, CSCS (2022-11-25) + !! + SUBROUTINE diffusion_init(cvd_o_rd, grav, & + jg, nproma, nlev, nblks_e, nblks_v, nblks_c, nshift, nshift_total, & + nrdmax, ndyn_substeps, nudge_max_coeff, denom_diffu_v, & + hdiff_smag_z, hdiff_smag_z2, hdiff_smag_z3, hdiff_smag_z4, & + hdiff_smag_fac, hdiff_smag_fac2, hdiff_smag_fac3, hdiff_smag_fac4, & + hdiff_order, hdiff_efdt_ratio, & + k4, k4w, itype_comm, itype_sher, itype_vn_diffu, itype_t_diffu, & + p_test_run, lphys, lhdiff_rcf, lhdiff_w, lhdiff_temp, l_limited_area,& + lfeedback, l_zdiffu_t, ltkeshs, lsmag_3d, lvert_nest, ltimer, & + ddt_vn_hdf_is_associated, ddt_vn_dyn_is_associated, & + vct_a, c_lin_e, e_bln_c_s, e_bln_c_u, e_bln_c_v, cells_aw_verts, & ! p_int + geofac_div, geofac_rot, geofac_n2s, geofac_grg, nudgecoeff_e, & ! p_int + rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, & ! p_int + enhfac_diffu, zd_intcoef, zd_geofac, zd_diffcoef, & ! p_nh_metrics + wgtfac_c, wgtfac_e, wgtfacq_e, wgtfacq1_e, & ! p_nh_metrics + ddqz_z_full_e, theta_ref_mc, & ! p_nh_metrics + zd_indlist, zd_blklist, zd_vertidx, zd_listdim, & ! p_nh_metrics + edges_start_block, edges_end_block, edges_start_index, edges_end_index,&! p_patch%edges + edges_vertex_idx, edges_vertex_blk, edges_cell_idx, edges_cell_blk, & ! p_patch%edges + edges_tangent_orientation, & ! p_patch%edges + edges_primal_normal_vert, edges_dual_normal_vert, & ! p_patch%edges + edges_primal_normal_cell, edges_dual_normal_cell, & ! p_patch%edges + edges_inv_vert_vert_length, edges_inv_primal_edge_length, & ! p_patch%edges + edges_inv_dual_edge_length, edges_area_edge, & ! p_patch%edges + cells_start_block, cells_end_block, cells_start_index, cells_end_index,&! p_patch%cells + cells_neighbor_idx, cells_neighbor_blk, & ! p_patch%cells + cells_edge_idx, cells_edge_blk, cells_area, & ! p_patch%cells + verts_start_block, verts_end_block, verts_start_index, verts_end_index )! p_patch%verts + REAL(wp), INTENT(IN) :: cvd_o_rd, grav ! Physical constants from central location + INTEGER, INTENT(IN) :: jg + INTEGER, INTENT(IN) :: nproma, nlev, nblks_e, nblks_v, nblks_c, nshift, nshift_total + INTEGER, INTENT(IN) :: nrdmax ! = nrdmax(jg) + INTEGER, INTENT(IN) :: ndyn_substeps + INTEGER, INTENT(IN) :: hdiff_order, itype_comm, itype_sher, itype_vn_diffu, itype_t_diffu + REAL(wp), INTENT(IN) :: hdiff_smag_z, hdiff_smag_z2, hdiff_smag_z3, hdiff_smag_z4 + REAL(wp), INTENT(IN) :: hdiff_smag_fac, hdiff_smag_fac2, hdiff_smag_fac3, hdiff_smag_fac4 + REAL(wp), INTENT(IN) :: hdiff_efdt_ratio + REAL(wp), INTENT(IN) :: k4, k4w + REAL(wp), INTENT(IN) :: nudge_max_coeff + REAL(wp), INTENT(IN) :: denom_diffu_v + LOGICAL, INTENT(IN) :: p_test_run + LOGICAL, INTENT(IN) :: lphys !< is a run with physics + LOGICAL, INTENT(IN) :: lhdiff_rcf + LOGICAL, INTENT(IN) :: lhdiff_w + LOGICAL, INTENT(IN) :: lhdiff_temp + LOGICAL, INTENT(IN) :: l_zdiffu_t + LOGICAL, INTENT(IN) :: l_limited_area + LOGICAL, INTENT(IN) :: lfeedback ! = lfeedback(jg) + LOGICAL, INTENT(IN) :: ltkeshs + LOGICAL, INTENT(IN) :: lsmag_3d + LOGICAL, INTENT(IN) :: lvert_nest + LOGICAL, INTENT(IN) :: ltimer + LOGICAL, INTENT(IN) :: ddt_vn_hdf_is_associated + LOGICAL, INTENT(IN) :: ddt_vn_dyn_is_associated + + REAL(wp), TARGET, INTENT(IN) :: vct_a(:) ! param. A of the vertical coordinte + + REAL(wp), TARGET, INTENT(IN) :: c_lin_e(:,:,:) ! p_int + REAL(wp), TARGET, INTENT(IN) :: e_bln_c_s(:,:,:) ! : + REAL(wp), TARGET, INTENT(IN) :: e_bln_c_u(:,:,:) ! : + REAL(wp), TARGET, INTENT(IN) :: e_bln_c_v(:,:,:) ! : + REAL(wp), TARGET, INTENT(IN) :: cells_aw_verts(:,:,:) ! : + REAL(wp), TARGET, INTENT(IN) :: geofac_div(:,:,:) ! : + REAL(wp), TARGET, INTENT(IN) :: geofac_rot(:,:,:) ! : + REAL(wp), TARGET, INTENT(IN) :: geofac_n2s(:,:,:) ! : + REAL(wp), TARGET, INTENT(IN) :: geofac_grg(:,:,:,:) ! : + REAL(wp), TARGET, INTENT(IN) :: nudgecoeff_e(:,:) ! : + INTEGER, TARGET, INTENT(IN) :: rbf_vec_idx_v(:,:,:) + INTEGER, TARGET, INTENT(IN) :: rbf_vec_blk_v(:,:,:) + REAL(wp), TARGET, INTENT(IN) :: rbf_vec_coeff_v(:,:,:,:) + + REAL(wp), TARGET, INTENT(IN) :: enhfac_diffu(:) ! p_nh_metrics + REAL(wp), TARGET, INTENT(IN) :: zd_intcoef(:,:) ! : + REAL(wp), TARGET, INTENT(IN) :: zd_geofac(:,:) ! : + REAL(wp), TARGET, INTENT(IN) :: zd_diffcoef(:) ! : + REAL(vp), TARGET, INTENT(IN) :: wgtfac_c(:,:,:) ! : + REAL(vp), TARGET, INTENT(IN) :: wgtfac_e(:,:,:) ! : + REAL(vp), TARGET, INTENT(IN) :: wgtfacq_e(:,:,:) ! : + REAL(vp), TARGET, INTENT(IN) :: wgtfacq1_e(:,:,:) ! : + REAL(vp), TARGET, INTENT(IN) :: ddqz_z_full_e(:,:,:) ! : + REAL(vp), TARGET, INTENT(IN) :: theta_ref_mc(:,:,:) ! : + INTEGER, TARGET, INTENT(IN) :: zd_indlist(:,:) ! : + INTEGER, TARGET, INTENT(IN) :: zd_blklist(:,:) ! : + INTEGER, TARGET, INTENT(IN) :: zd_vertidx(:,:) ! : + INTEGER, INTENT(IN) :: zd_listdim ! : + + INTEGER, TARGET, INTENT(IN) :: edges_start_block(min_rledge:) ! p_patch%edges + INTEGER, TARGET, INTENT(IN) :: edges_end_block(min_rledge:) ! : + INTEGER, TARGET, INTENT(IN) :: edges_start_index(min_rledge:) ! p_patch%edges + INTEGER, TARGET, INTENT(IN) :: edges_end_index(min_rledge:) ! : + INTEGER, TARGET, INTENT(IN) :: edges_vertex_idx(:,:,:) ! : + INTEGER, TARGET, INTENT(IN) :: edges_vertex_blk(:,:,:) ! : + INTEGER, TARGET, INTENT(IN) :: edges_cell_idx(:,:,:) ! : + INTEGER, TARGET, INTENT(IN) :: edges_cell_blk(:,:,:) ! : + REAL(wp), TARGET, INTENT(IN) :: edges_tangent_orientation(:,:) + TYPE(t_tangent_vectors), TARGET, INTENT(IN) :: edges_primal_normal_vert(:,:,:) + TYPE(t_tangent_vectors), TARGET, INTENT(IN) :: edges_dual_normal_vert(:,:,:) + TYPE(t_tangent_vectors), TARGET, INTENT(IN) :: edges_primal_normal_cell(:,:,:) + TYPE(t_tangent_vectors), TARGET, INTENT(IN) :: edges_dual_normal_cell(:,:,:) + REAL(wp), TARGET, INTENT(IN) :: edges_inv_vert_vert_length(:,:) + REAL(wp), TARGET, INTENT(IN) :: edges_inv_primal_edge_length(:,:) + REAL(wp), TARGET, INTENT(IN) :: edges_inv_dual_edge_length(:,:) + REAL(wp), TARGET, INTENT(IN) :: edges_area_edge(:,:) + + INTEGER, TARGET, INTENT(IN) :: cells_start_block(min_rlcell:) ! p_patch%cells + INTEGER, TARGET, INTENT(IN) :: cells_end_block(min_rlcell:) ! : + INTEGER, TARGET, INTENT(IN) :: cells_start_index(min_rlcell:) ! p_patch%cells + INTEGER, TARGET, INTENT(IN) :: cells_end_index(min_rlcell:) ! : + INTEGER, TARGET, INTENT(IN) :: cells_neighbor_idx(:,:,:) ! : + INTEGER, TARGET, INTENT(IN) :: cells_neighbor_blk(:,:,:) ! : + INTEGER, TARGET, INTENT(IN) :: cells_edge_idx(:,:,:) ! : + INTEGER, TARGET, INTENT(IN) :: cells_edge_blk(:,:,:) ! : + REAL(wp), TARGET, INTENT(IN) :: cells_area(:,:) + + INTEGER, TARGET, INTENT(IN) :: verts_start_block(min_rlvert:) ! p_patch%verts + INTEGER, TARGET, INTENT(IN) :: verts_end_block(min_rlvert:) ! : + INTEGER, TARGET, INTENT(IN) :: verts_start_index(min_rlvert:) ! p_patch%verts + INTEGER, TARGET, INTENT(IN) :: verts_end_index(min_rlvert:) ! : + !-------------------------------------------------------------------------- + + diff_inst(jg)%vct_a => vct_a + + diff_inst(jg)%c_lin_e => c_lin_e ! p_int + diff_inst(jg)%e_bln_c_s => e_bln_c_s ! : + diff_inst(jg)%e_bln_c_u => e_bln_c_u ! : + diff_inst(jg)%e_bln_c_v => e_bln_c_v ! : + diff_inst(jg)%cells_aw_verts => cells_aw_verts + diff_inst(jg)%geofac_div => geofac_div + diff_inst(jg)%geofac_rot => geofac_rot + diff_inst(jg)%geofac_n2s => geofac_n2s + diff_inst(jg)%geofac_grg => geofac_grg + diff_inst(jg)%nudgecoeff_e => nudgecoeff_e + diff_inst(jg)%rbf_vec_idx_v => rbf_vec_idx_v + diff_inst(jg)%rbf_vec_blk_v => rbf_vec_blk_v + diff_inst(jg)%rbf_vec_coeff_v => rbf_vec_coeff_v + + diff_inst(jg)%enhfac_diffu => enhfac_diffu ! p_nh_metrics + diff_inst(jg)%zd_intcoef => zd_intcoef ! : + diff_inst(jg)%zd_geofac => zd_geofac + diff_inst(jg)%zd_diffcoef => zd_diffcoef + diff_inst(jg)%wgtfac_c => wgtfac_c + diff_inst(jg)%wgtfac_e => wgtfac_e + diff_inst(jg)%wgtfacq_e => wgtfacq_e + diff_inst(jg)%wgtfacq1_e => wgtfacq1_e + diff_inst(jg)%ddqz_z_full_e => ddqz_z_full_e + diff_inst(jg)%theta_ref_mc => theta_ref_mc + diff_inst(jg)%zd_indlist => zd_indlist + diff_inst(jg)%zd_blklist => zd_blklist + diff_inst(jg)%zd_vertidx => zd_vertidx + diff_inst(jg)%zd_listdim = zd_listdim + + p_patch_diff(jg)%edges%start_block => edges_start_block ! p_patch%edges + p_patch_diff(jg)%edges%end_block => edges_end_block ! : + p_patch_diff(jg)%edges%start_index => edges_start_index ! p_patch%edges + p_patch_diff(jg)%edges%end_index => edges_end_index ! : + p_patch_diff(jg)%edges%vertex_idx => edges_vertex_idx + p_patch_diff(jg)%edges%vertex_blk => edges_vertex_blk + p_patch_diff(jg)%edges%cell_idx => edges_cell_idx + p_patch_diff(jg)%edges%cell_blk => edges_cell_blk + p_patch_diff(jg)%edges%tangent_orientation => edges_tangent_orientation + p_patch_diff(jg)%edges%primal_normal_vert => edges_primal_normal_vert + p_patch_diff(jg)%edges%dual_normal_vert => edges_dual_normal_vert + p_patch_diff(jg)%edges%primal_normal_cell => edges_primal_normal_cell + p_patch_diff(jg)%edges%dual_normal_cell => edges_dual_normal_cell + p_patch_diff(jg)%edges%inv_vert_vert_length => edges_inv_vert_vert_length + p_patch_diff(jg)%edges%inv_primal_edge_length => edges_inv_primal_edge_length + p_patch_diff(jg)%edges%inv_dual_edge_length => edges_inv_dual_edge_length + p_patch_diff(jg)%edges%area_edge => edges_area_edge + + p_patch_diff(jg)%cells%start_block => cells_start_block ! p_patch%cells + p_patch_diff(jg)%cells%end_block => cells_end_block ! : + p_patch_diff(jg)%cells%start_index => cells_start_index ! p_patch%cells + p_patch_diff(jg)%cells%end_index => cells_end_index ! : + p_patch_diff(jg)%cells%neighbor_idx => cells_neighbor_idx + p_patch_diff(jg)%cells%neighbor_blk => cells_neighbor_blk + p_patch_diff(jg)%cells%edge_idx => cells_edge_idx + p_patch_diff(jg)%cells%edge_blk => cells_edge_blk + p_patch_diff(jg)%cells%area => cells_area + + p_patch_diff(jg)%verts%start_block => verts_start_block ! p_patch%cells + p_patch_diff(jg)%verts%end_block => verts_end_block ! : + p_patch_diff(jg)%verts%start_index => verts_start_index ! p_patch%cells + p_patch_diff(jg)%verts%end_index => verts_end_index ! : + + diff_inst(jg)%nrdmax = nrdmax + diff_inst(jg)%grav = grav ! from central location + diff_inst(jg)%cvd_o_rd = cvd_o_rd ! " + diff_inst(jg)%nudge_max_coeff= nudge_max_coeff + diff_inst(jg)%denom_diffu_v = denom_diffu_v + diff_inst(jg)%k4 = k4 + diff_inst(jg)%k4w = k4w + + ! number of vertical levels, blocks for edges, vertices and cells + diff_inst(jg)%nproma = nproma + diff_inst(jg)%nlev = nlev + diff_inst(jg)%nlevp1 = nlev+1 + diff_inst(jg)%nblks_e = nblks_e + diff_inst(jg)%nblks_v = nblks_v + diff_inst(jg)%nblks_c = nblks_c + + diff_inst(jg)%ndyn_substeps = ndyn_substeps + diff_inst(jg)%nshift = nshift ! p_patch%nshift + diff_inst(jg)%nshift_total = nshift_total ! p_patch%nshift_total + + diff_inst(jg)%itype_sher = itype_sher + diff_inst(jg)%itype_comm = itype_comm + + diff_inst(jg)%p_test_run = p_test_run + diff_inst(jg)%lphys = lphys + diff_inst(jg)%l_zdiffu_t = l_zdiffu_t + diff_inst(jg)%l_limited_area = l_limited_area + diff_inst(jg)%lfeedback = lfeedback + diff_inst(jg)%ltkeshs = ltkeshs + diff_inst(jg)%lsmag_3d = lsmag_3d + diff_inst(jg)%lhdiff_w = lhdiff_w + diff_inst(jg)%lhdiff_rcf = lhdiff_rcf + diff_inst(jg)%lhdiff_temp = lhdiff_temp + diff_inst(jg)%ltimer = ltimer + diff_inst(jg)%lvert_nest = lvert_nest + diff_inst(jg)%ddt_vn_hdf_is_associated = ddt_vn_hdf_is_associated + diff_inst(jg)%ddt_vn_dyn_is_associated = ddt_vn_dyn_is_associated + + diff_inst(jg)%hdiff_order = hdiff_order + + diff_inst(jg)%discr_vn = itype_vn_diffu + diff_inst(jg)%discr_t = itype_t_diffu + + diff_inst(jg)%hdiff_smag_z = hdiff_smag_z + diff_inst(jg)%hdiff_smag_z2 = hdiff_smag_z2 + diff_inst(jg)%hdiff_smag_z3 = hdiff_smag_z3 + diff_inst(jg)%hdiff_smag_z4 = hdiff_smag_z4 + diff_inst(jg)%hdiff_smag_fac = hdiff_smag_fac + diff_inst(jg)%hdiff_smag_fac2= hdiff_smag_fac2 + diff_inst(jg)%hdiff_smag_fac3= hdiff_smag_fac3 + diff_inst(jg)%hdiff_smag_fac4= hdiff_smag_fac4 + diff_inst(jg)%hdiff_efdt_ratio = hdiff_efdt_ratio + + !$ACC ENTER DATA COPYIN(diff_inst(jg)) + END SUBROUTINE diffusion_init + + !> + !! diffusion + !! + !! Computes the horizontal diffusion of velocity and temperature + !! + !! @par Revision History + !! Initial release by Guenther Zaengl, DWD (2010-10-13), based on an earlier + !! version initially developed by Almut Gassmann, MPI-M + !! + + SUBROUTINE diffusion_run(jg, dtime, linit, & + vn, w, theta_v, exner, & ! p_nh_prog + vt, theta_v_ic, div_ic, hdef_ic, dwdx, dwdy, & ! p_nh_diag + ddt_vn_dyn, ddt_vn_hdf ) ! p_nh_diag optional + + INTEGER, INTENT(IN) :: jg ! patch ID + REAL(wp), INTENT(IN) :: dtime !< time step + LOGICAL, INTENT(IN) :: linit !< initial call or runtime call + + REAL(wp), INTENT(INOUT) :: vn(:,:,:) ! orthogonal normal wind (nproma,nlev,nblks_e) [m/s] + REAL(wp), INTENT(INOUT) :: w(:,:,:) ! orthogonal vertical wind (nproma,nlevp1,nblks_c) [m/s] + REAL(wp), INTENT(INOUT) :: theta_v(:,:,:) ! virtual potential temperature (nproma,nlev,nblks_c) [K] + REAL(wp), INTENT(INOUT) :: exner(:,:,:) ! Exner pressure (nproma,nlev,nblks_c) [-] + REAL(wp), INTENT(INOUT) :: theta_v_ic(:,:,:) ! theta_v at half levels (nproma,nlevp1,nblks_c) [K] + REAL(vp), INTENT(IN) :: vt(:,:,:) ! tangential wind (nproma,nlev,nblks_e) [m/s] + REAL(vp), INTENT(OUT) :: div_ic(:,:,:) ! divergence at half levels(nproma,nlevp1,nblks_c) [1/s] + REAL(vp), INTENT(OUT) :: hdef_ic(:,:,:) ! horizontal wind field deformation (nproma,nlevp1,nblks_c) [1/s^2] + REAL(vp), INTENT(OUT) :: dwdx(:,:,:) ! divergence at half levels(nproma,nlevp1,nblks_c) [1/s] + REAL(vp), INTENT(OUT) :: dwdy(:,:,:) ! horizontal wind field deformation (nproma,nlevp1,nblks_c) [1/s^2] + REAL(wp), INTENT(INOUT), OPTIONAL :: ddt_vn_dyn(:,:,:) ! d vn / dt (sum of all contributions) + REAL(wp), INTENT(INOUT), OPTIONAL :: ddt_vn_hdf(:,:,:) ! d vn / dt (horizontal diffusion only) + + ! local variables - vp means variable precision depending on the __MIXED_PRECISION cpp flag + REAL(vp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlev,diff_inst(jg)%nblks_c) :: z_temp + REAL(wp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlev,diff_inst(jg)%nblks_e) :: z_nabla2_e + REAL(vp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlev,diff_inst(jg)%nblks_c) :: z_nabla2_c + REAL(wp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlev,diff_inst(jg)%nblks_e) :: z_nabla4_e + REAL(vp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlev) :: z_nabla4_e2 + + REAL(wp):: diff_multfac_vn(diff_inst(jg)%nlev), diff_multfac_w, diff_multfac_n2w(diff_inst(jg)%nlev) + INTEGER :: i_startblk, i_endblk, i_startidx, i_endidx + INTEGER :: rl_start, rl_end + INTEGER :: jk, jb, jc, je, ic, ishift, nshift, jk1 + INTEGER :: nlev, nlevp1 !< number of full and half levels + + ! start index levels and diffusion coefficient for boundary diffusion + INTEGER, PARAMETER :: start_bdydiff_e = 5 ! refin_ctrl level at which boundary diffusion starts + REAL(wp):: fac_bdydiff_v + + ! For Smagorinsky diffusion - vp means variable precision depending on the __MIXED_PRECISION cpp flag + REAL(vp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlev,diff_inst(jg)%nblks_e) :: kh_smag_e + REAL(vp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlev,diff_inst(jg)%nblks_e) :: kh_smag_ec + REAL(vp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlev,diff_inst(jg)%nblks_v) :: u_vert + REAL(vp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlev,diff_inst(jg)%nblks_v) :: v_vert + REAL(wp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlev,diff_inst(jg)%nblks_c) :: u_cell + REAL(wp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlev,diff_inst(jg)%nblks_c) :: v_cell + REAL(vp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlev) :: kh_c, div + + REAL(vp) :: dvt_norm, dvt_tang, vn_vert1, vn_vert2, vn_vert3, vn_vert4, vn_cell1, vn_cell2 + + REAL(vp) :: smag_offset, nabv_tang, nabv_norm, rd_o_cvd, nudgezone_diff, bdy_diff, enh_diffu + REAL(vp), DIMENSION(diff_inst(jg)%nlev) :: smag_limit, diff_multfac_smag, enh_smag_fac + INTEGER :: nblks_zdiffu, nproma_zdiffu, npromz_zdiffu, nlen_zdiffu + + REAL(wp) :: alin, dz32, df32, dz42, df42, bqdr, aqdr, zf, dzlin, dzqdr + + ! Additional variables for 3D Smagorinsky coefficient + REAL(wp):: z_w_v(diff_inst(jg)%nproma,diff_inst(jg)%nlevp1,diff_inst(jg)%nblks_v) + REAL(wp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlevp1) :: z_vn_ie, z_vt_ie + REAL(wp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlev) :: dvndz, dvtdz, dwdz, dthvdz, dwdn, dwdt, kh_smag3d_e + + ! Variables for provisional fix against runaway cooling in local topography depressions + INTEGER :: icount(diff_inst(jg)%nblks_c), iclist(2*diff_inst(jg)%nproma,diff_inst(jg)%nblks_c), iklist(2*diff_inst(jg)%nproma,diff_inst(jg)%nblks_c) + REAL(wp) :: tdlist(2*diff_inst(jg)%nproma,diff_inst(jg)%nblks_c), tdiff, trefdiff, thresh_tdiff, z_theta, fac2d + + + INTEGER, DIMENSION(:,:,:), POINTER :: icidx, icblk, ieidx, ieblk, ividx, ivblk, iecidx, iecblk + INTEGER, DIMENSION(:,:), POINTER :: icell, ilev, iblk !, iedge, iedblk + REAL(wp), DIMENSION(:,:), POINTER :: vcoef, zd_geofac !, blcoef + LOGICAL :: ltemp_diffu + INTEGER :: diffu_type + +#ifdef _OPENACC + REAL(vp), DIMENSION(diff_inst(jg)%nproma,diff_inst(jg)%nlev-1:diff_inst(jg)%nlev,diff_inst(jg)%nblks_c) :: enh_diffu_3d +#endif + + ! Variables for tendency diagnostics + REAL(wp) :: z_d_vn_hdf + REAL(wp) :: r_dtimensubsteps + + CHARACTER(*), PARAMETER :: routine = "diffusion_run" + + !-------------------------------------------------------------------------- + + ividx => p_patch_diff(jg)%edges%vertex_idx + ivblk => p_patch_diff(jg)%edges%vertex_blk + + iecidx => p_patch_diff(jg)%edges%cell_idx + iecblk => p_patch_diff(jg)%edges%cell_blk + + icidx => p_patch_diff(jg)%cells%neighbor_idx + icblk => p_patch_diff(jg)%cells%neighbor_blk + + ieidx => p_patch_diff(jg)%cells%edge_idx + ieblk => p_patch_diff(jg)%cells%edge_blk + + ! prepare for tendency diagnostics + IF (diff_inst(jg)%lhdiff_rcf) THEN + r_dtimensubsteps = 1._wp/dtime ! without substepping, no averaging is necessary + ELSE + r_dtimensubsteps = 1._wp/(dtime*REAL(diff_inst(jg)%ndyn_substeps,wp)) ! with substepping the tendency is averaged over the substeps + END IF + + ! number of vertical levels + nlev = diff_inst(jg)%nlev + nlevp1 = nlev+1 + + ! Normalized diffusion coefficient for boundary diffusion + IF (diff_inst(jg)%lhdiff_rcf) THEN + fac_bdydiff_v = SQRT(REAL(diff_inst(jg)%ndyn_substeps,wp))/diff_inst(jg)%denom_diffu_v + ELSE + fac_bdydiff_v = 1._wp/diff_inst(jg)%denom_diffu_v + ENDIF + + ! scaling factor for enhanced diffusion in nudging zone (if present, i.e. for + ! limited-area runs and one-way nesting) + nudgezone_diff = 0.04_wp/(diff_inst(jg)%nudge_max_coeff + dbl_eps) + + ! scaling factor for enhanced near-boundary diffusion for + ! two-way nesting (used with Smagorinsky diffusion only; not needed otherwise) + bdy_diff = 0.015_wp/(diff_inst(jg)%nudge_max_coeff + dbl_eps) + + ! threshold temperature deviation from neighboring grid points + ! that activates extra diffusion against runaway cooling + thresh_tdiff = - 5._wp + + rd_o_cvd = 1._wp/diff_inst(jg)%cvd_o_rd + diffu_type = diff_inst(jg)%hdiff_order + + + IF (linit) THEN ! enhanced diffusion at all levels for initial velocity filtering call + diff_multfac_vn(:) = diff_inst(jg)%k4/3._wp*diff_inst(jg)%hdiff_efdt_ratio + smag_offset = 0.0_vp + diffu_type = 5 ! always combine nabla4 background diffusion with Smagorinsky diffusion for initial filtering call + smag_limit(:) = 0.125_wp-4._wp*diff_multfac_vn(:) + ELSE IF (diff_inst(jg)%lhdiff_rcf) THEN ! combination with divergence damping inside the dynamical core + IF (diffu_type == 4) THEN + diff_multfac_vn(:) = MIN(1._wp/128._wp,diff_inst(jg)%k4*REAL(diff_inst(jg)%ndyn_substeps,wp)/ & + 3._wp*diff_inst(jg)%enhfac_diffu(:)) + ELSE ! For Smagorinsky diffusion, the Smagorinsky coefficient rather than the background + ! diffusion coefficient is enhanced near the model top (see below) + diff_multfac_vn(:) = MIN(1._wp/128._wp,diff_inst(jg)%k4*REAL(diff_inst(jg)%ndyn_substeps,wp)/3._wp) + ENDIF + IF (diffu_type == 3) THEN + smag_offset = 0._vp + smag_limit(:) = 0.125_vp + ELSE + smag_offset = 0.25_wp*diff_inst(jg)%k4*REAL(diff_inst(jg)%ndyn_substeps,wp) + smag_limit(:) = 0.125_wp-4._wp*diff_multfac_vn(:) + ENDIF + ELSE ! enhanced diffusion near model top only + IF (diffu_type == 4) THEN + diff_multfac_vn(:) = diff_inst(jg)%k4/3._wp*diff_inst(jg)%enhfac_diffu(:) + ELSE ! For Smagorinsky diffusion, the Smagorinsky coefficient rather than the background + ! diffusion coefficient is enhanced near the model top (see below) + diff_multfac_vn(:) = diff_inst(jg)%k4/3._wp + ENDIF + smag_offset = 0.25_wp*diff_inst(jg)%k4 + smag_limit(:) = 0.125_wp-4._wp*diff_multfac_vn(:) + ! pure Smagorinsky diffusion does not work without divergence damping + IF (diff_inst(jg)%hdiff_order == 3) diffu_type = 5 + ENDIF + + ! Multiplication factor for nabla4 diffusion on vertical wind speed + diff_multfac_w = MIN(1._wp/48._wp,diff_inst(jg)%k4w*REAL(diff_inst(jg)%ndyn_substeps,wp)) + + ! Factor for additional nabla2 diffusion in upper damping zone + diff_multfac_n2w(:) = 0._wp + IF (diff_inst(jg)%nrdmax > 1) THEN ! seems to be redundant, but the NEC issues invalid operations otherwise + DO jk = 2, diff_inst(jg)%nrdmax + jk1 = jk + diff_inst(jg)%nshift_total + diff_multfac_n2w(jk) = 1._wp/12._wp*((diff_inst(jg)%vct_a(jk1)-diff_inst(jg)%vct_a(diff_inst(jg)%nshift_total+diff_inst(jg)%nrdmax+1))/ & + (diff_inst(jg)%vct_a(2)-diff_inst(jg)%vct_a(diff_inst(jg)%nshift_total+diff_inst(jg)%nrdmax+1)))**4 + ENDDO + ENDIF + + IF (diffu_type == 3 .OR. diffu_type == 5) THEN + + ! temperature diffusion is used only in combination with Smagorinsky diffusion + ltemp_diffu = diff_inst(jg)%lhdiff_temp + + ! The Smagorinsky diffusion factor enh_divdamp_fac is defined as a profile in height z + ! above sea level with 4 height sections: + ! + ! enh_smag_fac(z) = hdiff_smag_fac ! z <= hdiff_smag_z + ! enh_smag_fac(z) = hdiff_smag_fac + (z-hdiff_smag_z )* alin ! hdiff_smag_z <= z <= hdiff_smag_z2 + ! enh_smag_fac(z) = hdiff_smag_fac2 + (z-hdiff_smag_z2)*(aqdr+(z-hdiff_smag_z2)*bqdr) ! hdiff_smag_z2 <= z <= hdiff_smag_z4 + ! enh_smag_fac(z) = hdiff_smag_fac4 ! hdiff_smag_z4 <= z + ! + alin = (diff_inst(jg)%hdiff_smag_fac2-diff_inst(jg)%hdiff_smag_fac)/ & + & (diff_inst(jg)%hdiff_smag_z2 -diff_inst(jg)%hdiff_smag_z) + ! + df32 = diff_inst(jg)%hdiff_smag_fac3-diff_inst(jg)%hdiff_smag_fac2 + df42 = diff_inst(jg)%hdiff_smag_fac4-diff_inst(jg)%hdiff_smag_fac2 + ! + dz32 = diff_inst(jg)%hdiff_smag_z3-diff_inst(jg)%hdiff_smag_z2 + dz42 = diff_inst(jg)%hdiff_smag_z4-diff_inst(jg)%hdiff_smag_z2 + ! + bqdr = (df42*dz32-df32*dz42)/(dz32*dz42*(dz42-dz32)) + aqdr = df32/dz32-bqdr*dz32 + ! + DO jk = 1, nlev + jk1 = jk + diff_inst(jg)%nshift_total + ! + zf = 0.5_wp*(diff_inst(jg)%vct_a(jk1)+diff_inst(jg)%vct_a(jk1+1)) + dzlin = MIN( diff_inst(jg)%hdiff_smag_z2-diff_inst(jg)%hdiff_smag_z , & + & MAX( 0._wp, zf-diff_inst(jg)%hdiff_smag_z ) ) + dzqdr = MIN( diff_inst(jg)%hdiff_smag_z4-diff_inst(jg)%hdiff_smag_z2, & + & MAX( 0._wp, zf-diff_inst(jg)%hdiff_smag_z2) ) + ! + enh_smag_fac(jk) = REAL(diff_inst(jg)%hdiff_smag_fac + dzlin*alin + dzqdr*(aqdr+dzqdr*bqdr),vp) + ! + ENDDO + + ! Smagorinsky coefficient is also enhanced in the six model levels beneath a vertical nest interface + IF (diff_inst(jg)%lvert_nest .AND. (diff_inst(jg)%nshift > 0)) THEN + enh_smag_fac(1) = MAX(0.333_vp, enh_smag_fac(1)) + enh_smag_fac(2) = MAX(0.25_vp, enh_smag_fac(2)) + enh_smag_fac(3) = MAX(0.20_vp, enh_smag_fac(3)) + enh_smag_fac(4) = MAX(0.16_vp, enh_smag_fac(4)) + enh_smag_fac(5) = MAX(0.12_vp, enh_smag_fac(5)) + enh_smag_fac(6) = MAX(0.08_vp, enh_smag_fac(6)) + ENDIF + + ! empirically determined scaling factor + diff_multfac_smag(:) = enh_smag_fac(:)*REAL(dtime,vp) + + ELSE + ltemp_diffu = .FALSE. + ENDIF + + !$ACC DATA CREATE(div, kh_c, kh_smag_e, kh_smag_ec, u_vert, v_vert, u_cell, v_cell, z_w_v, z_temp) & + !$ACC CREATE(z_nabla4_e, z_nabla4_e2, z_nabla2_e, z_nabla2_c, enh_diffu_3d, icount) & + !$ACC CREATE(z_vn_ie, z_vt_ie, dvndz, dvtdz, dwdz, dthvdz, dwdn, dwdt, kh_smag3d_e) & + !$ACC COPYIN(diff_multfac_vn, diff_multfac_n2w, diff_multfac_smag, smag_limit) & + !$ACC PRESENT(diff_inst, p_patch_diff) & + !$ACC PRESENT(ividx, ivblk, iecidx, iecblk, icidx, icblk, ieidx, ieblk) & + !$ACC IF(i_am_accel_node) + + !!! Following variables may be present in certain situations, but we don't want it to fail in the general case. + !!! Should actually be in a separate data region with correct IF condition. + !!! !$ACC div_ic, dwdx, dwdy, hdef_ic, & + + ! The diffusion is an intrinsic part of the NH solver, thus it is added to the timer + IF (diff_inst(jg)%ltimer) CALL timer_start(timer_nh_hdiffusion) + + IF (diffu_type == 4) THEN + + CALL nabla4_vec( vn, jg, p_patch_diff(jg), diff_inst(jg)%geofac_div, diff_inst(jg)%geofac_rot, & + z_nabla4_e, opt_rlstart=7,opt_nabla2=z_nabla2_e ) + ELSE IF ((diffu_type == 3 .OR. diffu_type == 5) & + .AND. diff_inst(jg)%discr_vn == 1 .AND. .NOT. diff_inst(jg)%lsmag_3d) THEN + + IF (diff_inst(jg)%p_test_run) THEN + !$ACC KERNELS PRESENT(u_vert, v_vert) ASYNC(1) IF(i_am_accel_node) + u_vert = 0._vp + v_vert = 0._vp + !$ACC END KERNELS + ENDIF + + ! RBF reconstruction of velocity at vertices + + CALL rbf_vec_interpol_vertex( vn, p_patch_diff(jg), & + diff_inst(jg)%rbf_vec_idx_v, diff_inst(jg)%rbf_vec_blk_v, & + diff_inst(jg)%rbf_vec_coeff_v, u_vert, v_vert, & + opt_rlend=min_rlvert_int, opt_acc_async=.TRUE. ) + rl_start = start_bdydiff_e + rl_end = min_rledge_int - 2 + + IF (diff_inst(jg)%itype_comm == 1 .OR. diff_inst(jg)%itype_comm == 3) THEN +#ifdef __MIXED_PRECISION + CALL sync_patch_array_mult_mp(SYNC_V,p_patch(jg),0,2,f3din1_sp=u_vert,f3din2_sp=v_vert, & + opt_varname="diffusion: u_vert and v_vert") +#else + CALL sync_patch_array_mult(SYNC_V,p_patch(jg),2,u_vert,v_vert, & + opt_varname="diffusion: u_vert and v_vert") +#endif + ENDIF + +!$OMP PARALLEL PRIVATE(i_startblk,i_endblk) + + i_startblk = p_patch_diff(jg)%edges%start_block(rl_start) + i_endblk = p_patch_diff(jg)%edges%end_block(rl_end) + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je,vn_vert1,vn_vert2,vn_vert3,vn_vert4, & +!$OMP dvt_norm,dvt_tang), ICON_OMP_RUNTIME_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_e(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + ! Computation of wind field deformation + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx +!DIR$ IVDEP + DO jk = 1, nlev +#else +!$NEC outerloop_unroll(4) + DO jk = 1, nlev + DO je = i_startidx, i_endidx +#endif + + vn_vert1 = u_vert(ividx(je,jb,1),jk,ivblk(je,jb,1)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,1)%v1 + & + v_vert(ividx(je,jb,1),jk,ivblk(je,jb,1)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,1)%v2 + + vn_vert2 = u_vert(ividx(je,jb,2),jk,ivblk(je,jb,2)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,2)%v1 + & + v_vert(ividx(je,jb,2),jk,ivblk(je,jb,2)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,2)%v2 + + dvt_tang = p_patch_diff(jg)%edges%tangent_orientation(je,jb)* ( & + u_vert(ividx(je,jb,2),jk,ivblk(je,jb,2)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,2)%v1 + & + v_vert(ividx(je,jb,2),jk,ivblk(je,jb,2)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,2)%v2 - & + (u_vert(ividx(je,jb,1),jk,ivblk(je,jb,1)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,1)%v1 + & + v_vert(ividx(je,jb,1),jk,ivblk(je,jb,1)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,1)%v2) ) + + vn_vert3 = u_vert(ividx(je,jb,3),jk,ivblk(je,jb,3)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,3)%v1 + & + v_vert(ividx(je,jb,3),jk,ivblk(je,jb,3)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,3)%v2 + + vn_vert4 = u_vert(ividx(je,jb,4),jk,ivblk(je,jb,4)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,4)%v1 + & + v_vert(ividx(je,jb,4),jk,ivblk(je,jb,4)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,4)%v2 + + dvt_norm = u_vert(ividx(je,jb,4),jk,ivblk(je,jb,4)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,4)%v1 + & + v_vert(ividx(je,jb,4),jk,ivblk(je,jb,4)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,4)%v2 - & + (u_vert(ividx(je,jb,3),jk,ivblk(je,jb,3)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,3)%v1 + & + v_vert(ividx(je,jb,3),jk,ivblk(je,jb,3)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,3)%v2) + ! Smagorinsky diffusion coefficient + kh_smag_e(je,jk,jb) = diff_multfac_smag(jk)*SQRT( ( & + (vn_vert4-vn_vert3)*p_patch_diff(jg)%edges%inv_vert_vert_length(je,jb)- & + dvt_tang*p_patch_diff(jg)%edges%inv_primal_edge_length(je,jb) )**2 + ( & + (vn_vert2-vn_vert1)*p_patch_diff(jg)%edges%tangent_orientation(je,jb)* & + p_patch_diff(jg)%edges%inv_primal_edge_length(je,jb) + & + dvt_norm*p_patch_diff(jg)%edges%inv_vert_vert_length(je,jb))**2 ) + + ! The factor of 4 comes from dividing by twice the "correct" length + z_nabla2_e(je,jk,jb) = 4._wp * ( & + (vn_vert4 + vn_vert3 - 2._wp*vn(je,jk,jb)) & + *p_patch_diff(jg)%edges%inv_vert_vert_length(je,jb)**2 + & + (vn_vert2 + vn_vert1 - 2._wp*vn(je,jk,jb)) & + *p_patch_diff(jg)%edges%inv_primal_edge_length(je,jb)**2 ) + +#if defined (__LOOP_EXCHANGE) && !defined (_OPENACC) + ENDDO + ENDDO + + DO jk = 1, nlev + DO je = i_startidx, i_endidx +#endif + kh_smag_ec(je,jk,jb) = kh_smag_e(je,jk,jb) + ! Subtract part of the fourth-order background diffusion coefficient + kh_smag_e(je,jk,jb) = MAX(0._vp,kh_smag_e(je,jk,jb) - smag_offset) + ! Limit diffusion coefficient to the theoretical CFL stability threshold + kh_smag_e(je,jk,jb) = MIN(kh_smag_e(je,jk,jb),smag_limit(jk)) + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + ENDDO ! block jb +!$OMP END DO NOWAIT +!$OMP END PARALLEL + + ELSE IF ((diffu_type == 3 .OR. diffu_type == 5) .AND. diff_inst(jg)%discr_vn == 1) THEN + ! 3D Smagorinsky diffusion + IF (diff_inst(jg)%p_test_run) THEN + !$ACC KERNELS PRESENT(u_vert, v_vert, z_w_v) ASYNC(1) IF(i_am_accel_node) + u_vert = 0._vp + v_vert = 0._vp + z_w_v = 0._wp + !$ACC END KERNELS + ENDIF + + ! RBF reconstruction of velocity at vertices + + CALL rbf_vec_interpol_vertex( vn, p_patch_diff(jg), & + diff_inst(jg)%rbf_vec_idx_v, diff_inst(jg)%rbf_vec_blk_v, & + diff_inst(jg)%rbf_vec_coeff_v, u_vert, v_vert, & + opt_rlend=min_rlvert_int, opt_acc_async=.TRUE. ) + + rl_start = start_bdydiff_e + rl_end = min_rledge_int - 2 + + IF (diff_inst(jg)%itype_comm == 1 .OR. diff_inst(jg)%itype_comm == 3) THEN +#ifdef __MIXED_PRECISION + CALL sync_patch_array_mult_mp(SYNC_V,p_patch(jg),0,2,f3din1_sp=u_vert,f3din2_sp=v_vert, & + opt_varname="diffusion: u_vert and v_vert 2") +#else + CALL sync_patch_array_mult(SYNC_V,p_patch(jg),2,u_vert,v_vert, & + opt_varname="diffusion: u_vert and v_vert 2") +#endif + ENDIF + CALL cells2verts_scalar(w, p_patch_diff(jg), diff_inst(jg)%cells_aw_verts, z_w_v, opt_rlend=min_rlvert_int) + CALL sync_patch_array(SYNC_V,p_patch(jg),z_w_v,opt_varname="diffusion: z_w_v") + CALL sync_patch_array(SYNC_C,p_patch(jg),theta_v_ic,opt_varname="diffusion: theta_v_ic") + + fac2d = 0.0625_wp ! Factor of the 2D deformation field which is used as minimum of the 3D def field + +!$OMP PARALLEL PRIVATE(i_startblk,i_endblk) + + i_startblk = p_patch_diff(jg)%edges%start_block(rl_start) + i_endblk = p_patch_diff(jg)%edges%end_block(rl_end) + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je,vn_vert1,vn_vert2,vn_vert3,vn_vert4,dvt_norm,dvt_tang, & +!$OMP z_vn_ie,z_vt_ie,dvndz,dvtdz,dwdz,dthvdz,dwdn,dwdt,kh_smag3d_e), ICON_OMP_RUNTIME_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_e(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) + DO jk = 2, nlev + DO je = i_startidx, i_endidx + z_vn_ie(je,jk) = diff_inst(jg)%wgtfac_e(je,jk,jb)*vn(je,jk,jb) + & + (1._wp - diff_inst(jg)%wgtfac_e(je,jk,jb))*vn(je,jk-1,jb) + z_vt_ie(je,jk) = diff_inst(jg)%wgtfac_e(je,jk,jb)*vt(je,jk,jb) + & + (1._wp - diff_inst(jg)%wgtfac_e(je,jk,jb))*vt(je,jk-1,jb) + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR ASYNC(1) IF(i_am_accel_node) + DO je = i_startidx, i_endidx + z_vn_ie(je,1) = & + diff_inst(jg)%wgtfacq1_e(je,1,jb)*vn(je,1,jb) + & + diff_inst(jg)%wgtfacq1_e(je,2,jb)*vn(je,2,jb) + & + diff_inst(jg)%wgtfacq1_e(je,3,jb)*vn(je,3,jb) + z_vn_ie(je,nlevp1) = & + diff_inst(jg)%wgtfacq_e(je,1,jb)*vn(je,nlev,jb) + & + diff_inst(jg)%wgtfacq_e(je,2,jb)*vn(je,nlev-1,jb) + & + diff_inst(jg)%wgtfacq_e(je,3,jb)*vn(je,nlev-2,jb) + z_vt_ie(je,1) = & + diff_inst(jg)%wgtfacq1_e(je,1,jb)*vt(je,1,jb) + & + diff_inst(jg)%wgtfacq1_e(je,2,jb)*vt(je,2,jb) + & + diff_inst(jg)%wgtfacq1_e(je,3,jb)*vt(je,3,jb) + z_vt_ie(je,nlevp1) = & + diff_inst(jg)%wgtfacq_e(je,1,jb)*vt(je,nlev,jb) + & + diff_inst(jg)%wgtfacq_e(je,2,jb)*vt(je,nlev-1,jb) + & + diff_inst(jg)%wgtfacq_e(je,3,jb)*vt(je,nlev-2,jb) + ENDDO + !$ACC END PARALLEL LOOP + + ! Computation of wind field deformation + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx +!DIR$ IVDEP + DO jk = 1, nlev +#else +!$NEC outerloop_unroll(4) + DO jk = 1, nlev + DO je = i_startidx, i_endidx +#endif + + vn_vert1 = u_vert(ividx(je,jb,1),jk,ivblk(je,jb,1)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,1)%v1 + & + v_vert(ividx(je,jb,1),jk,ivblk(je,jb,1)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,1)%v2 + + vn_vert2 = u_vert(ividx(je,jb,2),jk,ivblk(je,jb,2)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,2)%v1 + & + v_vert(ividx(je,jb,2),jk,ivblk(je,jb,2)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,2)%v2 + + dvt_tang = p_patch_diff(jg)%edges%tangent_orientation(je,jb)* ( & + u_vert(ividx(je,jb,2),jk,ivblk(je,jb,2)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,2)%v1 + & + v_vert(ividx(je,jb,2),jk,ivblk(je,jb,2)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,2)%v2 - & + (u_vert(ividx(je,jb,1),jk,ivblk(je,jb,1)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,1)%v1 + & + v_vert(ividx(je,jb,1),jk,ivblk(je,jb,1)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,1)%v2) ) + + vn_vert3 = u_vert(ividx(je,jb,3),jk,ivblk(je,jb,3)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,3)%v1 + & + v_vert(ividx(je,jb,3),jk,ivblk(je,jb,3)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,3)%v2 + + vn_vert4 = u_vert(ividx(je,jb,4),jk,ivblk(je,jb,4)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,4)%v1 + & + v_vert(ividx(je,jb,4),jk,ivblk(je,jb,4)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,4)%v2 + + dvt_norm = u_vert(ividx(je,jb,4),jk,ivblk(je,jb,4)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,4)%v1 + & + v_vert(ividx(je,jb,4),jk,ivblk(je,jb,4)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,4)%v2 - & + (u_vert(ividx(je,jb,3),jk,ivblk(je,jb,3)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,3)%v1 + & + v_vert(ividx(je,jb,3),jk,ivblk(je,jb,3)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,3)%v2) + + dvndz(je,jk) = (z_vn_ie(je,jk) - z_vn_ie(je,jk+1)) / diff_inst(jg)%ddqz_z_full_e(je,jk,jb) + dvtdz(je,jk) = (z_vt_ie(je,jk) - z_vt_ie(je,jk+1)) / diff_inst(jg)%ddqz_z_full_e(je,jk,jb) + + dwdz (je,jk) = & + (diff_inst(jg)%c_lin_e(je,1,jb)*(w(iecidx(je,jb,1),jk, iecblk(je,jb,1)) - & + w(iecidx(je,jb,1),jk+1,iecblk(je,jb,1)) ) + & + diff_inst(jg)%c_lin_e(je,2,jb)*(w(iecidx(je,jb,2),jk, iecblk(je,jb,2)) - & + w(iecidx(je,jb,2),jk+1,iecblk(je,jb,2)) ) ) / & + diff_inst(jg)%ddqz_z_full_e(je,jk,jb) + + dthvdz(je,jk) = & + (diff_inst(jg)%c_lin_e(je,1,jb)*(theta_v_ic(iecidx(je,jb,1),jk, iecblk(je,jb,1)) - & + theta_v_ic(iecidx(je,jb,1),jk+1,iecblk(je,jb,1)) ) + & + diff_inst(jg)%c_lin_e(je,2,jb)*(theta_v_ic(iecidx(je,jb,2),jk, iecblk(je,jb,2)) - & + theta_v_ic(iecidx(je,jb,2),jk+1,iecblk(je,jb,2)) ) ) / & + diff_inst(jg)%ddqz_z_full_e(je,jk,jb) + + dwdn (je,jk) = p_patch_diff(jg)%edges%inv_dual_edge_length(je,jb)* ( & + 0.5_wp*(w(iecidx(je,jb,1),jk, iecblk(je,jb,1)) + & + w(iecidx(je,jb,1),jk+1,iecblk(je,jb,1))) - & + 0.5_wp*(w(iecidx(je,jb,2),jk, iecblk(je,jb,2)) + & + w(iecidx(je,jb,2),jk+1,iecblk(je,jb,2))) ) + + dwdt (je,jk) = p_patch_diff(jg)%edges%inv_primal_edge_length(je,jb) * & + p_patch_diff(jg)%edges%tangent_orientation(je,jb) * ( & + 0.5_wp*(z_w_v(ividx(je,jb,1),jk,ivblk(je,jb,1))+z_w_v(ividx(je,jb,1),jk+1,ivblk(je,jb,1))) - & + 0.5_wp*(z_w_v(ividx(je,jb,2),jk,ivblk(je,jb,2))+z_w_v(ividx(je,jb,2),jk+1,ivblk(je,jb,2))) ) + + kh_smag3d_e(je,jk) = 2._wp*( & + ( (vn_vert4-vn_vert3)*p_patch_diff(jg)%edges%inv_vert_vert_length(je,jb) )**2 + & + (dvt_tang*p_patch_diff(jg)%edges%inv_primal_edge_length(je,jb))**2 + dwdz(je,jk)**2) + & + 0.5_wp *( (p_patch_diff(jg)%edges%inv_primal_edge_length(je,jb) * & + p_patch_diff(jg)%edges%tangent_orientation(je,jb)*(vn_vert2-vn_vert1) + & + p_patch_diff(jg)%edges%inv_vert_vert_length(je,jb)*dvt_norm )**2 + & + (dvndz(je,jk) + dwdn(je,jk))**2 + (dvtdz(je,jk) + dwdt(je,jk))**2 ) - & + 3._wp*diff_inst(jg)%grav * dthvdz(je,jk) / ( & + diff_inst(jg)%c_lin_e(je,1,jb)*theta_v(iecidx(je,jb,1),jk,iecblk(je,jb,1)) + & + diff_inst(jg)%c_lin_e(je,2,jb)*theta_v(iecidx(je,jb,2),jk,iecblk(je,jb,2)) ) + + ! 2D Smagorinsky diffusion coefficient + kh_smag_e(je,jk,jb) = diff_multfac_smag(jk)*SQRT( MAX(kh_smag3d_e(je,jk), fac2d*( & + ((vn_vert4-vn_vert3)*p_patch_diff(jg)%edges%inv_vert_vert_length(je,jb)- & + dvt_tang*p_patch_diff(jg)%edges%inv_primal_edge_length(je,jb) )**2 + ( & + (vn_vert2-vn_vert1)*p_patch_diff(jg)%edges%tangent_orientation(je,jb)* & + p_patch_diff(jg)%edges%inv_primal_edge_length(je,jb) + & + dvt_norm*p_patch_diff(jg)%edges%inv_vert_vert_length(je,jb) )**2 ) ) ) + + ! The factor of 4 comes from dividing by twice the "correct" length + z_nabla2_e(je,jk,jb) = 4._wp * ( & + (vn_vert4 + vn_vert3 - 2._wp*vn(je,jk,jb)) & + *p_patch_diff(jg)%edges%inv_vert_vert_length(je,jb)**2 + & + (vn_vert2 + vn_vert1 - 2._wp*vn(je,jk,jb)) & + *p_patch_diff(jg)%edges%inv_primal_edge_length(je,jb)**2 ) + + kh_smag_ec(je,jk,jb) = kh_smag_e(je,jk,jb) + ! Subtract part of the fourth-order background diffusion coefficient + kh_smag_e(je,jk,jb) = MAX(0._vp,kh_smag_e(je,jk,jb) - smag_offset) + ! Limit diffusion coefficient to the theoretical CFL stability threshold + kh_smag_e(je,jk,jb) = MIN(kh_smag_e(je,jk,jb),smag_limit(jk)) + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + + ENDDO +!$OMP END DO NOWAIT +!$OMP END PARALLEL + + ELSE IF ((diffu_type == 3 .OR. diffu_type == 5) .AND. diff_inst(jg)%discr_vn >= 2) THEN + + ! RBF reconstruction of velocity at vertices and cells + + CALL rbf_vec_interpol_vertex( vn, p_patch_diff(jg), & + diff_inst(jg)%rbf_vec_idx_v, diff_inst(jg)%rbf_vec_blk_v, & + diff_inst(jg)%rbf_vec_coeff_v, u_vert, v_vert, & + opt_rlend=min_rlvert_int-1, opt_acc_async=.TRUE. ) + + ! DA: This wait ideally should be removed + !$ACC WAIT + + IF (diff_inst(jg)%discr_vn == 2) THEN + + CALL rbf_vec_interpol_cell( vn, p_patch_diff(jg), & + diff_inst(jg)%rbf_vec_idx_v, diff_inst(jg)%rbf_vec_blk_v, & + diff_inst(jg)%rbf_vec_coeff_v, u_cell, v_cell, & + opt_rlend=min_rlcell_int-1 ) + ELSE + + CALL edges2cells_vector( vn, vt, p_patch_diff(jg), diff_inst(jg)%e_bln_c_u, diff_inst(jg)%e_bln_c_v, & + u_cell, v_cell, opt_rlend=min_rlcell_int-1 ) + ENDIF + + IF (diff_inst(jg)%p_test_run) THEN + !$ACC KERNELS IF(i_am_accel_node) ASYNC(1) + z_nabla2_e = 0._wp + !$ACC END KERNELS + ENDIF + +!$OMP PARALLEL PRIVATE(i_startblk,i_endblk,rl_start,rl_end) + + rl_start = start_bdydiff_e + rl_end = min_rledge_int - 1 + + i_startblk = p_patch_diff(jg)%edges%start_block(rl_start) + i_endblk = p_patch_diff(jg)%edges%end_block(rl_end) + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je,vn_vert1,vn_vert2,vn_cell1,vn_cell2,& +!$OMP dvt_norm,dvt_tang), ICON_OMP_RUNTIME_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_e(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + ! Computation of wind field deformation + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx +!DIR$ IVDEP + DO jk = 1, nlev +#else +!$NEC outerloop_unroll(4) + DO jk = 1, nlev + DO je = i_startidx, i_endidx +#endif + + vn_vert1 = u_vert(ividx(je,jb,1),jk,ivblk(je,jb,1)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,1)%v1 + & + v_vert(ividx(je,jb,1),jk,ivblk(je,jb,1)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,1)%v2 + + vn_vert2 = u_vert(ividx(je,jb,2),jk,ivblk(je,jb,2)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,2)%v1 + & + v_vert(ividx(je,jb,2),jk,ivblk(je,jb,2)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,2)%v2 + + dvt_tang = p_patch_diff(jg)%edges%tangent_orientation(je,jb)* ( & + u_vert(ividx(je,jb,2),jk,ivblk(je,jb,2)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,2)%v1 + & + v_vert(ividx(je,jb,2),jk,ivblk(je,jb,2)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,2)%v2 - & + (u_vert(ividx(je,jb,1),jk,ivblk(je,jb,1)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,1)%v1 + & + v_vert(ividx(je,jb,1),jk,ivblk(je,jb,1)) * & + p_patch_diff(jg)%edges%dual_normal_vert(je,jb,1)%v2) ) + + vn_cell1 = u_cell(iecidx(je,jb,1),jk,iecblk(je,jb,1)) * & + p_patch_diff(jg)%edges%primal_normal_cell(je,jb,1)%v1 + & + v_cell(iecidx(je,jb,1),jk,iecblk(je,jb,1)) * & + p_patch_diff(jg)%edges%primal_normal_cell(je,jb,1)%v2 + + vn_cell2 = u_cell(iecidx(je,jb,2),jk,iecblk(je,jb,2)) * & + p_patch_diff(jg)%edges%primal_normal_cell(je,jb,2)%v1 + & + v_cell(iecidx(je,jb,2),jk,iecblk(je,jb,2)) * & + p_patch_diff(jg)%edges%primal_normal_cell(je,jb,2)%v2 + + dvt_norm = u_cell(iecidx(je,jb,2),jk,iecblk(je,jb,2)) * & + p_patch_diff(jg)%edges%dual_normal_cell(je,jb,2)%v1 + & + v_cell(iecidx(je,jb,2),jk,iecblk(je,jb,2)) * & + p_patch_diff(jg)%edges%dual_normal_cell(je,jb,2)%v2 - & + (u_cell(iecidx(je,jb,1),jk,iecblk(je,jb,1)) * & + p_patch_diff(jg)%edges%dual_normal_cell(je,jb,1)%v1 + & + v_cell(iecidx(je,jb,1),jk,iecblk(je,jb,1)) * & + p_patch_diff(jg)%edges%dual_normal_cell(je,jb,1)%v2) + + + ! Smagorinsky diffusion coefficient + kh_smag_e(je,jk,jb) = diff_multfac_smag(jk)*SQRT( ( & + (vn_cell2-vn_cell1)*p_patch_diff(jg)%edges%inv_dual_edge_length(je,jb)- & + dvt_tang*p_patch_diff(jg)%edges%inv_primal_edge_length(je,jb) )**2 + ( & + (vn_vert2-vn_vert1)*p_patch_diff(jg)%edges%tangent_orientation(je,jb)* & + p_patch_diff(jg)%edges%inv_primal_edge_length(je,jb) + & + dvt_norm*p_patch_diff(jg)%edges%inv_dual_edge_length(je,jb))**2 ) + + ! The factor of 4 comes from dividing by twice the "correct" length + z_nabla2_e(je,jk,jb) = 4._wp * ( & + (vn_cell2 + vn_cell1 - 2._wp*vn(je,jk,jb)) & + *p_patch_diff(jg)%edges%inv_dual_edge_length(je,jb)**2 + & + (vn_vert2 + vn_vert1 - 2._wp*vn(je,jk,jb)) & + *p_patch_diff(jg)%edges%inv_primal_edge_length(je,jb)**2 ) + +#ifndef _OPENACC + ENDDO + ENDDO + + DO jk = 1, nlev + DO je = i_startidx, i_endidx +#endif + + kh_smag_ec(je,jk,jb) = kh_smag_e(je,jk,jb) + ! Subtract part of the fourth-order background diffusion coefficient + kh_smag_e(je,jk,jb) = MAX(0._vp,kh_smag_e(je,jk,jb) - smag_offset) + ! Limit diffusion coefficient to the theoretical CFL stability threshold + kh_smag_e(je,jk,jb) = MIN(kh_smag_e(je,jk,jb),smag_limit(jk)) + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + + ENDDO +!$OMP END DO NOWAIT +!$OMP END PARALLEL + + ENDIF + + ! Compute input quantities for turbulence scheme + IF ((diffu_type == 3 .OR. diffu_type == 5) .AND. & + (diff_inst(jg)%itype_sher >= 1 .OR. diff_inst(jg)%ltkeshs)) THEN + +!$OMP PARALLEL PRIVATE(i_startblk,i_endblk) + rl_start = grf_bdywidth_c+1 + rl_end = min_rlcell_int + + i_startblk = p_patch_diff(jg)%cells%start_block(rl_start) + i_endblk = p_patch_diff(jg)%cells%end_block(rl_end) + +!$OMP DO PRIVATE(jk,jc,jb,i_startidx,i_endidx,kh_c,div), ICON_OMP_RUNTIME_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_c(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) +#ifdef __LOOP_EXCHANGE + DO jc = i_startidx, i_endidx + DO jk = 1, nlev +#else + DO jk = 1, nlev + DO jc = i_startidx, i_endidx +#endif + + kh_c(jc,jk) = (kh_smag_ec(ieidx(jc,jb,1),jk,ieblk(jc,jb,1))*diff_inst(jg)%e_bln_c_s(jc,1,jb) + & + kh_smag_ec(ieidx(jc,jb,2),jk,ieblk(jc,jb,2))*diff_inst(jg)%e_bln_c_s(jc,2,jb) + & + kh_smag_ec(ieidx(jc,jb,3),jk,ieblk(jc,jb,3))*diff_inst(jg)%e_bln_c_s(jc,3,jb))/ & + diff_multfac_smag(jk) + + div(jc,jk) = vn(ieidx(jc,jb,1),jk,ieblk(jc,jb,1))*diff_inst(jg)%geofac_div(jc,1,jb) + & + vn(ieidx(jc,jb,2),jk,ieblk(jc,jb,2))*diff_inst(jg)%geofac_div(jc,2,jb) + & + vn(ieidx(jc,jb,3),jk,ieblk(jc,jb,3))*diff_inst(jg)%geofac_div(jc,3,jb) + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) + DO jk = 2, nlev ! levels 1 and nlevp1 are unused +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + + div_ic(jc,jk,jb) = diff_inst(jg)%wgtfac_c(jc,jk,jb)*div(jc,jk) + & + (1._wp-diff_inst(jg)%wgtfac_c(jc,jk,jb))*div(jc,jk-1) + + hdef_ic(jc,jk,jb) = (diff_inst(jg)%wgtfac_c(jc,jk,jb)*kh_c(jc,jk) + & + (1._wp-diff_inst(jg)%wgtfac_c(jc,jk,jb))*kh_c(jc,jk-1))**2 + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + ENDDO +!$OMP END DO +!$OMP END PARALLEL + + ENDIF + + IF (diffu_type == 5) THEN ! Add fourth-order background diffusion + + IF (diff_inst(jg)%discr_vn > 1) THEN + CALL sync_patch_array(SYNC_E,p_patch(jg),z_nabla2_e, & + opt_varname="diffusion: nabla2_e") + END IF + + ! Interpolate nabla2(v) to vertices in order to compute nabla2(nabla2(v)) + + IF (diff_inst(jg)%p_test_run) THEN + !$ACC KERNELS IF(i_am_accel_node) + u_vert = 0._wp + v_vert = 0._wp + !$ACC END KERNELS + ENDIF + + CALL rbf_vec_interpol_vertex( z_nabla2_e, p_patch_diff(jg), & + diff_inst(jg)%rbf_vec_idx_v, diff_inst(jg)%rbf_vec_blk_v, & + diff_inst(jg)%rbf_vec_coeff_v, u_vert, v_vert, & + opt_rlstart=4, opt_rlend=min_rlvert_int, opt_acc_async=.TRUE. ) + rl_start = grf_bdywidth_e+1 + rl_end = min_rledge_int + + IF (diff_inst(jg)%itype_comm == 1 .OR. diff_inst(jg)%itype_comm == 3) THEN +#ifdef __MIXED_PRECISION + CALL sync_patch_array_mult_mp(SYNC_V,p_patch(jg),0,2,f3din1_sp=u_vert,f3din2_sp=v_vert, & + opt_varname="diffusion: u_vert and v_vert 3") +#else + CALL sync_patch_array_mult(SYNC_V,p_patch(jg),2,u_vert,v_vert, & + opt_varname="diffusion: u_vert and v_vert 3") +#endif + ENDIF + +!$OMP PARALLEL PRIVATE(i_startblk,i_endblk) + + i_startblk = p_patch_diff(jg)%edges%start_block(rl_start) + i_endblk = p_patch_diff(jg)%edges%end_block(rl_end) + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je,nabv_tang,nabv_norm,z_nabla4_e2,z_d_vn_hdf), ICON_OMP_RUNTIME_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_e(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + ! Compute nabla4(v) + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx + DO jk = 1, nlev +#else +!$NEC outerloop_unroll(4) + DO jk = 1, nlev + DO je = i_startidx, i_endidx +#endif + + nabv_tang = u_vert(ividx(je,jb,1),jk,ivblk(je,jb,1)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,1)%v1 + & + v_vert(ividx(je,jb,1),jk,ivblk(je,jb,1)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,1)%v2 + & + u_vert(ividx(je,jb,2),jk,ivblk(je,jb,2)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,2)%v1 + & + v_vert(ividx(je,jb,2),jk,ivblk(je,jb,2)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,2)%v2 + + nabv_norm = u_vert(ividx(je,jb,3),jk,ivblk(je,jb,3)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,3)%v1 + & + v_vert(ividx(je,jb,3),jk,ivblk(je,jb,3)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,3)%v2 + & + u_vert(ividx(je,jb,4),jk,ivblk(je,jb,4)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,4)%v1 + & + v_vert(ividx(je,jb,4),jk,ivblk(je,jb,4)) * & + p_patch_diff(jg)%edges%primal_normal_vert(je,jb,4)%v2 + + ! The factor of 4 comes from dividing by twice the "correct" length + z_nabla4_e2(je,jk) = 4._wp * ( & + (nabv_norm - 2._wp*z_nabla2_e(je,jk,jb)) & + *p_patch_diff(jg)%edges%inv_vert_vert_length(je,jb)**2 + & + (nabv_tang - 2._wp*z_nabla2_e(je,jk,jb)) & + *p_patch_diff(jg)%edges%inv_primal_edge_length(je,jb)**2 ) + + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + + ! Apply diffusion for the case of diffu_type = 5 + IF ( jg == 1 .AND. diff_inst(jg)%l_limited_area .OR. jg > 1 .AND. .NOT. diff_inst(jg)%lfeedback ) THEN + ! + ! Domains with lateral boundary and nests without feedback + ! + !$ACC PARALLEL LOOP DEFAULT(PRESENT) PRIVATE(z_d_vn_hdf) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) + DO jk = 1, nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + ! + z_d_vn_hdf = p_patch_diff(jg)%edges%area_edge(je,jb) & + & * ( MAX(nudgezone_diff*diff_inst(jg)%nudgecoeff_e(je,jb), & + & REAL(kh_smag_e(je,jk,jb),wp)) * z_nabla2_e(je,jk,jb) & + & - p_patch_diff(jg)%edges%area_edge(je,jb) & + & * diff_multfac_vn(jk) * z_nabla4_e2(je,jk) ) + ! + vn(je,jk,jb) = vn(je,jk,jb) + z_d_vn_hdf + ! +#ifdef __ENABLE_DDT_VN_XYZ__ + IF ( diff_inst(jg)%ddt_vn_hdf_is_associated) THEN + ddt_vn_hdf(je,jk,jb) = ddt_vn_hdf(je,jk,jb) + z_d_vn_hdf * r_dtimensubsteps + END IF + ! + IF ( diff_inst(jg)%ddt_vn_dyn_is_associated) THEN + ddt_vn_dyn(je,jk,jb) = ddt_vn_dyn(je,jk,jb) + z_d_vn_hdf * r_dtimensubsteps + END IF +#endif + ! + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + + ELSE IF (jg > 1) THEN + ! + ! Nests with feedback + ! + !$ACC PARALLEL LOOP DEFAULT(PRESENT) PRIVATE(z_d_vn_hdf) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) + DO jk = 1, nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + ! + z_d_vn_hdf = p_patch_diff(jg)%edges%area_edge(je,jb) & + & * ( kh_smag_e(je,jk,jb) * z_nabla2_e(je,jk,jb) & + & - p_patch_diff(jg)%edges%area_edge(je,jb) & + & * MAX(diff_multfac_vn(jk),bdy_diff*diff_inst(jg)%nudgecoeff_e(je,jb)) * z_nabla4_e2(je,jk) ) + ! + vn(je,jk,jb) = vn(je,jk,jb) + z_d_vn_hdf + ! +#ifdef __ENABLE_DDT_VN_XYZ__ + IF ( diff_inst(jg)%ddt_vn_hdf_is_associated) THEN + ddt_vn_hdf(je,jk,jb) = ddt_vn_hdf(je,jk,jb) + z_d_vn_hdf * r_dtimensubsteps + END IF + ! + IF ( diff_inst(jg)%ddt_vn_dyn_is_associated) THEN + ddt_vn_dyn(je,jk,jb) = ddt_vn_dyn(je,jk,jb) + z_d_vn_hdf * r_dtimensubsteps + END IF +#endif + ! + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + + ELSE + + ! + ! Global domains + ! + !$ACC PARALLEL LOOP DEFAULT(PRESENT) PRIVATE(z_d_vn_hdf) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) + DO jk = 1, nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + ! + z_d_vn_hdf = p_patch_diff(jg)%edges%area_edge(je,jb) & + & * ( kh_smag_e(je,jk,jb) * z_nabla2_e(je,jk,jb) & + & - p_patch_diff(jg)%edges%area_edge(je,jb) & + & * diff_multfac_vn(jk) * z_nabla4_e2(je,jk) ) + + ! + vn(je,jk,jb) = vn(je,jk,jb) + z_d_vn_hdf + ! +#ifdef __ENABLE_DDT_VN_XYZ__ + IF ( diff_inst(jg)%ddt_vn_hdf_is_associated) THEN + ddt_vn_hdf(je,jk,jb) = ddt_vn_hdf(je,jk,jb) + z_d_vn_hdf * r_dtimensubsteps + END IF + ! + IF ( diff_inst(jg)%ddt_vn_dyn_is_associated) THEN + ddt_vn_dyn(je,jk,jb) = ddt_vn_dyn(je,jk,jb) + z_d_vn_hdf * r_dtimensubsteps + END IF +#endif + ! + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + ENDIF + + ENDDO +!$OMP END DO NOWAIT +!$OMP END PARALLEL + + ENDIF + + ! Apply diffusion for the cases of diffu_type = 3 or 4 + +!$OMP PARALLEL PRIVATE(i_startblk,i_endblk,rl_start,rl_end) + + rl_start = grf_bdywidth_e+1 + rl_end = min_rledge_int + + i_startblk = p_patch_diff(jg)%edges%start_block(rl_start) + i_endblk = p_patch_diff(jg)%edges%end_block(rl_end) + + IF (diffu_type == 3) THEN ! Only Smagorinsky diffusion + IF ( jg == 1 .AND. diff_inst(jg)%l_limited_area .OR. jg > 1 .AND. .NOT. diff_inst(jg)%lfeedback ) THEN + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je,z_d_vn_hdf) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_e(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) PRIVATE(z_d_vn_hdf) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) + DO jk = 1, nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + ! + z_d_vn_hdf = p_patch_diff(jg)%edges%area_edge(je,jb) & + & * MAX(nudgezone_diff*diff_inst(jg)%nudgecoeff_e(je,jb),REAL(kh_smag_e(je,jk,jb),wp)) & + & * z_nabla2_e(je,jk,jb) + ! + vn(je,jk,jb) = vn(je,jk,jb) + z_d_vn_hdf + ! +#ifdef __ENABLE_DDT_VN_XYZ__ + IF ( diff_inst(jg)%ddt_vn_hdf_is_associated) THEN + ddt_vn_hdf(je,jk,jb) = ddt_vn_hdf(je,jk,jb) + z_d_vn_hdf * r_dtimensubsteps + END IF + ! + IF ( diff_inst(jg)%ddt_vn_dyn_is_associated) THEN + ddt_vn_dyn(je,jk,jb) = ddt_vn_dyn(je,jk,jb) + z_d_vn_hdf * r_dtimensubsteps + END IF +#endif + ! + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + ENDDO +!$OMP END DO + + ELSE + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je,z_d_vn_hdf) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_e(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) PRIVATE(z_d_vn_hdf) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) + DO jk = 1, nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + ! + z_d_vn_hdf = p_patch_diff(jg)%edges%area_edge(je,jb) * kh_smag_e(je,jk,jb) * z_nabla2_e(je,jk,jb) + ! + vn(je,jk,jb) = vn(je,jk,jb) + z_d_vn_hdf + ! +#ifdef __ENABLE_DDT_VN_XYZ__ + IF ( diff_inst(jg)%ddt_vn_hdf_is_associated) THEN + ddt_vn_hdf(je,jk,jb) = ddt_vn_hdf(je,jk,jb) + z_d_vn_hdf * r_dtimensubsteps + END IF + ! + IF ( diff_inst(jg)%ddt_vn_dyn_is_associated) THEN + ddt_vn_dyn(je,jk,jb) = ddt_vn_dyn(je,jk,jb) + z_d_vn_hdf * r_dtimensubsteps + END IF +#endif + ! + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + ENDDO +!$OMP END DO + + ENDIF + + ELSE IF (diffu_type == 4) THEN + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je,z_d_vn_hdf) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_e(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) PRIVATE(z_d_vn_hdf) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) + DO jk = 1, nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + ! + z_d_vn_hdf = - p_patch_diff(jg)%edges%area_edge(je,jb)*p_patch_diff(jg)%edges%area_edge(je,jb) & + & * diff_multfac_vn(jk) * z_nabla4_e(je,jk,jb) + ! + vn(je,jk,jb) = vn(je,jk,jb) + z_d_vn_hdf + ! +#ifdef __ENABLE_DDT_VN_XYZ__ + IF ( diff_inst(jg)%ddt_vn_hdf_is_associated) THEN + ddt_vn_hdf(je,jk,jb) = ddt_vn_hdf(je,jk,jb) + z_d_vn_hdf * r_dtimensubsteps + END IF + ! + IF ( diff_inst(jg)%ddt_vn_dyn_is_associated) THEN + ddt_vn_dyn(je,jk,jb) = ddt_vn_dyn(je,jk,jb) + z_d_vn_hdf * r_dtimensubsteps + END IF +#endif + ! + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + ENDDO +!$OMP END DO + + ENDIF + + IF (diff_inst(jg)%l_limited_area .OR. jg > 1) THEN + + ! Lateral boundary diffusion for vn + i_startblk = p_patch_diff(jg)%edges%start_block(start_bdydiff_e) + i_endblk = p_patch_diff(jg)%edges%end_block(grf_bdywidth_e) + +!$OMP DO PRIVATE(je,jk,jb,i_startidx,i_endidx,z_d_vn_hdf) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_e(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, start_bdydiff_e, grf_bdywidth_e) + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) PRIVATE(z_d_vn_hdf) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) + DO jk = 1, nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + ! + z_d_vn_hdf = p_patch_diff(jg)%edges%area_edge(je,jb) * fac_bdydiff_v * z_nabla2_e(je,jk,jb) + ! + vn(je,jk,jb) = vn(je,jk,jb) + z_d_vn_hdf + ! +#ifdef __ENABLE_DDT_VN_XYZ__ + IF ( diff_inst(jg)%ddt_vn_hdf_is_associated) THEN + ddt_vn_hdf(je,jk,jb) = ddt_vn_hdf(je,jk,jb) + z_d_vn_hdf * r_dtimensubsteps + END IF + ! + IF ( diff_inst(jg)%ddt_vn_dyn_is_associated) THEN + ddt_vn_dyn(je,jk,jb) = ddt_vn_dyn(je,jk,jb) + z_d_vn_hdf * r_dtimensubsteps + END IF +#endif + ! + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + ENDDO +!$OMP END DO + + ENDIF ! vn boundary diffusion + + IF (diff_inst(jg)%lhdiff_rcf .AND. diff_inst(jg)%lhdiff_w) THEN ! add diffusion on vertical wind speed + ! remark: the surface level (nlevp1) is excluded because w is diagnostic there + + IF (diff_inst(jg)%l_limited_area .AND. jg == 1) THEN + rl_start = grf_bdywidth_c+1 + ELSE + rl_start = grf_bdywidth_c + ENDIF + rl_end = min_rlcell_int-1 + + i_startblk = p_patch_diff(jg)%cells%start_block(rl_start) + i_endblk = p_patch_diff(jg)%cells%end_block(rl_end) + +!$OMP DO PRIVATE(jk,jc,jb,i_startidx,i_endidx), ICON_OMP_RUNTIME_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_c(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) +#ifdef __LOOP_EXCHANGE + DO jc = i_startidx, i_endidx +!DIR$ IVDEP +#ifdef _CRAYFTN +!DIR$ PREFERVECTOR +#endif + DO jk = 1, nlev +#else + DO jk = 1, nlev + DO jc = i_startidx, i_endidx +#endif + z_nabla2_c(jc,jk,jb) = & + w(jc,jk,jb) *diff_inst(jg)%geofac_n2s(jc,1,jb) + & + w(icidx(jc,jb,1),jk,icblk(jc,jb,1))*diff_inst(jg)%geofac_n2s(jc,2,jb) + & + w(icidx(jc,jb,2),jk,icblk(jc,jb,2))*diff_inst(jg)%geofac_n2s(jc,3,jb) + & + w(icidx(jc,jb,3),jk,icblk(jc,jb,3))*diff_inst(jg)%geofac_n2s(jc,4,jb) + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + + IF (diff_inst(jg)%itype_sher == 2) THEN ! compute horizontal gradients of w + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) +#ifdef __LOOP_EXCHANGE + DO jc = i_startidx, i_endidx +!DIR$ IVDEP + DO jk = 2, nlev +#else + DO jk = 2, nlev + DO jc = i_startidx, i_endidx +#endif + dwdx(jc,jk,jb) = diff_inst(jg)%geofac_grg(jc,1,jb,1)*w(jc,jk,jb) + & + diff_inst(jg)%geofac_grg(jc,2,jb,1)*w(icidx(jc,jb,1),jk,icblk(jc,jb,1)) + & + diff_inst(jg)%geofac_grg(jc,3,jb,1)*w(icidx(jc,jb,2),jk,icblk(jc,jb,2)) + & + diff_inst(jg)%geofac_grg(jc,4,jb,1)*w(icidx(jc,jb,3),jk,icblk(jc,jb,3)) + + dwdy(jc,jk,jb) = diff_inst(jg)%geofac_grg(jc,1,jb,2)*w(jc,jk,jb) + & + diff_inst(jg)%geofac_grg(jc,2,jb,2)*w(icidx(jc,jb,1),jk,icblk(jc,jb,1)) + & + diff_inst(jg)%geofac_grg(jc,3,jb,2)*w(icidx(jc,jb,2),jk,icblk(jc,jb,2)) + & + diff_inst(jg)%geofac_grg(jc,4,jb,2)*w(icidx(jc,jb,3),jk,icblk(jc,jb,3)) + + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + ENDIF + + ENDDO +!$OMP END DO + + IF (diff_inst(jg)%l_limited_area .AND. jg == 1) THEN + rl_start = 0 + ELSE + rl_start = grf_bdywidth_c+1 + ENDIF + rl_end = min_rlcell_int + + i_startblk = p_patch_diff(jg)%cells%start_block(rl_start) + i_endblk = p_patch_diff(jg)%cells%end_block(rl_end) + + +!$OMP DO PRIVATE(jk,jc,jb,i_startidx,i_endidx), ICON_OMP_RUNTIME_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_c(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) +#ifdef __LOOP_EXCHANGE + DO jc = i_startidx, i_endidx +!DIR$ IVDEP + DO jk = 1, nlev +#else + DO jk = 1, nlev + DO jc = i_startidx, i_endidx +#endif + w(jc,jk,jb) = w(jc,jk,jb) - diff_multfac_w * p_patch_diff(jg)%cells%area(jc,jb)**2 * & + (z_nabla2_c(jc,jk,jb) *diff_inst(jg)%geofac_n2s(jc,1,jb) + & + z_nabla2_c(icidx(jc,jb,1),jk,icblk(jc,jb,1))*diff_inst(jg)%geofac_n2s(jc,2,jb) + & + z_nabla2_c(icidx(jc,jb,2),jk,icblk(jc,jb,2))*diff_inst(jg)%geofac_n2s(jc,3,jb) + & + z_nabla2_c(icidx(jc,jb,3),jk,icblk(jc,jb,3))*diff_inst(jg)%geofac_n2s(jc,4,jb)) + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + + ! Add nabla2 diffusion in upper damping layer (if present) + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) + DO jk = 2, diff_inst(jg)%nrdmax +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + w(jc,jk,jb) = w(jc,jk,jb) + & + diff_multfac_n2w(jk) * p_patch_diff(jg)%cells%area(jc,jb) * z_nabla2_c(jc,jk,jb) + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + + ENDDO +!$OMP END DO + + ENDIF ! w diffusion + +!$OMP END PARALLEL + + IF (diff_inst(jg)%itype_comm == 1 .OR. diff_inst(jg)%itype_comm == 3) THEN + CALL sync_patch_array(SYNC_E, p_patch(jg), vn,opt_varname="diffusion: vn sync") + ENDIF + + IF (ltemp_diffu) THEN ! Smagorinsky temperature diffusion + + IF (diff_inst(jg)%l_zdiffu_t) THEN + icell => diff_inst(jg)%zd_indlist + iblk => diff_inst(jg)%zd_blklist + ilev => diff_inst(jg)%zd_vertidx + ! iedge => diff_inst(jg)%zd_edgeidx + ! iedblk => diff_inst(jg)%zd_edgeblk + vcoef => diff_inst(jg)%zd_intcoef + ! blcoef => diff_inst(jg)%zd_e2cell + zd_geofac => diff_inst(jg)%zd_geofac + +!!! nproma_zdiffu = cpu_min_nproma(diff_inst(jg)%nproma,256) +#ifdef _OPENACC + nproma_zdiffu = diff_inst(jg)%nproma +#else + nproma_zdiffu = MIN(diff_inst(jg)%nproma,256) +#endif + nblks_zdiffu = INT(diff_inst(jg)%zd_listdim/nproma_zdiffu) + npromz_zdiffu = MOD(diff_inst(jg)%zd_listdim,nproma_zdiffu) + IF (npromz_zdiffu > 0) THEN + nblks_zdiffu = nblks_zdiffu + 1 + ELSE + npromz_zdiffu = nproma_zdiffu + ENDIF + ENDIF + +!$OMP PARALLEL PRIVATE(rl_start,rl_end,i_startblk,i_endblk) + + ! Enhance Smagorinsky diffusion coefficient in the presence of excessive grid-point cold pools + ! This is restricted to the two lowest model levels + ! + rl_start = grf_bdywidth_c + rl_end = min_rlcell_int-1 + + i_startblk = p_patch_diff(jg)%cells%start_block(rl_start) + i_endblk = p_patch_diff(jg)%cells%end_block(rl_end) + +!$OMP DO PRIVATE(jk,jc,jb,i_startidx,i_endidx,ic,tdiff,trefdiff), ICON_OMP_RUNTIME_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_c(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + ic = 0 + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) + DO jk = nlev-1, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + ! Perturbation potential temperature difference between local point and average of the three neighbors + tdiff = theta_v(jc,jk,jb) - & + (theta_v(icidx(jc,jb,1),jk,icblk(jc,jb,1)) + & + theta_v(icidx(jc,jb,2),jk,icblk(jc,jb,2)) + & + theta_v(icidx(jc,jb,3),jk,icblk(jc,jb,3)) ) / 3._wp + trefdiff = diff_inst(jg)%theta_ref_mc(jc,jk,jb) - & + (diff_inst(jg)%theta_ref_mc(icidx(jc,jb,1),jk,icblk(jc,jb,1)) + & + diff_inst(jg)%theta_ref_mc(icidx(jc,jb,2),jk,icblk(jc,jb,2)) + & + diff_inst(jg)%theta_ref_mc(icidx(jc,jb,3),jk,icblk(jc,jb,3)) ) / 3._wp + + ! Enahnced horizontal diffusion is applied if the theta perturbation is either + ! - at least 5 K colder than the average of the neighbor points on valley points (determined by trefdiff < 0.) or + ! - at least 7.5 K colder than the average of the neighbor points otherwise + IF (tdiff-trefdiff < thresh_tdiff .AND. trefdiff < 0._wp .OR. tdiff-trefdiff < 1.5_wp*thresh_tdiff) THEN +#ifndef _OPENACC + ic = ic+1 + iclist(ic,jb) = jc + iklist(ic,jb) = jk + tdlist(ic,jb) = thresh_tdiff - tdiff + trefdiff +#else + ! Enhance Smagorinsky coefficients at the three edges of the cells included in the list +! Attention: this operation is neither vectorizable nor OpenMP-parallelizable (race conditions!) + enh_diffu_3d(jc,jk,jb) = (thresh_tdiff - tdiff + trefdiff)*5.e-4_vp + ELSE + enh_diffu_3d(jc,jk,jb) = -HUGE(0._vp) ! In order that this is never taken as the MAX +#endif + ENDIF + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + icount(jb) = ic + + ENDDO +!$OMP END DO + + ! Enhance Smagorinsky coefficients at the three edges of the cells included in the list + ! Attention: this operation is neither vectorizable nor OpenMP-parallelizable (race conditions!) + +#ifndef _OPENACC +!$OMP MASTER + DO jb = i_startblk,i_endblk + + IF (icount(jb) > 0) THEN + DO ic = 1, icount(jb) + jc = iclist(ic,jb) + jk = iklist(ic,jb) + enh_diffu = tdlist(ic,jb)*5.e-4_vp + kh_smag_e(ieidx(jc,jb,1),jk,ieblk(jc,jb,1)) = MAX(enh_diffu,kh_smag_e(ieidx(jc,jb,1),jk,ieblk(jc,jb,1))) + kh_smag_e(ieidx(jc,jb,2),jk,ieblk(jc,jb,2)) = MAX(enh_diffu,kh_smag_e(ieidx(jc,jb,2),jk,ieblk(jc,jb,2))) + kh_smag_e(ieidx(jc,jb,3),jk,ieblk(jc,jb,3)) = MAX(enh_diffu,kh_smag_e(ieidx(jc,jb,3),jk,ieblk(jc,jb,3))) + ENDDO + ENDIF + + ENDDO + +!$OMP END MASTER +!$OMP BARRIER + +#else + + rl_start = grf_bdywidth_e+1 + rl_end = min_rledge_int + + i_startblk = p_patch_diff(jg)%edges%start_block(rl_start) + i_endblk = p_patch_diff(jg)%edges%end_block(rl_end) + + DO jb = i_startblk,i_endblk + + CALL get_indices_e(p_patch_diff(jg), jb, i_startblk, i_endblk, i_startidx, i_endidx, rl_start, rl_end) + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) + DO jk = nlev-1, nlev + DO je = i_startidx, i_endidx + kh_smag_e(je,jk,jb) = MAX(kh_smag_e(je,jk,jb), enh_diffu_3d(iecidx(je,jb,1),jk,iecblk(je,jb,1)), & + enh_diffu_3d(iecidx(je,jb,2),jk,iecblk(je,jb,2)) ) + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + ENDDO +#endif + + IF (diff_inst(jg)%discr_t == 1) THEN ! use discretization K*nabla(theta) + + rl_start = grf_bdywidth_c+1 + rl_end = min_rlcell_int + + i_startblk = p_patch_diff(jg)%cells%start_block(rl_start) + i_endblk = p_patch_diff(jg)%cells%end_block(rl_end) + +!$OMP DO PRIVATE(jk,jc,jb,i_startidx,i_endidx), ICON_OMP_RUNTIME_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_c(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + ! interpolated diffusion coefficient times nabla2(theta) + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) +#ifdef __LOOP_EXCHANGE + DO jc = i_startidx, i_endidx +!DIR$ IVDEP +#ifdef _CRAYFTN +!DIR$ PREFERVECTOR +#endif + DO jk = 1, nlev +#else + DO jk = 1, nlev + DO jc = i_startidx, i_endidx +#endif + z_temp(jc,jk,jb) = & + (kh_smag_e(ieidx(jc,jb,1),jk,ieblk(jc,jb,1))*diff_inst(jg)%e_bln_c_s(jc,1,jb) + & + kh_smag_e(ieidx(jc,jb,2),jk,ieblk(jc,jb,2))*diff_inst(jg)%e_bln_c_s(jc,2,jb) + & + kh_smag_e(ieidx(jc,jb,3),jk,ieblk(jc,jb,3))*diff_inst(jg)%e_bln_c_s(jc,3,jb)) * & + (theta_v(jc,jk,jb) *diff_inst(jg)%geofac_n2s(jc,1,jb) + & + theta_v(icidx(jc,jb,1),jk,icblk(jc,jb,1))*diff_inst(jg)%geofac_n2s(jc,2,jb) + & + theta_v(icidx(jc,jb,2),jk,icblk(jc,jb,2))*diff_inst(jg)%geofac_n2s(jc,3,jb) + & + theta_v(icidx(jc,jb,3),jk,icblk(jc,jb,3))*diff_inst(jg)%geofac_n2s(jc,4,jb)) + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + ENDDO +!$OMP END DO + + ELSE IF (diff_inst(jg)%discr_t == 2) THEN ! use conservative discretization div(k*grad(theta)) + + rl_start = grf_bdywidth_e + rl_end = min_rledge_int - 1 + + i_startblk = p_patch_diff(jg)%edges%start_block(rl_start) + i_endblk = p_patch_diff(jg)%edges%end_block(rl_end) + +!$OMP DO PRIVATE(jk,je,jb,i_startidx,i_endidx), ICON_OMP_RUNTIME_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_e(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + ! compute kh_smag_e * grad(theta) (stored in z_nabla2_e for memory efficiency) + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx +!DIR$ IVDEP +#ifdef _CRAYFTN +!DIR$ PREFERVECTOR +#endif + DO jk = 1, nlev +#else + DO jk = 1, nlev + DO je = i_startidx, i_endidx +#endif + z_nabla2_e(je,jk,jb) = kh_smag_e(je,jk,jb) * & + p_patch_diff(jg)%edges%inv_dual_edge_length(je,jb)* & + (theta_v(iecidx(je,jb,2),jk,iecblk(je,jb,2)) - & + theta_v(iecidx(je,jb,1),jk,iecblk(je,jb,1))) + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + ENDDO +!$OMP END DO + + rl_start = grf_bdywidth_c+1 + rl_end = min_rlcell_int + + i_startblk = p_patch_diff(jg)%cells%start_block(rl_start) + i_endblk = p_patch_diff(jg)%cells%end_block(rl_end) + +!$OMP DO PRIVATE(jk,jc,jb,i_startidx,i_endidx), ICON_OMP_RUNTIME_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_c(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + ! now compute the divergence of the quantity above + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) +#ifdef __LOOP_EXCHANGE + DO jc = i_startidx, i_endidx + DO jk = 1, nlev +#else + DO jk = 1, nlev + DO jc = i_startidx, i_endidx +#endif + z_temp(jc,jk,jb) = & + z_nabla2_e(ieidx(jc,jb,1),jk,ieblk(jc,jb,1))*diff_inst(jg)%geofac_div(jc,1,jb) + & + z_nabla2_e(ieidx(jc,jb,2),jk,ieblk(jc,jb,2))*diff_inst(jg)%geofac_div(jc,2,jb) + & + z_nabla2_e(ieidx(jc,jb,3),jk,ieblk(jc,jb,3))*diff_inst(jg)%geofac_div(jc,3,jb) + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + ENDDO +!$OMP END DO + + ENDIF + + IF (diff_inst(jg)%l_zdiffu_t) THEN ! Compute temperature diffusion truly horizontally over steep slopes + ! A conservative discretization is not possible here +!$OMP DO PRIVATE(jb,jc,ic,nlen_zdiffu,ishift) ICON_OMP_DEFAULT_SCHEDULE + DO jb = 1, nblks_zdiffu + IF (jb == nblks_zdiffu) THEN + nlen_zdiffu = npromz_zdiffu + ELSE + nlen_zdiffu = nproma_zdiffu + ENDIF + ishift = (jb-1)*nproma_zdiffu + !$ACC PARALLEL LOOP DEFAULT(PRESENT) PRESENT(icell, ilev, iblk, vcoef, zd_geofac) & + !$ACC GANG VECTOR ASYNC(1) IF(i_am_accel_node) +!$NEC ivdep +!DIR$ IVDEP + DO jc = 1, nlen_zdiffu + ic = ishift+jc + z_temp(icell(1,ic),ilev(1,ic),iblk(1,ic)) = & + z_temp(icell(1,ic),ilev(1,ic),iblk(1,ic)) + diff_inst(jg)%zd_diffcoef(ic)* & +! MAX(diff_inst(jg)%zd_diffcoef(ic), & +! kh_smag_e(iedge(1,ic),ilev(1,ic),iedblk(1,ic))* blcoef(1,ic) + & +! kh_smag_e(iedge(2,ic),ilev(1,ic),iedblk(2,ic))* blcoef(2,ic) + & +! kh_smag_e(iedge(3,ic),ilev(1,ic),iedblk(3,ic))* blcoef(3,ic) ) * & + (zd_geofac(1,ic)*theta_v(icell(1,ic),ilev(1,ic),iblk(1,ic)) + & + zd_geofac(2,ic)*(vcoef(1,ic)*theta_v(icell(2,ic),ilev(2,ic),iblk(2,ic))+& + (1._wp-vcoef(1,ic))* theta_v(icell(2,ic),ilev(2,ic)+1,iblk(2,ic))) + & + zd_geofac(3,ic)*(vcoef(2,ic)*theta_v(icell(3,ic),ilev(3,ic),iblk(3,ic))+& + (1._wp-vcoef(2,ic))*theta_v(icell(3,ic),ilev(3,ic)+1,iblk(3,ic))) + & + zd_geofac(4,ic)*(vcoef(3,ic)*theta_v(icell(4,ic),ilev(4,ic),iblk(4,ic))+& + (1._wp-vcoef(3,ic))* theta_v(icell(4,ic),ilev(4,ic)+1,iblk(4,ic))) ) + ENDDO + !$ACC END PARALLEL LOOP + ENDDO +!$OMP END DO + + ENDIF + +!$OMP DO PRIVATE(jk,jc,jb,i_startidx,i_endidx,z_theta) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_c(p_patch_diff(jg), jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) GANG VECTOR COLLAPSE(2) ASYNC(1) IF(i_am_accel_node) + DO jk = 1, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + z_theta = theta_v(jc,jk,jb) + + theta_v(jc,jk,jb) = theta_v(jc,jk,jb) + & + p_patch_diff(jg)%cells%area(jc,jb)*z_temp(jc,jk,jb) + + exner(jc,jk,jb) = exner(jc,jk,jb) * & + (1._wp+rd_o_cvd*(theta_v(jc,jk,jb)/z_theta-1._wp)) + + ENDDO + ENDDO + !$ACC END PARALLEL LOOP + + ENDDO +!$OMP END DO NOWAIT +!$OMP END PARALLEL + + ! This could be further optimized, but applications without physics are quite rare; + IF ( .NOT. diff_inst(jg)%lhdiff_rcf .OR. linit .OR. .NOT. diff_inst(jg)%lphys ) THEN + CALL sync_patch_array_mult(SYNC_C,p_patch(jg),2,theta_v,exner, & + opt_varname="diffusion: theta and exner") + ENDIF + + ENDIF ! temperature diffusion + + IF ( .NOT. diff_inst(jg)%lhdiff_rcf .OR. linit .OR. .NOT. diff_inst(jg)%lphys ) THEN + IF (diff_inst(jg)%lhdiff_w) THEN + CALL sync_patch_array(SYNC_C,p_patch(jg),w,"diffusion: w") + END IF + ENDIF + + IF (diff_inst(jg)%ltimer) CALL timer_stop(timer_nh_hdiffusion) + + !$ACC END DATA + + !$ACC WAIT + + END SUBROUTINE diffusion_run + + !> + !! finalize_diffusion + !! + !! Prepares the horizontal diffusion of velocity and temperature + !! + !! @par Revision History + !! Initial release by William Sawyer, CSCS (2022-11-25) + !! + SUBROUTINE diffusion_finalize(jg) + INTEGER, INTENT(IN) :: jg + ! Currently nothing to do here + END SUBROUTINE diffusion_finalize + +END MODULE mo_nh_diffusion_new diff --git a/pyutils/tests/samples/subroutine_example.f90 b/pyutils/tests/samples/subroutine_example.f90 new file mode 100644 index 0000000000..bb22e8c041 --- /dev/null +++ b/pyutils/tests/samples/subroutine_example.f90 @@ -0,0 +1,29 @@ +MODULE example_subroutines + USE ISO_C_BINDING, ONLY: C_DOUBLE + IMPLICIT NONE + + PUBLIC :: mysubroutine, foo_type + PRIVATE + + TYPE, BIND(C) :: foo_type + REAL(C_DOUBLE) :: p1 + REAL(C_DOUBLE) :: p2 + END TYPE foo_type + + CONTAINS + + SUBROUTINE mysubroutine(a, b, c) + REAL, INTENT(in) :: a + REAL, INTENT(inout) :: b + REAL, INTENT(out) :: c + c = a + b + b = 2.0 * b + end SUBROUTINE mysubroutine + +END MODULE example_subroutines + + + + + + diff --git a/pyutils/tests/test_parsing.py b/pyutils/tests/test_parsing.py new file mode 100644 index 0000000000..0edcf1823c --- /dev/null +++ b/pyutils/tests/test_parsing.py @@ -0,0 +1,76 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from pathlib import Path + +import pytest + +from icon4py.serialisation.exceptions import MissingDerivedTypeError, ParsingError +from icon4py.serialisation.parse import GranuleParser + + +def root_dir(): + return Path(__file__).parent + + +@pytest.fixture +def granule(): + return Path(f"{root_dir()}/samples/granule_example.f90") + + +def test_granule_parsing(granule): + dependencies = [ + Path(f"{root_dir()}/samples/derived_types_example.f90"), + Path(f"{root_dir()}/samples/subroutine_example.f90"), + ] + parser = GranuleParser(granule, dependencies) + parsed = parser.parse() + + assert list(parsed) == ["diffusion_init", "diffusion_run"] + + assert list(parsed["diffusion_init"]) == ["in"] + assert len(parsed["diffusion_init"]["in"]) == 107 + assert parsed["diffusion_init"]["in"]["codegen_lines"] == [279] + + assert list(parsed["diffusion_run"]) == ["in", "inout", "out"] + assert len(parsed["diffusion_run"]["in"]) == 5 + assert parsed["diffusion_run"]["in"]["codegen_lines"] == [432] + + assert len(parsed["diffusion_run"]["inout"]) == 8 + assert parsed["diffusion_run"]["inout"]["codegen_lines"] == [432, 1970] + + assert len(parsed["diffusion_run"]["out"]) == 5 + assert parsed["diffusion_run"]["out"]["codegen_lines"] == [1970] + + assert isinstance(parsed, dict) + + +@pytest.mark.parametrize( + "dependencies", + [ + [], + [Path(f"{root_dir()}/samples/subroutine_example.f90")], + ], +) +def test_granule_parsing_missing_derived_typedef(granule, dependencies): + parser = GranuleParser(granule, dependencies) + with pytest.raises( + MissingDerivedTypeError, match="Could not find type definition for TYPE" + ): + parser.parse() + + +def test_granule_parsing_no_intent(): + parser = GranuleParser(Path(f"{root_dir()}/samples/subroutine_example.f90"), []) + with pytest.raises(ParsingError): + parser.parse() From 30004e602744f1604ac9031ac7535884fa0f02b7 Mon Sep 17 00:00:00 2001 From: Samuel Date: Thu, 30 Mar 2023 15:04:50 +0200 Subject: [PATCH 03/21] Add basic deserialisation (#178) * Add basic deserialisation * Add deserialiser and tests * Add basic savepoints * Move f90 sources to testutils * Create maximum two savepoints per subroutine --- liskov/src/icon4py/liskov/codegen/common.py | 4 +- .../liskov/codegen/integration/generate.py | 23 +--- .../liskov/codegen/serialisation/interface.py | 2 +- .../liskov/codegen/serialisation/template.py | 2 +- liskov/src/icon4py/liskov/codegen/types.py | 2 - .../src/icon4py/liskov/parsing/deserialise.py | 25 +--- liskov/tests/conftest.py | 15 --- liskov/tests/test_cli.py | 6 +- ...ser.py => test_directives_deserialiser.py} | 3 +- liskov/tests/test_external.py | 1 - liskov/tests/test_generation.py | 20 ++- liskov/tests/test_parser.py | 15 ++- liskov/tests/test_scanner.py | 5 +- liskov/tests/test_validation.py | 10 +- liskov/tests/test_writer.py | 6 +- .../icon4py/bindings/codegen/render/field.py | 2 +- .../{serialisation => f2ser}/__init__.py | 0 .../icon4py/{serialisation => f2ser}/cli.py | 0 pyutils/src/icon4py/f2ser/deserialise.py | 114 ++++++++++++++++++ .../{serialisation => f2ser}/exceptions.py | 0 .../{serialisation => f2ser}/interface.py | 17 +-- .../icon4py/{serialisation => f2ser}/parse.py | 19 +-- pyutils/tests/f2ser/conftest.py | 33 +++++ .../tests/f2ser/test_granule_deserialiser.py | 96 +++++++++++++++ pyutils/tests/f2ser/test_parsing.py | 54 +++++++++ pyutils/tests/test_parsing.py | 76 ------------ .../fortran}/derived_types_example.f90 | 0 .../testutils/fortran/diffusion_granule.f90 | 0 .../testutils/fortran}/subroutine_example.f90 | 0 .../testutils/liskov_fortran_samples.py | 0 .../icon4py/testutils/liskov_test_utils.py | 29 +++++ 31 files changed, 393 insertions(+), 186 deletions(-) rename liskov/tests/{test_deserialiser.py => test_directives_deserialiser.py} (99%) rename pyutils/src/icon4py/{serialisation => f2ser}/__init__.py (100%) rename pyutils/src/icon4py/{serialisation => f2ser}/cli.py (100%) create mode 100644 pyutils/src/icon4py/f2ser/deserialise.py rename pyutils/src/icon4py/{serialisation => f2ser}/exceptions.py (100%) rename pyutils/src/icon4py/{serialisation => f2ser}/interface.py (82%) rename pyutils/src/icon4py/{serialisation => f2ser}/parse.py (95%) create mode 100644 pyutils/tests/f2ser/conftest.py create mode 100644 pyutils/tests/f2ser/test_granule_deserialiser.py create mode 100644 pyutils/tests/f2ser/test_parsing.py delete mode 100644 pyutils/tests/test_parsing.py rename {pyutils/tests/samples => testutils/src/icon4py/testutils/fortran}/derived_types_example.f90 (100%) rename pyutils/tests/samples/granule_example.f90 => testutils/src/icon4py/testutils/fortran/diffusion_granule.f90 (100%) rename {pyutils/tests/samples => testutils/src/icon4py/testutils/fortran}/subroutine_example.f90 (100%) rename liskov/tests/samples/fortran_samples.py => testutils/src/icon4py/testutils/liskov_fortran_samples.py (100%) create mode 100644 testutils/src/icon4py/testutils/liskov_test_utils.py diff --git a/liskov/src/icon4py/liskov/codegen/common.py b/liskov/src/icon4py/liskov/codegen/common.py index d74cb2c4d1..dac1655159 100644 --- a/liskov/src/icon4py/liskov/codegen/common.py +++ b/liskov/src/icon4py/liskov/codegen/common.py @@ -61,7 +61,6 @@ def _generate( parent_node: Type[eve.Node], code_generator: Type[TemplatedGenerator], startln: int, - endln: int, **kwargs: CodeGenInput | Sequence[CodeGenInput] | Optional[bool] | Any, ) -> None: """Add a GeneratedCode object to the `generated` attribute with the given source code and line number information. @@ -70,9 +69,8 @@ def _generate( parent_node: The parent node of the code to be generated. code_generator: The code generator to use for generating the code. startln: The start line number of the generated code. - endln: The end line number of the generated code. **kwargs: Additional keyword arguments to be passed to the code generator. """ source = self._generate_fortran_code(parent_node, code_generator, **kwargs) - code = GeneratedCode(source=source, startln=startln, endln=endln) + code = GeneratedCode(startln=startln, source=source) self.generated.append(code) diff --git a/liskov/src/icon4py/liskov/codegen/integration/generate.py b/liskov/src/icon4py/liskov/codegen/integration/generate.py index a4ffd3acbf..e08fac5d47 100644 --- a/liskov/src/icon4py/liskov/codegen/integration/generate.py +++ b/liskov/src/icon4py/liskov/codegen/integration/generate.py @@ -88,7 +88,6 @@ def _generate_metadata(self) -> None: MetadataStatement, MetadataStatementGenerator, startln=0, - endln=0, metadata=CodeMetadata(), ) @@ -100,7 +99,6 @@ def _generate_declare(self) -> None: DeclareStatement, DeclareStatementGenerator, self.directives.Declare[i].startln, - self.directives.Declare[i].endln, declare_data=declare, ) @@ -124,7 +122,6 @@ def _generate_start_stencil(self) -> None: if stencil.mergecopy and next_stencil.mergecopy: stencil = StartStencilData( startln=stencil.startln, - endln=next_stencil.endln, name=stencil.name + "_" + next_stencil.name, fields=stencil.fields + next_stencil.fields, bounds=stencil.bounds, @@ -138,7 +135,6 @@ def _generate_start_stencil(self) -> None: StartStencilStatement, StartStencilStatementGenerator, stencil.startln, - next_stencil.endln, stencil_data=stencil, profile=self.profile, ) @@ -147,7 +143,6 @@ def _generate_start_stencil(self) -> None: StartStencilStatement, StartStencilStatementGenerator, self.directives.StartStencil[i].startln, - self.directives.StartStencil[i].endln, stencil_data=stencil, profile=self.profile, ) @@ -165,7 +160,6 @@ def _generate_end_stencil(self) -> None: EndStencilStatement, EndStencilStatementGenerator, self.directives.EndStencil[i].startln, - self.directives.EndStencil[i].endln, stencil_data=stencil, profile=self.profile, noendif=self.directives.EndStencil[i].noendif, @@ -179,7 +173,6 @@ def _generate_imports(self) -> None: ImportsStatement, ImportsStatementGenerator, self.directives.Imports.startln, - self.directives.Imports.endln, stencils=self.directives.StartStencil, ) @@ -190,7 +183,6 @@ def _generate_create(self) -> None: StartCreateStatement, StartCreateStatementGenerator, self.directives.StartCreate.startln, - self.directives.StartCreate.endln, stencils=self.directives.StartStencil, extra_fields=self.directives.StartCreate.extra_fields, ) @@ -199,7 +191,6 @@ def _generate_create(self) -> None: EndCreateStatement, EndCreateStatementGenerator, self.directives.EndCreate.startln, - self.directives.EndCreate.endln, ) def _generate_endif(self) -> None: @@ -207,12 +198,7 @@ def _generate_endif(self) -> None: if self.directives.EndIf != UnusedDirective: for endif in self.directives.EndIf: # type: ignore logger.info("Generating ENDIF statement.") - self._generate( - EndIfStatement, - EndIfStatementGenerator, - endif.startln, - endif.endln, - ) + self._generate(EndIfStatement, EndIfStatementGenerator, endif.startln) def _generate_profile(self) -> None: """Generate additional nvtx profiling statements.""" @@ -224,7 +210,6 @@ def _generate_profile(self) -> None: StartProfileStatement, StartProfileStatementGenerator, start.startln, - start.endln, name=start.name, ) @@ -232,10 +217,7 @@ def _generate_profile(self) -> None: for end in self.directives.EndProfile: # type: ignore logger.info("Generating nvtx end statement.") self._generate( - EndProfileStatement, - EndProfileStatementGenerator, - end.startln, - end.endln, + EndProfileStatement, EndProfileStatementGenerator, end.startln ) def _generate_insert(self) -> None: @@ -247,6 +229,5 @@ def _generate_insert(self) -> None: InsertStatement, InsertStatementGenerator, insert.startln, - insert.endln, content=insert.content, ) diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/interface.py b/liskov/src/icon4py/liskov/codegen/serialisation/interface.py index 58780979a3..728072d86e 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/interface.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/interface.py @@ -11,4 +11,4 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -# todo: add serialisation nodes +# todo: add f2ser nodes diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/template.py b/liskov/src/icon4py/liskov/codegen/serialisation/template.py index 4834f3a672..15d6f65387 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/template.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/template.py @@ -11,4 +11,4 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -# todo: add serialisation code generation +# todo: add f2ser code generation diff --git a/liskov/src/icon4py/liskov/codegen/types.py b/liskov/src/icon4py/liskov/codegen/types.py index 0ea604719f..defc99df44 100644 --- a/liskov/src/icon4py/liskov/codegen/types.py +++ b/liskov/src/icon4py/liskov/codegen/types.py @@ -12,13 +12,11 @@ # SPDX-License-Identifier: GPL-3.0-or-later from dataclasses import dataclass -from typing import Optional @dataclass class CodeGenInput: startln: int - endln: Optional[int] @dataclass diff --git a/liskov/src/icon4py/liskov/parsing/deserialise.py b/liskov/src/icon4py/liskov/parsing/deserialise.py index a4cc0a0b11..e033aa2688 100644 --- a/liskov/src/icon4py/liskov/parsing/deserialise.py +++ b/liskov/src/icon4py/liskov/parsing/deserialise.py @@ -106,11 +106,7 @@ def __call__( else: deserialised = [] for directive in extracted: - deserialised.append( - self.dtype( - startln=directive.startln, endln=directive.endln, **kwargs - ) - ) + deserialised.append(self.dtype(startln=directive.startln, **kwargs)) return deserialised @@ -118,7 +114,7 @@ def __call__( class RequiredSingleUseDataFactory(DataFactoryBase): def __call__(self, parsed: ts.ParsedDict) -> CodeGenInput: extracted = extract_directive(parsed["directives"], self.directive_cls)[0] - return self.dtype(startln=extracted.startln, endln=extracted.endln) + return self.dtype(startln=extracted.startln) @dataclass @@ -159,9 +155,7 @@ def __call__(self, parsed: ts.ParsedDict) -> StartCreateData: if named_args: extra_fields = named_args["extra_fields"].split(",") - return self.dtype( - startln=directive.startln, endln=directive.endln, extra_fields=extra_fields - ) + return self.dtype(startln=directive.startln, extra_fields=extra_fields) @dataclass @@ -185,7 +179,6 @@ def __call__(self, parsed: ts.ParsedDict) -> list[DeclareData]: deserialised.append( self.dtype( startln=directive.startln, - endln=directive.endln, declarations=named_args, ident_type=ident_type, suffix=suffix, @@ -206,11 +199,7 @@ def __call__(self, parsed: ts.ParsedDict) -> list[StartProfileData]: named_args = parsed["content"]["StartProfile"][i] stencil_name = _extract_stencil_name(named_args, directive) deserialised.append( - self.dtype( - name=stencil_name, - startln=directive.startln, - endln=directive.endln, - ) + self.dtype(name=stencil_name, startln=directive.startln) ) return deserialised @@ -232,7 +221,6 @@ def __call__(self, parsed: ts.ParsedDict) -> list[EndStencilData]: self.dtype( name=stencil_name, startln=directive.startln, - endln=directive.endln, noendif=noendif, noprofile=noprofile, ) @@ -282,7 +270,6 @@ def __call__(self, parsed: ts.ParsedDict) -> list[StartStencilData]: fields=fields_w_tolerance, bounds=bounds, startln=directive.startln, - endln=directive.endln, acc_present=acc_present, mergecopy=mergecopy, copies=copies, @@ -388,9 +375,7 @@ def __call__(self, parsed: ts.ParsedDict) -> list[InsertData]: for i, directive in enumerate(extracted): content = parsed["content"]["Insert"][i] deserialised.append( - self.dtype( - startln=directive.startln, endln=directive.endln, content=content # type: ignore - ) + self.dtype(startln=directive.startln, content=content) # type: ignore ) return deserialised diff --git a/liskov/tests/conftest.py b/liskov/tests/conftest.py index fd870ff584..81a94b4faf 100644 --- a/liskov/tests/conftest.py +++ b/liskov/tests/conftest.py @@ -16,9 +16,6 @@ import pytest from click.testing import CliRunner -import icon4py.liskov.parsing.types as ts -from icon4py.liskov.parsing.scan import DirectivesScanner - @pytest.fixture def make_f90_tmpfile(tmp_path) -> Path: @@ -40,15 +37,3 @@ def _make_f90_tmpfile(content: str): @pytest.fixture def cli(): return CliRunner() - - -def scan_for_directives(fpath: Path) -> list[ts.RawDirective]: - collector = DirectivesScanner(fpath) - return collector() - - -def insert_new_lines(fname: Path, lines: list[str]) -> None: - """Append new lines into file.""" - with open(fname, "a") as f: - for ln in lines: - f.write(f"{ln}\n") diff --git a/liskov/tests/test_cli.py b/liskov/tests/test_cli.py index 75a6114da5..242171fe6d 100644 --- a/liskov/tests/test_cli.py +++ b/liskov/tests/test_cli.py @@ -12,7 +12,9 @@ # SPDX-License-Identifier: GPL-3.0-or-later import pytest -from samples.fortran_samples import ( + +from icon4py.liskov.cli import main +from icon4py.testutils.liskov_fortran_samples import ( CONSECUTIVE_STENCIL, FREE_FORM_STENCIL, MULTIPLE_STENCILS, @@ -20,8 +22,6 @@ SINGLE_STENCIL, ) -from icon4py.liskov.cli import main - @pytest.fixture def outfile(tmp_path): diff --git a/liskov/tests/test_deserialiser.py b/liskov/tests/test_directives_deserialiser.py similarity index 99% rename from liskov/tests/test_deserialiser.py rename to liskov/tests/test_directives_deserialiser.py index 57fa10d3e0..a3287b2c2f 100644 --- a/liskov/tests/test_deserialiser.py +++ b/liskov/tests/test_directives_deserialiser.py @@ -49,7 +49,7 @@ @pytest.mark.parametrize( - "factory_class, directive_type, startln, endln, string, expected", + "factory_class, directive_type, string, startln, endln, expected", [ (EndCreateDataFactory, ts.EndCreate, "END CREATE", 2, 2, EndCreateData), (ImportsDataFactory, ts.Imports, "IMPORTS", 3, 3, ImportsData), @@ -72,7 +72,6 @@ def test_data_factories_no_args( assert isinstance(result, expected) assert result.startln == startln - assert result.endln == endln @pytest.mark.parametrize( diff --git a/liskov/tests/test_external.py b/liskov/tests/test_external.py index 12a77fe8f0..4051700f1b 100644 --- a/liskov/tests/test_external.py +++ b/liskov/tests/test_external.py @@ -77,7 +77,6 @@ def test_stencil_collector_invalid_member(): ], bounds=None, startln=None, - endln=None, acc_present=False, mergecopy=False, copies=True, diff --git a/liskov/tests/test_generation.py b/liskov/tests/test_generation.py index 77efbe1118..1413e473cc 100644 --- a/liskov/tests/test_generation.py +++ b/liskov/tests/test_generation.py @@ -65,30 +65,26 @@ def serialised_directives(): ], bounds=BoundsData("1", "10", "-1", "-10"), startln=1, - endln=2, acc_present=False, mergecopy=False, copies=True, ) end_stencil_data = EndStencilData( - name="stencil1", startln=3, endln=4, noendif=False, noprofile=False + name="stencil1", startln=3, noendif=False, noprofile=False ) declare_data = DeclareData( startln=5, - endln=6, declarations={"field2": "(nproma, p_patch%nlev, p_patch%nblks_e)"}, ident_type="REAL(wp)", suffix="before", ) - imports_data = ImportsData(startln=7, endln=8) - start_create_data = StartCreateData( - extra_fields=["foo", "bar"], startln=9, endln=10 - ) - end_create_data = EndCreateData(startln=11, endln=11) - endif_data = EndIfData(startln=12, endln=12) - start_profile_data = StartProfileData(startln=13, endln=13, name="test_stencil") - end_profile_data = EndProfileData(startln=14, endln=14) - insert_data = InsertData(startln=15, endln=15, content="print *, 'Hello, World!'") + imports_data = ImportsData(startln=7) + start_create_data = StartCreateData(extra_fields=["foo", "bar"], startln=9) + end_create_data = EndCreateData(startln=11) + endif_data = EndIfData(startln=12) + start_profile_data = StartProfileData(startln=13, name="test_stencil") + end_profile_data = EndProfileData(startln=14) + insert_data = InsertData(startln=15, content="print *, 'Hello, World!'") return DeserialisedDirectives( StartStencil=[start_stencil_data], diff --git a/liskov/tests/test_parser.py b/liskov/tests/test_parser.py index 8101988c51..820651ed0d 100644 --- a/liskov/tests/test_parser.py +++ b/liskov/tests/test_parser.py @@ -15,17 +15,20 @@ from collections import defaultdict import pytest -from conftest import insert_new_lines, scan_for_directives from pytest import mark -from samples.fortran_samples import ( - MULTIPLE_STENCILS, - NO_DIRECTIVES_STENCIL, - SINGLE_STENCIL, -) import icon4py.liskov.parsing.types as ts from icon4py.liskov.parsing.exceptions import UnsupportedDirectiveError from icon4py.liskov.parsing.parse import DirectivesParser +from icon4py.testutils.liskov_fortran_samples import ( + MULTIPLE_STENCILS, + NO_DIRECTIVES_STENCIL, + SINGLE_STENCIL, +) +from icon4py.testutils.liskov_test_utils import ( + insert_new_lines, + scan_for_directives, +) def test_parse_no_input(): diff --git a/liskov/tests/test_scanner.py b/liskov/tests/test_scanner.py index 64dcc4aedf..6e7c5c2252 100644 --- a/liskov/tests/test_scanner.py +++ b/liskov/tests/test_scanner.py @@ -16,11 +16,14 @@ import pytest from pytest import mark -from samples.fortran_samples import DIRECTIVES_SAMPLE, NO_DIRECTIVES_STENCIL from icon4py.liskov.parsing.exceptions import DirectiveSyntaxError from icon4py.liskov.parsing.scan import DirectivesScanner from icon4py.liskov.parsing.types import RawDirective +from icon4py.testutils.liskov_fortran_samples import ( + DIRECTIVES_SAMPLE, + NO_DIRECTIVES_STENCIL, +) ALLOWED_EOL_CHARS = [")", "&"] diff --git a/liskov/tests/test_validation.py b/liskov/tests/test_validation.py index 03bd6e5b53..3f31f570a1 100644 --- a/liskov/tests/test_validation.py +++ b/liskov/tests/test_validation.py @@ -12,9 +12,7 @@ # SPDX-License-Identifier: GPL-3.0-or-later import pytest -from conftest import insert_new_lines, scan_for_directives from pytest import mark -from samples.fortran_samples import MULTIPLE_STENCILS, SINGLE_STENCIL from icon4py.liskov.parsing.exceptions import ( DirectiveSyntaxError, @@ -30,6 +28,14 @@ StartStencil, ) from icon4py.liskov.parsing.validation import DirectiveSyntaxValidator +from icon4py.testutils.liskov_fortran_samples import ( + MULTIPLE_STENCILS, + SINGLE_STENCIL, +) +from icon4py.testutils.liskov_test_utils import ( + insert_new_lines, + scan_for_directives, +) @mark.parametrize( diff --git a/liskov/tests/test_writer.py b/liskov/tests/test_writer.py index 9d8a0fa03a..a26c4d3c9d 100644 --- a/liskov/tests/test_writer.py +++ b/liskov/tests/test_writer.py @@ -28,7 +28,7 @@ def test_write_from(): f.write("!$DSL\n some code\n another line") # create an instance of IntegrationWriter and write generated code - generated = [GeneratedCode(1, 3, "generated code")] + generated = [GeneratedCode(1, "generated code")] integration_writer = CodegenWriter(input_filepath, output_filepath) integration_writer(generated) @@ -55,8 +55,8 @@ def test_remove_directives(): def test_insert_generated_code(): current_file = ["some code", "another line"] generated = [ - GeneratedCode(5, 6, "generated code2"), - GeneratedCode(1, 3, "generated code1"), + GeneratedCode(5, "generated code2"), + GeneratedCode(1, "generated code1"), ] expected_output = [ "some code", diff --git a/pyutils/src/icon4py/bindings/codegen/render/field.py b/pyutils/src/icon4py/bindings/codegen/render/field.py index d93f8a2455..89d7634875 100644 --- a/pyutils/src/icon4py/bindings/codegen/render/field.py +++ b/pyutils/src/icon4py/bindings/codegen/render/field.py @@ -68,7 +68,7 @@ def render_ranked_dim_string(self) -> str: ) def render_serialise_func(self) -> str: - """Render c++ serialisation function.""" + """Render c++ f2ser function.""" _serializers = { "E": "serialize_dense_edges", "C": "serialize_dense_cells", diff --git a/pyutils/src/icon4py/serialisation/__init__.py b/pyutils/src/icon4py/f2ser/__init__.py similarity index 100% rename from pyutils/src/icon4py/serialisation/__init__.py rename to pyutils/src/icon4py/f2ser/__init__.py diff --git a/pyutils/src/icon4py/serialisation/cli.py b/pyutils/src/icon4py/f2ser/cli.py similarity index 100% rename from pyutils/src/icon4py/serialisation/cli.py rename to pyutils/src/icon4py/f2ser/cli.py diff --git a/pyutils/src/icon4py/f2ser/deserialise.py b/pyutils/src/icon4py/f2ser/deserialise.py new file mode 100644 index 0000000000..250ce1b1dd --- /dev/null +++ b/pyutils/src/icon4py/f2ser/deserialise.py @@ -0,0 +1,114 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from icon4py.f2ser.interface import ( + FieldSerialisationData, + InitData, + SavepointData, + SerialisationInterface, +) +from icon4py.f2ser.parse import ParsedGranule + + +class ParsedGranuleDeserialiser: + def __init__(self, parsed: ParsedGranule, directory: str): + self.parsed = parsed + self.directory = directory + self.data = {"savepoint": [], "init": ...} + + def deserialise(self) -> SerialisationInterface: + """ + Deserialises the parsed granule and returns a serialisation interface. + + Returns: + A `SerialisationInterface` object representing the deserialised data. + """ + self._merge_out_inout_fields() + self._make_savepoints() + self._make_init_data() + return SerialisationInterface(**self.data) + + def _make_savepoints(self) -> None: + """Create savepoints for each subroutine and intent in the parsed granule.""" + for subroutine_name, intent_dict in self.parsed.items(): + for intent, var_dict in intent_dict.items(): + savepoint_name = self._create_savepoint_name(subroutine_name, intent) + self._create_savepoint(savepoint_name, var_dict) + + @staticmethod + def _create_savepoint_name(subroutine_name: str, intent: str) -> str: + return f"{subroutine_name}_{intent}" + + def _create_savepoint(self, savepoint_name: str, var_dict: dict) -> None: + """Create a savepoint for the given variables. + + Args: + savepoint_name: The name of the savepoint. + var_dict: A dictionary representing the variables to be saved. + """ + fields = [] + metadata = None # todo: decide how to handle metadata + field_vals = {k: v for k, v in var_dict.items() if isinstance(v, dict)} + + for var_name, var_data in field_vals.items(): + association = self._get_variable_association(var_data, var_name) + field = FieldSerialisationData(variable=var_name, association=association) + fields.append(field) + + self.data["savepoint"].append( + SavepointData( + name=savepoint_name, + startln=var_dict["codegen_line"], + fields=fields, + metadata=metadata, + ) + ) + + @staticmethod + def _get_variable_association(var_data: dict, var_name: str) -> str: + """ + Generate a string representing the association of a variable with its dimensions. + + Parameters: + var_data (dict): A dictionary containing information about the variable, including its dimensions. + var_name (str): The name of the variable. + + Returns: + str: A string representing the association of the variable with its dimensions, formatted as + "var_name(dim1,dim2,...)" if the variable has dimensions, or simply "var_name" otherwise. + """ + # todo: handle other dimension cases e.g. verts_end_index(min_rlvert:) + dimension = var_data.get("dimension", None) + + if dimension is not None: + dim_string = ",".join(dimension) + association = f"{var_name}({dim_string})" + else: + association = var_name + return association + + def _make_init_data(self) -> None: + lns = [] + for _, intent_dict in self.parsed.items(): + for intent, var_dict in intent_dict.items(): + if intent == "in": + lns.append(var_dict["codegen_line"]) + startln = min(lns) + self.data["init"] = InitData(startln=startln, directory=self.directory) + + def _merge_out_inout_fields(self): + for _, intent_dict in self.parsed.items(): + if "inout" in intent_dict: + intent_dict["in"].update(intent_dict["inout"]) + intent_dict["out"].update(intent_dict["inout"]) + del intent_dict["inout"] diff --git a/pyutils/src/icon4py/serialisation/exceptions.py b/pyutils/src/icon4py/f2ser/exceptions.py similarity index 100% rename from pyutils/src/icon4py/serialisation/exceptions.py rename to pyutils/src/icon4py/f2ser/exceptions.py diff --git a/pyutils/src/icon4py/serialisation/interface.py b/pyutils/src/icon4py/f2ser/interface.py similarity index 82% rename from pyutils/src/icon4py/serialisation/interface.py rename to pyutils/src/icon4py/f2ser/interface.py index cc11e78c2a..dd204d3f00 100644 --- a/pyutils/src/icon4py/serialisation/interface.py +++ b/pyutils/src/icon4py/f2ser/interface.py @@ -17,19 +17,22 @@ from icon4py.liskov.codegen.types import CodeGenInput -@dataclass -class Metadata(CodeGenInput): - key: str - value: str +# todo: decomposed fields require extra information so that we can generate corresponding field copies @dataclass class InitData(CodeGenInput): - directory_path: str + directory: str + + +@dataclass +class Metadata: + key: str + value: str @dataclass -class FieldSerializationData(CodeGenInput): +class FieldSerialisationData: variable: str association: str @@ -37,7 +40,7 @@ class FieldSerializationData(CodeGenInput): @dataclass class SavepointData(CodeGenInput): name: str - fields: list[FieldSerializationData] + fields: list[FieldSerialisationData] metadata: Optional[list[Metadata]] diff --git a/pyutils/src/icon4py/serialisation/parse.py b/pyutils/src/icon4py/f2ser/parse.py similarity index 95% rename from pyutils/src/icon4py/serialisation/parse.py rename to pyutils/src/icon4py/f2ser/parse.py index 8216c4eb9e..0dacbce30f 100644 --- a/pyutils/src/icon4py/serialisation/parse.py +++ b/pyutils/src/icon4py/f2ser/parse.py @@ -19,7 +19,11 @@ from numpy.f2py.crackfortran import crackfortran -from icon4py.serialisation.exceptions import MissingDerivedTypeError, ParsingError +from icon4py.f2ser.exceptions import MissingDerivedTypeError, ParsingError + + +CodeGenLines = list[int] +ParsedGranule = dict[str, dict[str, dict[str, any] | CodeGenLines]] def crack(path: Path) -> dict: @@ -58,8 +62,7 @@ def __init__( self.granule = granule self.dependencies = dependencies - def parse(self) -> dict: - + def parse(self) -> ParsedGranule: parsed = crack(self.granule) subroutines = self._extract_subroutines(parsed) @@ -192,17 +195,15 @@ def _update_with_codegen_lines(self, parsed_types: dict) -> dict: for subroutine in with_lines: ctx = self.get_line_numbers(subroutine) for intent in with_lines[subroutine]: - lns = [] if intent == "in": - lns.append(ctx.last_intent_ln) + ln = ctx.last_intent_ln elif intent == "inout": - lns.append(ctx.last_intent_ln) - lns.append(ctx.end_subroutine_ln) + continue elif intent == "out": - lns.append(ctx.end_subroutine_ln) + ln = ctx.end_subroutine_ln else: raise ValueError(f"Unrecognized intent: {intent}") - with_lines[subroutine][intent]["codegen_lines"] = lns + with_lines[subroutine][intent]["codegen_line"] = ln return with_lines def get_line_numbers(self, subroutine_name: str) -> CodegenContext: diff --git a/pyutils/tests/f2ser/conftest.py b/pyutils/tests/f2ser/conftest.py new file mode 100644 index 0000000000..3244246c76 --- /dev/null +++ b/pyutils/tests/f2ser/conftest.py @@ -0,0 +1,33 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from pathlib import Path + +import pytest + +import icon4py.testutils as testutils + + +@pytest.fixture +def samples_path(): + return Path(testutils.__file__).parent / "fortran" + + +@pytest.fixture +def diffusion_granule(samples_path): + return samples_path / "diffusion_granule.f90" + + +@pytest.fixture +def diffusion_granule_deps(samples_path): + return [samples_path / "derived_types_example.f90"] diff --git a/pyutils/tests/f2ser/test_granule_deserialiser.py b/pyutils/tests/f2ser/test_granule_deserialiser.py new file mode 100644 index 0000000000..745bcc750f --- /dev/null +++ b/pyutils/tests/f2ser/test_granule_deserialiser.py @@ -0,0 +1,96 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later +import pytest + +from icon4py.f2ser.deserialise import ParsedGranuleDeserialiser +from icon4py.f2ser.interface import ( + FieldSerialisationData, + SavepointData, + SerialisationInterface, +) +from icon4py.f2ser.parse import GranuleParser + + +@pytest.fixture +def mock_parsed_granule(): + return { + "diffusion_init": { + "in": { + "jg": {"typespec": "integer", "attrspec": [], "intent": ["in"]}, + "vt": { + "typespec": "real", + "kindselector": {"kind": "vp"}, + "attrspec": [], + "intent": ["in"], + "dimension": [":", ":", ":"], + }, + "codegen_line": 432, + } + }, + "diffusion_run": { + "out": { + "vert_idx": { + "typespec": "logical", + "kindselector": {"kind": "vp"}, + "attrspec": [], + "intent": ["in"], + "dimension": [":", ":", ":"], + }, + "codegen_line": 800, + }, + "in": { + "vn": {"typespec": "integer", "attrspec": [], "intent": ["out"]}, + "vert_idx": { + "typespec": "logical", + "kindselector": {"kind": "vp"}, + "attrspec": [], + "intent": ["in"], + "dimension": [":", ":", ":"], + }, + "codegen_line": 600, + }, + "inout": { + "vn": {"typespec": "integer", "attrspec": [], "intent": ["out"]}, + "vert_idx": { + "typespec": "logical", + "kindselector": {"kind": "vp"}, + "attrspec": [], + "intent": ["in"], + "dimension": [":", ":", ":"], + }, + }, + }, + } + + +def test_deserialiser_mock(mock_parsed_granule): + deserialiser = ParsedGranuleDeserialiser(mock_parsed_granule, directory=".") + interface = deserialiser.deserialise() + assert isinstance(interface, SerialisationInterface) + assert len(interface.savepoint) == 3 + assert all([isinstance(s, SavepointData) for s in interface.savepoint]) + assert all( + [ + isinstance(f, FieldSerialisationData) + for s in interface.savepoint + for f in s.fields + ] + ) + + +def test_deserialiser_diffusion_granule(diffusion_granule, diffusion_granule_deps): + parser = GranuleParser(diffusion_granule, diffusion_granule_deps) + parsed = parser.parse() + deserialiser = ParsedGranuleDeserialiser(parsed, directory=".") + interface = deserialiser.deserialise() + assert len(interface.savepoint) == 3 diff --git a/pyutils/tests/f2ser/test_parsing.py b/pyutils/tests/f2ser/test_parsing.py new file mode 100644 index 0000000000..de114ee392 --- /dev/null +++ b/pyutils/tests/f2ser/test_parsing.py @@ -0,0 +1,54 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +import pytest + +from icon4py.f2ser.exceptions import MissingDerivedTypeError, ParsingError +from icon4py.f2ser.parse import GranuleParser + + +def test_granule_parsing(diffusion_granule, diffusion_granule_deps): + parser = GranuleParser(diffusion_granule, diffusion_granule_deps) + parsed = parser.parse() + + assert list(parsed) == ["diffusion_init", "diffusion_run"] + + assert list(parsed["diffusion_init"]) == ["in"] + assert len(parsed["diffusion_init"]["in"]) == 107 + assert parsed["diffusion_init"]["in"]["codegen_line"] == 279 + + assert list(parsed["diffusion_run"]) == ["in", "inout", "out"] + assert len(parsed["diffusion_run"]["in"]) == 5 + assert parsed["diffusion_run"]["in"]["codegen_line"] == 432 + + assert len(parsed["diffusion_run"]["inout"]) == 7 + + assert len(parsed["diffusion_run"]["out"]) == 5 + assert parsed["diffusion_run"]["out"]["codegen_line"] == 1970 + + assert isinstance(parsed, dict) + + +def test_granule_parsing_missing_derived_typedef(diffusion_granule, samples_path): + dependencies = [samples_path / "subroutine_example.f90"] + parser = GranuleParser(diffusion_granule, dependencies) + with pytest.raises( + MissingDerivedTypeError, match="Could not find type definition for TYPE" + ): + parser.parse() + + +def test_granule_parsing_no_intent(samples_path): + parser = GranuleParser(samples_path / "subroutine_example.f90", []) + with pytest.raises(ParsingError): + parser.parse() diff --git a/pyutils/tests/test_parsing.py b/pyutils/tests/test_parsing.py deleted file mode 100644 index 0edcf1823c..0000000000 --- a/pyutils/tests/test_parsing.py +++ /dev/null @@ -1,76 +0,0 @@ -# ICON4Py - ICON inspired code in Python and GT4Py -# -# Copyright (c) 2022, ETH Zurich and MeteoSwiss -# All rights reserved. -# -# This file is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or any later -# version. See the LICENSE.txt file at the top-level directory of this -# distribution for a copy of the license or check . -# -# SPDX-License-Identifier: GPL-3.0-or-later - -from pathlib import Path - -import pytest - -from icon4py.serialisation.exceptions import MissingDerivedTypeError, ParsingError -from icon4py.serialisation.parse import GranuleParser - - -def root_dir(): - return Path(__file__).parent - - -@pytest.fixture -def granule(): - return Path(f"{root_dir()}/samples/granule_example.f90") - - -def test_granule_parsing(granule): - dependencies = [ - Path(f"{root_dir()}/samples/derived_types_example.f90"), - Path(f"{root_dir()}/samples/subroutine_example.f90"), - ] - parser = GranuleParser(granule, dependencies) - parsed = parser.parse() - - assert list(parsed) == ["diffusion_init", "diffusion_run"] - - assert list(parsed["diffusion_init"]) == ["in"] - assert len(parsed["diffusion_init"]["in"]) == 107 - assert parsed["diffusion_init"]["in"]["codegen_lines"] == [279] - - assert list(parsed["diffusion_run"]) == ["in", "inout", "out"] - assert len(parsed["diffusion_run"]["in"]) == 5 - assert parsed["diffusion_run"]["in"]["codegen_lines"] == [432] - - assert len(parsed["diffusion_run"]["inout"]) == 8 - assert parsed["diffusion_run"]["inout"]["codegen_lines"] == [432, 1970] - - assert len(parsed["diffusion_run"]["out"]) == 5 - assert parsed["diffusion_run"]["out"]["codegen_lines"] == [1970] - - assert isinstance(parsed, dict) - - -@pytest.mark.parametrize( - "dependencies", - [ - [], - [Path(f"{root_dir()}/samples/subroutine_example.f90")], - ], -) -def test_granule_parsing_missing_derived_typedef(granule, dependencies): - parser = GranuleParser(granule, dependencies) - with pytest.raises( - MissingDerivedTypeError, match="Could not find type definition for TYPE" - ): - parser.parse() - - -def test_granule_parsing_no_intent(): - parser = GranuleParser(Path(f"{root_dir()}/samples/subroutine_example.f90"), []) - with pytest.raises(ParsingError): - parser.parse() diff --git a/pyutils/tests/samples/derived_types_example.f90 b/testutils/src/icon4py/testutils/fortran/derived_types_example.f90 similarity index 100% rename from pyutils/tests/samples/derived_types_example.f90 rename to testutils/src/icon4py/testutils/fortran/derived_types_example.f90 diff --git a/pyutils/tests/samples/granule_example.f90 b/testutils/src/icon4py/testutils/fortran/diffusion_granule.f90 similarity index 100% rename from pyutils/tests/samples/granule_example.f90 rename to testutils/src/icon4py/testutils/fortran/diffusion_granule.f90 diff --git a/pyutils/tests/samples/subroutine_example.f90 b/testutils/src/icon4py/testutils/fortran/subroutine_example.f90 similarity index 100% rename from pyutils/tests/samples/subroutine_example.f90 rename to testutils/src/icon4py/testutils/fortran/subroutine_example.f90 diff --git a/liskov/tests/samples/fortran_samples.py b/testutils/src/icon4py/testutils/liskov_fortran_samples.py similarity index 100% rename from liskov/tests/samples/fortran_samples.py rename to testutils/src/icon4py/testutils/liskov_fortran_samples.py diff --git a/testutils/src/icon4py/testutils/liskov_test_utils.py b/testutils/src/icon4py/testutils/liskov_test_utils.py new file mode 100644 index 0000000000..526dcd39cf --- /dev/null +++ b/testutils/src/icon4py/testutils/liskov_test_utils.py @@ -0,0 +1,29 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from pathlib import Path + +import icon4py.liskov.parsing.types as ts +from icon4py.liskov.parsing.scan import DirectivesScanner + + +def scan_for_directives(fpath: Path) -> list[ts.RawDirective]: + collector = DirectivesScanner(fpath) + return collector() + + +def insert_new_lines(fname: Path, lines: list[str]) -> None: + """Append new lines into file.""" + with open(fname, "a") as f: + for ln in lines: + f.write(f"{ln}\n") From a7be879f6330c91a78755c64d8f93568a4485167 Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 4 Apr 2023 15:20:09 +0200 Subject: [PATCH 04/21] Add serialisation codegen (#186) * Add basic structure * Fix tests * Add codegen templates * Add more codegen * Generate code in correct location * Cleanup and add docstrings * Remove todo * Update fortran granule --- .../liskov/codegen/serialisation/generate.py | 46 ++++++- .../liskov/codegen/serialisation/interface.py | 41 +++++- .../liskov/codegen/serialisation/template.py | 91 ++++++++++++- pyutils/src/icon4py/f2ser/deserialise.py | 123 +++++++++++------- pyutils/src/icon4py/f2ser/interface.py | 50 ------- pyutils/src/icon4py/f2ser/parse.py | 95 +++++++++++--- pyutils/tests/f2ser/test_f2ser_codegen.py | 26 ++++ .../tests/f2ser/test_granule_deserialiser.py | 12 +- pyutils/tests/f2ser/test_parsing.py | 2 +- .../testutils/fortran/diffusion_granule.f90 | 12 +- 10 files changed, 369 insertions(+), 129 deletions(-) delete mode 100644 pyutils/src/icon4py/f2ser/interface.py create mode 100644 pyutils/tests/f2ser/test_f2ser_codegen.py diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/generate.py b/liskov/src/icon4py/liskov/codegen/serialisation/generate.py index ba043c55e8..0576ce76d2 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/generate.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/generate.py @@ -10,5 +10,49 @@ # distribution for a copy of the license or check . # # SPDX-License-Identifier: GPL-3.0-or-later +from typing import Any -# todo: add SerialisationGenerator +from icon4py.liskov.codegen.common import CodeGenerator +from icon4py.liskov.codegen.serialisation.interface import SerialisationInterface +from icon4py.liskov.codegen.serialisation.template import ( + InitStatement, + InitStatementGenerator, + SavepointStatement, + SavepointStatementGenerator, +) +from icon4py.liskov.codegen.types import GeneratedCode +from icon4py.liskov.logger import setup_logger + + +logger = setup_logger(__name__) + + +class SerialisationGenerator(CodeGenerator): + def __init__(self, ser_iface: SerialisationInterface): + super().__init__() + self.ser_iface = ser_iface + + def __call__(self, data: Any = None) -> list[GeneratedCode]: + """Generate all f90 code for integration.""" + self._generate_init() + self._generate_savepoints() + return self.generated + + def _generate_init(self) -> None: + logger.info("Generating pp_ser initialisation statement.") + self._generate( + InitStatement, + InitStatementGenerator, + self.ser_iface.Init.startln, + directory=self.ser_iface.Init.directory, + ) + + def _generate_savepoints(self) -> None: + for i, savepoint in enumerate(self.ser_iface.Savepoint): + logger.info("Generating pp_ser savepoint statement.") + self._generate( + SavepointStatement, + SavepointStatementGenerator, + self.ser_iface.Savepoint[i].startln, + savepoint=savepoint, + ) diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/interface.py b/liskov/src/icon4py/liskov/codegen/serialisation/interface.py index 728072d86e..1bf11eb6c8 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/interface.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/interface.py @@ -11,4 +11,43 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -# todo: add f2ser nodes +from dataclasses import dataclass +from typing import Optional + +from icon4py.liskov.codegen.types import CodeGenInput + + +@dataclass +class InitData(CodeGenInput): + directory: str + + +@dataclass +class Metadata: + key: str + value: str + + +@dataclass +class FieldSerialisationData: + variable: str + association: str + decomposed: bool = False + dimension: Optional[list[str]] = None + typespec: Optional[str] = None + typename: Optional[str] = None + ptr_var: Optional[str] = None + + +@dataclass +class SavepointData(CodeGenInput): + subroutine: str + intent: str + fields: list[FieldSerialisationData] + metadata: Optional[list[Metadata]] + + +@dataclass +class SerialisationInterface: + Init: InitData + Savepoint: list[SavepointData] diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/template.py b/liskov/src/icon4py/liskov/codegen/serialisation/template.py index 15d6f65387..d60a71179e 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/template.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/template.py @@ -10,5 +10,94 @@ # distribution for a copy of the license or check . # # SPDX-License-Identifier: GPL-3.0-or-later +from dataclasses import asdict +from typing import Optional -# todo: add f2ser code generation +import gt4py.eve as eve +from gt4py.eve.codegen import JinjaTemplate as as_jinja +from gt4py.eve.codegen import TemplatedGenerator + +from icon4py.liskov.codegen.serialisation.interface import SavepointData + + +class InitStatement(eve.Node): + directory: str + + +class InitStatementGenerator(TemplatedGenerator): + InitStatement = as_jinja("!$ser init directory={{directory}}") + + +class Field(eve.Node): + variable: str + association: str + decomposed: bool + dimension: Optional[list[str]] + typespec: Optional[str] + typename: Optional[str] + ptr_var: Optional[str] + + +class StandardFields(eve.Node): + fields: list[Field] + + +class DecomposedFields(StandardFields): + ... + + +class SavepointStatement(eve.Node): + savepoint: SavepointData + standard_fields: StandardFields = eve.datamodels.field(init=False) + decomposed_fields: DecomposedFields = eve.datamodels.field(init=False) + + def __post_init__(self): + self.standard_fields = StandardFields( + fields=[ + Field(**asdict(f)) for f in self.savepoint.fields if not f.decomposed + ] + ) + self.decomposed_fields = DecomposedFields( + fields=[Field(**asdict(f)) for f in self.savepoint.fields if f.decomposed] + ) + + +class SavepointStatementGenerator(TemplatedGenerator): + SavepointStatement = as_jinja( + """ + !$ser savepoint {{ _this_node.savepoint.subroutine }}_{{ _this_node.savepoint.intent }} {% if _this_node.savepoint.metadata %} {%- for m in _this_node.savepoint.metadata -%} {{ m.key }}={{ m.value }} {%- endfor -%} {% endif %} + + {{ standard_fields }} + {{ decomposed_fields }} + """ + ) + + StandardFields = as_jinja( + """ + {% for f in _this_node.fields %} + !$ser data {{ f.variable }}={{ f.association }} + {% endfor %} + """ + ) + + DecomposedFields = as_jinja( + """ + {% for f in _this_node.fields %} + !$ser verbatim {{ f.typespec }}, automatic :: {{ f.variable }}_{{ f.ptr_var}}({{ f.alloc_dims }}) + !$ser data {{ f.variable }}={{ f.association }} + {% endfor %} + """ + ) + + def visit_DecomposedFields(self, node: DecomposedFields): + def generate_size_strings(colon_list, var_name): + size_strings = [] + for i in range(len(colon_list)): + size_strings.append(f"size({var_name}, {i + 1})") + return size_strings + + for f in node.fields: + f.variable = f.variable.replace(f"_{f.ptr_var}", "") + f.alloc_dims = ", ".join(generate_size_strings(f.dimension, f.variable)) + + return self.generic_visit(node) diff --git a/pyutils/src/icon4py/f2ser/deserialise.py b/pyutils/src/icon4py/f2ser/deserialise.py index 250ce1b1dd..c074a1817d 100644 --- a/pyutils/src/icon4py/f2ser/deserialise.py +++ b/pyutils/src/icon4py/f2ser/deserialise.py @@ -11,24 +11,23 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -from icon4py.f2ser.interface import ( +from icon4py.f2ser.parse import ParsedGranule +from icon4py.liskov.codegen.serialisation.interface import ( FieldSerialisationData, InitData, SavepointData, SerialisationInterface, ) -from icon4py.f2ser.parse import ParsedGranule class ParsedGranuleDeserialiser: def __init__(self, parsed: ParsedGranule, directory: str): self.parsed = parsed self.directory = directory - self.data = {"savepoint": [], "init": ...} + self.data = {"Savepoint": [], "Init": ...} def deserialise(self) -> SerialisationInterface: - """ - Deserialises the parsed granule and returns a serialisation interface. + """Deserialise the parsed granule and returns a serialisation interface. Returns: A `SerialisationInterface` object representing the deserialised data. @@ -39,74 +38,108 @@ def deserialise(self) -> SerialisationInterface: return SerialisationInterface(**self.data) def _make_savepoints(self) -> None: - """Create savepoints for each subroutine and intent in the parsed granule.""" + """Create savepoints for each subroutine and intent in the parsed granule. + + Returns: + None. + """ for subroutine_name, intent_dict in self.parsed.items(): for intent, var_dict in intent_dict.items(): - savepoint_name = self._create_savepoint_name(subroutine_name, intent) - self._create_savepoint(savepoint_name, var_dict) - - @staticmethod - def _create_savepoint_name(subroutine_name: str, intent: str) -> str: - return f"{subroutine_name}_{intent}" + self._create_savepoint(subroutine_name, intent, var_dict) - def _create_savepoint(self, savepoint_name: str, var_dict: dict) -> None: + def _create_savepoint( + self, subroutine_name: str, intent: str, var_dict: dict + ) -> None: """Create a savepoint for the given variables. Args: - savepoint_name: The name of the savepoint. + subroutine_name: The name of the subroutine. + intent: The intent of the fields to be serialised. var_dict: A dictionary representing the variables to be saved. + + Returns: + None. """ - fields = [] - metadata = None # todo: decide how to handle metadata field_vals = {k: v for k, v in var_dict.items() if isinstance(v, dict)} + fields = [ + FieldSerialisationData( + variable=var_name, + association=self._create_association(var_data, var_name), + decomposed=var_data["decomposed"] + if var_data.get("decomposed") + else False, + dimension=var_data.get("dimension"), + typespec=var_data.get("typespec"), + typename=var_data.get("typename"), + ptr_var=var_data.get("ptr_var"), + ) + for var_name, var_data in field_vals.items() + ] - for var_name, var_data in field_vals.items(): - association = self._get_variable_association(var_data, var_name) - field = FieldSerialisationData(variable=var_name, association=association) - fields.append(field) - - self.data["savepoint"].append( + self.data["Savepoint"].append( SavepointData( - name=savepoint_name, + subroutine=subroutine_name, + intent=intent, startln=var_dict["codegen_line"], fields=fields, - metadata=metadata, + metadata=None, # todo: decide how to handle metadata ) ) @staticmethod - def _get_variable_association(var_data: dict, var_name: str) -> str: - """ - Generate a string representing the association of a variable with its dimensions. + def get_slice_expression(var_name: str, dimension: str) -> str: + """Return a string representing a slice expression for a given variable name and dimension. - Parameters: - var_data (dict): A dictionary containing information about the variable, including its dimensions. + Args: var_name (str): The name of the variable. + dimension (str): The dimension of the variable. Returns: - str: A string representing the association of the variable with its dimensions, formatted as - "var_name(dim1,dim2,...)" if the variable has dimensions, or simply "var_name" otherwise. + str: A string representing a slice expression. """ - # todo: handle other dimension cases e.g. verts_end_index(min_rlvert:) - dimension = var_data.get("dimension", None) + idx = dimension.split()[-1].lstrip("-+") + return f"{var_name}({idx}:)" + + def _create_association(self, var_data: dict, var_name: str) -> str: + """Create an association between a variable and its data. + Args: + var_data (dict): A dictionary containing information about the variable. + var_name (str): The name of the variable. + + Returns: + str: A string representing the association between the variable and its data. + """ + dimension = var_data.get("dimension") if dimension is not None: - dim_string = ",".join(dimension) - association = f"{var_name}({dim_string})" - else: - association = var_name - return association + return ( + self.get_slice_expression(var_name, dimension[0]) + if ":" not in dimension + else f"{var_name}({','.join(dimension)})" + ) + return var_name def _make_init_data(self) -> None: - lns = [] - for _, intent_dict in self.parsed.items(): - for intent, var_dict in intent_dict.items(): - if intent == "in": - lns.append(var_dict["codegen_line"]) - startln = min(lns) - self.data["init"] = InitData(startln=startln, directory=self.directory) + """Create an `InitData` object and sets it to the `Init` key in the `data` dictionary. + + Returns: + None. + """ + in_lines = [ + var_dict["codegen_line"] + for intent_dict in self.parsed.values() + for intent, var_dict in intent_dict.items() + if intent == "in" + ] + startln = min(in_lines, default=0) + self.data["Init"] = InitData(startln=startln, directory=self.directory) def _merge_out_inout_fields(self): + """Merge the `inout` fields into the `in` and `out` fields in the `parsed` dictionary. + + Returns: + None. + """ for _, intent_dict in self.parsed.items(): if "inout" in intent_dict: intent_dict["in"].update(intent_dict["inout"]) diff --git a/pyutils/src/icon4py/f2ser/interface.py b/pyutils/src/icon4py/f2ser/interface.py deleted file mode 100644 index dd204d3f00..0000000000 --- a/pyutils/src/icon4py/f2ser/interface.py +++ /dev/null @@ -1,50 +0,0 @@ -# ICON4Py - ICON inspired code in Python and GT4Py -# -# Copyright (c) 2022, ETH Zurich and MeteoSwiss -# All rights reserved. -# -# This file is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or any later -# version. See the LICENSE.txt file at the top-level directory of this -# distribution for a copy of the license or check . -# -# SPDX-License-Identifier: GPL-3.0-or-later - -from dataclasses import dataclass -from typing import Optional - -from icon4py.liskov.codegen.types import CodeGenInput - - -# todo: decomposed fields require extra information so that we can generate corresponding field copies - - -@dataclass -class InitData(CodeGenInput): - directory: str - - -@dataclass -class Metadata: - key: str - value: str - - -@dataclass -class FieldSerialisationData: - variable: str - association: str - - -@dataclass -class SavepointData(CodeGenInput): - name: str - fields: list[FieldSerialisationData] - metadata: Optional[list[Metadata]] - - -@dataclass -class SerialisationInterface: - init: InitData - savepoint: list[SavepointData] diff --git a/pyutils/src/icon4py/f2ser/parse.py b/pyutils/src/icon4py/f2ser/parse.py index 0dacbce30f..294db84c04 100644 --- a/pyutils/src/icon4py/f2ser/parse.py +++ b/pyutils/src/icon4py/f2ser/parse.py @@ -63,32 +63,31 @@ def __init__( self.dependencies = dependencies def parse(self) -> ParsedGranule: - parsed = crack(self.granule) - - subroutines = self._extract_subroutines(parsed) - + """Parse the granule and return the parsed data.""" + subroutines = self._extract_subroutines(crack(self.granule)) variables_grouped_by_intent = { name: self._extract_intent_vars(routine) for name, routine in subroutines.items() } - intrinsic_type_vars, derived_type_vars = self._parse_types( variables_grouped_by_intent ) - combined_type_vars = self._combine_types(derived_type_vars, intrinsic_type_vars) + return self._update_with_codegen_lines(combined_type_vars) - vars_with_lines = self._update_with_codegen_lines(combined_type_vars) + def _extract_subroutines(self, parsed: dict[str, any]) -> dict[str, any]: + """Extract the _init and _run subroutines from the parsed granule. - return vars_with_lines + Args: + parsed: A dictionary representing the parsed granule. - def _extract_subroutines(self, parsed: dict) -> dict: - subroutines: dict = {} + Returns: + A dictionary containing the extracted _init and _run subroutines. + """ + subroutines = {} for elt in parsed["body"]: name = elt["name"] - if SubroutineType.RUN.value in name: - subroutines[name] = elt - elif SubroutineType.INIT.value in name: + if SubroutineType.RUN.value in name or SubroutineType.INIT.value in name: subroutines[name] = elt if len(subroutines) != 2: @@ -100,6 +99,14 @@ def _extract_subroutines(self, parsed: dict) -> dict: @staticmethod def _extract_intent_vars(subroutine: dict) -> dict: + """Extract variables grouped by their intent. + + Args: + subroutine (dict): A dictionary representing the subroutine. + + Returns: + A dictionary representing variables grouped by their intent. + """ intents = ["in", "inout", "out"] result: dict = {} for var in subroutine["vars"]: @@ -112,6 +119,14 @@ def _extract_intent_vars(subroutine: dict) -> dict: return result def _parse_types(self, parsed: dict) -> tuple[dict, dict]: + """Parse the intrinsic and derived type variables of each subroutine and intent from a parsed granule dictionary. + + Args: + parsed (dict): A dictionary containing the parsed information of a granule. + + Returns: + tuple[dict, dict]: A tuple containing two dictionaries. The first one maps each subroutine and intent to a dictionary of intrinsic type variables. The second one maps each subroutine and intent to a dictionary of derived type variables. + """ intrinsic_types: dict = {} derived_types: dict = {} @@ -135,7 +150,17 @@ def _parse_types(self, parsed: dict) -> tuple[dict, dict]: return intrinsic_types, self._parse_derived_types(derived_types) def _parse_derived_types(self, derived_types: dict) -> dict: - # Create a dictionary that maps the typename to the typedef for each derived type + """Parse the derived types defined in the input dictionary by adding their type definitions. + + Args: + derived_types (dict): A dictionary containing the derived types. + + Returns: + dict: A dictionary containing the parsed derived types with their type definitions. + + Raises: + MissingDerivedTypeError: If the type definition for a derived type could not be found in any of the dependency files. + """ derived_type_defs = {} for dep in self.dependencies: parsed = crack(dep) @@ -143,7 +168,6 @@ def _parse_derived_types(self, derived_types: dict) -> dict: if block["block"] == "type": derived_type_defs[block["name"]] = block["vars"] - # Iterate over the derived types and add the typedef for each derived type for _, subroutine_vars in derived_types.items(): for _, intent_vars in subroutine_vars.items(): for _, var in intent_vars.items(): @@ -151,6 +175,7 @@ def _parse_derived_types(self, derived_types: dict) -> dict: typename = var["typename"] if typename in derived_type_defs: var["typedef"] = derived_type_defs[typename] + var["decomposed"] = True else: raise MissingDerivedTypeError( f"Could not find type definition for TYPE: {typename} in dependency files: {self.dependencies}" @@ -160,6 +185,14 @@ def _parse_derived_types(self, derived_types: dict) -> dict: @staticmethod def _decompose_derived_types(derived_types: dict) -> dict: + """Decompose derived types into individual subtypes. + + Args: + derived_types (dict): A dictionary containing the derived types to be decomposed. + + Returns: + dict: A dictionary containing the decomposed derived types, with each subtype represented by a separate entry. + """ decomposed_vars: dict = {} for subroutine, subroutine_vars in derived_types.items(): decomposed_vars[subroutine] = {} @@ -176,6 +209,7 @@ def _decompose_derived_types(derived_types: dict) -> dict: decomposed_vars[subroutine][intent][ new_type_name ] = new_var_dict + new_var_dict["ptr_var"] = subtype_name else: decomposed_vars[subroutine][intent][var_name] = var_dict @@ -183,6 +217,15 @@ def _decompose_derived_types(derived_types: dict) -> dict: @staticmethod def _combine_types(derived_type_vars: dict, intrinsic_type_vars: dict) -> dict: + """Combine intrinsic and derived type variables and returns a dictionary with the combined result. + + Args: + derived_type_vars (dict): A dictionary with derived type variables. + intrinsic_type_vars (dict): A dictionary with intrinsic type variables. + + Returns: + dict: A dictionary with the combined intrinsic and derived type variables. + """ combined = deepcopy(intrinsic_type_vars) for subroutine_name in combined: for intent in combined[subroutine_name]: @@ -191,6 +234,14 @@ def _combine_types(derived_type_vars: dict, intrinsic_type_vars: dict) -> dict: return combined def _update_with_codegen_lines(self, parsed_types: dict) -> dict: + """Update the parsed_types dictionary with the line numbers for codegen. + + Args: + parsed_types (dict): A dictionary containing the parsed intrinsic and derived types. + + Returns: + dict: A dictionary containing the parsed intrinsic and derived types with line numbers for codegen. + """ with_lines = deepcopy(parsed_types) for subroutine in with_lines: ctx = self.get_line_numbers(subroutine) @@ -207,6 +258,14 @@ def _update_with_codegen_lines(self, parsed_types: dict) -> dict: return with_lines def get_line_numbers(self, subroutine_name: str) -> CodegenContext: + """Return CodegenContext object containing line numbers of the last intent statement and the code before the end of the given subroutine. + + Args: + subroutine_name (str): Name of the subroutine to look for in the code. + + Returns: + CodegenContext: Object containing the line number of the last intent statement and the line number of the last line of the code before the end of the given subroutine. + """ with open(self.granule, "r") as f: code = f.read() @@ -233,4 +292,8 @@ def get_line_numbers(self, subroutine_name: str) -> CodegenContext: raise ParsingError(f"No INTENT declarations found in {self.granule}") last_intent_ln = intent_pattern_lines[-1] + start_subroutine_ln + 1 - return CodegenContext(last_intent_ln, end_subroutine_ln) + pre_end_subroutine_ln = ( + end_subroutine_ln - 1 + ) # we want to generate the code before the end of the subroutine + + return CodegenContext(last_intent_ln, pre_end_subroutine_ln) diff --git a/pyutils/tests/f2ser/test_f2ser_codegen.py b/pyutils/tests/f2ser/test_f2ser_codegen.py new file mode 100644 index 0000000000..e72197ae4b --- /dev/null +++ b/pyutils/tests/f2ser/test_f2ser_codegen.py @@ -0,0 +1,26 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from icon4py.f2ser.deserialise import ParsedGranuleDeserialiser +from icon4py.f2ser.parse import GranuleParser +from icon4py.liskov.codegen.serialisation.generate import SerialisationGenerator + + +def test_deserialiser_diffusion_codegen(diffusion_granule, diffusion_granule_deps): + parser = GranuleParser(diffusion_granule, diffusion_granule_deps) + parsed = parser.parse() + deserialiser = ParsedGranuleDeserialiser(parsed, directory=".") + interface = deserialiser.deserialise() + generator = SerialisationGenerator(interface) + generated = generator() + assert len(generated) == 4 diff --git a/pyutils/tests/f2ser/test_granule_deserialiser.py b/pyutils/tests/f2ser/test_granule_deserialiser.py index 745bcc750f..a6d88d5bee 100644 --- a/pyutils/tests/f2ser/test_granule_deserialiser.py +++ b/pyutils/tests/f2ser/test_granule_deserialiser.py @@ -13,12 +13,12 @@ import pytest from icon4py.f2ser.deserialise import ParsedGranuleDeserialiser -from icon4py.f2ser.interface import ( +from icon4py.f2ser.parse import GranuleParser +from icon4py.liskov.codegen.serialisation.interface import ( FieldSerialisationData, SavepointData, SerialisationInterface, ) -from icon4py.f2ser.parse import GranuleParser @pytest.fixture @@ -77,12 +77,12 @@ def test_deserialiser_mock(mock_parsed_granule): deserialiser = ParsedGranuleDeserialiser(mock_parsed_granule, directory=".") interface = deserialiser.deserialise() assert isinstance(interface, SerialisationInterface) - assert len(interface.savepoint) == 3 - assert all([isinstance(s, SavepointData) for s in interface.savepoint]) + assert len(interface.Savepoint) == 3 + assert all([isinstance(s, SavepointData) for s in interface.Savepoint]) assert all( [ isinstance(f, FieldSerialisationData) - for s in interface.savepoint + for s in interface.Savepoint for f in s.fields ] ) @@ -93,4 +93,4 @@ def test_deserialiser_diffusion_granule(diffusion_granule, diffusion_granule_dep parsed = parser.parse() deserialiser = ParsedGranuleDeserialiser(parsed, directory=".") interface = deserialiser.deserialise() - assert len(interface.savepoint) == 3 + assert len(interface.Savepoint) == 3 diff --git a/pyutils/tests/f2ser/test_parsing.py b/pyutils/tests/f2ser/test_parsing.py index de114ee392..7c0a30a068 100644 --- a/pyutils/tests/f2ser/test_parsing.py +++ b/pyutils/tests/f2ser/test_parsing.py @@ -34,7 +34,7 @@ def test_granule_parsing(diffusion_granule, diffusion_granule_deps): assert len(parsed["diffusion_run"]["inout"]) == 7 assert len(parsed["diffusion_run"]["out"]) == 5 - assert parsed["diffusion_run"]["out"]["codegen_line"] == 1970 + assert parsed["diffusion_run"]["out"]["codegen_line"] == 1969 assert isinstance(parsed, dict) diff --git a/testutils/src/icon4py/testutils/fortran/diffusion_granule.f90 b/testutils/src/icon4py/testutils/fortran/diffusion_granule.f90 index bfb91fcc18..84b5768b2c 100644 --- a/testutils/src/icon4py/testutils/fortran/diffusion_granule.f90 +++ b/testutils/src/icon4py/testutils/fortran/diffusion_granule.f90 @@ -36,10 +36,11 @@ MODULE mo_nh_diffusion_new USE mo_kind_base, ONLY: wp, vp #endif USE mo_math_types, ONLY: t_tangent_vectors ! to maintain compatibility w/ p_patch - +#if 0 + USE mo_math_types_base, ONLY: t_tangent_vectors +#endif USE mo_model_domain_advanced, ONLY: t_patch ! until GridManager available USE mo_model_domain, ONLY: p_patch - USE mo_intp_rbf_math, ONLY: rbf_vec_interpol_vertex, rbf_vec_interpol_cell USE mo_interpolation_scalar_math, ONLY: cells2verts_scalar USE mo_interpolation_vector_math, ONLY: edges2cells_vector @@ -397,6 +398,7 @@ SUBROUTINE diffusion_init(cvd_o_rd, grav, diff_inst(jg)%hdiff_efdt_ratio = hdiff_efdt_ratio !$ACC ENTER DATA COPYIN(diff_inst(jg)) + END SUBROUTINE diffusion_init !> @@ -408,7 +410,6 @@ END SUBROUTINE diffusion_init !! Initial release by Guenther Zaengl, DWD (2010-10-13), based on an earlier !! version initially developed by Almut Gassmann, MPI-M !! - SUBROUTINE diffusion_run(jg, dtime, linit, & vn, w, theta_v, exner, & ! p_nh_prog vt, theta_v_ic, div_ic, hdef_ic, dwdx, dwdy, & ! p_nh_diag @@ -417,7 +418,6 @@ SUBROUTINE diffusion_run(jg, dtime, linit, & INTEGER, INTENT(IN) :: jg ! patch ID REAL(wp), INTENT(IN) :: dtime !< time step LOGICAL, INTENT(IN) :: linit !< initial call or runtime call - REAL(wp), INTENT(INOUT) :: vn(:,:,:) ! orthogonal normal wind (nproma,nlev,nblks_e) [m/s] REAL(wp), INTENT(INOUT) :: w(:,:,:) ! orthogonal vertical wind (nproma,nlevp1,nblks_c) [m/s] REAL(wp), INTENT(INOUT) :: theta_v(:,:,:) ! virtual potential temperature (nproma,nlev,nblks_c) [K] @@ -670,7 +670,6 @@ SUBROUTINE diffusion_run(jg, dtime, linit, & ENDIF ! RBF reconstruction of velocity at vertices - CALL rbf_vec_interpol_vertex( vn, p_patch_diff(jg), & diff_inst(jg)%rbf_vec_idx_v, diff_inst(jg)%rbf_vec_blk_v, & diff_inst(jg)%rbf_vec_coeff_v, u_vert, v_vert, & @@ -796,7 +795,6 @@ SUBROUTINE diffusion_run(jg, dtime, linit, & ENDIF ! RBF reconstruction of velocity at vertices - CALL rbf_vec_interpol_vertex( vn, p_patch_diff(jg), & diff_inst(jg)%rbf_vec_idx_v, diff_inst(jg)%rbf_vec_blk_v, & diff_inst(jg)%rbf_vec_coeff_v, u_vert, v_vert, & @@ -987,7 +985,6 @@ SUBROUTINE diffusion_run(jg, dtime, linit, & ELSE IF ((diffu_type == 3 .OR. diffu_type == 5) .AND. diff_inst(jg)%discr_vn >= 2) THEN ! RBF reconstruction of velocity at vertices and cells - CALL rbf_vec_interpol_vertex( vn, p_patch_diff(jg), & diff_inst(jg)%rbf_vec_idx_v, diff_inst(jg)%rbf_vec_blk_v, & diff_inst(jg)%rbf_vec_coeff_v, u_vert, v_vert, & @@ -1596,7 +1593,6 @@ SUBROUTINE diffusion_run(jg, dtime, linit, & i_startblk = p_patch_diff(jg)%cells%start_block(rl_start) i_endblk = p_patch_diff(jg)%cells%end_block(rl_end) - !$OMP DO PRIVATE(jk,jc,jb,i_startidx,i_endidx), ICON_OMP_RUNTIME_SCHEDULE DO jb = i_startblk,i_endblk From 41e3a9ca768ff9b98be0a557fdf5c64b57c28a96 Mon Sep 17 00:00:00 2001 From: Christopher Bignamini Date: Wed, 5 Apr 2023 09:32:25 +0200 Subject: [PATCH 05/21] CLI created (#187) * CLI created * Input parameter renaming, dependencies made optional * Update cli.py --------- Co-authored-by: Samuel --- pyutils/src/icon4py/f2ser/cli.py | 48 ++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/pyutils/src/icon4py/f2ser/cli.py b/pyutils/src/icon4py/f2ser/cli.py index 15dfdb0098..cbb9c420ae 100644 --- a/pyutils/src/icon4py/f2ser/cli.py +++ b/pyutils/src/icon4py/f2ser/cli.py @@ -10,3 +10,51 @@ # distribution for a copy of the license or check . # # SPDX-License-Identifier: GPL-3.0-or-later + +import pathlib + +import click + +from icon4py.f2ser.logger import setup_logger +from icon4py.f2ser.deserialise import ParsedGranuleDeserialiser +from icon4py.f2ser.parse import GranuleParser + +logger = setup_logger(__name__) + +@click.command("icon_f2ser") +@click.argument( + "granule_path", + type=click.Path( + exists=True, dir_okay=False, resolve_path=True, path_type=pathlib.Path + ), +) +@click.argument('dependencies', multiple=True, help='List of dependency file paths.') +@click.argument( + "output_filepath", + type=click.Path(dir_okay=False, resolve_path=True, path_type=pathlib.Path), +) +def main( + granule_path: pathlib.Path, + dependencies: Optional[list[pathlib.Path]] = None, + output_filepath: pathlib.Path, +) -> None: + """Command line interface for interacting with the ICON-f2ser serialization Preprocessor. + + Usage: + icon_f2ser + + Options: + + Arguments: + granule_path (Path): A path to the Fortran source file to be parsed. + dependencies (Optional[list[Path]]): A list of paths to any additional Fortran source files that the input file depends on. + output_filepath (Path): A path to the output Fortran source file to be generated. + """ + + parser = GranuleParser(granule_path, dependencies) + parsed = parser.parse() + deserialiser = ParsedGranuleDeserialiser(parsed, directory=".") + interface = deserialiser.deserialise() + +if __name__ == "__main__": + main() From b48114e7a84cd4f49eb6ce284da8154457f9294b Mon Sep 17 00:00:00 2001 From: Samuel Date: Wed, 12 Apr 2023 12:53:29 +0200 Subject: [PATCH 06/21] Serialisation codegen fixes (#190) Introduces fixes to the f2ser code generation template. --- .../liskov/codegen/serialisation/generate.py | 35 +++++++-------- .../liskov/codegen/serialisation/interface.py | 1 + .../liskov/codegen/serialisation/template.py | 43 +++++++++++++----- pyutils/setup.cfg | 1 + pyutils/src/icon4py/f2ser/cli.py | 43 ++++++++++++------ pyutils/src/icon4py/f2ser/deserialise.py | 32 ++++++++++---- pyutils/src/icon4py/f2ser/parse.py | 44 +++++++++---------- pyutils/tests/f2ser/test_f2ser_cli.py | 35 +++++++++++++++ pyutils/tests/f2ser/test_f2ser_codegen.py | 4 +- .../tests/f2ser/test_granule_deserialiser.py | 14 +++--- pyutils/tests/f2ser/test_parsing.py | 16 ++++--- 11 files changed, 179 insertions(+), 89 deletions(-) create mode 100644 pyutils/tests/f2ser/test_f2ser_cli.py diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/generate.py b/liskov/src/icon4py/liskov/codegen/serialisation/generate.py index 0576ce76d2..2a79c664d8 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/generate.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/generate.py @@ -15,8 +15,6 @@ from icon4py.liskov.codegen.common import CodeGenerator from icon4py.liskov.codegen.serialisation.interface import SerialisationInterface from icon4py.liskov.codegen.serialisation.template import ( - InitStatement, - InitStatementGenerator, SavepointStatement, SavepointStatementGenerator, ) @@ -31,28 +29,29 @@ class SerialisationGenerator(CodeGenerator): def __init__(self, ser_iface: SerialisationInterface): super().__init__() self.ser_iface = ser_iface + self.ser_init_complete = False def __call__(self, data: Any = None) -> list[GeneratedCode]: """Generate all f90 code for integration.""" - self._generate_init() self._generate_savepoints() return self.generated - def _generate_init(self) -> None: - logger.info("Generating pp_ser initialisation statement.") - self._generate( - InitStatement, - InitStatementGenerator, - self.ser_iface.Init.startln, - directory=self.ser_iface.Init.directory, - ) - def _generate_savepoints(self) -> None: for i, savepoint in enumerate(self.ser_iface.Savepoint): logger.info("Generating pp_ser savepoint statement.") - self._generate( - SavepointStatement, - SavepointStatementGenerator, - self.ser_iface.Savepoint[i].startln, - savepoint=savepoint, - ) + if self.ser_init_complete: + self._generate( + SavepointStatement, + SavepointStatementGenerator, + self.ser_iface.Savepoint[i].startln, + savepoint=savepoint, + ) + else: + self._generate( + SavepointStatement, + SavepointStatementGenerator, + self.ser_iface.Savepoint[i].startln, + savepoint=savepoint, + init=self.ser_iface.Init, + ) + self.ser_init_complete = True diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/interface.py b/liskov/src/icon4py/liskov/codegen/serialisation/interface.py index 1bf11eb6c8..54dc048fa8 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/interface.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/interface.py @@ -20,6 +20,7 @@ @dataclass class InitData(CodeGenInput): directory: str + prefix: str @dataclass diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/template.py b/liskov/src/icon4py/liskov/codegen/serialisation/template.py index d60a71179e..a2e0e0d860 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/template.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/template.py @@ -17,15 +17,7 @@ from gt4py.eve.codegen import JinjaTemplate as as_jinja from gt4py.eve.codegen import TemplatedGenerator -from icon4py.liskov.codegen.serialisation.interface import SavepointData - - -class InitStatement(eve.Node): - directory: str - - -class InitStatementGenerator(TemplatedGenerator): - InitStatement = as_jinja("!$ser init directory={{directory}}") +from icon4py.liskov.codegen.serialisation.interface import InitData, SavepointData class Field(eve.Node): @@ -46,10 +38,18 @@ class DecomposedFields(StandardFields): ... +class DecomposedFieldDeclarations(DecomposedFields): + ... + + class SavepointStatement(eve.Node): savepoint: SavepointData + init: Optional[InitData] = eve.datamodels.field(default=None) standard_fields: StandardFields = eve.datamodels.field(init=False) decomposed_fields: DecomposedFields = eve.datamodels.field(init=False) + decomposed_field_declarations: DecomposedFieldDeclarations = eve.datamodels.field( + init=False + ) def __post_init__(self): self.standard_fields = StandardFields( @@ -60,15 +60,25 @@ def __post_init__(self): self.decomposed_fields = DecomposedFields( fields=[Field(**asdict(f)) for f in self.savepoint.fields if f.decomposed] ) + self.decomposed_field_declarations = DecomposedFieldDeclarations( + fields=[Field(**asdict(f)) for f in self.savepoint.fields if f.decomposed] + ) class SavepointStatementGenerator(TemplatedGenerator): SavepointStatement = as_jinja( """ + {{ decomposed_field_declarations }} + + {% if _this_node.init %} + !$ser init directory="{{_this_node.init.directory}}" prefix="{{ _this_node.init.prefix }}" + {% endif %} + !$ser savepoint {{ _this_node.savepoint.subroutine }}_{{ _this_node.savepoint.intent }} {% if _this_node.savepoint.metadata %} {%- for m in _this_node.savepoint.metadata -%} {{ m.key }}={{ m.value }} {%- endfor -%} {% endif %} - {{ standard_fields }} {{ decomposed_fields }} + + {{ standard_fields }} """ ) @@ -80,11 +90,20 @@ class SavepointStatementGenerator(TemplatedGenerator): """ ) + DecomposedFieldDeclarations = as_jinja( + """ + {% for f in _this_node.fields %} + !$ser verbatim {{ f.typespec }}, dimension({{ ",".join(f.dimension) }}), allocatable :: {{ f.variable }}_{{ f.ptr_var}} + {% endfor %} + """ + ) + DecomposedFields = as_jinja( """ {% for f in _this_node.fields %} - !$ser verbatim {{ f.typespec }}, automatic :: {{ f.variable }}_{{ f.ptr_var}}({{ f.alloc_dims }}) - !$ser data {{ f.variable }}={{ f.association }} + !$ser verbatim allocate({{ f.variable }}_{{ f.ptr_var}}({{ f.alloc_dims }})) + !$ser data {{ f.variable }}_{{ f.ptr_var}}={{ f.association }} + !$ser verbatim deallocate({{ f.variable }}_{{ f.ptr_var}}) {% endfor %} """ ) diff --git a/pyutils/setup.cfg b/pyutils/setup.cfg index 2209a5a16b..28de7ba140 100644 --- a/pyutils/setup.cfg +++ b/pyutils/setup.cfg @@ -54,3 +54,4 @@ exclude = [options.entry_points] console_scripts = icon4pygen = icon4py.pyutils.icon4pygen:main + f2ser = icon4py.f2ser.cli:main diff --git a/pyutils/src/icon4py/f2ser/cli.py b/pyutils/src/icon4py/f2ser/cli.py index cbb9c420ae..598346a61b 100644 --- a/pyutils/src/icon4py/f2ser/cli.py +++ b/pyutils/src/icon4py/f2ser/cli.py @@ -12,14 +12,15 @@ # SPDX-License-Identifier: GPL-3.0-or-later import pathlib +from typing import Optional import click -from icon4py.f2ser.logger import setup_logger from icon4py.f2ser.deserialise import ParsedGranuleDeserialiser from icon4py.f2ser.parse import GranuleParser +from icon4py.liskov.codegen.serialisation.generate import SerialisationGenerator +from icon4py.liskov.codegen.write import CodegenWriter -logger = setup_logger(__name__) @click.command("icon_f2ser") @click.argument( @@ -28,33 +29,47 @@ exists=True, dir_okay=False, resolve_path=True, path_type=pathlib.Path ), ) -@click.argument('dependencies', multiple=True, help='List of dependency file paths.') +@click.option( + "--dependencies", + "-d", + multiple=True, + type=click.Path(exists=True), + help="Optional list of dependency paths.", +) @click.argument( "output_filepath", type=click.Path(dir_okay=False, resolve_path=True, path_type=pathlib.Path), ) +@click.option( + "--directory", type=str, help="Directory to serialise variables to.", default="." +) +@click.option( + "--prefix", type=str, help="Prefix to use for serialised files.", default="f2ser" +) def main( granule_path: pathlib.Path, - dependencies: Optional[list[pathlib.Path]] = None, + dependencies: Optional[list[pathlib.Path]], output_filepath: pathlib.Path, + directory: str, + prefix: str, ) -> None: """Command line interface for interacting with the ICON-f2ser serialization Preprocessor. - Usage: - icon_f2ser - - Options: - Arguments: granule_path (Path): A path to the Fortran source file to be parsed. - dependencies (Optional[list[Path]]): A list of paths to any additional Fortran source files that the input file depends on. output_filepath (Path): A path to the output Fortran source file to be generated. + directory (str): The directory to serialise the variables to. + prefix (str): The prefix to use for each serialised variable. """ + parsed = GranuleParser(granule_path, dependencies).parse() + interface = ParsedGranuleDeserialiser( + parsed, directory=directory, prefix=prefix + ).deserialise() + generator = SerialisationGenerator(interface) + generated = generator() + writer = CodegenWriter(granule_path, output_filepath) + writer(generated) - parser = GranuleParser(granule_path, dependencies) - parsed = parser.parse() - deserialiser = ParsedGranuleDeserialiser(parsed, directory=".") - interface = deserialiser.deserialise() if __name__ == "__main__": main() diff --git a/pyutils/src/icon4py/f2ser/deserialise.py b/pyutils/src/icon4py/f2ser/deserialise.py index c074a1817d..ac37c5dd27 100644 --- a/pyutils/src/icon4py/f2ser/deserialise.py +++ b/pyutils/src/icon4py/f2ser/deserialise.py @@ -11,7 +11,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -from icon4py.f2ser.parse import ParsedGranule +from icon4py.f2ser.parse import CodegenContext, ParsedGranule from icon4py.liskov.codegen.serialisation.interface import ( FieldSerialisationData, InitData, @@ -21,9 +21,10 @@ class ParsedGranuleDeserialiser: - def __init__(self, parsed: ParsedGranule, directory: str): + def __init__(self, parsed: ParsedGranule, directory: str, prefix: str): self.parsed = parsed self.directory = directory + self.prefix = prefix self.data = {"Savepoint": [], "Init": ...} def deserialise(self) -> SerialisationInterface: @@ -80,7 +81,7 @@ def _create_savepoint( SavepointData( subroutine=subroutine_name, intent=intent, - startln=var_dict["codegen_line"], + startln=self._get_codegen_line(var_dict["codegen_ctx"], intent), fields=fields, metadata=None, # todo: decide how to handle metadata ) @@ -125,14 +126,18 @@ def _make_init_data(self) -> None: Returns: None. """ - in_lines = [ - var_dict["codegen_line"] + first_intent_in_subroutine = [ + var_dict for intent_dict in self.parsed.values() for intent, var_dict in intent_dict.items() if intent == "in" - ] - startln = min(in_lines, default=0) - self.data["Init"] = InitData(startln=startln, directory=self.directory) + ][0] + startln = self._get_codegen_line( + first_intent_in_subroutine["codegen_ctx"], "init" + ) + self.data["Init"] = InitData( + startln=startln, directory=self.directory, prefix=self.prefix + ) def _merge_out_inout_fields(self): """Merge the `inout` fields into the `in` and `out` fields in the `parsed` dictionary. @@ -145,3 +150,14 @@ def _merge_out_inout_fields(self): intent_dict["in"].update(intent_dict["inout"]) intent_dict["out"].update(intent_dict["inout"]) del intent_dict["inout"] + + @staticmethod + def _get_codegen_line(ctx: CodegenContext, intent: str): + if intent == "in": + return ctx.last_declaration_ln + elif intent == "out": + return ctx.end_subroutine_ln + elif intent == "init": + return ctx.first_declaration_ln + else: + raise ValueError(f"Unrecognized intent: {intent}") diff --git a/pyutils/src/icon4py/f2ser/parse.py b/pyutils/src/icon4py/f2ser/parse.py index 294db84c04..af79e52159 100644 --- a/pyutils/src/icon4py/f2ser/parse.py +++ b/pyutils/src/icon4py/f2ser/parse.py @@ -22,10 +22,6 @@ from icon4py.f2ser.exceptions import MissingDerivedTypeError, ParsingError -CodeGenLines = list[int] -ParsedGranule = dict[str, dict[str, dict[str, any] | CodeGenLines]] - - def crack(path: Path) -> dict: return crackfortran(path)[0] @@ -37,10 +33,14 @@ class SubroutineType(Enum): @dataclass class CodegenContext: - last_intent_ln: int + first_declaration_ln: int + last_declaration_ln: int end_subroutine_ln: int +ParsedGranule = dict[str, dict[str, dict[str, any] | CodegenContext]] + + class GranuleParser: """Parses a Fortran source file and extracts information about its subroutines and variables. @@ -244,27 +244,20 @@ def _update_with_codegen_lines(self, parsed_types: dict) -> dict: """ with_lines = deepcopy(parsed_types) for subroutine in with_lines: - ctx = self.get_line_numbers(subroutine) for intent in with_lines[subroutine]: - if intent == "in": - ln = ctx.last_intent_ln - elif intent == "inout": - continue - elif intent == "out": - ln = ctx.end_subroutine_ln - else: - raise ValueError(f"Unrecognized intent: {intent}") - with_lines[subroutine][intent]["codegen_line"] = ln + with_lines[subroutine][intent]["codegen_ctx"] = self.get_line_numbers( + subroutine + ) return with_lines def get_line_numbers(self, subroutine_name: str) -> CodegenContext: - """Return CodegenContext object containing line numbers of the last intent statement and the code before the end of the given subroutine. + """Return CodegenContext object containing line numbers of the last declaration statement and the code before the end of the given subroutine. Args: subroutine_name (str): Name of the subroutine to look for in the code. Returns: - CodegenContext: Object containing the line number of the last intent statement and the line number of the last line of the code before the end of the given subroutine. + CodegenContext: Object containing the line number of the last declaration statement and the line number of the last line of the code before the end of the given subroutine. """ with open(self.granule, "r") as f: code = f.read() @@ -280,20 +273,23 @@ def get_line_numbers(self, subroutine_name: str) -> CodegenContext: end_subroutine_ln = code[: end_match.start()].count("\n") + 1 # Find the last intent statement line number in the subroutine - intent_pattern = r"\bINTENT\b" - intent_pattern_lines = [ + declaration_pattern = r".*::\s*(\w+\b)" + declaration_pattern_lines = [ i for i, line in enumerate( code.splitlines()[start_subroutine_ln:end_subroutine_ln] ) - if re.search(intent_pattern, line) + if re.search(declaration_pattern, line) ] - if not intent_pattern_lines: - raise ParsingError(f"No INTENT declarations found in {self.granule}") - last_intent_ln = intent_pattern_lines[-1] + start_subroutine_ln + 1 + if not declaration_pattern_lines: + raise ParsingError(f"No declarations found in {self.granule}") + last_declaration_ln = declaration_pattern_lines[-1] + start_subroutine_ln + 1 + first_declaration_ln = declaration_pattern_lines[0] + start_subroutine_ln pre_end_subroutine_ln = ( end_subroutine_ln - 1 ) # we want to generate the code before the end of the subroutine - return CodegenContext(last_intent_ln, pre_end_subroutine_ln) + return CodegenContext( + first_declaration_ln, last_declaration_ln, pre_end_subroutine_ln + ) diff --git a/pyutils/tests/f2ser/test_f2ser_cli.py b/pyutils/tests/f2ser/test_f2ser_cli.py new file mode 100644 index 0000000000..8962ed1c25 --- /dev/null +++ b/pyutils/tests/f2ser/test_f2ser_cli.py @@ -0,0 +1,35 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +import pytest +from click.testing import CliRunner + +from icon4py.f2ser.cli import main + + +@pytest.fixture +def outfile(tmp_path): + return str(tmp_path / "gen.f90") + + +@pytest.fixture +def cli(): + return CliRunner() + + +def test_cli(diffusion_granule, diffusion_granule_deps, outfile, cli): + inp = str(diffusion_granule) + deps = [str(p) for p in diffusion_granule_deps] + args = [inp, outfile, "-d", ",".join(deps)] + result = cli.invoke(main, args) + assert result.exit_code == 0 diff --git a/pyutils/tests/f2ser/test_f2ser_codegen.py b/pyutils/tests/f2ser/test_f2ser_codegen.py index e72197ae4b..f00187eb72 100644 --- a/pyutils/tests/f2ser/test_f2ser_codegen.py +++ b/pyutils/tests/f2ser/test_f2ser_codegen.py @@ -19,8 +19,8 @@ def test_deserialiser_diffusion_codegen(diffusion_granule, diffusion_granule_deps): parser = GranuleParser(diffusion_granule, diffusion_granule_deps) parsed = parser.parse() - deserialiser = ParsedGranuleDeserialiser(parsed, directory=".") + deserialiser = ParsedGranuleDeserialiser(parsed, directory=".", prefix="test") interface = deserialiser.deserialise() generator = SerialisationGenerator(interface) generated = generator() - assert len(generated) == 4 + assert len(generated) == 3 diff --git a/pyutils/tests/f2ser/test_granule_deserialiser.py b/pyutils/tests/f2ser/test_granule_deserialiser.py index a6d88d5bee..505bbb650d 100644 --- a/pyutils/tests/f2ser/test_granule_deserialiser.py +++ b/pyutils/tests/f2ser/test_granule_deserialiser.py @@ -13,7 +13,7 @@ import pytest from icon4py.f2ser.deserialise import ParsedGranuleDeserialiser -from icon4py.f2ser.parse import GranuleParser +from icon4py.f2ser.parse import CodegenContext, GranuleParser from icon4py.liskov.codegen.serialisation.interface import ( FieldSerialisationData, SavepointData, @@ -34,7 +34,7 @@ def mock_parsed_granule(): "intent": ["in"], "dimension": [":", ":", ":"], }, - "codegen_line": 432, + "codegen_ctx": CodegenContext(432, 450, 600), } }, "diffusion_run": { @@ -46,7 +46,7 @@ def mock_parsed_granule(): "intent": ["in"], "dimension": [":", ":", ":"], }, - "codegen_line": 800, + "codegen_ctx": CodegenContext(800, 850, 1000), }, "in": { "vn": {"typespec": "integer", "attrspec": [], "intent": ["out"]}, @@ -57,7 +57,7 @@ def mock_parsed_granule(): "intent": ["in"], "dimension": [":", ":", ":"], }, - "codegen_line": 600, + "codegen_ctx": CodegenContext(600, 690, 750), }, "inout": { "vn": {"typespec": "integer", "attrspec": [], "intent": ["out"]}, @@ -74,7 +74,9 @@ def mock_parsed_granule(): def test_deserialiser_mock(mock_parsed_granule): - deserialiser = ParsedGranuleDeserialiser(mock_parsed_granule, directory=".") + deserialiser = ParsedGranuleDeserialiser( + mock_parsed_granule, directory=".", prefix="f2ser" + ) interface = deserialiser.deserialise() assert isinstance(interface, SerialisationInterface) assert len(interface.Savepoint) == 3 @@ -91,6 +93,6 @@ def test_deserialiser_mock(mock_parsed_granule): def test_deserialiser_diffusion_granule(diffusion_granule, diffusion_granule_deps): parser = GranuleParser(diffusion_granule, diffusion_granule_deps) parsed = parser.parse() - deserialiser = ParsedGranuleDeserialiser(parsed, directory=".") + deserialiser = ParsedGranuleDeserialiser(parsed, directory=".", prefix="f2ser") interface = deserialiser.deserialise() assert len(interface.Savepoint) == 3 diff --git a/pyutils/tests/f2ser/test_parsing.py b/pyutils/tests/f2ser/test_parsing.py index 7c0a30a068..d3e348623a 100644 --- a/pyutils/tests/f2ser/test_parsing.py +++ b/pyutils/tests/f2ser/test_parsing.py @@ -14,7 +14,7 @@ import pytest from icon4py.f2ser.exceptions import MissingDerivedTypeError, ParsingError -from icon4py.f2ser.parse import GranuleParser +from icon4py.f2ser.parse import CodegenContext, GranuleParser def test_granule_parsing(diffusion_granule, diffusion_granule_deps): @@ -25,16 +25,22 @@ def test_granule_parsing(diffusion_granule, diffusion_granule_deps): assert list(parsed["diffusion_init"]) == ["in"] assert len(parsed["diffusion_init"]["in"]) == 107 - assert parsed["diffusion_init"]["in"]["codegen_line"] == 279 + assert parsed["diffusion_init"]["in"]["codegen_ctx"] == CodegenContext( + first_declaration_ln=190, last_declaration_ln=280, end_subroutine_ln=401 + ) assert list(parsed["diffusion_run"]) == ["in", "inout", "out"] assert len(parsed["diffusion_run"]["in"]) == 5 - assert parsed["diffusion_run"]["in"]["codegen_line"] == 432 + assert parsed["diffusion_run"]["in"]["codegen_ctx"] == CodegenContext( + first_declaration_ln=417, last_declaration_ln=492, end_subroutine_ln=1965 + ) - assert len(parsed["diffusion_run"]["inout"]) == 7 + assert len(parsed["diffusion_run"]["inout"]) == 8 assert len(parsed["diffusion_run"]["out"]) == 5 - assert parsed["diffusion_run"]["out"]["codegen_line"] == 1969 + assert parsed["diffusion_run"]["out"]["codegen_ctx"] == CodegenContext( + first_declaration_ln=417, last_declaration_ln=492, end_subroutine_ln=1965 + ) assert isinstance(parsed, dict) From 0213e4e3a3a431f207bd078930a81a668c1258c7 Mon Sep 17 00:00:00 2001 From: samkellerhals Date: Wed, 26 Apr 2023 13:12:17 +0200 Subject: [PATCH 07/21] Add zero size array check --- .../src/icon4py/liskov/codegen/serialisation/template.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/template.py b/liskov/src/icon4py/liskov/codegen/serialisation/template.py index a2e0e0d860..501dc65ca9 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/template.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/template.py @@ -85,7 +85,15 @@ class SavepointStatementGenerator(TemplatedGenerator): StandardFields = as_jinja( """ {% for f in _this_node.fields %} + {% if f.dimension %} + IF (SIZE({{ f.variable }}) > 0) THEN !$ser data {{ f.variable }}={{ f.association }} + ELSE + PRINT *, 'Warning: Array {{ f.variable }} has size 0. Not serializing array.' + ENDIF + {% else %} + !$ser data {{ f.variable }}={{ f.association }} + {% endif %} {% endfor %} """ ) From fd8e9b0deb94735919a3701bd1ea607109ed37d5 Mon Sep 17 00:00:00 2001 From: Samuel Date: Wed, 3 May 2023 11:33:25 +0200 Subject: [PATCH 08/21] Small cleanup (#200) * Remove superflous methods * cleanup --- pyutils/src/icon4py/f2ser/cli.py | 12 ++++-------- pyutils/src/icon4py/f2ser/deserialise.py | 4 ++-- pyutils/src/icon4py/f2ser/parse.py | 7 ++----- pyutils/tests/f2ser/test_f2ser_codegen.py | 4 ++-- pyutils/tests/f2ser/test_granule_deserialiser.py | 6 +++--- pyutils/tests/f2ser/test_parsing.py | 6 +++--- 6 files changed, 16 insertions(+), 23 deletions(-) diff --git a/pyutils/src/icon4py/f2ser/cli.py b/pyutils/src/icon4py/f2ser/cli.py index 598346a61b..4c142fe5ea 100644 --- a/pyutils/src/icon4py/f2ser/cli.py +++ b/pyutils/src/icon4py/f2ser/cli.py @@ -61,14 +61,10 @@ def main( directory (str): The directory to serialise the variables to. prefix (str): The prefix to use for each serialised variable. """ - parsed = GranuleParser(granule_path, dependencies).parse() - interface = ParsedGranuleDeserialiser( - parsed, directory=directory, prefix=prefix - ).deserialise() - generator = SerialisationGenerator(interface) - generated = generator() - writer = CodegenWriter(granule_path, output_filepath) - writer(generated) + parsed = GranuleParser(granule_path, dependencies)() + interface = ParsedGranuleDeserialiser(parsed, directory=directory, prefix=prefix)() + generated = SerialisationGenerator(interface)() + CodegenWriter(granule_path, output_filepath)(generated) if __name__ == "__main__": diff --git a/pyutils/src/icon4py/f2ser/deserialise.py b/pyutils/src/icon4py/f2ser/deserialise.py index ac37c5dd27..a4587e07f8 100644 --- a/pyutils/src/icon4py/f2ser/deserialise.py +++ b/pyutils/src/icon4py/f2ser/deserialise.py @@ -27,7 +27,7 @@ def __init__(self, parsed: ParsedGranule, directory: str, prefix: str): self.prefix = prefix self.data = {"Savepoint": [], "Init": ...} - def deserialise(self) -> SerialisationInterface: + def __call__(self) -> SerialisationInterface: """Deserialise the parsed granule and returns a serialisation interface. Returns: @@ -83,7 +83,7 @@ def _create_savepoint( intent=intent, startln=self._get_codegen_line(var_dict["codegen_ctx"], intent), fields=fields, - metadata=None, # todo: decide how to handle metadata + metadata=None, ) ) diff --git a/pyutils/src/icon4py/f2ser/parse.py b/pyutils/src/icon4py/f2ser/parse.py index af79e52159..609859485e 100644 --- a/pyutils/src/icon4py/f2ser/parse.py +++ b/pyutils/src/icon4py/f2ser/parse.py @@ -48,12 +48,9 @@ class GranuleParser: granule (Path): A path to the Fortran source file to be parsed. dependencies (Optional[list[Path]]): A list of paths to any additional Fortran source files that the input file depends on. - Methods: - parse(): Parses the input file and returns a dictionary with information about its subroutines and variables. - Example usage: parser = GranuleParser(Path("my_file.f90"), dependencies=[Path("common.f90"), Path("constants.f90")]) - parsed_types = parser.parse() + parsed_types = parser() """ def __init__( @@ -62,7 +59,7 @@ def __init__( self.granule = granule self.dependencies = dependencies - def parse(self) -> ParsedGranule: + def __call__(self) -> ParsedGranule: """Parse the granule and return the parsed data.""" subroutines = self._extract_subroutines(crack(self.granule)) variables_grouped_by_intent = { diff --git a/pyutils/tests/f2ser/test_f2ser_codegen.py b/pyutils/tests/f2ser/test_f2ser_codegen.py index f00187eb72..d9ea58718d 100644 --- a/pyutils/tests/f2ser/test_f2ser_codegen.py +++ b/pyutils/tests/f2ser/test_f2ser_codegen.py @@ -18,9 +18,9 @@ def test_deserialiser_diffusion_codegen(diffusion_granule, diffusion_granule_deps): parser = GranuleParser(diffusion_granule, diffusion_granule_deps) - parsed = parser.parse() + parsed = parser() deserialiser = ParsedGranuleDeserialiser(parsed, directory=".", prefix="test") - interface = deserialiser.deserialise() + interface = deserialiser() generator = SerialisationGenerator(interface) generated = generator() assert len(generated) == 3 diff --git a/pyutils/tests/f2ser/test_granule_deserialiser.py b/pyutils/tests/f2ser/test_granule_deserialiser.py index 505bbb650d..c82d422a37 100644 --- a/pyutils/tests/f2ser/test_granule_deserialiser.py +++ b/pyutils/tests/f2ser/test_granule_deserialiser.py @@ -77,7 +77,7 @@ def test_deserialiser_mock(mock_parsed_granule): deserialiser = ParsedGranuleDeserialiser( mock_parsed_granule, directory=".", prefix="f2ser" ) - interface = deserialiser.deserialise() + interface = deserialiser() assert isinstance(interface, SerialisationInterface) assert len(interface.Savepoint) == 3 assert all([isinstance(s, SavepointData) for s in interface.Savepoint]) @@ -92,7 +92,7 @@ def test_deserialiser_mock(mock_parsed_granule): def test_deserialiser_diffusion_granule(diffusion_granule, diffusion_granule_deps): parser = GranuleParser(diffusion_granule, diffusion_granule_deps) - parsed = parser.parse() + parsed = parser() deserialiser = ParsedGranuleDeserialiser(parsed, directory=".", prefix="f2ser") - interface = deserialiser.deserialise() + interface = deserialiser() assert len(interface.Savepoint) == 3 diff --git a/pyutils/tests/f2ser/test_parsing.py b/pyutils/tests/f2ser/test_parsing.py index d3e348623a..78e6985275 100644 --- a/pyutils/tests/f2ser/test_parsing.py +++ b/pyutils/tests/f2ser/test_parsing.py @@ -19,7 +19,7 @@ def test_granule_parsing(diffusion_granule, diffusion_granule_deps): parser = GranuleParser(diffusion_granule, diffusion_granule_deps) - parsed = parser.parse() + parsed = parser() assert list(parsed) == ["diffusion_init", "diffusion_run"] @@ -51,10 +51,10 @@ def test_granule_parsing_missing_derived_typedef(diffusion_granule, samples_path with pytest.raises( MissingDerivedTypeError, match="Could not find type definition for TYPE" ): - parser.parse() + parser() def test_granule_parsing_no_intent(samples_path): parser = GranuleParser(samples_path / "subroutine_example.f90", []) with pytest.raises(ParsingError): - parser.parse() + parser() From 0b2dd72c471cb39d8d5764c9040e9c035c9760eb Mon Sep 17 00:00:00 2001 From: Christopher Bignamini Date: Tue, 9 May 2023 11:44:13 +0200 Subject: [PATCH 09/21] No dependency test created (#193) * No dependency test created * Missing dependency test added * Added test for non existing granule input file * Add small fixes --------- Co-authored-by: samkellerhals --- pyutils/tests/f2ser/conftest.py | 10 ++++++++ pyutils/tests/f2ser/test_f2ser_cli.py | 24 ++++++++++++++++++ .../fortran/no_deps_subroutine_example.f90 | 25 +++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 testutils/src/icon4py/testutils/fortran/no_deps_subroutine_example.f90 diff --git a/pyutils/tests/f2ser/conftest.py b/pyutils/tests/f2ser/conftest.py index 3244246c76..ff73a38d4f 100644 --- a/pyutils/tests/f2ser/conftest.py +++ b/pyutils/tests/f2ser/conftest.py @@ -31,3 +31,13 @@ def diffusion_granule(samples_path): @pytest.fixture def diffusion_granule_deps(samples_path): return [samples_path / "derived_types_example.f90"] + + +@pytest.fixture +def no_deps_source_file(samples_path): + return samples_path / "no_deps_subroutine_example.f90" + + +@pytest.fixture +def not_existing_diffusion_granule(samples_path): + return samples_path / "not_existing_file.f90" diff --git a/pyutils/tests/f2ser/test_f2ser_cli.py b/pyutils/tests/f2ser/test_f2ser_cli.py index 8962ed1c25..ac544211ea 100644 --- a/pyutils/tests/f2ser/test_f2ser_cli.py +++ b/pyutils/tests/f2ser/test_f2ser_cli.py @@ -15,6 +15,7 @@ from click.testing import CliRunner from icon4py.f2ser.cli import main +from icon4py.f2ser.exceptions import MissingDerivedTypeError @pytest.fixture @@ -33,3 +34,26 @@ def test_cli(diffusion_granule, diffusion_granule_deps, outfile, cli): args = [inp, outfile, "-d", ",".join(deps)] result = cli.invoke(main, args) assert result.exit_code == 0 + + +def test_cli_no_deps(no_deps_source_file, outfile, cli): + inp = str(no_deps_source_file) + args = [inp, outfile] + result = cli.invoke(main, args) + assert result.exit_code == 0 + + +def test_cli_missing_deps(diffusion_granule, outfile, cli): + inp = str(diffusion_granule) + args = [inp, outfile] + result = cli.invoke(main, args) + assert isinstance(result.exception, MissingDerivedTypeError) + + +def test_cli_missing_source(not_existing_diffusion_granule, outfile, cli): + inp = str(not_existing_diffusion_granule) + args = [inp, outfile] + result = cli.invoke(main, args) + error_search = result.stdout.find("Invalid value for 'GRANULE_PATH'") + assert error_search != -1 + assert isinstance(result.exception, SystemExit) diff --git a/testutils/src/icon4py/testutils/fortran/no_deps_subroutine_example.f90 b/testutils/src/icon4py/testutils/fortran/no_deps_subroutine_example.f90 new file mode 100644 index 0000000000..021cb4aa76 --- /dev/null +++ b/testutils/src/icon4py/testutils/fortran/no_deps_subroutine_example.f90 @@ -0,0 +1,25 @@ +MODULE no_deps_example_subroutines + IMPLICIT NONE + + PUBLIC :: no_deps_init, no_deps_run + PRIVATE + + CONTAINS + + SUBROUTINE no_deps_init(a, b, c) + REAL, INTENT(in) :: a + REAL, INTENT(inout) :: b + REAL, INTENT(out) :: c + c = a + b + b = 2.0 * b + END SUBROUTINE no_deps_init + + SUBROUTINE no_deps_run(a, b, c) + REAL, INTENT(in) :: a + REAL, INTENT(inout) :: b + REAL, INTENT(out) :: c + c = a + b + b = 2.0 * b + END SUBROUTINE no_deps_run + +END MODULE no_deps_example_subroutines From e2e819c49acd674583fdae73792e420b13bff6a6 Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 9 May 2023 11:59:45 +0200 Subject: [PATCH 10/21] Add serialisation to liskov (#192) --- .../src/icon4py/common}/logger.py | 0 liskov/src/icon4py/liskov/cli.py | 60 ++++-- .../integration}/deserialise.py | 74 ++----- .../liskov/codegen/integration/generate.py | 70 +++--- .../liskov/codegen/integration/interface.py | 4 +- .../liskov/codegen/integration/template.py | 2 +- .../codegen/serialisation/deserialise.py | 199 ++++++++++++++++++ .../liskov/codegen/serialisation/generate.py | 30 +-- .../liskov/codegen/serialisation/interface.py | 6 +- .../liskov/codegen/serialisation/template.py | 5 +- .../icon4py/liskov/codegen/shared/__init__.py | 12 ++ .../liskov/codegen/shared/deserialiser.py | 52 +++++ .../{common.py => shared/generator.py} | 4 +- .../liskov/codegen/{ => shared}/types.py | 2 - .../codegen/{write.py => shared/writer.py} | 16 +- liskov/src/icon4py/liskov/external/gt4py.py | 10 +- liskov/src/icon4py/liskov/parsing/parse.py | 135 +++++++++++- liskov/src/icon4py/liskov/parsing/scan.py | 4 +- liskov/src/icon4py/liskov/parsing/types.py | 114 +--------- liskov/src/icon4py/liskov/parsing/utils.py | 12 +- .../src/icon4py/liskov/parsing/validation.py | 41 ++-- .../src/icon4py/liskov/pipeline/__init__.py | 12 ++ .../{pipeline.py => pipeline/collection.py} | 72 ++++--- .../{common.py => pipeline/definition.py} | 0 liskov/tests/test_cli.py | 48 +++-- liskov/tests/test_directives_deserialiser.py | 114 +++++++--- liskov/tests/test_external.py | 4 +- liskov/tests/test_generation.py | 127 ++++++++++- liskov/tests/test_parser.py | 9 +- .../tests/test_serialisation_deserialiser.py | 133 ++++++++++++ liskov/tests/test_utils.py | 6 +- liskov/tests/test_validation.py | 4 +- liskov/tests/test_writer.py | 4 +- pyutils/src/icon4py/f2ser/cli.py | 8 +- pyutils/src/icon4py/f2ser/deserialise.py | 8 +- pyutils/tests/f2ser/test_f2ser_codegen.py | 6 +- .../tests/f2ser/test_granule_deserialiser.py | 4 +- .../testutils/liskov_fortran_samples.py | 79 ++++++- 38 files changed, 1096 insertions(+), 394 deletions(-) rename {liskov/src/icon4py/liskov => common/src/icon4py/common}/logger.py (100%) rename liskov/src/icon4py/liskov/{parsing => codegen/integration}/deserialise.py (86%) create mode 100644 liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py create mode 100644 liskov/src/icon4py/liskov/codegen/shared/__init__.py create mode 100644 liskov/src/icon4py/liskov/codegen/shared/deserialiser.py rename liskov/src/icon4py/liskov/codegen/{common.py => shared/generator.py} (95%) rename liskov/src/icon4py/liskov/codegen/{ => shared}/types.py (89%) rename liskov/src/icon4py/liskov/codegen/{write.py => shared/writer.py} (89%) create mode 100644 liskov/src/icon4py/liskov/pipeline/__init__.py rename liskov/src/icon4py/liskov/{pipeline.py => pipeline/collection.py} (54%) rename liskov/src/icon4py/liskov/{common.py => pipeline/definition.py} (100%) create mode 100644 liskov/tests/test_serialisation_deserialiser.py diff --git a/liskov/src/icon4py/liskov/logger.py b/common/src/icon4py/common/logger.py similarity index 100% rename from liskov/src/icon4py/liskov/logger.py rename to common/src/icon4py/common/logger.py diff --git a/liskov/src/icon4py/liskov/cli.py b/liskov/src/icon4py/liskov/cli.py index 8e0fe27436..fb67f550ff 100644 --- a/liskov/src/icon4py/liskov/cli.py +++ b/liskov/src/icon4py/liskov/cli.py @@ -15,8 +15,8 @@ import click -from icon4py.liskov.logger import setup_logger -from icon4py.liskov.pipeline import ( +from icon4py.common.logger import setup_logger +from icon4py.liskov.pipeline.collection import ( load_gt4py_stencils, parse_fortran_file, run_code_generation, @@ -28,17 +28,26 @@ @click.command("icon_liskov") @click.argument( - "input_filepath", + "input_path", type=click.Path( exists=True, dir_okay=False, resolve_path=True, path_type=pathlib.Path ), ) @click.argument( - "output_filepath", + "output_path", type=click.Path(dir_okay=False, resolve_path=True, path_type=pathlib.Path), ) @click.option( - "--profile", "-p", is_flag=True, help="Add nvtx profile statements to stencils." + "--ppser", + is_flag=True, + type=str, + help="Generate ppser serialization statements instead of integration code.", +) +@click.option( + "--profile", + "-p", + is_flag=True, + help="Add nvtx profile statements to integration code.", ) @click.option( "--metadatagen", @@ -47,29 +56,50 @@ help="Add metadata header with information about program (requires git).", ) def main( - input_filepath: pathlib.Path, - output_filepath: pathlib.Path, + input_path: pathlib.Path, + output_path: pathlib.Path, + ppser: bool, profile: bool, metadatagen: bool, ) -> None: """Command line interface for interacting with the ICON-Liskov DSL Preprocessor. Usage: - icon_liskov [-p] [-m] + icon_liskov [-p] [-m] Options: -p --profile Add nvtx profile statements to stencils. -m --metadatagen Add metadata header with information about program (requires git). + --ppser Generate ppser serialization statements instead of integration code. Arguments: - input_filepath Path to the input file to process. - output_filepath Path to the output file to generate. + input_path: Path to the input file to process. + output_path: Path to the output file to generate. """ - parsed = parse_fortran_file(input_filepath, output_filepath) - parsed_checked = load_gt4py_stencils(parsed) - run_code_generation( - parsed_checked, input_filepath, output_filepath, profile, metadatagen - ) + mode = "serialisation" if ppser else "integration" + + def run_serialisation() -> None: + iface = parse_fortran_file(input_path, output_path, mode) + run_code_generation(input_path, output_path, mode, iface) + + def run_integration() -> None: + iface = parse_fortran_file(input_path, output_path, mode) + iface_gt4py = load_gt4py_stencils(iface) + run_code_generation( + input_path, + output_path, + mode, + iface_gt4py, + profile=profile, + metadatagen=metadatagen, + ) + + mode_dispatcher = { + "serialisation": run_serialisation, + "integration": run_integration, + } + + mode_dispatcher[mode]() if __name__ == "__main__": diff --git a/liskov/src/icon4py/liskov/parsing/deserialise.py b/liskov/src/icon4py/liskov/codegen/integration/deserialise.py similarity index 86% rename from liskov/src/icon4py/liskov/parsing/deserialise.py rename to liskov/src/icon4py/liskov/codegen/integration/deserialise.py index 5da834e390..1832e501d6 100644 --- a/liskov/src/icon4py/liskov/parsing/deserialise.py +++ b/liskov/src/icon4py/liskov/codegen/integration/deserialise.py @@ -11,14 +11,14 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -from dataclasses import dataclass -from typing import Any, Callable, Optional, Protocol, Type +from typing import Any, Optional, Protocol, Type +import icon4py.liskov.parsing.parse import icon4py.liskov.parsing.types as ts +from icon4py.common.logger import setup_logger from icon4py.liskov.codegen.integration.interface import ( BoundsData, DeclareData, - DeserialisedDirectives, EndCreateData, EndIfData, EndProfileData, @@ -26,14 +26,14 @@ FieldAssociationData, ImportsData, InsertData, + IntegrationCodeInterface, StartCreateData, StartProfileData, StartStencilData, UnusedDirective, ) -from icon4py.liskov.codegen.types import CodeGenInput -from icon4py.liskov.common import Step -from icon4py.liskov.logger import setup_logger +from icon4py.liskov.codegen.shared.deserialiser import Deserialiser +from icon4py.liskov.codegen.shared.types import CodeGenInput from icon4py.liskov.parsing.exceptions import ( DirectiveSyntaxError, MissingBoundsError, @@ -89,13 +89,11 @@ def __call__( ... -@dataclass class DataFactoryBase: directive_cls: Type[ts.ParsedDirective] dtype: Type[CodeGenInput] -@dataclass class OptionalMultiUseDataFactory(DataFactoryBase): def __call__( self, parsed: ts.ParsedDict, **kwargs: Any @@ -110,40 +108,34 @@ def __call__( return deserialised -@dataclass class RequiredSingleUseDataFactory(DataFactoryBase): def __call__(self, parsed: ts.ParsedDict) -> CodeGenInput: extracted = extract_directive(parsed["directives"], self.directive_cls)[0] return self.dtype(startln=extracted.startln) -@dataclass class EndCreateDataFactory(RequiredSingleUseDataFactory): - directive_cls: Type[ts.ParsedDirective] = ts.EndCreate + directive_cls: Type[ts.ParsedDirective] = icon4py.liskov.parsing.parse.EndCreate dtype: Type[EndCreateData] = EndCreateData -@dataclass class ImportsDataFactory(RequiredSingleUseDataFactory): - directive_cls: Type[ts.ParsedDirective] = ts.Imports + directive_cls: Type[ts.ParsedDirective] = icon4py.liskov.parsing.parse.Imports dtype: Type[ImportsData] = ImportsData -@dataclass class EndIfDataFactory(OptionalMultiUseDataFactory): - directive_cls: Type[ts.ParsedDirective] = ts.EndIf + directive_cls: Type[ts.ParsedDirective] = icon4py.liskov.parsing.parse.EndIf dtype: Type[EndIfData] = EndIfData -@dataclass class EndProfileDataFactory(OptionalMultiUseDataFactory): - directive_cls: Type[ts.ParsedDirective] = ts.EndProfile + directive_cls: Type[ts.ParsedDirective] = icon4py.liskov.parsing.parse.EndProfile dtype: Type[EndProfileData] = EndProfileData -@dataclass class StartCreateDataFactory(DataFactoryBase): - directive_cls: Type[ts.ParsedDirective] = ts.StartCreate + directive_cls: Type[ts.ParsedDirective] = icon4py.liskov.parsing.parse.StartCreate dtype: Type[StartCreateData] = StartCreateData def __call__(self, parsed: ts.ParsedDict) -> StartCreateData: @@ -158,9 +150,8 @@ def __call__(self, parsed: ts.ParsedDict) -> StartCreateData: return self.dtype(startln=directive.startln, extra_fields=extra_fields) -@dataclass class DeclareDataFactory(DataFactoryBase): - directive_cls: Type[ts.ParsedDirective] = ts.Declare + directive_cls: Type[ts.ParsedDirective] = icon4py.liskov.parsing.parse.Declare dtype: Type[DeclareData] = DeclareData @staticmethod @@ -187,9 +178,8 @@ def __call__(self, parsed: ts.ParsedDict) -> list[DeclareData]: return deserialised -@dataclass class StartProfileDataFactory(DataFactoryBase): - directive_cls: Type[ts.ParsedDirective] = ts.StartProfile + directive_cls: Type[ts.ParsedDirective] = icon4py.liskov.parsing.parse.StartProfile dtype: Type[StartProfileData] = StartProfileData def __call__(self, parsed: ts.ParsedDict) -> list[StartProfileData]: @@ -204,9 +194,8 @@ def __call__(self, parsed: ts.ParsedDict) -> list[StartProfileData]: return deserialised -@dataclass class EndStencilDataFactory(DataFactoryBase): - directive_cls: Type[ts.ParsedDirective] = ts.EndStencil + directive_cls: Type[ts.ParsedDirective] = icon4py.liskov.parsing.parse.EndStencil dtype: Type[EndStencilData] = EndStencilData def __call__(self, parsed: ts.ParsedDict) -> list[EndStencilData]: @@ -228,9 +217,8 @@ def __call__(self, parsed: ts.ParsedDict) -> list[EndStencilData]: return deserialised -@dataclass class StartStencilDataFactory(DataFactoryBase): - directive_cls: Type[ts.ParsedDirective] = ts.StartStencil + directive_cls: Type[ts.ParsedDirective] = icon4py.liskov.parsing.parse.StartStencil dtype: Type[StartStencilData] = StartStencilData def __call__(self, parsed: ts.ParsedDict) -> list[StartStencilData]: @@ -364,9 +352,8 @@ def _update_tolerances( return fields -@dataclass class InsertDataFactory(DataFactoryBase): - directive_cls: Type[ts.ParsedDirective] = ts.Insert + directive_cls: Type[ts.ParsedDirective] = icon4py.liskov.parsing.parse.Insert dtype: Type[InsertData] = InsertData def __call__(self, parsed: ts.ParsedDict) -> list[InsertData]: @@ -380,8 +367,8 @@ def __call__(self, parsed: ts.ParsedDict) -> list[InsertData]: return deserialised -class DirectiveDeserialiser(Step): - _FACTORIES: dict[str, Callable] = { +class IntegrationCodeDeserialiser(Deserialiser): + _FACTORIES = { "StartCreate": StartCreateDataFactory(), "EndCreate": EndCreateDataFactory(), "Imports": ImportsDataFactory(), @@ -393,27 +380,4 @@ class DirectiveDeserialiser(Step): "EndProfile": EndProfileDataFactory(), "Insert": InsertDataFactory(), } - - def __call__(self, directives: ts.ParsedDict) -> DeserialisedDirectives: - """Deserialise the provided parsed directives to a DeserialisedDirectives object. - - Args: - directives: The parsed directives to deserialise. - - Returns: - A DeserialisedDirectives object containing the deserialised directives. - - Note: - The method uses the `_FACTORIES` class attribute to create the appropriate - factory object for each directive type, and uses these objects to deserialise - the parsed directives. The DeserialisedDirectives class is a dataclass - containing the deserialised versions of the different directives. - """ - logger.info("Deserialising directives ...") - deserialised = dict() - - for key, func in self._FACTORIES.items(): - ser = func(directives) - deserialised[key] = ser - - return DeserialisedDirectives(**deserialised) + _INTERFACE_TYPE = IntegrationCodeInterface diff --git a/liskov/src/icon4py/liskov/codegen/integration/generate.py b/liskov/src/icon4py/liskov/codegen/integration/generate.py index e08fac5d47..c34bc47fef 100644 --- a/liskov/src/icon4py/liskov/codegen/integration/generate.py +++ b/liskov/src/icon4py/liskov/codegen/integration/generate.py @@ -13,9 +13,9 @@ from typing import Any -from icon4py.liskov.codegen.common import CodeGenerator +from icon4py.common.logger import setup_logger from icon4py.liskov.codegen.integration.interface import ( - DeserialisedDirectives, + IntegrationCodeInterface, StartStencilData, UnusedDirective, ) @@ -43,25 +43,25 @@ StartStencilStatement, StartStencilStatementGenerator, ) -from icon4py.liskov.codegen.types import GeneratedCode +from icon4py.liskov.codegen.shared.generator import CodeGenerator +from icon4py.liskov.codegen.shared.types import GeneratedCode from icon4py.liskov.external.metadata import CodeMetadata -from icon4py.liskov.logger import setup_logger logger = setup_logger(__name__) -class IntegrationGenerator(CodeGenerator): +class IntegrationCodeGenerator(CodeGenerator): def __init__( self, - directives: DeserialisedDirectives, - profile: bool, - metadata_gen: bool, + interface: IntegrationCodeInterface, + profile: bool = False, + metadatagen: bool = False, ): super().__init__() self.profile = profile - self.directives = directives - self.metadata_gen = metadata_gen + self.interface = interface + self.metadatagen = metadatagen def __call__(self, data: Any = None) -> list[GeneratedCode]: """Generate all f90 code for integration. @@ -82,7 +82,7 @@ def __call__(self, data: Any = None) -> list[GeneratedCode]: def _generate_metadata(self) -> None: """Generate metadata about the current liskov execution.""" - if self.metadata_gen: + if self.metadatagen: logger.info("Generating icon-liskov metadata.") self._generate( MetadataStatement, @@ -93,12 +93,12 @@ def _generate_metadata(self) -> None: def _generate_declare(self) -> None: """Generate f90 code for declaration statements.""" - for i, declare in enumerate(self.directives.Declare): + for i, declare in enumerate(self.interface.Declare): logger.info("Generating DECLARE statement.") self._generate( DeclareStatement, DeclareStatementGenerator, - self.directives.Declare[i].startln, + self.interface.Declare[i].startln, declare_data=declare, ) @@ -110,12 +110,12 @@ def _generate_start_stencil(self) -> None: """ i = 0 - while i < len(self.directives.StartStencil): - stencil = self.directives.StartStencil[i] + while i < len(self.interface.StartStencil): + stencil = self.interface.StartStencil[i] logger.info(f"Generating START statement for {stencil.name}") try: - next_stencil = self.directives.StartStencil[i + 1] + next_stencil = self.interface.StartStencil[i + 1] except IndexError: pass @@ -142,7 +142,7 @@ def _generate_start_stencil(self) -> None: self._generate( StartStencilStatement, StartStencilStatementGenerator, - self.directives.StartStencil[i].startln, + self.interface.StartStencil[i].startln, stencil_data=stencil, profile=self.profile, ) @@ -154,16 +154,16 @@ def _generate_end_stencil(self) -> None: Args: profile: A boolean indicating whether to include profiling calls in the generated code. """ - for i, stencil in enumerate(self.directives.StartStencil): + for i, stencil in enumerate(self.interface.StartStencil): logger.info(f"Generating END statement for {stencil.name}") self._generate( EndStencilStatement, EndStencilStatementGenerator, - self.directives.EndStencil[i].startln, + self.interface.EndStencil[i].startln, stencil_data=stencil, profile=self.profile, - noendif=self.directives.EndStencil[i].noendif, - noprofile=self.directives.EndStencil[i].noprofile, + noendif=self.interface.EndStencil[i].noendif, + noprofile=self.interface.EndStencil[i].noprofile, ) def _generate_imports(self) -> None: @@ -172,8 +172,8 @@ def _generate_imports(self) -> None: self._generate( ImportsStatement, ImportsStatementGenerator, - self.directives.Imports.startln, - stencils=self.directives.StartStencil, + self.interface.Imports.startln, + stencils=self.interface.StartStencil, ) def _generate_create(self) -> None: @@ -182,29 +182,29 @@ def _generate_create(self) -> None: self._generate( StartCreateStatement, StartCreateStatementGenerator, - self.directives.StartCreate.startln, - stencils=self.directives.StartStencil, - extra_fields=self.directives.StartCreate.extra_fields, + self.interface.StartCreate.startln, + stencils=self.interface.StartStencil, + extra_fields=self.interface.StartCreate.extra_fields, ) self._generate( EndCreateStatement, EndCreateStatementGenerator, - self.directives.EndCreate.startln, + self.interface.EndCreate.startln, ) def _generate_endif(self) -> None: """Generate f90 code for endif statements.""" - if self.directives.EndIf != UnusedDirective: - for endif in self.directives.EndIf: # type: ignore + if self.interface.EndIf != UnusedDirective: + for endif in self.interface.EndIf: # type: ignore logger.info("Generating ENDIF statement.") self._generate(EndIfStatement, EndIfStatementGenerator, endif.startln) def _generate_profile(self) -> None: """Generate additional nvtx profiling statements.""" if self.profile: - if self.directives.StartProfile != UnusedDirective: - for start in self.directives.StartProfile: # type: ignore + if self.interface.StartProfile != UnusedDirective: + for start in self.interface.StartProfile: # type: ignore logger.info("Generating nvtx start statement.") self._generate( StartProfileStatement, @@ -213,8 +213,8 @@ def _generate_profile(self) -> None: name=start.name, ) - if self.directives.EndProfile != UnusedDirective: - for end in self.directives.EndProfile: # type: ignore + if self.interface.EndProfile != UnusedDirective: + for end in self.interface.EndProfile: # type: ignore logger.info("Generating nvtx end statement.") self._generate( EndProfileStatement, EndProfileStatementGenerator, end.startln @@ -222,8 +222,8 @@ def _generate_profile(self) -> None: def _generate_insert(self) -> None: """Generate free form statement from insert directive.""" - if self.directives.Insert != UnusedDirective: - for insert in self.directives.Insert: # type: ignore + if self.interface.Insert != UnusedDirective: + for insert in self.interface.Insert: # type: ignore logger.info("Generating free form statement.") self._generate( InsertStatement, diff --git a/liskov/src/icon4py/liskov/codegen/integration/interface.py b/liskov/src/icon4py/liskov/codegen/integration/interface.py index e7f5931854..e7fa5385e1 100644 --- a/liskov/src/icon4py/liskov/codegen/integration/interface.py +++ b/liskov/src/icon4py/liskov/codegen/integration/interface.py @@ -14,7 +14,7 @@ from dataclasses import dataclass from typing import Optional, Sequence -from icon4py.liskov.codegen.types import CodeGenInput +from icon4py.liskov.codegen.shared.types import CodeGenInput class UnusedDirective: @@ -100,7 +100,7 @@ class InsertData(CodeGenInput): @dataclass -class DeserialisedDirectives: +class IntegrationCodeInterface: StartStencil: Sequence[StartStencilData] EndStencil: Sequence[EndStencilData] Declare: Sequence[DeclareData] diff --git a/liskov/src/icon4py/liskov/codegen/integration/template.py b/liskov/src/icon4py/liskov/codegen/integration/template.py index a172777510..a29e89864d 100644 --- a/liskov/src/icon4py/liskov/codegen/integration/template.py +++ b/liskov/src/icon4py/liskov/codegen/integration/template.py @@ -92,7 +92,7 @@ class MetadataStatementGenerator(TemplatedGenerator): ! GENERATED WITH ICON-LISKOV !+-+-+-+-+-+-+-+-+-+ +-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+ ! Generated on: {{ _this_node.metadata.generated_on }} - ! Input filepath: {{ _this_node.metadata.cli_params['input_filepath'] }} + ! Input filepath: {{ _this_node.metadata.cli_params['input_path'] }} ! Profiling active: {{ _this_node.metadata.cli_params['profile'] }} ! Git version tag: {{ _this_node.metadata.tag }} ! Git commit hash: {{ _this_node.metadata.commit_hash }} diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py b/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py new file mode 100644 index 0000000000..9254a19ef1 --- /dev/null +++ b/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py @@ -0,0 +1,199 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later +import uuid + +import icon4py.liskov.parsing.parse +import icon4py.liskov.parsing.types as ts +from icon4py.common.logger import setup_logger +from icon4py.liskov.codegen.integration.deserialise import ( + TOLERANCE_ARGS, + _extract_stencil_name, + pop_item_from_dict, +) +from icon4py.liskov.codegen.serialisation.interface import ( + FieldSerialisationData, + InitData, + Metadata, + SavepointData, + SerialisationCodeInterface, +) +from icon4py.liskov.codegen.shared.deserialiser import Deserialiser +from icon4py.liskov.parsing.utils import extract_directive + + +logger = setup_logger(__name__) + +KEYS_TO_REMOVE = [ + "accpresent", + "mergecopy", + "copies", + "horizontal_lower", + "horizontal_upper", + "vertical_lower", + "vertical_upper", + "name", +] + +SKIP_VARS = ["ikoffset", "pos_on_tplane_e_1", "pos_on_tplane_e_2"] + + +class InitDataFactory: + dtype = InitData + + def __call__(self, parsed: ts.ParsedDict) -> InitData: + return self.dtype(startln=0, directory=".", prefix="liskov-serialisation") + + +class SavepointDataFactory: + def __call__(self, parsed: ts.ParsedDict) -> list[SavepointData]: + """Create a list of Start and End Savepoints for each Start and End Stencil directive.""" + start_stencil = extract_directive( + parsed["directives"], icon4py.liskov.parsing.parse.StartStencil + ) + end_stencil = extract_directive( + parsed["directives"], icon4py.liskov.parsing.parse.EndStencil + ) + + repeated = self._find_repeated_stencils(parsed["content"]) + + deserialised = [] + + for i, (start, end) in enumerate(zip(start_stencil, end_stencil)): + named_args = parsed["content"]["StartStencil"][i] + stencil_name = _extract_stencil_name(named_args, start) + + if stencil_name in repeated: + stencil_name = f"{stencil_name}_{str(uuid.uuid4())}" + + field_names = self._remove_unnecessary_keys(named_args) + + metadata = [ + Metadata(key=k, value=v) + for k, v in self._get_timestep_variables(stencil_name).items() + ] + + fields = self._make_fields(field_names) + + for intent, ln in [("start", start.startln), ("end", end.startln)]: + savepoint = SavepointData( + subroutine=f"{stencil_name}", + intent=intent, + startln=ln, + fields=fields, + metadata=metadata, + ) + deserialised.append(savepoint) + + return deserialised + + @staticmethod + def _remove_unnecessary_keys(named_args: dict) -> dict: + """Remove unnecessary keys from named_args, and only return field names.""" + copy = named_args.copy() + [pop_item_from_dict(copy, k, None) for k in KEYS_TO_REMOVE] + for tol in TOLERANCE_ARGS: + for k in named_args.copy().keys(): + if k.endswith(tol): + pop_item_from_dict(named_args, k, None) + return copy + + @staticmethod + def _make_fields(named_args: dict) -> list[FieldSerialisationData]: + """Create a list of FieldSerialisationData objects based on named arguments.""" + fields = [ + FieldSerialisationData( + variable=variable, + association="z_hydro_corr(:,:,1)" # special case + if association == "z_hydro_corr(:,nlev,1)" + else association, + decomposed=False, + dimension=None, + typespec=None, + typename=None, + ptr_var=None, + ) + for variable, association in named_args.items() + if variable not in SKIP_VARS + ] + return fields + + @staticmethod + def _get_timestep_variables(stencil_name: str) -> dict: + """Get the corresponding timestep metadata variables for the stencil.""" + timestep_variables = {} + + diffusion_stencil_names = [ + "apply", + "calculate", + "enhance", + "update", + "temporary", + "diffusion", + ] + if any(name in stencil_name for name in diffusion_stencil_names): + timestep_variables["jstep"] = "jstep_ptr" + timestep_variables["diffctr"] = "diffctr" + + if "mo_velocity_advection" in stencil_name: + timestep_variables["jstep"] = "jstep_ptr" + timestep_variables["nstep"] = "nstep_ptr" + timestep_variables["istep"] = "istep" + + if "mo_intp_rbf" in stencil_name: + timestep_variables["jstep"] = "jstep_ptr" + timestep_variables["mo_intp_rbf_ctr"] = "mo_intp_rbf_ctr" + + if "mo_math_divrot" in stencil_name: + timestep_variables["jstep"] = "jstep_ptr" + timestep_variables["mo_math_divrot_ctr"] = "mo_math_divrot_ctr" + + if "grad_green_gauss" in stencil_name: + timestep_variables["jstep"] = "jstep_ptr" + timestep_variables["grad_green_gauss_ctr"] = "grad_green_gauss_ctr" + + if "mo_icon_interpolation_scalar" in stencil_name: + timestep_variables["jstep"] = "jstep_ptr" + timestep_variables[ + "mo_icon_interpolation_ctr" + ] = "mo_icon_interpolation_ctr" + + if "mo_advection_traj" in stencil_name: + timestep_variables["jstep"] = "jstep_ptr" + timestep_variables["mo_advection_traj_ctr"] = "mo_advection_traj_ctr" + + if "mo_solve_nonhydro" in stencil_name: + timestep_variables["istep"] = "istep" + timestep_variables["mo_solve_nonhydro_ctr"] = "mo_solve_nonhydro_ctr" + + return timestep_variables + + def _find_repeated_stencils(self, content): + stencil_names = {} + repeated_names = [] + for stencil in content["StartStencil"]: + name = stencil["name"] + if name in stencil_names: + if stencil_names[name] not in repeated_names: + repeated_names.append(stencil_names[name]) + repeated_names.append(name) + else: + stencil_names[name] = name + return set(repeated_names) + + +class SerialisationCodeDeserialiser(Deserialiser): + _FACTORIES = { + "Init": InitDataFactory(), + "Savepoint": SavepointDataFactory(), + } + _INTERFACE_TYPE = SerialisationCodeInterface diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/generate.py b/liskov/src/icon4py/liskov/codegen/serialisation/generate.py index 2a79c664d8..25d42c7f4a 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/generate.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/generate.py @@ -12,24 +12,25 @@ # SPDX-License-Identifier: GPL-3.0-or-later from typing import Any -from icon4py.liskov.codegen.common import CodeGenerator -from icon4py.liskov.codegen.serialisation.interface import SerialisationInterface +from icon4py.common.logger import setup_logger +from icon4py.liskov.codegen.serialisation.interface import ( + SerialisationCodeInterface, +) from icon4py.liskov.codegen.serialisation.template import ( SavepointStatement, SavepointStatementGenerator, ) -from icon4py.liskov.codegen.types import GeneratedCode -from icon4py.liskov.logger import setup_logger +from icon4py.liskov.codegen.shared.generator import CodeGenerator +from icon4py.liskov.codegen.shared.types import GeneratedCode logger = setup_logger(__name__) -class SerialisationGenerator(CodeGenerator): - def __init__(self, ser_iface: SerialisationInterface): +class SerialisationCodeGenerator(CodeGenerator): + def __init__(self, interface: SerialisationCodeInterface): super().__init__() - self.ser_iface = ser_iface - self.ser_init_complete = False + self.interface = interface def __call__(self, data: Any = None) -> list[GeneratedCode]: """Generate all f90 code for integration.""" @@ -37,21 +38,22 @@ def __call__(self, data: Any = None) -> list[GeneratedCode]: return self.generated def _generate_savepoints(self) -> None: - for i, savepoint in enumerate(self.ser_iface.Savepoint): + init_complete = False + for i, savepoint in enumerate(self.interface.Savepoint): logger.info("Generating pp_ser savepoint statement.") - if self.ser_init_complete: + if init_complete: self._generate( SavepointStatement, SavepointStatementGenerator, - self.ser_iface.Savepoint[i].startln, + self.interface.Savepoint[i].startln, savepoint=savepoint, ) else: self._generate( SavepointStatement, SavepointStatementGenerator, - self.ser_iface.Savepoint[i].startln, + self.interface.Savepoint[i].startln, savepoint=savepoint, - init=self.ser_iface.Init, + init=self.interface.Init, ) - self.ser_init_complete = True + init_complete = True diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/interface.py b/liskov/src/icon4py/liskov/codegen/serialisation/interface.py index 54dc048fa8..364fc65f50 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/interface.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/interface.py @@ -14,7 +14,7 @@ from dataclasses import dataclass from typing import Optional -from icon4py.liskov.codegen.types import CodeGenInput +from icon4py.liskov.codegen.shared.types import CodeGenInput @dataclass @@ -42,13 +42,13 @@ class FieldSerialisationData: @dataclass class SavepointData(CodeGenInput): - subroutine: str + subroutine: str # todo: change to name intent: str fields: list[FieldSerialisationData] metadata: Optional[list[Metadata]] @dataclass -class SerialisationInterface: +class SerialisationCodeInterface: Init: InitData Savepoint: list[SavepointData] diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/template.py b/liskov/src/icon4py/liskov/codegen/serialisation/template.py index 501dc65ca9..6603624797 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/template.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/template.py @@ -74,7 +74,7 @@ class SavepointStatementGenerator(TemplatedGenerator): !$ser init directory="{{_this_node.init.directory}}" prefix="{{ _this_node.init.prefix }}" {% endif %} - !$ser savepoint {{ _this_node.savepoint.subroutine }}_{{ _this_node.savepoint.intent }} {% if _this_node.savepoint.metadata %} {%- for m in _this_node.savepoint.metadata -%} {{ m.key }}={{ m.value }} {%- endfor -%} {% endif %} + !$ser savepoint {{ _this_node.savepoint.subroutine }}_{{ _this_node.savepoint.intent }} {% if _this_node.savepoint.metadata %} {%- for m in _this_node.savepoint.metadata -%} {{ m.key }}={{ m.value }} {% endfor -%} {% endif %} {{ decomposed_fields }} @@ -85,6 +85,7 @@ class SavepointStatementGenerator(TemplatedGenerator): StandardFields = as_jinja( """ {% for f in _this_node.fields %} + PRINT *, 'Serializing {{ f.variable }}={{ f.association }}' {% if f.dimension %} IF (SIZE({{ f.variable }}) > 0) THEN !$ser data {{ f.variable }}={{ f.association }} @@ -125,6 +126,6 @@ def generate_size_strings(colon_list, var_name): for f in node.fields: f.variable = f.variable.replace(f"_{f.ptr_var}", "") - f.alloc_dims = ", ".join(generate_size_strings(f.dimension, f.variable)) + f.alloc_dims = ",".join(generate_size_strings(f.dimension, f.variable)) return self.generic_visit(node) diff --git a/liskov/src/icon4py/liskov/codegen/shared/__init__.py b/liskov/src/icon4py/liskov/codegen/shared/__init__.py new file mode 100644 index 0000000000..15dfdb0098 --- /dev/null +++ b/liskov/src/icon4py/liskov/codegen/shared/__init__.py @@ -0,0 +1,12 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later diff --git a/liskov/src/icon4py/liskov/codegen/shared/deserialiser.py b/liskov/src/icon4py/liskov/codegen/shared/deserialiser.py new file mode 100644 index 0000000000..6cb02811ab --- /dev/null +++ b/liskov/src/icon4py/liskov/codegen/shared/deserialiser.py @@ -0,0 +1,52 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from typing import Callable + +import icon4py.liskov.parsing.types as ts +from icon4py.common.logger import setup_logger +from icon4py.liskov.codegen.integration.interface import IntegrationCodeInterface +from icon4py.liskov.codegen.serialisation.interface import ( + SerialisationCodeInterface, +) +from icon4py.liskov.pipeline.definition import Step + + +logger = setup_logger(__name__) + + +class Deserialiser(Step): + _FACTORIES: dict[str, Callable] = {} + _INTERFACE_TYPE: SerialisationCodeInterface | IntegrationCodeInterface + + def __call__(self, directives: ts.ParsedDict): + """Deserialises parsed directives into an Interface object. + + Args: + directives: A dictionary of parsed directives. + + Returns: + An Interface object containing the deserialised directives. + + This method is responsible for deserialising parsed directives into an Interface object of the given _INTERFACE_TYPE. + It uses the `_FACTORIES` dictionary of factory functions to create the appropriate factory object for each directive type. + The resulting deserialised objects are then used to create an Interface object which can be used for code generation. + """ + logger.info("Deserialising directives ...") + deserialised = dict() + + for key, factory in self._FACTORIES.items(): + obj = factory(directives) + deserialised[key] = obj + + return self._INTERFACE_TYPE(**deserialised) diff --git a/liskov/src/icon4py/liskov/codegen/common.py b/liskov/src/icon4py/liskov/codegen/shared/generator.py similarity index 95% rename from liskov/src/icon4py/liskov/codegen/common.py rename to liskov/src/icon4py/liskov/codegen/shared/generator.py index 91484cc8c9..47931e3af4 100644 --- a/liskov/src/icon4py/liskov/codegen/common.py +++ b/liskov/src/icon4py/liskov/codegen/shared/generator.py @@ -18,8 +18,8 @@ from gt4py.eve.codegen import TemplatedGenerator from icon4py.icon4pygen.bindings.utils import format_fortran_code -from icon4py.liskov.codegen.types import CodeGenInput, GeneratedCode -from icon4py.liskov.common import Step +from icon4py.liskov.codegen.shared.types import CodeGenInput, GeneratedCode +from icon4py.liskov.pipeline.definition import Step class CodeGenerator(Step): diff --git a/liskov/src/icon4py/liskov/codegen/types.py b/liskov/src/icon4py/liskov/codegen/shared/types.py similarity index 89% rename from liskov/src/icon4py/liskov/codegen/types.py rename to liskov/src/icon4py/liskov/codegen/shared/types.py index defc99df44..7420b6f532 100644 --- a/liskov/src/icon4py/liskov/codegen/types.py +++ b/liskov/src/icon4py/liskov/codegen/shared/types.py @@ -21,6 +21,4 @@ class CodeGenInput: @dataclass class GeneratedCode(CodeGenInput): - """A class for storing generated f90 code and its line number information.""" - source: str diff --git a/liskov/src/icon4py/liskov/codegen/write.py b/liskov/src/icon4py/liskov/codegen/shared/writer.py similarity index 89% rename from liskov/src/icon4py/liskov/codegen/write.py rename to liskov/src/icon4py/liskov/codegen/shared/writer.py index ec2da30dfe..588d90e6d3 100644 --- a/liskov/src/icon4py/liskov/codegen/write.py +++ b/liskov/src/icon4py/liskov/codegen/shared/writer.py @@ -13,10 +13,10 @@ from pathlib import Path from typing import List -from icon4py.liskov.codegen.types import GeneratedCode -from icon4py.liskov.common import Step -from icon4py.liskov.logger import setup_logger +from icon4py.common.logger import setup_logger +from icon4py.liskov.codegen.shared.types import GeneratedCode from icon4py.liskov.parsing.types import DIRECTIVE_IDENT +from icon4py.liskov.pipeline.definition import Step logger = setup_logger(__name__) @@ -24,7 +24,7 @@ class CodegenWriter(Step): def __init__(self, input_filepath: Path, output_filepath: Path) -> None: - """Initialize an IntegrationWriter instance with a list of generated code. + """Initialize an CodegenWriter instance. Args: input_filepath: Path to file containing directives. @@ -34,10 +34,12 @@ def __init__(self, input_filepath: Path, output_filepath: Path) -> None: self.output_filepath = output_filepath def __call__(self, generated: List[GeneratedCode]) -> None: - """Write a file containing generated code, with the DSL directives removed in the same directory as filepath using a new suffix. + """Write a new file containing the generated code. + + Any !$DSL directives are removed from the file. Args: - generated: A list of GeneratedCode instances representing the generated code that will be written to a file. + generated: A list of GeneratedCode instances representing the generated code that will be written to the file. """ current_file = self._read_file() with_generated_code = self._insert_generated_code(current_file, generated) @@ -45,7 +47,7 @@ def __call__(self, generated: List[GeneratedCode]) -> None: self._write_file(without_directives) def _read_file(self) -> List[str]: - """Read the lines of a file into a list. + """Read the lines of the input file into a list. Returns: A list of strings representing the lines of the file. diff --git a/liskov/src/icon4py/liskov/external/gt4py.py b/liskov/src/icon4py/liskov/external/gt4py.py index a7ddcf888e..d02397b1db 100644 --- a/liskov/src/icon4py/liskov/external/gt4py.py +++ b/liskov/src/icon4py/liskov/external/gt4py.py @@ -17,14 +17,14 @@ from gt4py.next.ffront.decorator import Program +from icon4py.common.logger import setup_logger from icon4py.icon4pygen.metadata import get_stencil_info -from icon4py.liskov.codegen.integration.interface import DeserialisedDirectives -from icon4py.liskov.common import Step +from icon4py.liskov.codegen.integration.interface import IntegrationCodeInterface from icon4py.liskov.external.exceptions import ( IncompatibleFieldError, UnknownStencilError, ) -from icon4py.liskov.logger import setup_logger +from icon4py.liskov.pipeline.definition import Step logger = setup_logger(__name__) @@ -33,10 +33,10 @@ class UpdateFieldsWithGt4PyStencils(Step): _STENCIL_PACKAGES = ["atm_dyn_iconam", "advection"] - def __init__(self, parsed: DeserialisedDirectives): + def __init__(self, parsed: IntegrationCodeInterface): self.parsed = parsed - def __call__(self, data: Any = None) -> DeserialisedDirectives: + def __call__(self, data: Any = None) -> IntegrationCodeInterface: logger.info("Updating parsed fields with data from icon4py stencils...") for s in self.parsed.StartStencil: diff --git a/liskov/src/icon4py/liskov/parsing/parse.py b/liskov/src/icon4py/liskov/parsing/parse.py index 93b42d0cb0..3543fb7c37 100644 --- a/liskov/src/icon4py/liskov/parsing/parse.py +++ b/liskov/src/icon4py/liskov/parsing/parse.py @@ -13,14 +13,16 @@ import collections import shutil import sys +from dataclasses import dataclass, field from pathlib import Path -from typing import Sequence +from typing import Optional, Sequence, Type import icon4py.liskov.parsing.types as ts -from icon4py.liskov.common import Step -from icon4py.liskov.logger import setup_logger +from icon4py.common.logger import setup_logger from icon4py.liskov.parsing.exceptions import UnsupportedDirectiveError +from icon4py.liskov.parsing.types import ParsedDirective, RawDirective from icon4py.liskov.parsing.validation import VALIDATORS +from icon4py.liskov.pipeline.definition import Step REPLACE_CHARS = [ts.DIRECTIVE_IDENT, "&", "\n"] @@ -68,7 +70,7 @@ def _determine_type( typed = [] for raw in raw_directives: found = False - for directive in ts.SUPPORTED_DIRECTIVES: + for directive in SUPPORTED_DIRECTIVES: if directive.pattern in raw.string: typed.append(directive(raw.string, raw.startln, raw.endln)) # type: ignore found = True @@ -108,3 +110,128 @@ def _parse(directives: Sequence[ts.ParsedDirective]) -> ts.ParsedContent: content = d.get_content() parsed_content[d.type_name].append(content) return parsed_content + + +class TypedDirective(RawDirective): + pattern: str + + @property + def type_name(self) -> str: + return self.__class__.__name__ + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TypedDirective): + raise NotImplementedError + return self.string == other.string + + +@dataclass(eq=False) +class WithArguments(TypedDirective): + regex: str = field(default=r"(.+?)=(.+?)", init=False) + + def get_content(self) -> dict[str, str]: + args = self.string.replace(f"{self.pattern}", "") + delimited = args[1:-1].split(";") + content = { + strip_whitespace(a.split("=")[0].strip()): strip_whitespace(a.split("=")[1]) + for a in delimited + } + return content + + +@dataclass(eq=False) +class WithOptionalArguments(TypedDirective): + regex: str = field(default=r"(?:.+?=.+?|)", init=False) + + def get_content(self) -> Optional[dict[str, str]]: + args = self.string.replace(f"{self.pattern}", "")[1:-1] + if len(args) > 0: + content = dict([args.split("=")]) + return content + return None + + +@dataclass(eq=False) +class WithoutArguments(TypedDirective): + # matches an empty string at the beginning of a line + regex: str = field(default=r"^(?![\s\S])", init=False) + + def get_content(self) -> dict: + return {} + + +@dataclass(eq=False) +class FreeForm(TypedDirective): + # matches any string inside brackets + regex: str = field(default=r"(.+?)", init=False) + + def get_content(self) -> str: + args = self.string.replace(f"{self.pattern}", "") + return args[1:-1] + + +def strip_whitespace(string: str) -> str: + """ + Remove all whitespace characters from the given string. + + Args: + string: The string to remove whitespace from. + + Returns: + The input string with all whitespace removed. + """ + return "".join(string.split()) + + +class StartStencil(WithArguments): + pattern = "START STENCIL" + + +class EndStencil(WithArguments): + pattern = "END STENCIL" + + +class Declare(WithArguments): + pattern = "DECLARE" + + +class Imports(WithoutArguments): + pattern = "IMPORTS" + + +class StartCreate(WithOptionalArguments): + pattern = "START CREATE" + + +class EndCreate(WithoutArguments): + pattern = "END CREATE" + + +class EndIf(WithoutArguments): + pattern = "ENDIF" + + +class StartProfile(WithArguments): + pattern = "START PROFILE" + + +class EndProfile(WithoutArguments): + pattern = "END PROFILE" + + +class Insert(FreeForm): + pattern = "INSERT" + + +SUPPORTED_DIRECTIVES: Sequence[Type[ParsedDirective]] = [ + StartStencil, + EndStencil, + Imports, + Declare, + StartCreate, + EndCreate, + EndIf, + StartProfile, + EndProfile, + Insert, +] diff --git a/liskov/src/icon4py/liskov/parsing/scan.py b/liskov/src/icon4py/liskov/parsing/scan.py index 84c44955eb..f13c1611aa 100644 --- a/liskov/src/icon4py/liskov/parsing/scan.py +++ b/liskov/src/icon4py/liskov/parsing/scan.py @@ -15,9 +15,9 @@ from typing import Any import icon4py.liskov.parsing.types as ts -from icon4py.liskov.common import Step -from icon4py.liskov.logger import setup_logger +from icon4py.common.logger import setup_logger from icon4py.liskov.parsing.exceptions import DirectiveSyntaxError +from icon4py.liskov.pipeline.definition import Step logger = setup_logger(__name__) diff --git a/liskov/src/icon4py/liskov/parsing/types.py b/liskov/src/icon4py/liskov/parsing/types.py index 9fb5ee29da..f24028ec01 100644 --- a/liskov/src/icon4py/liskov/parsing/types.py +++ b/liskov/src/icon4py/liskov/parsing/types.py @@ -10,13 +10,11 @@ # distribution for a copy of the license or check . # # SPDX-License-Identifier: GPL-3.0-or-later -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import ( Any, - Optional, Protocol, Sequence, - Type, TypeAlias, TypedDict, runtime_checkable, @@ -55,113 +53,3 @@ class RawDirective: string: str startln: int endln: int - - -class TypedDirective(RawDirective): - pattern: str - - @property - def type_name(self) -> str: - return self.__class__.__name__ - - def __eq__(self, other: object) -> bool: - if not isinstance(other, TypedDirective): - raise NotImplementedError - return self.string == other.string - - -@dataclass(eq=False) -class WithArguments(TypedDirective): - regex: str = field(default=r"(.+?)=(.+?)", init=False) - - def get_content(self) -> dict[str, str]: - args = self.string.replace(f"{self.pattern}", "") - delimited = args[1:-1].split(";") - content = {a.split("=")[0].strip(): a.split("=")[1] for a in delimited} - return content - - -@dataclass(eq=False) -class WithOptionalArguments(TypedDirective): - regex: str = field(default=r"(?:.+?=.+?|)", init=False) - - def get_content(self) -> Optional[dict[str, str]]: - args = self.string.replace(f"{self.pattern}", "")[1:-1] - if len(args) > 0: - content = dict([args.split("=")]) - return content - return None - - -@dataclass(eq=False) -class WithoutArguments(TypedDirective): - # matches an empty string at the beginning of a line - regex: str = field(default=r"^(?![\s\S])", init=False) - - def get_content(self) -> dict: - return {} - - -@dataclass(eq=False) -class FreeForm(TypedDirective): - # matches any string inside brackets - regex: str = field(default=r"(.+?)", init=False) - - def get_content(self) -> str: - args = self.string.replace(f"{self.pattern}", "") - return args[1:-1] - - -class StartStencil(WithArguments): - pattern = "START STENCIL" - - -class EndStencil(WithArguments): - pattern = "END STENCIL" - - -class Declare(WithArguments): - pattern = "DECLARE" - - -class Imports(WithoutArguments): - pattern = "IMPORTS" - - -class StartCreate(WithOptionalArguments): - pattern = "START CREATE" - - -class EndCreate(WithoutArguments): - pattern = "END CREATE" - - -class EndIf(WithoutArguments): - pattern = "ENDIF" - - -class StartProfile(WithArguments): - pattern = "START PROFILE" - - -class EndProfile(WithoutArguments): - pattern = "END PROFILE" - - -class Insert(FreeForm): - pattern = "INSERT" - - -# When adding a new directive this list must be updated. -SUPPORTED_DIRECTIVES: Sequence[Type[ParsedDirective]] = [ - StartStencil, - EndStencil, - Imports, - Declare, - StartCreate, - EndCreate, - EndIf, - StartProfile, - EndProfile, - Insert, -] diff --git a/liskov/src/icon4py/liskov/parsing/utils.py b/liskov/src/icon4py/liskov/parsing/utils.py index 8a6f1d743b..35d4a1a4ed 100644 --- a/liskov/src/icon4py/liskov/parsing/utils.py +++ b/liskov/src/icon4py/liskov/parsing/utils.py @@ -12,7 +12,7 @@ # SPDX-License-Identifier: GPL-3.0-or-later from typing import Sequence, Type -import icon4py.liskov.parsing.types as ts +from icon4py.liskov.parsing import types as ts def flatten_list_of_dicts(list_of_dicts: list[dict]) -> dict: @@ -36,11 +36,6 @@ def string_to_bool(string: str) -> bool: raise ValueError(f"Cannot convert '{string}' to a boolean.") -def print_parsed_directive(directive: ts.ParsedDirective) -> str: - """Print a parsed directive, including its contents, and start and end line numbers.""" - return f"Directive: {directive.string}, start line: {directive.startln}, end line: {directive.endln}\n" - - def extract_directive( directives: Sequence[ts.ParsedDirective], required_type: Type[ts.ParsedDirective], @@ -50,6 +45,11 @@ def extract_directive( return directives +def print_parsed_directive(directive: ts.ParsedDirective) -> str: + """Print a parsed directive, including its contents, and start and end line numbers.""" + return f"Directive: {directive.string}, start line: {directive.startln}, end line: {directive.endln}\n" + + def remove_directive_types( directives: Sequence[ts.ParsedDirective], exclude_types: Sequence[Type[ts.ParsedDirective]], diff --git a/liskov/src/icon4py/liskov/parsing/validation.py b/liskov/src/icon4py/liskov/parsing/validation.py index 616803e709..70e4e4cde8 100644 --- a/liskov/src/icon4py/liskov/parsing/validation.py +++ b/liskov/src/icon4py/liskov/parsing/validation.py @@ -16,8 +16,9 @@ from pathlib import Path from typing import Match, Optional, Protocol +import icon4py.liskov.parsing.parse import icon4py.liskov.parsing.types as ts -from icon4py.liskov.logger import setup_logger +from icon4py.common.logger import setup_logger from icon4py.liskov.parsing.exceptions import ( DirectiveSyntaxError, RepeatedDirectiveError, @@ -117,12 +118,12 @@ def _validate_directive_uniqueness( repeated = remove_directive_types( [d for d in directives if directives.count(d) > 1], [ - ts.StartStencil, - ts.EndStencil, - ts.EndIf, - ts.EndProfile, - ts.StartProfile, - ts.Insert, + icon4py.liskov.parsing.parse.StartStencil, + icon4py.liskov.parsing.parse.EndStencil, + icon4py.liskov.parsing.parse.EndIf, + icon4py.liskov.parsing.parse.EndProfile, + icon4py.liskov.parsing.parse.StartProfile, + icon4py.liskov.parsing.parse.Insert, ], ) if repeated: @@ -136,12 +137,12 @@ def _validate_required_directives( ) -> None: """Check that all required directives are used at least once.""" expected = [ - ts.Declare, - ts.Imports, - ts.StartCreate, - ts.EndCreate, - ts.StartStencil, - ts.EndStencil, + icon4py.liskov.parsing.parse.Declare, + icon4py.liskov.parsing.parse.Imports, + icon4py.liskov.parsing.parse.StartCreate, + icon4py.liskov.parsing.parse.EndCreate, + icon4py.liskov.parsing.parse.StartStencil, + icon4py.liskov.parsing.parse.EndStencil, ] for expected_type in expected: if not any([isinstance(d, expected_type) for d in directives]): @@ -171,13 +172,23 @@ def _validate_stencil_directives( directives (list[ts.ParsedDirective]): List of stencil directives to validate. """ stencil_directives = [ - d for d in directives if isinstance(d, (ts.StartStencil, ts.EndStencil)) + d + for d in directives + if isinstance( + d, + ( + icon4py.liskov.parsing.parse.StartStencil, + icon4py.liskov.parsing.parse.EndStencil, + ), + ) ] stencil_counts: dict = {} for directive in stencil_directives: stencil_name = self.extract_arg_from_directive(directive.string, "name") stencil_counts[stencil_name] = stencil_counts.get(stencil_name, 0) + ( - 1 if isinstance(directive, ts.StartStencil) else -1 + 1 + if isinstance(directive, icon4py.liskov.parsing.parse.StartStencil) + else -1 ) unbalanced_stencils = [ diff --git a/liskov/src/icon4py/liskov/pipeline/__init__.py b/liskov/src/icon4py/liskov/pipeline/__init__.py new file mode 100644 index 0000000000..15dfdb0098 --- /dev/null +++ b/liskov/src/icon4py/liskov/pipeline/__init__.py @@ -0,0 +1,12 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later diff --git a/liskov/src/icon4py/liskov/pipeline.py b/liskov/src/icon4py/liskov/pipeline/collection.py similarity index 54% rename from liskov/src/icon4py/liskov/pipeline.py rename to liskov/src/icon4py/liskov/pipeline/collection.py index f8f43d2745..5994cd28fd 100644 --- a/liskov/src/icon4py/liskov/pipeline.py +++ b/liskov/src/icon4py/liskov/pipeline/collection.py @@ -10,49 +10,72 @@ # distribution for a copy of the license or check . # # SPDX-License-Identifier: GPL-3.0-or-later - from pathlib import Path -from icon4py.liskov.codegen.integration.generate import IntegrationGenerator -from icon4py.liskov.codegen.integration.interface import DeserialisedDirectives -from icon4py.liskov.codegen.write import CodegenWriter -from icon4py.liskov.common import Step, linear_pipeline +from icon4py.liskov.codegen.integration.deserialise import ( + IntegrationCodeDeserialiser, +) +from icon4py.liskov.codegen.integration.generate import IntegrationCodeGenerator +from icon4py.liskov.codegen.integration.interface import IntegrationCodeInterface +from icon4py.liskov.codegen.serialisation.deserialise import ( + SerialisationCodeDeserialiser, +) +from icon4py.liskov.codegen.serialisation.generate import ( + SerialisationCodeGenerator, +) +from icon4py.liskov.codegen.shared.writer import CodegenWriter from icon4py.liskov.external.gt4py import UpdateFieldsWithGt4PyStencils -from icon4py.liskov.parsing.deserialise import DirectiveDeserialiser from icon4py.liskov.parsing.parse import DirectivesParser from icon4py.liskov.parsing.scan import DirectivesScanner +from icon4py.liskov.pipeline.definition import Step, linear_pipeline + + +DESERIALISERS = { + "integration": IntegrationCodeDeserialiser, + "serialisation": SerialisationCodeDeserialiser, +} + +CODEGENS = { + "integration": IntegrationCodeGenerator, + "serialisation": SerialisationCodeGenerator, +} @linear_pipeline -def parse_fortran_file(input_filepath: Path, output_filepath: Path) -> list[Step]: +def parse_fortran_file( + input_filepath: Path, output_filepath: Path, deserialiser_type: str +) -> list[Step]: """Execute a pipeline to parse and deserialize directives from a file. The pipeline consists of three steps: DirectivesScanner, DirectivesParser, and DirectiveDeserialiser. The DirectivesScanner scans the file for directives, the DirectivesParser parses the directives into a dictionary, and the DirectiveDeserialiser deserializes the dictionary into a - DeserialisedDirectives object. + its corresponding Interface object. Args: input_filepath: Path to the input file to process. output_filepath: Path to the output file to generate. + deserialiser_type: What deserialiser to use. Returns: - DeserialisedDirectives: The deserialized directives object. + IntegrationCodeInterface | SerialisationCodeInterface: The interface object. """ + deserialiser = DESERIALISERS[deserialiser_type] + return [ DirectivesScanner(input_filepath), DirectivesParser(input_filepath, output_filepath), - DirectiveDeserialiser(), + deserialiser(), ] @linear_pipeline -def load_gt4py_stencils(parsed: DeserialisedDirectives) -> list[Step]: - """Execute a pipeline to update fields of a DeserialisedDirectives object with GT4Py stencils. +def load_gt4py_stencils(parsed: IntegrationCodeInterface) -> list[Step]: + """Execute a pipeline to update fields of a IntegrationCodeInterface object with GT4Py stencils. Args: - parsed: The input DeserialisedDirectives object. + parsed: The input IntegrationCodeInterface object. Returns: The updated object with fields containing information from GT4Py stencils. @@ -62,26 +85,25 @@ def load_gt4py_stencils(parsed: DeserialisedDirectives) -> list[Step]: @linear_pipeline def run_code_generation( - parsed: DeserialisedDirectives, input_filepath: Path, output_filepath: Path, - profile: bool, - metadatagen: bool, + codegen_type: str, + *args, + **kwargs, ) -> list[Step]: - """Execute a pipeline to generate and write code for a set of directives. - - The pipeline consists of two steps: IntegrationGenerator and IntegrationWriter. The IntegrationGenerator generates - code based on the parsed directives and profile flag. The IntegrationWriter writes the generated code to the - specified filepath. + """Execute a pipeline to generate and write code. Args: - parsed: The deserialized directives object. input_filepath: The original file containing the DSL preprocessor directives. output_filepath: The file path to write the generated code to. - profile: A flag to indicate if profiling information should be included in the generated code. - metadatagen: A flag to indicate if a metadata header should be included in the generated code. + codegen_type: Which type of code generator to use. + + Note: + Additional positional and keyword arguments are passed to the code generator. """ + code_generator = CODEGENS[codegen_type] + return [ - IntegrationGenerator(parsed, profile, metadatagen), + code_generator(*args, **kwargs), CodegenWriter(input_filepath, output_filepath), ] diff --git a/liskov/src/icon4py/liskov/common.py b/liskov/src/icon4py/liskov/pipeline/definition.py similarity index 100% rename from liskov/src/icon4py/liskov/common.py rename to liskov/src/icon4py/liskov/pipeline/definition.py diff --git a/liskov/tests/test_cli.py b/liskov/tests/test_cli.py index 242171fe6d..9aa9c47f1e 100644 --- a/liskov/tests/test_cli.py +++ b/liskov/tests/test_cli.py @@ -19,6 +19,7 @@ FREE_FORM_STENCIL, MULTIPLE_STENCILS, NO_DIRECTIVES_STENCIL, + REPEATED_STENCILS, SINGLE_STENCIL, ) @@ -28,31 +29,36 @@ def outfile(tmp_path): return str(tmp_path / "gen.f90") -@pytest.mark.parametrize("file", [NO_DIRECTIVES_STENCIL]) -def test_cli_no_directives(make_f90_tmpfile, cli, file, outfile): - fpath = str(make_f90_tmpfile(content=file)) - result = cli.invoke(main, [fpath, outfile]) - assert result.exit_code == 0 - - @pytest.mark.parametrize( - "file, profile", + "file, options", [ - (NO_DIRECTIVES_STENCIL, False), - (SINGLE_STENCIL, False), - (CONSECUTIVE_STENCIL, False), - (FREE_FORM_STENCIL, False), - (MULTIPLE_STENCILS, False), - (SINGLE_STENCIL, True), - (CONSECUTIVE_STENCIL, True), - (FREE_FORM_STENCIL, True), - (MULTIPLE_STENCILS, True), + (NO_DIRECTIVES_STENCIL, ["--ppser"]), + (NO_DIRECTIVES_STENCIL, []), + (SINGLE_STENCIL, ["--ppser"]), + (SINGLE_STENCIL, []), + (CONSECUTIVE_STENCIL, ["--ppser"]), + (CONSECUTIVE_STENCIL, []), + (FREE_FORM_STENCIL, ["--ppser"]), + (FREE_FORM_STENCIL, []), + (MULTIPLE_STENCILS, ["--ppser"]), + (MULTIPLE_STENCILS, []), + (SINGLE_STENCIL, ["--ppser"]), + (SINGLE_STENCIL, ["--profile"]), + (CONSECUTIVE_STENCIL, ["--ppser", "--profile"]), + (CONSECUTIVE_STENCIL, ["--profile"]), + (FREE_FORM_STENCIL, ["--ppser", "--profile"]), + (FREE_FORM_STENCIL, ["--profile"]), + (MULTIPLE_STENCILS, ["--ppser", "--profile"]), + (MULTIPLE_STENCILS, ["--profile"]), + (REPEATED_STENCILS, ["--ppser", "--profile"]), + (REPEATED_STENCILS, ["--profile"]), ], ) -def test_cli(make_f90_tmpfile, cli, file, outfile, profile): +def test_cli(make_f90_tmpfile, cli, file, outfile, options): fpath = str(make_f90_tmpfile(content=file)) - args = [fpath, outfile] - if profile: - args.append("--profile") + args = [fpath, outfile, *options, "-m"] result = cli.invoke(main, args) assert result.exit_code == 0 + + +# todo: add test for wrong arguments diff --git a/liskov/tests/test_directives_deserialiser.py b/liskov/tests/test_directives_deserialiser.py index a3287b2c2f..bb10c24a1b 100644 --- a/liskov/tests/test_directives_deserialiser.py +++ b/liskov/tests/test_directives_deserialiser.py @@ -15,7 +15,19 @@ import pytest -import icon4py.liskov.parsing.types as ts +import icon4py.liskov.parsing.parse +from icon4py.liskov.codegen.integration.deserialise import ( + DeclareDataFactory, + EndCreateDataFactory, + EndIfDataFactory, + EndProfileDataFactory, + EndStencilDataFactory, + ImportsDataFactory, + InsertDataFactory, + StartCreateDataFactory, + StartProfileDataFactory, + StartStencilDataFactory, +) from icon4py.liskov.codegen.integration.interface import ( BoundsData, DeclareData, @@ -29,18 +41,6 @@ StartCreateData, StartProfileData, ) -from icon4py.liskov.parsing.deserialise import ( - DeclareDataFactory, - EndCreateDataFactory, - EndIfDataFactory, - EndProfileDataFactory, - EndStencilDataFactory, - ImportsDataFactory, - InsertDataFactory, - StartCreateDataFactory, - StartProfileDataFactory, - StartStencilDataFactory, -) from icon4py.liskov.parsing.exceptions import ( DirectiveSyntaxError, MissingBoundsError, @@ -51,10 +51,38 @@ @pytest.mark.parametrize( "factory_class, directive_type, string, startln, endln, expected", [ - (EndCreateDataFactory, ts.EndCreate, "END CREATE", 2, 2, EndCreateData), - (ImportsDataFactory, ts.Imports, "IMPORTS", 3, 3, ImportsData), - (EndIfDataFactory, ts.EndIf, "ENDIF", 4, 4, EndIfData), - (EndProfileDataFactory, ts.EndProfile, "END PROFILE", 5, 5, EndProfileData), + ( + EndCreateDataFactory, + icon4py.liskov.parsing.parse.EndCreate, + "END CREATE", + 2, + 2, + EndCreateData, + ), + ( + ImportsDataFactory, + icon4py.liskov.parsing.parse.Imports, + "IMPORTS", + 3, + 3, + ImportsData, + ), + ( + EndIfDataFactory, + icon4py.liskov.parsing.parse.EndIf, + "ENDIF", + 4, + 4, + EndIfData, + ), + ( + EndProfileDataFactory, + icon4py.liskov.parsing.parse.EndProfile, + "END PROFILE", + 5, + 5, + EndProfileData, + ), ], ) def test_data_factories_no_args( @@ -82,8 +110,10 @@ def test_data_factories_no_args( EndStencilData, { "directives": [ - ts.EndStencil("END STENCIL(name=foo)", 5, 5), - ts.EndStencil( + icon4py.liskov.parsing.parse.EndStencil( + "END STENCIL(name=foo)", 5, 5 + ), + icon4py.liskov.parsing.parse.EndStencil( "END STENCIL(name=bar; noendif=true; noprofile=true)", 20, 20 ), ], @@ -100,7 +130,9 @@ def test_data_factories_no_args( EndStencilData, { "directives": [ - ts.EndStencil("END STENCIL(name=foo; noprofile=true)", 5, 5) + icon4py.liskov.parsing.parse.EndStencil( + "END STENCIL(name=foo; noprofile=true)", 5, 5 + ) ], "content": {"EndStencil": [{"name": "foo"}]}, }, @@ -110,8 +142,12 @@ def test_data_factories_no_args( StartProfileData, { "directives": [ - ts.StartProfile("START PROFILE(name=foo)", 5, 5), - ts.StartProfile("START PROFILE(name=bar)", 20, 20), + icon4py.liskov.parsing.parse.StartProfile( + "START PROFILE(name=foo)", 5, 5 + ), + icon4py.liskov.parsing.parse.StartProfile( + "START PROFILE(name=bar)", 20, 20 + ), ], "content": {"StartProfile": [{"name": "foo"}, {"name": "bar"}]}, }, @@ -120,7 +156,11 @@ def test_data_factories_no_args( StartProfileDataFactory, StartProfileData, { - "directives": [ts.StartProfile("START PROFILE(name=foo)", 5, 5)], + "directives": [ + icon4py.liskov.parsing.parse.StartProfile( + "START PROFILE(name=foo)", 5, 5 + ) + ], "content": {"StartProfile": [{"name": "foo"}]}, }, ), @@ -129,7 +169,7 @@ def test_data_factories_no_args( DeclareData, { "directives": [ - ts.Declare( + icon4py.liskov.parsing.parse.Declare( "DECLARE(vn=nlev,nblks_c; w=nlevp1,nblks_e; suffix=dsl; type=LOGICAL)", 5, 5, @@ -151,7 +191,9 @@ def test_data_factories_no_args( InsertDataFactory, InsertData, { - "directives": [ts.Insert("INSERT(content=foo)", 5, 5)], + "directives": [ + icon4py.liskov.parsing.parse.Insert("INSERT(content=foo)", 5, 5) + ], "content": {"Insert": ["foo"]}, }, ), @@ -168,7 +210,11 @@ def test_data_factories_with_args(factory, target, mock_data): [ ( { - "directives": [ts.StartCreate("START CREATE(extra_fields=foo)", 5, 5)], + "directives": [ + icon4py.liskov.parsing.parse.StartCreate( + "START CREATE(extra_fields=foo)", 5, 5 + ) + ], "content": {"StartCreate": [{"extra_fields": "foo"}]}, }, ["foo"], @@ -176,7 +222,9 @@ def test_data_factories_with_args(factory, target, mock_data): ( { "directives": [ - ts.StartCreate("START CREATE(extra_fields=foo,xyz)", 5, 5) + icon4py.liskov.parsing.parse.StartCreate( + "START CREATE(extra_fields=foo,xyz)", 5, 5 + ) ], "content": {"StartCreate": [{"extra_fields": "foo,xyz"}]}, }, @@ -184,7 +232,9 @@ def test_data_factories_with_args(factory, target, mock_data): ), ( { - "directives": [ts.StartCreate("START CREATE()", 5, 5)], + "directives": [ + icon4py.liskov.parsing.parse.StartCreate("START CREATE()", 5, 5) + ], "content": {"StartCreate": [None]}, }, None, @@ -206,8 +256,12 @@ def test_start_create_factory(mock_data, extra_fields): EndStencilData, { "directives": [ - ts.EndStencil("END STENCIL(name=foo)", 5, 5), - ts.EndStencil("END STENCIL(name=bar; noendif=foo)", 20, 20), + icon4py.liskov.parsing.parse.EndStencil( + "END STENCIL(name=foo)", 5, 5 + ), + icon4py.liskov.parsing.parse.EndStencil( + "END STENCIL(name=bar; noendif=foo)", 20, 20 + ), ], "content": { "EndStencil": [{"name": "foo"}, {"name": "bar", "noendif": "foo"}] diff --git a/liskov/tests/test_external.py b/liskov/tests/test_external.py index 4051700f1b..84a9acf2f3 100644 --- a/liskov/tests/test_external.py +++ b/liskov/tests/test_external.py @@ -18,8 +18,8 @@ from gt4py.next.ffront.decorator import Program from icon4py.liskov.codegen.integration.interface import ( - DeserialisedDirectives, FieldAssociationData, + IntegrationCodeInterface, StartStencilData, ) from icon4py.liskov.external.exceptions import ( @@ -60,7 +60,7 @@ def test_stencil_collector_invalid_member(): os.remove(path) -mock_deserialised_directives = DeserialisedDirectives( +mock_deserialised_directives = IntegrationCodeInterface( StartStencil=[ StartStencilData( name="apply_nabla2_to_w", diff --git a/liskov/tests/test_generation.py b/liskov/tests/test_generation.py index 1413e473cc..7c82076bf6 100644 --- a/liskov/tests/test_generation.py +++ b/liskov/tests/test_generation.py @@ -13,11 +13,10 @@ import pytest -from icon4py.liskov.codegen.integration.generate import IntegrationGenerator +from icon4py.liskov.codegen.integration.generate import IntegrationCodeGenerator from icon4py.liskov.codegen.integration.interface import ( BoundsData, DeclareData, - DeserialisedDirectives, EndCreateData, EndIfData, EndProfileData, @@ -25,15 +24,27 @@ FieldAssociationData, ImportsData, InsertData, + IntegrationCodeInterface, StartCreateData, StartProfileData, StartStencilData, ) - # TODO: fix tests to adapt to new custom output fields +from icon4py.liskov.codegen.serialisation.generate import ( + SerialisationCodeGenerator, +) +from icon4py.liskov.codegen.serialisation.interface import ( + FieldSerialisationData, + InitData, + Metadata, + SavepointData, + SerialisationCodeInterface, +) + + @pytest.fixture -def serialised_directives(): +def integration_code_interface(): start_stencil_data = StartStencilData( name="stencil1", fields=[ @@ -86,7 +97,7 @@ def serialised_directives(): end_profile_data = EndProfileData(startln=14) insert_data = InsertData(startln=15, content="print *, 'Hello, World!'") - return DeserialisedDirectives( + return IntegrationCodeInterface( StartStencil=[start_stencil_data], EndStencil=[end_stencil_data], Declare=[declare_data], @@ -197,12 +208,14 @@ def expected_insert_source(): @pytest.fixture -def generator(serialised_directives): - return IntegrationGenerator(serialised_directives, profile=True, metadata_gen=False) +def integration_code_generator(integration_code_interface): + return IntegrationCodeGenerator( + integration_code_interface, profile=True, metadatagen=False + ) -def test_generate( - generator, +def test_integration_code_generation( + integration_code_generator, expected_start_create_source, expected_end_create_source, expected_imports_source, @@ -215,7 +228,7 @@ def test_generate( expected_insert_source, ): # Check that the generated code snippets are as expected - generated = generator() + generated = integration_code_generator() assert len(generated) == 10 assert generated[0].source == expected_start_create_source assert generated[1].source == expected_end_create_source @@ -227,3 +240,97 @@ def test_generate( assert generated[7].source == expected_start_profile_source assert generated[8].source == expected_end_profile_source assert generated[9].source == expected_insert_source + + +# TODO: fix tests to adapt to new custom output fields +@pytest.fixture +def serialisation_code_interface(): + interface = { + "Init": InitData(startln=0, directory=".", prefix="liskov-serialisation"), + "Savepoint": [ + SavepointData( + startln=9, + subroutine="apply_nabla2_to_vn_in_lateral_boundary", + intent="start", + fields=[ + FieldSerialisationData( + variable="z_nabla2_e", + association="z_nabla2_e(:,:,1)", + decomposed=False, + dimension=None, + typespec=None, + typename=None, + ptr_var=None, + ), + ], + metadata=[ + Metadata(key="jstep", value="jstep_ptr"), + Metadata(key="diffctr", value="diffctr"), + ], + ), + SavepointData( + startln=38, + subroutine="apply_nabla2_to_vn_in_lateral_boundary", + intent="end", + fields=[ + FieldSerialisationData( + variable="z_nabla2_e", + association="z_nabla2_e(:,:,1)", + decomposed=False, + dimension=None, + typespec=None, + typename=None, + ptr_var=None, + ), + FieldSerialisationData( + variable="vn", + association="p_nh_prog%vn(:,:,1)", + decomposed=False, + dimension=None, + typespec=None, + typename=None, + ptr_var=None, + ), + ], + metadata=[ + Metadata(key="jstep", value="jstep_ptr"), + Metadata(key="diffctr", value="diffctr"), + ], + ), + ], + } + + return SerialisationCodeInterface(**interface) + + +@pytest.fixture +def expected_savepoints(): + return [ + """ + !$ser init directory="." prefix="liskov-serialisation" + + !$ser savepoint apply_nabla2_to_vn_in_lateral_boundary_start jstep=jstep_ptr diffctr=diffctr + + PRINT *, 'Serializing z_nabla2_e=z_nabla2_e(:,:,1)' + + !$ser data z_nabla2_e=z_nabla2_e(:,:,1)""", + """ + !$ser savepoint apply_nabla2_to_vn_in_lateral_boundary_end jstep=jstep_ptr diffctr=diffctr + + PRINT *, 'Serializing z_nabla2_e=z_nabla2_e(:,:,1)' + + !$ser data z_nabla2_e=z_nabla2_e(:,:,1) + + PRINT *, 'Serializing vn=p_nh_prog%vn(:,:,1)' + + !$ser data vn=p_nh_prog%vn(:,:,1)""", + ] + + +def test_serialisation_code_generation( + serialisation_code_interface, expected_savepoints +): + generated = SerialisationCodeGenerator(serialisation_code_interface)() + assert len(generated) == 2 + assert generated[0].source == expected_savepoints[0] + assert generated[1].source == expected_savepoints[1] diff --git a/liskov/tests/test_parser.py b/liskov/tests/test_parser.py index 820651ed0d..6fa5095de9 100644 --- a/liskov/tests/test_parser.py +++ b/liskov/tests/test_parser.py @@ -17,6 +17,7 @@ import pytest from pytest import mark +import icon4py.liskov.parsing.parse import icon4py.liskov.parsing.types as ts from icon4py.liskov.parsing.exceptions import UnsupportedDirectiveError from icon4py.liskov.parsing.parse import DirectivesParser @@ -40,21 +41,23 @@ def test_parse_no_input(): "directive, string, startln, endln, expected_content", [ ( - ts.Imports("IMPORTS()", 1, 1), + icon4py.liskov.parsing.parse.Imports("IMPORTS()", 1, 1), "IMPORTS()", 1, 1, defaultdict(list, {"Imports": [{}]}), ), ( - ts.StartCreate("START CREATE(extra_fields=foo)", 2, 2), + icon4py.liskov.parsing.parse.StartCreate( + "START CREATE(extra_fields=foo)", 2, 2 + ), "START CREATE()", 2, 2, defaultdict(list, {"StartCreate": [{"extra_fields": "foo"}]}), ), ( - ts.StartStencil( + icon4py.liskov.parsing.parse.StartStencil( "START STENCIL(name=mo_nh_diffusion_06; vn=p_patch%p%vn; foo=abc)", 3, 4 ), "START STENCIL(name=mo_nh_diffusion_06; vn=p_patch%p%vn; foo=abc)", diff --git a/liskov/tests/test_serialisation_deserialiser.py b/liskov/tests/test_serialisation_deserialiser.py new file mode 100644 index 0000000000..5fe597de65 --- /dev/null +++ b/liskov/tests/test_serialisation_deserialiser.py @@ -0,0 +1,133 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +import pytest + +from icon4py.liskov.codegen.serialisation.deserialise import ( + InitDataFactory, + SavepointDataFactory, +) +from icon4py.liskov.codegen.serialisation.interface import ( + FieldSerialisationData, + InitData, + Metadata, + SavepointData, +) +from icon4py.liskov.parsing.parse import ( + Declare, + EndCreate, + EndProfile, + EndStencil, + Imports, + StartCreate, + StartProfile, + StartStencil, +) + + +@pytest.fixture +def parsed_dict(): + parsed = { + "directives": [ + Imports(string="IMPORTS()", startln=0, endln=0), + StartCreate( + string="START CREATE()", + startln=2, + endln=2, + ), + Declare( + string="DECLARE(vn=nproma,p_patch%nlev,p_patch%nblks_e; suffix=dsl)", + startln=4, + endln=4, + ), + Declare( + string="DECLARE(vn= nproma,p_patch%nlev,p_patch%nblks_e; a=nproma,p_patch%nlev,p_patch%nblks_e; b=nproma,p_patch%nlev,p_patch%nblks_e; type=REAL(vp))", + startln=6, + endln=7, + ), + StartStencil( + string="START STENCIL(name=apply_nabla2_to_vn_in_lateral_boundary; z_nabla2_e=z_nabla2_e(:, :, 1); area_edge=p_patch%edges%area_edge(:,1); fac_bdydiff_v=fac_bdydiff_v; vn=p_nh_prog%vn(:,:,1); vertical_lower=1; vertical_upper=nlev; horizontal_lower=i_startidx; horizontal_upper=i_endidx; accpresent=True)", + startln=9, + endln=14, + ), + StartProfile( + string="START PROFILE(name=apply_nabla2_to_vn_in_lateral_boundary)", + startln=35, + endln=35, + ), + EndProfile( + string="END PROFILE()", + startln=37, + endln=37, + ), + EndStencil( + string="END STENCIL(name=apply_nabla2_to_vn_in_lateral_boundary; noprofile=True)", + startln=38, + endln=38, + ), + EndCreate( + string="END CREATE()", + startln=39, + endln=39, + ), + ], + "content": { + "Imports": [{}], + "StartCreate": [None], + "Declare": [ + {"vn": "nproma,p_patch%nlev,p_patch%nblks_e", "suffix": "dsl"}, + { + "vn": "nproma,p_patch%nlev,p_patch%nblks_e", + "a": "nproma,p_patch%nlev,p_patch%nblks_e", + "b": "nproma,p_patch%nlev,p_patch%nblks_e", + "type": "REAL(vp)", + }, + ], + "StartStencil": [ + { + "name": "apply_nabla2_to_vn_in_lateral_boundary", + "z_nabla2_e": "z_nabla2_e(:,:,1)", + "area_edge": "p_patch%edges%area_edge(:,1)", + "fac_bdydiff_v": "fac_bdydiff_v", + "vn": "p_nh_prog%vn(:,:,1)", + "vertical_lower": "1", + "vertical_upper": "nlev", + "horizontal_lower": "i_startidx", + "horizontal_upper": "i_endidx", + "accpresent": "True", + } + ], + "StartProfile": [{"name": "apply_nabla2_to_vn_in_lateral_boundary"}], + "EndProfile": [{}], + "EndStencil": [ + {"name": "apply_nabla2_to_vn_in_lateral_boundary", "noprofile": "True"} + ], + "EndCreate": [{}], + }, + } + return parsed + + +def test_init_data_factory(parsed_dict): + init = InitDataFactory()(parsed_dict) + assert isinstance(init, InitData) + assert init.directory == "." + assert init.prefix == "liskov-serialisation" + + +def test_savepoint_data_factory(parsed_dict): + savepoints = SavepointDataFactory()(parsed_dict) + assert len(savepoints) == 2 + assert any([isinstance(sp, SavepointData) for sp in savepoints]) + assert any([isinstance(f, FieldSerialisationData) for f in savepoints[0].fields]) + assert any([isinstance(m, Metadata) for m in savepoints[0].metadata]) diff --git a/liskov/tests/test_utils.py b/liskov/tests/test_utils.py index 1a327af09d..bd3d3313ae 100644 --- a/liskov/tests/test_utils.py +++ b/liskov/tests/test_utils.py @@ -14,8 +14,8 @@ import pytest -import icon4py.liskov.parsing.types as ts -from icon4py.liskov.parsing.types import Imports, StartCreate +import icon4py.liskov.parsing.parse +from icon4py.liskov.parsing.parse import Imports, StartCreate from icon4py.liskov.parsing.utils import ( extract_directive, print_parsed_directive, @@ -64,6 +64,6 @@ def test_string_to_bool(string, expected): def test_print_parsed_directive(): - directive = ts.Imports("IMPORTS()", 1, 1) + directive = icon4py.liskov.parsing.parse.Imports("IMPORTS()", 1, 1) expected_output = "Directive: IMPORTS(), start line: 1, end line: 1\n" assert print_parsed_directive(directive) == expected_output diff --git a/liskov/tests/test_validation.py b/liskov/tests/test_validation.py index 3f31f570a1..ba6488cfa4 100644 --- a/liskov/tests/test_validation.py +++ b/liskov/tests/test_validation.py @@ -20,9 +20,9 @@ RequiredDirectivesError, UnbalancedStencilDirectiveError, ) -from icon4py.liskov.parsing.parse import DirectivesParser -from icon4py.liskov.parsing.types import ( +from icon4py.liskov.parsing.parse import ( Declare, + DirectivesParser, Imports, StartCreate, StartStencil, diff --git a/liskov/tests/test_writer.py b/liskov/tests/test_writer.py index a26c4d3c9d..5b7866d5d9 100644 --- a/liskov/tests/test_writer.py +++ b/liskov/tests/test_writer.py @@ -14,8 +14,8 @@ from pathlib import Path from tempfile import TemporaryDirectory -from icon4py.liskov.codegen.types import GeneratedCode -from icon4py.liskov.codegen.write import DIRECTIVE_IDENT, CodegenWriter +from icon4py.liskov.codegen.shared.types import GeneratedCode +from icon4py.liskov.codegen.shared.writer import DIRECTIVE_IDENT, CodegenWriter def test_write_from(): diff --git a/pyutils/src/icon4py/f2ser/cli.py b/pyutils/src/icon4py/f2ser/cli.py index 4c142fe5ea..0dfeba93a3 100644 --- a/pyutils/src/icon4py/f2ser/cli.py +++ b/pyutils/src/icon4py/f2ser/cli.py @@ -18,8 +18,10 @@ from icon4py.f2ser.deserialise import ParsedGranuleDeserialiser from icon4py.f2ser.parse import GranuleParser -from icon4py.liskov.codegen.serialisation.generate import SerialisationGenerator -from icon4py.liskov.codegen.write import CodegenWriter +from icon4py.liskov.codegen.serialisation.generate import ( + SerialisationCodeGenerator, +) +from icon4py.liskov.codegen.shared.writer import CodegenWriter @click.command("icon_f2ser") @@ -63,7 +65,7 @@ def main( """ parsed = GranuleParser(granule_path, dependencies)() interface = ParsedGranuleDeserialiser(parsed, directory=directory, prefix=prefix)() - generated = SerialisationGenerator(interface)() + generated = SerialisationCodeGenerator(interface)() CodegenWriter(granule_path, output_filepath)(generated) diff --git a/pyutils/src/icon4py/f2ser/deserialise.py b/pyutils/src/icon4py/f2ser/deserialise.py index a4587e07f8..2b32240f23 100644 --- a/pyutils/src/icon4py/f2ser/deserialise.py +++ b/pyutils/src/icon4py/f2ser/deserialise.py @@ -16,7 +16,7 @@ FieldSerialisationData, InitData, SavepointData, - SerialisationInterface, + SerialisationCodeInterface, ) @@ -27,7 +27,7 @@ def __init__(self, parsed: ParsedGranule, directory: str, prefix: str): self.prefix = prefix self.data = {"Savepoint": [], "Init": ...} - def __call__(self) -> SerialisationInterface: + def __call__(self) -> SerialisationCodeInterface: """Deserialise the parsed granule and returns a serialisation interface. Returns: @@ -36,7 +36,7 @@ def __call__(self) -> SerialisationInterface: self._merge_out_inout_fields() self._make_savepoints() self._make_init_data() - return SerialisationInterface(**self.data) + return SerialisationCodeInterface(**self.data) def _make_savepoints(self) -> None: """Create savepoints for each subroutine and intent in the parsed granule. @@ -83,7 +83,7 @@ def _create_savepoint( intent=intent, startln=self._get_codegen_line(var_dict["codegen_ctx"], intent), fields=fields, - metadata=None, + metadata=None, # todo: currently not using metadata ) ) diff --git a/pyutils/tests/f2ser/test_f2ser_codegen.py b/pyutils/tests/f2ser/test_f2ser_codegen.py index d9ea58718d..9f5368373b 100644 --- a/pyutils/tests/f2ser/test_f2ser_codegen.py +++ b/pyutils/tests/f2ser/test_f2ser_codegen.py @@ -13,7 +13,9 @@ from icon4py.f2ser.deserialise import ParsedGranuleDeserialiser from icon4py.f2ser.parse import GranuleParser -from icon4py.liskov.codegen.serialisation.generate import SerialisationGenerator +from icon4py.liskov.codegen.serialisation.generate import ( + SerialisationCodeGenerator, +) def test_deserialiser_diffusion_codegen(diffusion_granule, diffusion_granule_deps): @@ -21,6 +23,6 @@ def test_deserialiser_diffusion_codegen(diffusion_granule, diffusion_granule_dep parsed = parser() deserialiser = ParsedGranuleDeserialiser(parsed, directory=".", prefix="test") interface = deserialiser() - generator = SerialisationGenerator(interface) + generator = SerialisationCodeGenerator(interface) generated = generator() assert len(generated) == 3 diff --git a/pyutils/tests/f2ser/test_granule_deserialiser.py b/pyutils/tests/f2ser/test_granule_deserialiser.py index c82d422a37..cbe16a325d 100644 --- a/pyutils/tests/f2ser/test_granule_deserialiser.py +++ b/pyutils/tests/f2ser/test_granule_deserialiser.py @@ -17,7 +17,7 @@ from icon4py.liskov.codegen.serialisation.interface import ( FieldSerialisationData, SavepointData, - SerialisationInterface, + SerialisationCodeInterface, ) @@ -78,7 +78,7 @@ def test_deserialiser_mock(mock_parsed_granule): mock_parsed_granule, directory=".", prefix="f2ser" ) interface = deserialiser() - assert isinstance(interface, SerialisationInterface) + assert isinstance(interface, SerialisationCodeInterface) assert len(interface.Savepoint) == 3 assert all([isinstance(s, SavepointData) for s in interface.Savepoint]) assert all( diff --git a/testutils/src/icon4py/testutils/liskov_fortran_samples.py b/testutils/src/icon4py/testutils/liskov_fortran_samples.py index 858b3a2089..8231dbe038 100644 --- a/testutils/src/icon4py/testutils/liskov_fortran_samples.py +++ b/testutils/src/icon4py/testutils/liskov_fortran_samples.py @@ -33,11 +33,11 @@ !$DSL DECLARE(vn=nproma,p_patch%nlev,p_patch%nblks_e; suffix=dsl) - !$DSL DECLARE(vn=nproma,p_patch%nlev,p_patch%nblks_e; a=nproma,p_patch%nlev,p_patch%nblks_e; & + !$DSL DECLARE(vn= nproma,p_patch%nlev,p_patch%nblks_e; a=nproma,p_patch%nlev,p_patch%nblks_e; & !$DSL b=nproma,p_patch%nlev,p_patch%nblks_e; type=REAL(vp)) !$DSL START STENCIL(name=apply_nabla2_to_vn_in_lateral_boundary; & - !$DSL z_nabla2_e=z_nabla2_e(:,:,1); area_edge=p_patch%edges%area_edge(:,1); & + !$DSL z_nabla2_e=z_nabla2_e(:, :, 1); area_edge=p_patch%edges%area_edge(:,1); & !$DSL fac_bdydiff_v=fac_bdydiff_v; vn=p_nh_prog%vn(:,:,1); & !$DSL vertical_lower=1; vertical_upper=nlev; & !$DSL horizontal_lower=i_startidx; horizontal_upper=i_endidx; & @@ -242,3 +242,78 @@ !$DSL END CREATE() """ + + +REPEATED_STENCILS = """\ + !$DSL IMPORTS() + + !$DSL START CREATE() + + !$DSL DECLARE(vn=nproma,p_patch%nlev,p_patch%nblks_e; suffix=dsl) + + !$DSL DECLARE(vn= nproma,p_patch%nlev,p_patch%nblks_e; a=nproma,p_patch%nlev,p_patch%nblks_e; & + !$DSL b=nproma,p_patch%nlev,p_patch%nblks_e; type=REAL(vp)) + + !$DSL START STENCIL(name=apply_nabla2_to_vn_in_lateral_boundary; & + !$DSL z_nabla2_e=z_nabla2_e(:, :, 1); area_edge=p_patch%edges%area_edge(:,1); & + !$DSL fac_bdydiff_v=fac_bdydiff_v; vn=p_nh_prog%vn(:,:,1); & + !$DSL vertical_lower=1; vertical_upper=nlev; & + !$DSL horizontal_lower=i_startidx; horizontal_upper=i_endidx; & + !$DSL accpresent=True) + !$OMP DO PRIVATE(je,jk,jb,i_startidx,i_endidx) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, start_bdydiff_e, grf_bdywidth_e) + + !$ACC PARALLEL IF( i_am_accel_node .AND. acc_on ) DEFAULT(NONE) ASYNC(1) + vn_before(:,:,:) = p_nh_prog%vn(:,:,:) + !$ACC END PARALLEL + + !$ACC PARALLEL LOOP DEFAULT(NONE) GANG VECTOR COLLAPSE(2) ASYNC(1) IF( i_am_accel_node .AND. acc_on ) + DO jk = 1, nlev + !DIR$ IVDEP + DO je = i_startidx, i_endidx + p_nh_prog%vn(je,jk,jb) = & + p_nh_prog%vn(je,jk,jb) + & + z_nabla2_e(je,jk,jb) * & + p_patch%edges%area_edge(je,jb)*fac_bdydiff_v + ENDDO + ENDDO + !$DSL START PROFILE(name=apply_nabla2_to_vn_in_lateral_boundary) + !$ACC END PARALLEL LOOP + !$DSL END PROFILE() + !$DSL END STENCIL(name=apply_nabla2_to_vn_in_lateral_boundary; noprofile=True) + + !$DSL START STENCIL(name=apply_nabla2_to_vn_in_lateral_boundary; & + !$DSL z_nabla2_e=z_nabla2_e(:, :, 1); area_edge=p_patch%edges%area_edge(:,1); & + !$DSL fac_bdydiff_v=fac_bdydiff_v; vn=p_nh_prog%vn(:,:,1); & + !$DSL vertical_lower=1; vertical_upper=nlev; & + !$DSL horizontal_lower=i_startidx; horizontal_upper=i_endidx; & + !$DSL accpresent=True) + !$OMP DO PRIVATE(je,jk,jb,i_startidx,i_endidx) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk,i_endblk + + CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, start_bdydiff_e, grf_bdywidth_e) + + !$ACC PARALLEL IF( i_am_accel_node .AND. acc_on ) DEFAULT(NONE) ASYNC(1) + vn_before(:,:,:) = p_nh_prog%vn(:,:,:) + !$ACC END PARALLEL + + !$ACC PARALLEL LOOP DEFAULT(NONE) GANG VECTOR COLLAPSE(2) ASYNC(1) IF( i_am_accel_node .AND. acc_on ) + DO jk = 1, nlev + !DIR$ IVDEP + DO je = i_startidx, i_endidx + p_nh_prog%vn(je,jk,jb) = & + p_nh_prog%vn(je,jk,jb) + & + z_nabla2_e(je,jk,jb) * & + p_patch%edges%area_edge(je,jb)*fac_bdydiff_v + ENDDO + ENDDO + !$DSL START PROFILE(name=apply_nabla2_to_vn_in_lateral_boundary) + !$ACC END PARALLEL LOOP + !$DSL END PROFILE() + !$DSL END STENCIL(name=apply_nabla2_to_vn_in_lateral_boundary; noprofile=True) + !$DSL END CREATE() + """ From c5e2710d790595a9437e67374bef2f7daff524c7 Mon Sep 17 00:00:00 2001 From: samkellerhals Date: Tue, 9 May 2023 14:22:57 +0200 Subject: [PATCH 11/21] Do not use metadata in tests --- liskov/tests/test_cli.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/liskov/tests/test_cli.py b/liskov/tests/test_cli.py index 9aa9c47f1e..efbc68b47d 100644 --- a/liskov/tests/test_cli.py +++ b/liskov/tests/test_cli.py @@ -56,9 +56,6 @@ def outfile(tmp_path): ) def test_cli(make_f90_tmpfile, cli, file, outfile, options): fpath = str(make_f90_tmpfile(content=file)) - args = [fpath, outfile, *options, "-m"] + args = [fpath, outfile, *options] result = cli.invoke(main, args) assert result.exit_code == 0 - - -# todo: add test for wrong arguments From a7b165ea62ea4e0940d394028fca89a995f0ed51 Mon Sep 17 00:00:00 2001 From: samkellerhals Date: Tue, 9 May 2023 14:42:21 +0200 Subject: [PATCH 12/21] Do not serialise tolerance data --- .../codegen/serialisation/deserialise.py | 4 +- test.f90 | 7020 +++++++++++++++++ 2 files changed, 7022 insertions(+), 2 deletions(-) create mode 100644 test.f90 diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py b/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py index 9254a19ef1..3ce3c81ef1 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py @@ -102,9 +102,9 @@ def _remove_unnecessary_keys(named_args: dict) -> dict: copy = named_args.copy() [pop_item_from_dict(copy, k, None) for k in KEYS_TO_REMOVE] for tol in TOLERANCE_ARGS: - for k in named_args.copy().keys(): + for k in copy.copy().keys(): if k.endswith(tol): - pop_item_from_dict(named_args, k, None) + pop_item_from_dict(copy, k, None) return copy @staticmethod diff --git a/test.f90 b/test.f90 new file mode 100644 index 0000000000..7a5a424067 --- /dev/null +++ b/test.f90 @@ -0,0 +1,7020 @@ +!> +!! mo_solve_nonhydro +!! +!! This module contains the nonhydrostatic dynamical core for the triangular version +!! Its routines were previously contained in mo_divergent_modes and mo_vector_operations +!! but have been extracted for better memory efficiency +!! +!! @author Guenther Zaengl, DWD +!! +!! @par Revision History +!! Initial release by Guenther Zaengl (2010-10-13) based on earlier work +!! by Almut Gassmann, MPI-M +!! Modification by William Sawyer, CSCS (2015-02-06) +!! - OpenACC implementation +!! +!! @par Copyright and License +!! +!! This code is subject to the DWD and MPI-M-Software-License-Agreement in +!! its most recent form. +!! Please see the file LICENSE in the root of the source tree for this code. +!! Where software is supplied by third parties, it is indicated in the +!! headers of the routines. +!! + +!---------------------------- +#include "omp_definitions.inc" +!---------------------------- + +MODULE mo_solve_nonhydro + + USE mo_kind, ONLY: wp, vp + USE mo_nonhydrostatic_config,ONLY: itime_scheme,iadv_rhotheta, igradp_method, l_open_ubc, & + kstart_moist, lhdiff_rcf, divdamp_order, & + divdamp_fac, divdamp_fac2, divdamp_fac3, divdamp_fac4, & + divdamp_z, divdamp_z2, divdamp_z3, divdamp_z4, & + divdamp_type, rayleigh_type, rhotheta_offctr, & + veladv_offctr, divdamp_fac_o2, kstart_dd3d, ndyn_substeps_var + USE mo_dynamics_config, ONLY: idiv_method + USE mo_parallel_config, ONLY: nproma, p_test_run, itype_comm, use_dycore_barrier, & + & cpu_min_nproma + USE mo_run_config, ONLY: ltimer, timers_level, lvert_nest + USE mo_model_domain, ONLY: t_patch + USE mo_grid_config, ONLY: l_limited_area + USE mo_gridref_config, ONLY: grf_intmethod_e + USE mo_interpol_config, ONLY: nudge_max_coeff + USE mo_intp_data_strc, ONLY: t_int_state + USE mo_intp, ONLY: cells2verts_scalar + USE mo_nonhydro_types, ONLY: t_nh_state + USE mo_physical_constants,ONLY: cpd, rd, cvd, cvd_o_rd, grav, rd_o_cpd, p0ref + USE mo_math_gradients, ONLY: grad_green_gauss_cell + USE mo_velocity_advection,ONLY: velocity_tendencies + USE mo_math_constants, ONLY: dbl_eps + USE mo_math_divrot, ONLY: div_avg + USE mo_vertical_grid, ONLY: nrdmax, nflat_gradp + USE mo_init_vgrid, ONLY: nflatlev + USE mo_loopindices, ONLY: get_indices_c, get_indices_e + USE mo_impl_constants, ONLY: min_rlcell_int, min_rledge_int, min_rlvert_int, & + & min_rlcell, min_rledge, RAYLEIGH_CLASSIC, RAYLEIGH_KLEMP + USE mo_impl_constants_grf,ONLY: grf_bdywidth_c, grf_bdywidth_e + USE mo_advection_hflux, ONLY: upwind_hflux_miura3 + USE mo_advection_traj, ONLY: t_back_traj, btraj_compute_o1 + USE mo_sync, ONLY: SYNC_E, SYNC_C, sync_patch_array, & + sync_patch_array_mult, sync_patch_array_mult_mp + USE mo_mpi, ONLY: my_process_is_mpi_all_seq, work_mpi_barrier, i_am_accel_node + USE mo_timer, ONLY: timer_solve_nh, timer_barrier, timer_start, timer_stop, & + timer_solve_nh_cellcomp, timer_solve_nh_edgecomp, & + timer_solve_nh_vnupd, timer_solve_nh_vimpl, timer_solve_nh_exch + USE mo_exception, ONLY: message + USE mo_icon_comm_lib, ONLY: icon_comm_sync + USE mo_vertical_coord_table,ONLY: vct_a + USE mo_prepadv_types, ONLY: t_prepare_adv + USE mo_initicon_config, ONLY: is_iau_active, iau_wgt_dyn + USE mo_fortran_tools, ONLY: init_zero_contiguous_dp, init_zero_contiguous_sp ! Import both for mixed prec. + !$ser verbatim USE mo_nonhydro_state, ONLY: jstep_ptr, nstep_ptr, mo_solve_nonhydro_ctr +#ifdef _OPENACC + USE mo_mpi, ONLY: my_process_is_work +#endif + + + USE cudafor + USE nvtx + + IMPLICIT NONE + + PRIVATE + + + REAL(wp), PARAMETER :: rd_o_cvd = 1._wp / cvd_o_rd + REAL(wp), PARAMETER :: cpd_o_rd = 1._wp / rd_o_cpd + REAL(wp), PARAMETER :: rd_o_p0ref = rd / p0ref + REAL(wp), PARAMETER :: grav_o_cpd = grav / cpd + + PUBLIC :: solve_nh + +#ifdef _CRAYFTN +#define __CRAY_FTN_VERSION (_RELEASE_MAJOR * 100 + _RELEASE_MINOR) +#endif + + ! On the vectorizing DWD-NEC the diagnostics for the tendencies of the normal wind + ! from terms xyz, ddt_vn_xyz, is disabled by default due to the fear that the + ! conditional storage in conditionally allocated global fields is attempted even if + ! the condition is not given and therefore the global field not allocated. If this + ! happens, this would results in a corrupted memory. + ! (Requested by G. Zaengl based on earlier problems with similar constructs.) +#ifndef __SX__ +#define __ENABLE_DDT_VN_XYZ__ +#endif + + CONTAINS + + + !> + !! solve_nh + !! + !! Main solver routine for nonhydrostatic dynamical core + !! + !! @par Revision History + !! Development started by Guenther Zaengl on 2010-02-03 + !! Modification by Sebastian Borchert, DWD (2017-07-07) + !! (Dear developer, for computational efficiency reasons, a copy of this subroutine + !! exists in 'src/atm_dyn_iconam/mo_nh_deepatmo_solve'. If you would change something here, + !! please consider to apply your development there, too, in order to help preventing + !! the copy from diverging and becoming a code corpse sooner or later. Thank you!) + !! + SUBROUTINE solve_nh (p_nh, p_patch, p_int, prep_adv, nnow, nnew, l_init, l_recompute, lsave_mflx, & + lprep_adv, lclean_mflx, idyn_timestep, jstep, dtime) + + TYPE(t_nh_state), TARGET, INTENT(INOUT) :: p_nh + TYPE(t_int_state), TARGET, INTENT(IN) :: p_int + TYPE(t_patch), TARGET, INTENT(INOUT) :: p_patch + TYPE(t_prepare_adv), TARGET, INTENT(INOUT) :: prep_adv + + ! Initialization switch that has to be .TRUE. at the initial time step only (not for restart) + LOGICAL, INTENT(IN) :: l_init + ! Switch to recompute velocity tendencies after a physics call irrespective of the time scheme option + LOGICAL, INTENT(IN) :: l_recompute + ! Switch if mass flux needs to be saved for nest boundary interpolation tendency computation + LOGICAL, INTENT(IN) :: lsave_mflx + ! Switch if preparations for tracer advection shall be computed + LOGICAL, INTENT(IN) :: lprep_adv + ! Switch if mass fluxes computed for tracer advection need to be reinitialized + LOGICAL, INTENT(IN) :: lclean_mflx + ! Counter of dynamics time step within a large time step (ranges from 1 to ndyn_substeps) + INTEGER, INTENT(IN) :: idyn_timestep + ! Time step count since last boundary interpolation (ranges from 0 to 2*ndyn_substeps-1) + INTEGER, INTENT(IN) :: jstep + ! Time levels + INTEGER, INTENT(IN) :: nnow, nnew + ! Dynamics time step + REAL(wp), INTENT(IN) :: dtime + + ! Local variables + INTEGER :: jb, jk, jc, je, jks, jg + INTEGER :: nlev, nlevp1 !< number of full levels + INTEGER :: i_startblk, i_endblk, i_startidx, i_endidx, ishift + INTEGER :: rl_start, rl_end, istep, ntl1, ntl2, nvar, nshift, nshift_total + INTEGER :: i_startblk_2, i_endblk_2, i_startidx_2, i_endidx_2, rl_start_2, rl_end_2 + INTEGER :: ic, ie, ilc0, ibc0, ikp1, ikp2 + + REAL(wp) :: z_theta_v_fl_e (nproma,p_patch%nlev ,p_patch%nblks_e), & + z_theta_v_e (nproma,p_patch%nlev ,p_patch%nblks_e), & + z_rho_e (nproma,p_patch%nlev ,p_patch%nblks_e), & + z_mass_fl_div (nproma,p_patch%nlev ,p_patch%nblks_c), & ! used for idiv_method=2 only + z_theta_v_fl_div(nproma,p_patch%nlev ,p_patch%nblks_c), & ! used for idiv_method=2 only + z_theta_v_v (nproma,p_patch%nlev ,p_patch%nblks_v), & ! used for iadv_rhotheta=1 only + z_rho_v (nproma,p_patch%nlev ,p_patch%nblks_v) ! used for iadv_rhotheta=1 only + +#if !defined (__LOOP_EXCHANGE) && !defined (__SX__) + TYPE(t_back_traj), SAVE :: btraj +#endif + + ! The data type vp (variable precision) is by default the same as wp but reduces + ! to single precision when the __MIXED_PRECISION cpp flag is set at compile time +#ifdef __SWAPDIM + REAL(vp) :: z_th_ddz_exner_c(nproma,p_patch%nlev ,p_patch%nblks_c), & + z_dexner_dz_c (nproma,p_patch%nlev ,p_patch%nblks_c,2), & + z_vt_ie (nproma,p_patch%nlev ,p_patch%nblks_e), & + z_kin_hor_e (nproma,p_patch%nlev ,p_patch%nblks_e), & + z_exner_ex_pr (nproma,p_patch%nlevp1,p_patch%nblks_c), & + z_gradh_exner (nproma,p_patch%nlev ,p_patch%nblks_e), & + z_rth_pr (nproma,p_patch%nlev ,p_patch%nblks_c,2), & + z_grad_rth (nproma,p_patch%nlev ,p_patch%nblks_c,4), & + z_w_concorr_me (nproma,p_patch%nlev ,p_patch%nblks_e) +#else + REAL(vp) :: z_th_ddz_exner_c(nproma,p_patch%nlev,p_patch%nblks_c), & + z_dexner_dz_c (2,nproma,p_patch%nlev,p_patch%nblks_c), & + z_vt_ie (nproma,p_patch%nlev,p_patch%nblks_e), & + z_kin_hor_e (nproma,p_patch%nlev,p_patch%nblks_e), & + z_exner_ex_pr (nproma,p_patch%nlevp1,p_patch%nblks_c), & ! nlevp1 is intended here + z_gradh_exner (nproma,p_patch%nlev,p_patch%nblks_e), & + z_rth_pr (2,nproma,p_patch%nlev,p_patch%nblks_c), & + z_grad_rth (4,nproma,p_patch%nlev,p_patch%nblks_c), & + z_w_concorr_me (nproma,p_patch%nlev,p_patch%nblks_e) +#endif + ! This field in addition has reversed index order (vertical first) for optimization +#ifdef __LOOP_EXCHANGE + REAL(vp) :: z_graddiv_vn (p_patch%nlev,nproma,p_patch%nblks_e) +#else + REAL(vp) :: z_graddiv_vn (nproma,p_patch%nlev,p_patch%nblks_e) +#endif + + REAL(wp) :: z_w_expl (nproma,p_patch%nlevp1), & + z_thermal_exp (nproma,p_patch%nblks_c), & + z_vn_avg (nproma,p_patch%nlev ), & + z_mflx_top (nproma,p_patch%nblks_c), & + z_contr_w_fl_l (nproma,p_patch%nlevp1), & + z_rho_expl (nproma,p_patch%nlev ), & + z_exner_expl (nproma,p_patch%nlev ) + REAL(wp) :: z_theta_tavg_m1, z_theta_tavg, z_rho_tavg_m1, z_rho_tavg + REAL(wp) :: z_thermal_exp_local ! local variable to use in OpenACC loop + + + + ! The data type vp (variable precision) is by default the same as wp but reduces + ! to single precision when the __MIXED_PRECISION cpp flag is set at compile time + + ! TODO : of these, fairly easy to scalarize: z_theta_v_pr_ic + REAL(vp) :: z_alpha (nproma,p_patch%nlevp1), & + z_beta (nproma,p_patch%nlev ), & + z_q (nproma,p_patch%nlev ), & + z_graddiv2_vn (nproma,p_patch%nlev ), & + z_theta_v_pr_ic (nproma,p_patch%nlevp1), & + z_exner_ic (nproma,p_patch%nlevp1), & + z_w_concorr_mc (nproma,p_patch%nlev ), & + z_flxdiv_mass (nproma,p_patch%nlev ), & + z_flxdiv_theta (nproma,p_patch%nlev ), & + z_hydro_corr (nproma,p_patch%nlev,p_patch%nblks_e) + + REAL(vp) :: z_a, z_b, z_c, z_g, z_gamma, & + z_w_backtraj, z_theta_v_pr_mc_m1, z_theta_v_pr_mc + +#ifdef _OPENACC + REAL(vp) :: z_w_concorr_mc_m0, z_w_concorr_mc_m1, z_w_concorr_mc_m2 +#endif + + REAL(wp) :: z_theta1, z_theta2, wgt_nnow_vel, wgt_nnew_vel, & + dt_shift, wgt_nnow_rth, wgt_nnew_rth, dthalf, & + r_nsubsteps, r_dtimensubsteps, scal_divdamp_o2, & + alin, dz32, df32, dz42, df42, bqdr, aqdr, & + zf, dzlin, dzqdr + ! time shifts for linear interpolation of nest UBC + REAL(wp) :: dt_linintp_ubc, dt_linintp_ubc_nnow, dt_linintp_ubc_nnew + REAL(wp) :: z_raylfac(nrdmax(p_patch%id)) + REAL(wp) :: z_ntdistv_bary_1, distv_bary_1, z_ntdistv_bary_2, distv_bary_2 + + REAL(wp), DIMENSION(p_patch%nlev) :: scal_divdamp, bdy_divdamp, enh_divdamp_fac + REAL(vp) :: z_dwdz_dd(nproma,kstart_dd3d(p_patch%id):p_patch%nlev,p_patch%nblks_c) + + ! Local variables for normal wind tendencies and differentials + REAL(wp) :: z_ddt_vn_dyn, z_ddt_vn_apc, z_ddt_vn_cor, & + & z_ddt_vn_pgr, z_ddt_vn_ray, & + & z_d_vn_dmp, z_d_vn_iau + + REAL(wp), DIMENSION(nproma, p_patch%nblks_c) :: w_1 + !-------------------------------------------------------------------------- + ! OUT/INOUT FIELDS DSL + ! + + + + INTEGER, DIMENSION(:,:,:,:), POINTER :: ikoffset_dsl + + ! + ! OUT/INOUT FIELDS DSL + !-------------------------------------------------------------------------- + +#ifdef __INTEL_COMPILER +!DIR$ ATTRIBUTES ALIGN : 64 :: z_theta_v_fl_e,z_theta_v_e,z_rho_e,z_mass_fl_div +!DIR$ ATTRIBUTES ALIGN : 64 :: z_theta_v_fl_div,z_theta_v_v,z_rho_v,z_dwdz_dd +!DIR$ ATTRIBUTES ALIGN : 64 :: z_th_ddz_exner_c,z_dexner_dz_c,z_vt_ie,z_kin_hor_e +!DIR$ ATTRIBUTES ALIGN : 64 :: z_exner_ex_pr,z_gradh_exner,z_rth_pr,z_grad_rth +!DIR$ ATTRIBUTES ALIGN : 64 :: z_w_concorr_me,z_graddiv_vn,z_w_expl,z_thermal_exp +!DIR$ ATTRIBUTES ALIGN : 64 :: z_vn_avg,z_mflx_top,z_contr_w_fl_l,z_rho_expl +!DIR$ ATTRIBUTES ALIGN : 64 :: z_exner_expl,z_alpha,z_beta,z_q,z_graddiv2_vn +!DIR$ ATTRIBUTES ALIGN : 64 :: z_theta_v_pr_ic,z_exner_ic,z_w_concorr_mc +!DIR$ ATTRIBUTES ALIGN : 64 :: z_flxdiv_mass,z_flxdiv_theta,z_hydro_corr +!DIR$ ATTRIBUTES ALIGN : 64 :: z_raylfac,scal_divdamp,bdy_divdamp,enh_divdamp_fac +#endif + + INTEGER :: nproma_gradp, nblks_gradp, npromz_gradp, nlen_gradp, jk_start + LOGICAL :: lcompute, lcleanup, lvn_only, lvn_pos + + ! Local variables to control vertical nesting + LOGICAL :: l_vert_nested, l_child_vertnest + + ! Pointers + INTEGER, POINTER, CONTIGUOUS :: & + ! to cell indices + icidx(:,:,:), icblk(:,:,:), & + ! to edge indices + ieidx(:,:,:), ieblk(:,:,:), & + ! to vertex indices + ividx(:,:,:), ivblk(:,:,:), & + ! to vertical neighbor indices for pressure gradient computation + ikidx(:,:,:,:), & + ! to quad edge indices + iqidx(:,:,:), iqblk(:,:,:), & + ! for igradp_method = 3 + iplev(:), ipeidx(:), ipeblk(:) +#if !defined (__LOOP_EXCHANGE) && !defined (__SX__) +! These convenience pointers are needed to avoid PGI trying to copy derived type instance btraj back from device to host + INTEGER, POINTER :: p_cell_idx(:,:,:), p_cell_blk(:,:,:) + REAL(vp), POINTER :: p_distv_bary(:,:,:,:) +#endif +#ifdef __SX__ + REAL(wp) :: z_rho_tavg_m1_v(nproma), z_theta_tavg_m1_v(nproma) + REAL(vp) :: z_theta_v_pr_mc_m1_v(nproma) +#endif + !------------------------------------------------------------------- + IF (use_dycore_barrier) THEN + CALL timer_start(timer_barrier) + CALL work_mpi_barrier() + CALL timer_stop(timer_barrier) + ENDIF + !------------------------------------------------------------------- + +#if !defined (__LOOP_EXCHANGE) && !defined (__SX__) + CALL btraj%construct(nproma,p_patch%nlev,p_patch%nblks_e,2) +! These convenience pointers are needed to avoid PGI trying to copy derived type instance btraj back from device to host + p_cell_idx => btraj%cell_idx + p_cell_blk => btraj%cell_blk + p_distv_bary => btraj%distv_bary +#endif + + jg = p_patch%id + + IF (lvert_nest .AND. (p_patch%nshift_total > 0)) THEN + l_vert_nested = .TRUE. + nshift_total = p_patch%nshift_total + ELSE + l_vert_nested = .FALSE. + nshift_total = 0 + ENDIF + IF (lvert_nest .AND. p_patch%n_childdom > 0 .AND. & + (p_patch%nshift_child > 0 .OR. p_patch%nshift_total > 0)) THEN + l_child_vertnest = .TRUE. + nshift = p_patch%nshift_child + 1 + ELSE + l_child_vertnest = .FALSE. + nshift = 0 + ENDIF + dthalf = 0.5_wp*dtime + + CALL message('DSL', 'start running dycore kernels') + IF (ltimer) CALL timer_start(timer_solve_nh) + + ! Inverse value of ndyn_substeps for tracer advection precomputations + r_nsubsteps = 1._wp/REAL(ndyn_substeps_var(jg),wp) + + ! Inverse value of dtime * ndyn_substeps_var + r_dtimensubsteps = 1._wp/(dtime*REAL(ndyn_substeps_var(jg),wp)) + + ! number of vertical levels + nlev = p_patch%nlev + nlevp1 = p_patch%nlevp1 + + ! Set pointers to neighbor cells + icidx => p_patch%edges%cell_idx + icblk => p_patch%edges%cell_blk + + ! Set pointers to neighbor edges + ieidx => p_patch%cells%edge_idx + ieblk => p_patch%cells%edge_blk + + ! Set pointers to vertices of an edge + ividx => p_patch%edges%vertex_idx + ivblk => p_patch%edges%vertex_blk + + ! Set pointer to vertical neighbor indices for pressure gradient + ikidx => p_nh%metrics%vertidx_gradp + + ! Set pointers to quad edges + iqidx => p_patch%edges%quad_idx + iqblk => p_patch%edges%quad_blk + + ! DA: moved from below to here to get into the same ACC data section + iplev => p_nh%metrics%pg_vertidx + ipeidx => p_nh%metrics%pg_edgeidx + ipeblk => p_nh%metrics%pg_edgeblk + + !$ser verbatim mo_solve_nonhydro_ctr = mo_solve_nonhydro_ctr + 1 + + ! Precompute Rayleigh damping factor + DO jk = 2, nrdmax(jg) + z_raylfac(jk) = 1.0_wp/(1.0_wp+dtime*p_nh%metrics%rayleigh_w(jk)) + ENDDO + + ! Fourth-order divergence damping + ! + ! The divergence damping factor enh_divdamp_fac is defined as a profile in height z + ! above sea level with 4 height sections: + ! + ! enh_divdamp_fac(z) = divdamp_fac ! z <= divdamp_z + ! enh_divdamp_fac(z) = divdamp_fac + (z-divdamp_z )* alin ! divdamp_z <= z <= divdamp_z2 + ! enh_divdamp_fac(z) = divdamp_fac2 + (z-divdamp_z2)*(aqdr+(z-divdamp_z2)*bqdr) ! divdamp_z2 <= z <= divdamp_z4 + ! enh_divdamp_fac(z) = divdamp_fac4 ! divdamp_z4 <= z + ! + alin = (divdamp_fac2-divdamp_fac)/(divdamp_z2-divdamp_z) + ! + df32 = divdamp_fac3-divdamp_fac2; dz32 = divdamp_z3-divdamp_z2 + df42 = divdamp_fac4-divdamp_fac2; dz42 = divdamp_z4-divdamp_z2 + ! + bqdr = (df42*dz32-df32*dz42)/(dz32*dz42*(dz42-dz32)) + aqdr = df32/dz32-bqdr*dz32 + ! + DO jk = 1, nlev + jks = jk + nshift_total + zf = 0.5_wp*(vct_a(jks)+vct_a(jks+1)) + dzlin = MIN(divdamp_z2-divdamp_z ,MAX(0._wp,zf-divdamp_z )) + dzqdr = MIN(divdamp_z4-divdamp_z2,MAX(0._wp,zf-divdamp_z2)) + ! + IF (divdamp_order == 24) THEN + enh_divdamp_fac(jk) = MAX( 0._wp, divdamp_fac + dzlin*alin + dzqdr*(aqdr+dzqdr*bqdr) - 0.25_wp*divdamp_fac_o2 ) + ELSE + enh_divdamp_fac(jk) = divdamp_fac + dzlin*alin + dzqdr*(aqdr+dzqdr*bqdr) + ENDIF + ENDDO + + scal_divdamp(:) = - enh_divdamp_fac(:) * p_patch%geometry_info%mean_cell_area**2 + + ! Time increment for backward-shifting of lateral boundary mass flux + dt_shift = dtime*REAL(2*ndyn_substeps_var(jg)-1,wp)/2._wp ! == dt_phy - 0.5*dtime + + ! Time increment for linear interpolation of nest UBC. + ! The linear interpolation is of the form + ! \phi(t) = \phi0 + (t-t0)*dphi/dt, with t=(jstep+0.5)*dtime, and t0=dt_phy + ! + ! dt_linintp_ubc == (t-t0) + dt_linintp_ubc = jstep*dtime - dt_shift ! valid for center of current time step + dt_linintp_ubc_nnow = dt_linintp_ubc - 0.5_wp*dtime + dt_linintp_ubc_nnew = dt_linintp_ubc + 0.5_wp*dtime + + ! Coefficient for reduced fourth-order divergence damping along nest boundaries + bdy_divdamp(:) = 0.75_wp/(nudge_max_coeff + dbl_eps)*ABS(scal_divdamp(:)) + + !$ACC DATA CREATE(z_kin_hor_e, z_vt_ie, z_w_concorr_me, z_mass_fl_div, z_theta_v_fl_e, z_theta_v_fl_div) & + !$ACC CREATE(z_dexner_dz_c, z_exner_ex_pr, z_gradh_exner, z_rth_pr, z_grad_rth) & + !$ACC CREATE(z_theta_v_pr_ic, z_th_ddz_exner_c, z_w_concorr_mc) & + !$ACC CREATE(z_vn_avg, z_rho_e, z_theta_v_e, z_dwdz_dd, z_thermal_exp, z_mflx_top) & + !$ACC CREATE(z_exner_ic, z_alpha, z_beta, z_q, z_contr_w_fl_l, z_exner_expl) & + !$ACC CREATE(z_flxdiv_mass, z_flxdiv_theta, z_rho_expl, z_w_expl) & + !$ACC CREATE(z_rho_v, z_theta_v_v, z_graddiv_vn, z_hydro_corr, z_graddiv2_vn) & + !$ACC CREATE(w_1) & + !$ACC COPYIN(nflatlev, nflat_gradp, kstart_dd3d, kstart_moist, nrdmax) & + !$ACC COPYIN(z_raylfac, ndyn_substeps_var, scal_divdamp, bdy_divdamp) & +#ifndef __LOOP_EXCHANGE + !$ACC PRESENT(p_cell_idx, p_cell_blk, p_distv_bary) & +#endif + !$ACC PRESENT(prep_adv, p_int, p_patch, p_nh) & + !$ACC PRESENT(icidx, icblk, ividx, ivblk, ieidx, ieblk, ikidx, iqidx, iqblk) & + !$ACC PRESENT(ipeidx, ipeblk, iplev) & + !$ACC IF(i_am_accel_node) + + + ! scaling factor for second-order divergence damping: divdamp_fac_o2*delta_x**2 + ! delta_x**2 is approximated by the mean cell area + scal_divdamp_o2 = divdamp_fac_o2 * p_patch%geometry_info%mean_cell_area + + + IF (p_test_run) THEN + !$ACC KERNELS IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + z_rho_e = 0._wp + z_theta_v_e = 0._wp + z_dwdz_dd = 0._wp + z_graddiv_vn= 0._wp + !$ACC END KERNELS + ENDIF + + ! Set time levels of ddt_adv fields for call to velocity_tendencies + IF (itime_scheme >= 4) THEN ! Velocity advection averaging nnow and nnew tendencies + ntl1 = nnow + ntl2 = nnew + ELSE ! Velocity advection is taken at nnew only + ntl1 = 1 + ntl2 = 1 + ENDIF + + ! Weighting coefficients for velocity advection if tendency averaging is used + ! The off-centering specified here turned out to be beneficial to numerical + ! stability in extreme situations + wgt_nnow_vel = 0.5_wp - veladv_offctr ! default value for veladv_offctr is 0.25 + wgt_nnew_vel = 0.5_wp + veladv_offctr + + ! Weighting coefficients for rho and theta at interface levels in the corrector step + ! This empirically determined weighting minimizes the vertical wind off-centering + ! needed for numerical stability of vertical sound wave propagation + wgt_nnew_rth = 0.5_wp + rhotheta_offctr ! default value for rhotheta_offctr is -0.1 + wgt_nnow_rth = 1._wp - wgt_nnew_rth + +!$NEC sparse + DO istep = 1, 2 + + IF (istep == 1) THEN ! predictor step + IF (itime_scheme >= 6 .OR. l_init .OR. l_recompute) THEN + IF (itime_scheme < 6 .AND. .NOT. l_init) THEN + lvn_only = .TRUE. ! Recompute only vn tendency + ELSE + lvn_only = .FALSE. + ENDIF + CALL velocity_tendencies(p_nh%prog(nnow),p_patch,p_int,p_nh%metrics,p_nh%diag,z_w_concorr_me, & + z_kin_hor_e,z_vt_ie,ntl1,istep,lvn_only,dtime,dt_linintp_ubc_nnow) + ENDIF + nvar = nnow + ELSE ! corrector step + lvn_only = .FALSE. + CALL velocity_tendencies(p_nh%prog(nnew),p_patch,p_int,p_nh%metrics,p_nh%diag,z_w_concorr_me, & + z_kin_hor_e,z_vt_ie,ntl2,istep,lvn_only,dtime,dt_linintp_ubc_nnew) + nvar = nnew + ENDIF + + + ! Preparations for igradp_method = 3/5 (reformulated extrapolation below the ground) + IF (istep == 1 .AND. (igradp_method == 3 .OR. igradp_method == 5)) THEN + + nproma_gradp = cpu_min_nproma(nproma,256) + nblks_gradp = INT(p_nh%metrics%pg_listdim/nproma_gradp) + npromz_gradp = MOD(p_nh%metrics%pg_listdim,nproma_gradp) + IF (npromz_gradp > 0) THEN + nblks_gradp = nblks_gradp + 1 + ELSE + npromz_gradp = nproma_gradp + ENDIF + + ENDIF + + IF (timers_level > 5) CALL timer_start(timer_solve_nh_cellcomp) + + ! Computations on mass points +!$OMP PARALLEL PRIVATE (rl_start,rl_end,i_startblk,i_endblk) + + rl_start = 3 + IF (istep == 1) THEN + rl_end = min_rlcell_int - 1 + ELSE ! halo points are not needed in step 2 + rl_end = min_rlcell_int + ENDIF + + i_startblk = p_patch%cells%start_block(rl_start) + i_endblk = p_patch%cells%end_block(rl_end) + + ! DSL: Instead of calling init_zero_contiguous_dp to set z_rth_pr to zero, + ! introduce a stencil that does the same thing, but does not touch the + ! padding, so it can be verified. + + rl_start_2 = 1 + rl_end_2 = min_rlcell + + i_startblk_2 = p_patch%cells%start_block(rl_start_2) + i_endblk_2 = p_patch%cells%end_block(rl_end_2) + + ! initialize nest boundary points of z_rth_pr with zero + IF (istep == 1 .AND. (jg > 1 .OR. l_limited_area)) THEN + + CALL get_indices_c(p_patch, 1, i_startblk_2, i_endblk_2, & + i_startidx_2, i_endidx_2, rl_start_2, rl_end_2) + + + !$ser init directory="." prefix="liskov-serialisation" + + !$ser savepoint mo_solve_nonhydro_stencil_01_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' + + !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) + + PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) +!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO jk = 1, nlev + DO jc = i_startidx_2, i_endidx_2 + z_rth_pr(jc,jk,1,1) = 0._wp + z_rth_pr(jc,jk,1,2) = 0._wp + ENDDO + ENDDO +!$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_01_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' + + !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) + + PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) +!$OMP BARRIER + ENDIF + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,jc,z_exner_ic,z_theta_v_pr_ic,z_w_backtraj,& +!$OMP z_theta_v_pr_mc_m1,z_theta_v_pr_mc,z_rho_tavg_m1,z_rho_tavg, & +#ifdef __SX__ +!$OMP z_rho_tavg_m1_v,z_theta_tavg_m1_v,z_theta_v_pr_mc_m1_v, & +#endif +!$OMP z_theta_tavg_m1,z_theta_tavg,z_thermal_exp_local) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk, i_endblk + + CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + IF (istep == 1) THEN ! to be executed in predictor step only + + + !$ser savepoint mo_solve_nonhydro_stencil_02_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing exner_exfac=p_nh%metrics%exner_exfac(:,:,1)' + + !$ser data exner_exfac=p_nh%metrics%exner_exfac(:,:,1) + + PRINT *, 'Serializing exner=p_nh%prog(nnow)%exner(:,:,1)' + + !$ser data exner=p_nh%prog(nnow)%exner(:,:,1) + + PRINT *, 'Serializing exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1)' + + !$ser data exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1) + + PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,1)' + + !$ser data exner_pr=p_nh%diag%exner_pr(:,:,1) + + PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' + + !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 1, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + ! temporally extrapolated perturbation Exner pressure (used for horizontal gradients only) + z_exner_ex_pr(jc,jk,jb) = (1._wp + p_nh%metrics%exner_exfac(jc,jk,jb)) * & + (p_nh%prog(nnow)%exner(jc,jk,jb) - p_nh%metrics%exner_ref_mc(jc,jk,jb)) - & + p_nh%metrics%exner_exfac(jc,jk,jb) * p_nh%diag%exner_pr(jc,jk,jb) + + ! non-extrapolated perturbation Exner pressure, saved in exner_pr for the next time step + p_nh%diag%exner_pr(jc,jk,jb) = p_nh%prog(nnow)%exner(jc,jk,jb) - & + p_nh%metrics%exner_ref_mc(jc,jk,jb) + + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_02_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing exner_exfac=p_nh%metrics%exner_exfac(:,:,1)' + + !$ser data exner_exfac=p_nh%metrics%exner_exfac(:,:,1) + + PRINT *, 'Serializing exner=p_nh%prog(nnow)%exner(:,:,1)' + + !$ser data exner=p_nh%prog(nnow)%exner(:,:,1) + + PRINT *, 'Serializing exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1)' + + !$ser data exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1) + + PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,1)' + + !$ser data exner_pr=p_nh%diag%exner_pr(:,:,1) + + PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' + + !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) + + ! The purpose of the extra level of exner_pr is to simplify coding for + ! igradp_method=4/5. It is multiplied with zero and thus actually not used + + + !$ser savepoint mo_solve_nonhydro_stencil_03_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' + + !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) + !$ACC KERNELS IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + z_exner_ex_pr(i_startidx:i_endidx,nlevp1,jb) = 0._wp + !$ACC END KERNELS + + !$ser savepoint mo_solve_nonhydro_stencil_03_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' + + !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) + + IF (l_open_ubc .AND. .NOT. l_vert_nested) THEN + ! Compute contribution of thermal expansion to vertical wind at model top + ! Isothermal expansion is assumed + +#ifdef _OPENACC +! Exchanging loop order to remove data dep +! TODO: evaluate if this makes sense + !$ACC PARALLEL IF(i_am_accel_node) PRIVATE(z_thermal_exp_local) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + z_thermal_exp_local = 0._wp + DO jk = 1, nlev + z_thermal_exp_local= z_thermal_exp_local + cvd_o_rd & + * p_nh%diag%ddt_exner_phy(jc,jk,jb) & + / (p_nh%prog(nnow)%exner(jc,jk,jb)*p_nh%metrics%inv_ddqz_z_full(jc,jk,jb)) + ENDDO + z_thermal_exp(jc,jb) = z_thermal_exp_local + ENDDO + !$ACC END PARALLEL + +#else + z_thermal_exp(:,jb) = 0._wp + DO jk = 1, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + z_thermal_exp(jc,jb) = z_thermal_exp(jc,jb) + cvd_o_rd & + * p_nh%diag%ddt_exner_phy(jc,jk,jb) & + / (p_nh%prog(nnow)%exner(jc,jk,jb)*p_nh%metrics%inv_ddqz_z_full(jc,jk,jb)) + ENDDO + ENDDO +#endif + + ENDIF + + IF (igradp_method <= 3) THEN + ! Perturbation Exner pressure on bottom half level +!DIR$ IVDEP + + + !$ser savepoint mo_solve_nonhydro_stencil_04_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1)' + + !$ser data wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1) + + PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' + + !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) + + PRINT *, 'Serializing z_exner_ic=z_exner_ic(:,:)' + + !$ser data z_exner_ic=z_exner_ic(:,:) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + z_exner_ic(jc,nlevp1) = & + p_nh%metrics%wgtfacq_c(jc,1,jb)*z_exner_ex_pr(jc,nlev ,jb) + & + p_nh%metrics%wgtfacq_c(jc,2,jb)*z_exner_ex_pr(jc,nlev-1,jb) + & + p_nh%metrics%wgtfacq_c(jc,3,jb)*z_exner_ex_pr(jc,nlev-2,jb) + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_04_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1)' + + !$ser data wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1) + + PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' + + !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) + + PRINT *, 'Serializing z_exner_ic=z_exner_ic(:,:)' + + !$ser data z_exner_ic=z_exner_ic(:,:) + +! WS: moved full z_exner_ic calculation here to avoid OpenACC dependency on jk+1 below +! possibly GZ will want to consider the cache ramifications of this change for CPU + + !$ser savepoint mo_solve_nonhydro_stencil_05_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' + + !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) + + PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' + + !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) + + PRINT *, 'Serializing z_exner_ic=z_exner_ic(:,:)' + + !$ser data z_exner_ic=z_exner_ic(:,:) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR TILE(32, 4) + DO jk = nlev, MAX(2,nflatlev(jg)), -1 +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + ! Exner pressure on remaining half levels for metric correction term + z_exner_ic(jc,jk) = & + p_nh%metrics%wgtfac_c(jc,jk,jb) *z_exner_ex_pr(jc,jk ,jb) + & + (1._vp-p_nh%metrics%wgtfac_c(jc,jk,jb))*z_exner_ex_pr(jc,jk-1,jb) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_05_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' + + !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) + + PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' + + !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) + + PRINT *, 'Serializing z_exner_ic=z_exner_ic(:,:)' + + !$ser data z_exner_ic=z_exner_ic(:,:) + + + + !$ser savepoint mo_solve_nonhydro_stencil_06_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_exner_ic=z_exner_ic(:,:)' + + !$ser data z_exner_ic=z_exner_ic(:,:) + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) + + PRINT *, 'Serializing z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1)' + + !$ser data z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR TILE(32, 4) + DO jk = nlev, MAX(2,nflatlev(jg)), -1 +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + + ! First vertical derivative of perturbation Exner pressure +#ifdef __SWAPDIM + z_dexner_dz_c(jc,jk,jb,1) = & +#else + z_dexner_dz_c(1,jc,jk,jb) = & +#endif + (z_exner_ic(jc,jk) - z_exner_ic(jc,jk+1)) * & + p_nh%metrics%inv_ddqz_z_full(jc,jk,jb) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_06_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_exner_ic=z_exner_ic(:,:)' + + !$ser data z_exner_ic=z_exner_ic(:,:) + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) + + PRINT *, 'Serializing z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1)' + + !$ser data z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1) + + IF (nflatlev(jg) == 1) THEN + ! Perturbation Exner pressure on top half level +!DIR$ IVDEP + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + z_exner_ic(jc,1) = & + p_nh%metrics%wgtfacq1_c(jc,1,jb)*z_exner_ex_pr(jc,1,jb) + & + p_nh%metrics%wgtfacq1_c(jc,2,jb)*z_exner_ex_pr(jc,2,jb) + & + p_nh%metrics%wgtfacq1_c(jc,3,jb)*z_exner_ex_pr(jc,3,jb) + + ! First vertical derivative of perturbation Exner pressure +#ifdef __SWAPDIM + z_dexner_dz_c(jc,1,jb,1) = & +#else + z_dexner_dz_c(1,jc,1,jb) = & +#endif + (z_exner_ic(jc,1) - z_exner_ic(jc,2)) * & + p_nh%metrics%inv_ddqz_z_full(jc,1,jb) + ENDDO + !$ACC END PARALLEL + ENDIF + + ENDIF + + + !$ser savepoint mo_solve_nonhydro_stencil_07_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing rho=p_nh%prog(nnow)%rho(:,:,1)' + + !$ser data rho=p_nh%prog(nnow)%rho(:,:,1) + + PRINT *, 'Serializing rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1)' + + !$ser data rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1) + + PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' + + !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) + + PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' + + !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) + + PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) +#ifdef __SWAPDIM + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + z_rth_pr(jc,1,jb,1) = p_nh%prog(nnow)%rho(jc,1,jb) - & + p_nh%metrics%rho_ref_mc(jc,1,jb) + z_rth_pr(jc,1,jb,2) = p_nh%prog(nnow)%theta_v(jc,1,jb) - & + p_nh%metrics%theta_ref_mc(jc,1,jb) + ENDDO +#else + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + z_rth_pr(1,jc,1,jb) = p_nh%prog(nnow)%rho(jc,1,jb) - & + p_nh%metrics%rho_ref_mc(jc,1,jb) + z_rth_pr(2,jc,1,jb) = p_nh%prog(nnow)%theta_v(jc,1,jb) - & + p_nh%metrics%theta_ref_mc(jc,1,jb) + ENDDO +#endif + !$ACC END PARALLEL + + + !$ser savepoint mo_solve_nonhydro_stencil_07_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing rho=p_nh%prog(nnow)%rho(:,:,1)' + + !$ser data rho=p_nh%prog(nnow)%rho(:,:,1) + + PRINT *, 'Serializing rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1)' + + !$ser data rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1) + + PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' + + !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) + + PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' + + !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) + + PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) + + + !$ser savepoint mo_solve_nonhydro_stencil_08_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' + + !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) + + PRINT *, 'Serializing rho=p_nh%prog(nnow)%rho(:,:,1)' + + !$ser data rho=p_nh%prog(nnow)%rho(:,:,1) + + PRINT *, 'Serializing rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1)' + + !$ser data rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1) + + PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' + + !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) + + PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' + + !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) + + PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR TILE(32, 4) + DO jk = 2, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + ! density at interface levels for vertical flux divergence computation + p_nh%diag%rho_ic(jc,jk,jb) = p_nh%metrics%wgtfac_c(jc,jk,jb) *p_nh%prog(nnow)%rho(jc,jk ,jb) + & + (1._wp-p_nh%metrics%wgtfac_c(jc,jk,jb))*p_nh%prog(nnow)%rho(jc,jk-1,jb) + + ! perturbation density and virtual potential temperature at main levels for horizontal flux divergence term + ! (needed in the predictor step only) +#ifdef __SWAPDIM + z_rth_pr(jc,jk,jb,1) = p_nh%prog(nnow)%rho(jc,jk,jb) - p_nh%metrics%rho_ref_mc(jc,jk,jb) + z_rth_pr(jc,jk,jb,2) = p_nh%prog(nnow)%theta_v(jc,jk,jb) - p_nh%metrics%theta_ref_mc(jc,jk,jb) +#else + z_rth_pr(1,jc,jk,jb) = p_nh%prog(nnow)%rho(jc,jk,jb) - p_nh%metrics%rho_ref_mc(jc,jk,jb) + z_rth_pr(2,jc,jk,jb) = p_nh%prog(nnow)%theta_v(jc,jk,jb) - p_nh%metrics%theta_ref_mc(jc,jk,jb) +#endif +#ifdef _OPENACC + ENDDO + ENDDO + !$ACC END PARALLEL +#endif + + + !$ser savepoint mo_solve_nonhydro_stencil_08_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' + + !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) + + PRINT *, 'Serializing rho=p_nh%prog(nnow)%rho(:,:,1)' + + !$ser data rho=p_nh%prog(nnow)%rho(:,:,1) + + PRINT *, 'Serializing rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1)' + + !$ser data rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1) + + PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' + + !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) + + PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' + + !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) + + PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) + + + !$ser savepoint mo_solve_nonhydro_stencil_09_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' + + !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) + + PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) + + PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1)' + + !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1) + + PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,1)' + + !$ser data exner_pr=p_nh%diag%exner_pr(:,:,1) + + PRINT *, 'Serializing d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1)' + + !$ser data d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1) + + PRINT *, 'Serializing ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1)' + + !$ser data ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1) + + PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' + + !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) + + PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1)' + + !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1) +#ifdef _OPENACC + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 2, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx +#endif + + ! perturbation virtual potential temperature at interface levels +#ifdef __SWAPDIM + z_theta_v_pr_ic(jc,jk) = & + p_nh%metrics%wgtfac_c(jc,jk,jb) *z_rth_pr(jc,jk ,jb,2) + & + (1._vp-p_nh%metrics%wgtfac_c(jc,jk,jb))*z_rth_pr(jc,jk-1,jb,2) +#else + z_theta_v_pr_ic(jc,jk) = & + p_nh%metrics%wgtfac_c(jc,jk,jb) *z_rth_pr(2,jc,jk ,jb) + & + (1._vp-p_nh%metrics%wgtfac_c(jc,jk,jb))*z_rth_pr(2,jc,jk-1,jb) +#endif + ! virtual potential temperature at interface levels + p_nh%diag%theta_v_ic(jc,jk,jb) = & + p_nh%metrics%wgtfac_c(jc,jk,jb) *p_nh%prog(nnow)%theta_v(jc,jk ,jb) + & + (1._wp-p_nh%metrics%wgtfac_c(jc,jk,jb))*p_nh%prog(nnow)%theta_v(jc,jk-1,jb) + + ! vertical pressure gradient * theta_v + z_th_ddz_exner_c(jc,jk,jb) = p_nh%metrics%vwind_expl_wgt(jc,jb)* & + p_nh%diag%theta_v_ic(jc,jk,jb) * (p_nh%diag%exner_pr(jc,jk-1,jb)- & + p_nh%diag%exner_pr(jc,jk,jb)) / p_nh%metrics%ddqz_z_half(jc,jk,jb) + & + z_theta_v_pr_ic(jc,jk)*p_nh%metrics%d_exner_dz_ref_ic(jc,jk,jb) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_09_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' + + !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) + + PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) + + PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1)' + + !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1) + + PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,1)' + + !$ser data exner_pr=p_nh%diag%exner_pr(:,:,1) + + PRINT *, 'Serializing d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1)' + + !$ser data d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1) + + PRINT *, 'Serializing ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1)' + + !$ser data ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1) + + PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' + + !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) + + PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1)' + + !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1) + + + ELSE ! istep = 2 - in this step, an upwind-biased discretization is used for rho_ic and theta_v_ic + ! in order to reduce the numerical dispersion errors +#ifdef __SX__ + ! precompute values for jk = 1 which are previous values in first iteration of jk compute loop + jk = 2 + DO jc = i_startidx, i_endidx + z_rho_tavg_m1_v(jc) = wgt_nnow_rth*p_nh%prog(nnow)%rho(jc,jk-1,jb) + & + wgt_nnew_rth*p_nh%prog(nvar)%rho(jc,jk-1,jb) + z_theta_tavg_m1_v(jc) = wgt_nnow_rth*p_nh%prog(nnow)%theta_v(jc,jk-1,jb) + & + wgt_nnew_rth*p_nh%prog(nvar)%theta_v(jc,jk-1,jb) + z_theta_v_pr_mc_m1_v(jc) = z_theta_tavg_m1_v(jc) - p_nh%metrics%theta_ref_mc(jc,jk-1,jb) + ENDDO +#endif + + + !$ser savepoint mo_solve_nonhydro_stencil_10_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing wgt_nnew_rth=wgt_nnew_rth' + + !$ser data wgt_nnew_rth=wgt_nnew_rth + + PRINT *, 'Serializing wgt_nnow_rth=wgt_nnow_rth' + + !$ser data wgt_nnow_rth=wgt_nnow_rth + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) + + PRINT *, 'Serializing ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1)' + + !$ser data ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1) + + PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' + + !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) + + PRINT *, 'Serializing rho_var=p_nh%prog(nvar)%rho(:,:,1)' + + !$ser data rho_var=p_nh%prog(nvar)%rho(:,:,1) + + PRINT *, 'Serializing theta_now=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_now=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing theta_var=p_nh%prog(nvar)%theta_v(:,:,1)' + + !$ser data theta_var=p_nh%prog(nvar)%theta_v(:,:,1) + + PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' + + !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) + + PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' + + !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) + + PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1)' + + !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1) + + PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,1)' + + !$ser data exner_pr=p_nh%diag%exner_pr(:,:,1) + + PRINT *, 'Serializing d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1)' + + !$ser data d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1) + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) + + PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' + + !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) + + PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1)' + + !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR TILE(128, *) & + !$ACC PRIVATE(z_w_backtraj, z_rho_tavg_m1, z_theta_tavg_m1, z_rho_tavg) & + !$ACC PRIVATE(z_theta_tavg, z_theta_v_pr_mc_m1, z_theta_v_pr_mc) + DO jk = 2, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + ! backward trajectory - use w(nnew) in order to be at the same time level as w_concorr + z_w_backtraj = - (p_nh%prog(nnew)%w(jc,jk,jb) - p_nh%diag%w_concorr_c(jc,jk,jb)) * & + dtime*0.5_wp/p_nh%metrics%ddqz_z_half(jc,jk,jb) + + ! temporally averaged density and virtual potential temperature depending on rhotheta_offctr + ! (see pre-computation above) +#ifndef __SX__ + z_rho_tavg_m1 = wgt_nnow_rth*p_nh%prog(nnow)%rho(jc,jk-1,jb) + & + wgt_nnew_rth*p_nh%prog(nvar)%rho(jc,jk-1,jb) + z_theta_tavg_m1 = wgt_nnow_rth*p_nh%prog(nnow)%theta_v(jc,jk-1,jb) + & + wgt_nnew_rth*p_nh%prog(nvar)%theta_v(jc,jk-1,jb) +#else + z_rho_tavg_m1 = z_rho_tavg_m1_v(jc) + z_theta_tavg_m1 = z_theta_tavg_m1_v(jc) +#endif + + z_rho_tavg = wgt_nnow_rth*p_nh%prog(nnow)%rho(jc,jk,jb) + & + wgt_nnew_rth*p_nh%prog(nvar)%rho(jc,jk,jb) + z_theta_tavg = wgt_nnow_rth*p_nh%prog(nnow)%theta_v(jc,jk,jb) + & + wgt_nnew_rth*p_nh%prog(nvar)%theta_v(jc,jk,jb) + + ! density at interface levels for vertical flux divergence computation + p_nh%diag%rho_ic(jc,jk,jb) = p_nh%metrics%wgtfac_c(jc,jk,jb) *z_rho_tavg + & + (1._wp-p_nh%metrics%wgtfac_c(jc,jk,jb))*z_rho_tavg_m1 + & + z_w_backtraj*(z_rho_tavg_m1-z_rho_tavg) + + ! perturbation virtual potential temperature at main levels +#ifndef __SX__ + z_theta_v_pr_mc_m1 = z_theta_tavg_m1 - p_nh%metrics%theta_ref_mc(jc,jk-1,jb) +#else + z_theta_v_pr_mc_m1 = z_theta_v_pr_mc_m1_v(jc) +#endif + z_theta_v_pr_mc = z_theta_tavg - p_nh%metrics%theta_ref_mc(jc,jk,jb) + + ! perturbation virtual potential temperature at interface levels + z_theta_v_pr_ic(jc,jk) = & + p_nh%metrics%wgtfac_c(jc,jk,jb) *z_theta_v_pr_mc + & + (1._vp-p_nh%metrics%wgtfac_c(jc,jk,jb))*z_theta_v_pr_mc_m1 + + ! virtual potential temperature at interface levels + p_nh%diag%theta_v_ic(jc,jk,jb) = p_nh%metrics%wgtfac_c(jc,jk,jb) *z_theta_tavg + & + (1._wp-p_nh%metrics%wgtfac_c(jc,jk,jb))*z_theta_tavg_m1 + & + z_w_backtraj*(z_theta_tavg_m1-z_theta_tavg) + + ! vertical pressure gradient * theta_v + z_th_ddz_exner_c(jc,jk,jb) = p_nh%metrics%vwind_expl_wgt(jc,jb)* & + p_nh%diag%theta_v_ic(jc,jk,jb) * (p_nh%diag%exner_pr(jc,jk-1,jb)- & + p_nh%diag%exner_pr(jc,jk,jb)) / p_nh%metrics%ddqz_z_half(jc,jk,jb) + & + z_theta_v_pr_ic(jc,jk)*p_nh%metrics%d_exner_dz_ref_ic(jc,jk,jb) + +#ifdef __SX__ + ! save current values as previous values for next iteration + z_rho_tavg_m1_v(jc) = z_rho_tavg + z_theta_tavg_m1_v(jc) = z_theta_tavg + z_theta_v_pr_mc_m1_v(jc) = z_theta_v_pr_mc +#endif + + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_10_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing wgt_nnew_rth=wgt_nnew_rth' + + !$ser data wgt_nnew_rth=wgt_nnew_rth + + PRINT *, 'Serializing wgt_nnow_rth=wgt_nnow_rth' + + !$ser data wgt_nnow_rth=wgt_nnow_rth + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) + + PRINT *, 'Serializing ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1)' + + !$ser data ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1) + + PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' + + !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) + + PRINT *, 'Serializing rho_var=p_nh%prog(nvar)%rho(:,:,1)' + + !$ser data rho_var=p_nh%prog(nvar)%rho(:,:,1) + + PRINT *, 'Serializing theta_now=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_now=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing theta_var=p_nh%prog(nvar)%theta_v(:,:,1)' + + !$ser data theta_var=p_nh%prog(nvar)%theta_v(:,:,1) + + PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' + + !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) + + PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' + + !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) + + PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1)' + + !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1) + + PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,1)' + + !$ser data exner_pr=p_nh%diag%exner_pr(:,:,1) + + PRINT *, 'Serializing d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1)' + + !$ser data d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1) + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) + + PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' + + !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) + + PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1)' + + !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) + + ENDIF ! istep = 1/2 + + ! rho and theta at top level (in case of vertical nesting, upper boundary conditions + ! are set in the vertical solver loop) + IF (l_open_ubc .AND. .NOT. l_vert_nested) THEN + IF ( istep == 1 ) THEN + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) +!DIR$ IVDEP + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + p_nh%diag%theta_v_ic(jc,1,jb) = & + p_nh%metrics%theta_ref_ic(jc,1,jb) + & +#ifdef __SWAPDIM + p_nh%metrics%wgtfacq1_c(jc,1,jb)*z_rth_pr(jc,1,jb,2) + & + p_nh%metrics%wgtfacq1_c(jc,2,jb)*z_rth_pr(jc,2,jb,2) + & + p_nh%metrics%wgtfacq1_c(jc,3,jb)*z_rth_pr(jc,3,jb,2) +#else + p_nh%metrics%wgtfacq1_c(jc,1,jb)*z_rth_pr(2,jc,1,jb) + & + p_nh%metrics%wgtfacq1_c(jc,2,jb)*z_rth_pr(2,jc,2,jb) + & + p_nh%metrics%wgtfacq1_c(jc,3,jb)*z_rth_pr(2,jc,3,jb) +#endif + ENDDO + !$ACC END PARALLEL + ELSE ! ISTEP == 2 + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) +!DIR$ IVDEP + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + p_nh%diag%theta_v_ic(jc,1,jb) = p_nh%metrics%theta_ref_ic(jc,1,jb) + & + p_nh%metrics%wgtfacq1_c(jc,1,jb)* ( wgt_nnow_rth*p_nh%prog(nnow)%theta_v(jc,1,jb) + & + wgt_nnew_rth*p_nh%prog(nvar)%theta_v(jc,1,jb) - p_nh%metrics%theta_ref_mc(jc,1,jb) ) + & + p_nh%metrics%wgtfacq1_c(jc,2,jb)*( wgt_nnow_rth*p_nh%prog(nnow)%theta_v(jc,2,jb) + & + wgt_nnew_rth*p_nh%prog(nvar)%theta_v(jc,2,jb) - p_nh%metrics%theta_ref_mc(jc,2,jb) ) + & + p_nh%metrics%wgtfacq1_c(jc,3,jb)*( wgt_nnow_rth*p_nh%prog(nnow)%theta_v(jc,3,jb) + & + wgt_nnew_rth*p_nh%prog(nvar)%theta_v(jc,3,jb) - p_nh%metrics%theta_ref_mc(jc,3,jb) ) + ENDDO + !$ACC END PARALLEL + ENDIF + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) +!DIR$ IVDEP + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + p_nh%diag%rho_ic(jc,1,jb) = wgt_nnow_rth*( & + p_nh%metrics%wgtfacq1_c(jc,1,jb)*p_nh%prog(nnow)%rho(jc,1,jb) + & + p_nh%metrics%wgtfacq1_c(jc,2,jb)*p_nh%prog(nnow)%rho(jc,2,jb) + & + p_nh%metrics%wgtfacq1_c(jc,3,jb)*p_nh%prog(nnow)%rho(jc,3,jb))+ & + wgt_nnew_rth * ( & + p_nh%metrics%wgtfacq1_c(jc,1,jb)*p_nh%prog(nvar)%rho(jc,1,jb) + & + p_nh%metrics%wgtfacq1_c(jc,2,jb)*p_nh%prog(nvar)%rho(jc,2,jb) + & + p_nh%metrics%wgtfacq1_c(jc,3,jb)*p_nh%prog(nvar)%rho(jc,3,jb) ) + ENDDO + !$ACC END PARALLEL + ENDIF + + IF (istep == 1) THEN + + ! Perturbation theta at top and surface levels + + !$ser savepoint mo_solve_nonhydro_stencil_11_lower_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' + + !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) +!DIR$ IVDEP + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + z_theta_v_pr_ic(jc,1) = 0._wp + ENDDO +!$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_11_lower_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' + + !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) + + + + !$ser savepoint mo_solve_nonhydro_stencil_11_upper_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1)' + + !$ser data wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1) + + PRINT *, 'Serializing z_rth_pr=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr=z_rth_pr(:,:,1,2) + + PRINT *, 'Serializing theta_ref_ic=p_nh%metrics%theta_ref_ic(:,:,1)' + + !$ser data theta_ref_ic=p_nh%metrics%theta_ref_ic(:,:,1) + + PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' + + !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) +!$ACC PARALLEL IF( i_am_accel_node ) DEFAULT(PRESENT) ASYNC(1) +!DIR$ IVDEP + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + z_theta_v_pr_ic(jc,nlevp1) = & +#ifdef __SWAPDIM + p_nh%metrics%wgtfacq_c(jc,1,jb)*z_rth_pr(jc,nlev ,jb,2) + & + p_nh%metrics%wgtfacq_c(jc,2,jb)*z_rth_pr(jc,nlev-1,jb,2) + & + p_nh%metrics%wgtfacq_c(jc,3,jb)*z_rth_pr(jc,nlev-2,jb,2) +#else + p_nh%metrics%wgtfacq_c(jc,1,jb)*z_rth_pr(2,jc,nlev ,jb) + & + p_nh%metrics%wgtfacq_c(jc,2,jb)*z_rth_pr(2,jc,nlev-1,jb) + & + p_nh%metrics%wgtfacq_c(jc,3,jb)*z_rth_pr(2,jc,nlev-2,jb) +#endif + p_nh%diag%theta_v_ic(jc,nlevp1,jb) = & + p_nh%metrics%theta_ref_ic(jc,nlevp1,jb) + z_theta_v_pr_ic(jc,nlevp1) + ENDDO + !$ACC END PARALLEL + + + !$ser savepoint mo_solve_nonhydro_stencil_11_upper_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1)' + + !$ser data wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1) + + PRINT *, 'Serializing z_rth_pr=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr=z_rth_pr(:,:,1,2) + + PRINT *, 'Serializing theta_ref_ic=p_nh%metrics%theta_ref_ic(:,:,1)' + + !$ser data theta_ref_ic=p_nh%metrics%theta_ref_ic(:,:,1) + + PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' + + !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) + + IF (igradp_method <= 3) THEN + + + !$ser savepoint mo_solve_nonhydro_stencil_12_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' + + !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) + + PRINT *, 'Serializing d2dexdz2_fac1_mc=p_nh%metrics%d2dexdz2_fac1_mc(:,:,1)' + + !$ser data d2dexdz2_fac1_mc=p_nh%metrics%d2dexdz2_fac1_mc(:,:,1) + + PRINT *, 'Serializing d2dexdz2_fac2_mc=p_nh%metrics%d2dexdz2_fac2_mc(:,:,1)' + + !$ser data d2dexdz2_fac2_mc=p_nh%metrics%d2dexdz2_fac2_mc(:,:,1) + + PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) + + PRINT *, 'Serializing z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2)' + + !$ser data z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR TILE(32, 4) + DO jk = nflat_gradp(jg), nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + ! Second vertical derivative of perturbation Exner pressure (hydrostatic approximation) +#ifdef __SWAPDIM + z_dexner_dz_c(jc,jk,jb,2) = -0.5_vp * & + ((z_theta_v_pr_ic(jc,jk) - z_theta_v_pr_ic(jc,jk+1)) * & + p_nh%metrics%d2dexdz2_fac1_mc(jc,jk,jb) + z_rth_pr(jc,jk,jb,2) * & +#else + z_dexner_dz_c(2,jc,jk,jb) = -0.5_vp * & + ((z_theta_v_pr_ic(jc,jk) - z_theta_v_pr_ic(jc,jk+1)) * & + p_nh%metrics%d2dexdz2_fac1_mc(jc,jk,jb) + z_rth_pr(2,jc,jk,jb) * & +#endif + p_nh%metrics%d2dexdz2_fac2_mc(jc,jk,jb)) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_12_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' + + !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) + + PRINT *, 'Serializing d2dexdz2_fac1_mc=p_nh%metrics%d2dexdz2_fac1_mc(:,:,1)' + + !$ser data d2dexdz2_fac1_mc=p_nh%metrics%d2dexdz2_fac1_mc(:,:,1) + + PRINT *, 'Serializing d2dexdz2_fac2_mc=p_nh%metrics%d2dexdz2_fac2_mc(:,:,1)' + + !$ser data d2dexdz2_fac2_mc=p_nh%metrics%d2dexdz2_fac2_mc(:,:,1) + + PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) + + PRINT *, 'Serializing z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2)' + + !$ser data z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2) + ENDIF + + ENDIF ! istep == 1 + + ENDDO +!$OMP END DO NOWAIT + + IF (istep == 1) THEN + ! Add computation of z_grad_rth (perturbation density and virtual potential temperature at main levels) + ! at outer halo points: needed for correct calculation of the upwind gradients for Miura scheme + rl_start = min_rlcell_int - 2 + rl_end = min_rlcell_int - 2 + + i_startblk = p_patch%cells%start_block(rl_start) + i_endblk = p_patch%cells%end_block(rl_end) + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,jc) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk, i_endblk + + CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, i_startidx, i_endidx, rl_start, rl_end) + + + !$ser savepoint mo_solve_nonhydro_stencil_13_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing rho=p_nh%prog(nnow)%rho(:,:,1)' + + !$ser data rho=p_nh%prog(nnow)%rho(:,:,1) + + PRINT *, 'Serializing rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1)' + + !$ser data rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1) + + PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' + + !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) + + PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' + + !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) + + PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 1, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx +#ifdef __SWAPDIM + z_rth_pr(jc,jk,jb,1) = p_nh%prog(nnow)%rho(jc,jk,jb) - p_nh%metrics%rho_ref_mc(jc,jk,jb) + z_rth_pr(jc,jk,jb,2) = p_nh%prog(nnow)%theta_v(jc,jk,jb) - p_nh%metrics%theta_ref_mc(jc,jk,jb) +#else + z_rth_pr(1,jc,jk,jb) = p_nh%prog(nnow)%rho(jc,jk,jb) - p_nh%metrics%rho_ref_mc(jc,jk,jb) + z_rth_pr(2,jc,jk,jb) = p_nh%prog(nnow)%theta_v(jc,jk,jb) - p_nh%metrics%theta_ref_mc(jc,jk,jb) +#endif + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_13_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing rho=p_nh%prog(nnow)%rho(:,:,1)' + + !$ser data rho=p_nh%prog(nnow)%rho(:,:,1) + + PRINT *, 'Serializing rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1)' + + !$ser data rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1) + + PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' + + !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) + + PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' + + !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) + + PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) + + ENDDO +!$OMP END DO NOWAIT + + ENDIF +!$OMP END PARALLEL + + IF (timers_level > 5) THEN + CALL timer_stop(timer_solve_nh_cellcomp) + CALL timer_start(timer_solve_nh_vnupd) + ENDIF + + ! Compute rho and theta at edges for horizontal flux divergence term + IF (istep == 1) THEN + IF (iadv_rhotheta == 1) THEN ! Simplified Miura scheme + !DA: TODO: remove the wait after everything is async + !$ACC WAIT + ! Compute density and potential temperature at vertices + CALL cells2verts_scalar(p_nh%prog(nnow)%rho,p_patch, p_int%cells_aw_verts, & + z_rho_v, opt_rlend=min_rlvert_int-1) + CALL cells2verts_scalar(p_nh%prog(nnow)%theta_v,p_patch, p_int%cells_aw_verts, & + z_theta_v_v, opt_rlend=min_rlvert_int-1) + + ELSE IF (iadv_rhotheta == 2) THEN ! Miura second-order upwind scheme + +#if !defined (__LOOP_EXCHANGE) && !defined (__SX__) && !defined (_OPENACC) + ! Compute backward trajectory - code is inlined for cache-based machines (see below) + CALL btraj_compute_o1( btraj = btraj, & !inout + & ptr_p = p_patch, & !in + & ptr_int = p_int, & !in + & p_vn = p_nh%prog(nnow)%vn, & !in +#ifdef __MIXED_PRECISION + & p_vt = REAL(p_nh%diag%vt,wp), & !in ! this results in differences in distv_bary, not sure why... +#else + & p_vt = p_nh%diag%vt, & !in +#endif + & p_dthalf = 0.5_wp*dtime, & !in + & opt_rlstart = 7, & !in + & opt_rlend = min_rledge_int-1, & !in + & opt_acc_async = .TRUE. ) !in +#endif + + ! Compute Green-Gauss gradients for rho and theta +!TODO: grad_green_gauss_cell adjust... + CALL grad_green_gauss_cell(z_rth_pr, p_patch, p_int, z_grad_rth, & + opt_rlstart=3, opt_rlend=min_rlcell_int-1, opt_acc_async=.TRUE.) + + ELSE IF (iadv_rhotheta == 3) THEN ! Third-order Miura scheme (does not perform well yet) + + !DA: TODO: remove the wait after everything is async + !$ACC WAIT + + lcompute =.TRUE. + lcleanup =.FALSE. + ! First call: compute backward trajectory with wind at time level nnow + + CALL upwind_hflux_miura3(p_patch, p_nh%prog(nnow)%rho, p_nh%prog(nnow)%vn, & + p_nh%prog(nnow)%vn, REAL(p_nh%diag%vt,wp), dtime, p_int, & + lcompute, lcleanup, 0, z_rho_e, & + opt_rlstart=7, opt_lout_edge=.TRUE. ) + + ! Second call: compute only reconstructed value for flux divergence + lcompute =.FALSE. + lcleanup =.TRUE. + CALL upwind_hflux_miura3(p_patch, p_nh%prog(nnow)%theta_v, p_nh%prog(nnow)%vn, & + p_nh%prog(nnow)%vn, REAL(p_nh%diag%vt,wp), dtime, p_int, & + lcompute, lcleanup, 0, z_theta_v_e, & + opt_rlstart=7, opt_lout_edge=.TRUE. ) + + ENDIF + ENDIF ! istep = 1 + +!$OMP PARALLEL PRIVATE (rl_start,rl_end,i_startblk,i_endblk) + IF (istep == 1) THEN + ! Compute 'edge values' of density and virtual potential temperature for horizontal + ! flux divergence term; this is included in upwind_hflux_miura3 for option 3 + IF (iadv_rhotheta <= 2) THEN + + rl_start = min_rledge_int-2 + ! Initialize halo edges with zero in order to avoid access of uninitialized array elements + i_startblk = p_patch%edges%start_block(rl_start) + IF (idiv_method == 1) THEN + rl_end = min_rledge_int-2 + i_endblk = p_patch%edges%end_block(rl_end) + ELSE + rl_end = min_rledge_int-3 + i_endblk = p_patch%edges%end_block(rl_end) + ENDIF + + IF (i_endblk >= i_startblk) THEN + ! DSL: Instead of calling init_zero_contiguous_dp to set z_rho_e and + ! z_theta_v_e to zero, introduce a stencil that does the same thing, + ! but does not touch the padding, so it can be verified. + + CALL get_indices_e(p_patch, 1, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + + !$ser savepoint mo_solve_nonhydro_stencil_14_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' + + !$ser data z_rho_e=z_rho_e(:,:,1) + + PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' + + !$ser data z_theta_v_e=z_theta_v_e(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO jk = 1, nlev + DO jc = i_startidx, i_endidx + z_rho_e(jc,jk,1) = 0._wp + z_theta_v_e(jc,jk,1) = 0._wp + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_14_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' + + !$ser data z_rho_e=z_rho_e(:,:,1) + + PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' + + !$ser data z_theta_v_e=z_theta_v_e(:,:,1) + ENDIF +!$OMP BARRIER + + rl_start = 7 + rl_end = min_rledge_int-1 + + i_startblk = p_patch%edges%start_block(rl_start) + i_endblk = p_patch%edges%end_block (rl_end) + + ! initialize also nest boundary points with zero + IF (jg > 1 .OR. l_limited_area) THEN + ! DSL: Instead of calling init_zero_contiguous_dp to set z_rho_e and + ! z_theta_v_e to zero, introduce a stencil that does the same thing, + ! but does not touch the padding, so it can be verified. + + rl_start_2 = 1 + rl_end_2 = min_rledge_int-1 + + i_startblk_2 = p_patch%edges%start_block(rl_start_2) + i_endblk_2 = p_patch%edges%end_block (rl_end_2) + + CALL get_indices_e(p_patch, 1, i_startblk_2, i_endblk_2, & + i_startidx_2, i_endidx_2, rl_start_2, rl_end_2) + + + !$ser savepoint mo_solve_nonhydro_stencil_15_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' + + !$ser data z_rho_e=z_rho_e(:,:,1) + + PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' + + !$ser data z_theta_v_e=z_theta_v_e(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO jk = 1, nlev + DO jc = i_startidx_2, i_endidx_2 + z_rho_e(jc,jk,1) = 0._wp + z_theta_v_e(jc,jk,1) = 0._wp + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_15_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' + + !$ser data z_rho_e=z_rho_e(:,:,1) + + PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' + + !$ser data z_theta_v_e=z_theta_v_e(:,:,1) +!$OMP BARRIER + ENDIF + +!$OMP DO PRIVATE(jb,jk,je,i_startidx,i_endidx,ilc0,ibc0,lvn_pos,& +!$OMP z_ntdistv_bary_1,z_ntdistv_bary_2,distv_bary_1,distv_bary_2) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk, i_endblk + + CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + IF (iadv_rhotheta == 2) THEN + ! Operations from upwind_hflux_miura are inlined in order to process both + ! fields in one step + + + !$ser savepoint mo_solve_nonhydro_stencil_16_fused_btraj_traj_o1_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing p_dthalf=0.5_wp*dtime' + + !$ser data p_dthalf=0.5_wp*dtime + + PRINT *, 'Serializing p_vn=p_nh%prog(nnow)%vn(:,:,1)' + + !$ser data p_vn=p_nh%prog(nnow)%vn(:,:,1) + + PRINT *, 'Serializing p_vt=p_nh%diag%vt(:,:,1)' + + !$ser data p_vt=p_nh%diag%vt(:,:,1) + + PRINT *, 'Serializing primal_normal_cell_1=p_patch%edges%primal_normal_cell_x(:,:,1)' + + !$ser data primal_normal_cell_1=p_patch%edges%primal_normal_cell_x(:,:,1) + + PRINT *, 'Serializing dual_normal_cell_1=p_patch%edges%dual_normal_cell_x(:,:,1)' + + !$ser data dual_normal_cell_1=p_patch%edges%dual_normal_cell_x(:,:,1) + + PRINT *, 'Serializing primal_normal_cell_2=p_patch%edges%primal_normal_cell_y(:,:,1)' + + !$ser data primal_normal_cell_2=p_patch%edges%primal_normal_cell_y(:,:,1) + + PRINT *, 'Serializing dual_normal_cell_2=p_patch%edges%dual_normal_cell_y(:,:,1)' + + !$ser data dual_normal_cell_2=p_patch%edges%dual_normal_cell_y(:,:,1) + + PRINT *, 'Serializing rho_ref_me=p_nh%metrics%rho_ref_me(:,:,1)' + + !$ser data rho_ref_me=p_nh%metrics%rho_ref_me(:,:,1) + + PRINT *, 'Serializing theta_ref_me=p_nh%metrics%theta_ref_me(:,:,1)' + + !$ser data theta_ref_me=p_nh%metrics%theta_ref_me(:,:,1) + + PRINT *, 'Serializing z_grad_rth_1=z_grad_rth(:,:,1,1)' + + !$ser data z_grad_rth_1=z_grad_rth(:,:,1,1) + + PRINT *, 'Serializing z_grad_rth_2=z_grad_rth(:,:,1,2)' + + !$ser data z_grad_rth_2=z_grad_rth(:,:,1,2) + + PRINT *, 'Serializing z_grad_rth_3=z_grad_rth(:,:,1,3)' + + !$ser data z_grad_rth_3=z_grad_rth(:,:,1,3) + + PRINT *, 'Serializing z_grad_rth_4=z_grad_rth(:,:,1,4)' + + !$ser data z_grad_rth_4=z_grad_rth(:,:,1,4) + + PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' + + !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) + + PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) + + PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' + + !$ser data z_rho_e=z_rho_e(:,:,1) + + PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' + + !$ser data z_theta_v_e=z_theta_v_e(:,:,1) + + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) +#if defined (__LOOP_EXCHANGE) || defined (__SX__) || defined (_OPENACC) + ! For cache-based machines, also the back-trajectory computation is inlined to improve efficiency + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) & + !$ACC PRIVATE(lvn_pos, ilc0, ibc0, z_ntdistv_bary_1, z_ntdistv_bary_2, distv_bary_1, distv_bary_2) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx +!DIR$ IVDEP, PREFERVECTOR + DO jk = 1, nlev +#else + DO jk = 1, nlev + DO je = i_startidx, i_endidx +#endif + lvn_pos = p_nh%prog(nnow)%vn(je,jk,jb) >= 0._wp + + ! line and block indices of upwind neighbor cell + ilc0 = MERGE(p_patch%edges%cell_idx(je,jb,1),p_patch%edges%cell_idx(je,jb,2),lvn_pos) + ibc0 = MERGE(p_patch%edges%cell_blk(je,jb,1),p_patch%edges%cell_blk(je,jb,2),lvn_pos) + + ! distances from upwind mass point to the end point of the backward trajectory + ! in edge-normal and tangential directions + z_ntdistv_bary_1 = - ( p_nh%prog(nnow)%vn(je,jk,jb) * dthalf + & + MERGE(p_int%pos_on_tplane_e(je,jb,1,1), p_int%pos_on_tplane_e(je,jb,2,1),lvn_pos)) + + z_ntdistv_bary_2 = - ( p_nh%diag%vt(je,jk,jb) * dthalf + & + MERGE(p_int%pos_on_tplane_e(je,jb,1,2), p_int%pos_on_tplane_e(je,jb,2,2),lvn_pos)) + + ! rotate distance vectors into local lat-lon coordinates: + ! + ! component in longitudinal direction + distv_bary_1 = & + z_ntdistv_bary_1*MERGE(p_patch%edges%primal_normal_cell(je,jb,1)%v1, & + p_patch%edges%primal_normal_cell(je,jb,2)%v1,lvn_pos) & + + z_ntdistv_bary_2*MERGE(p_patch%edges%dual_normal_cell(je,jb,1)%v1, & + p_patch%edges%dual_normal_cell(je,jb,2)%v1,lvn_pos) + + ! component in latitudinal direction + distv_bary_2 = & + z_ntdistv_bary_1*MERGE(p_patch%edges%primal_normal_cell(je,jb,1)%v2, & + p_patch%edges%primal_normal_cell(je,jb,2)%v2,lvn_pos) & + + z_ntdistv_bary_2*MERGE(p_patch%edges%dual_normal_cell(je,jb,1)%v2, & + p_patch%edges%dual_normal_cell(je,jb,2)%v2,lvn_pos) + + + ! Calculate "edge values" of rho and theta_v + ! Note: z_rth_pr contains the perturbation values of rho and theta_v, + ! and the corresponding gradients are stored in z_grad_rth. +#ifdef __SWAPDIM + z_rho_e(je,jk,jb) = & + REAL(p_nh%metrics%rho_ref_me(je,jk,jb),wp) + z_rth_pr(ilc0,jk,ibc0,1) & + + distv_bary_1 * z_grad_rth(ilc0,jk,ibc0,1) & + + distv_bary_2 * z_grad_rth(ilc0,jk,ibc0,2) + z_theta_v_e(je,jk,jb) = & + REAL(p_nh%metrics%theta_ref_me(je,jk,jb),wp) + z_rth_pr(ilc0,jk,ibc0,2) & + + distv_bary_1 * z_grad_rth(ilc0,jk,ibc0,3) & + + distv_bary_2 * z_grad_rth(ilc0,jk,ibc0,4) +#else + z_rho_e(je,jk,jb) = REAL(p_nh%metrics%rho_ref_me(je,jk,jb),wp) & + + z_rth_pr(1,ilc0,jk,ibc0) & + + distv_bary_1 * z_grad_rth(1,ilc0,jk,ibc0) & + + distv_bary_2 * z_grad_rth(2,ilc0,jk,ibc0) + + z_theta_v_e(je,jk,jb) = REAL(p_nh%metrics%theta_ref_me(je,jk,jb),wp) & + + z_rth_pr(2,ilc0,jk,ibc0) & + + distv_bary_1 * z_grad_rth(3,ilc0,jk,ibc0) & + + distv_bary_2 * z_grad_rth(4,ilc0,jk,ibc0) +#endif + ENDDO ! loop over vertical levels + ENDDO ! loop over edges +#else + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) PRIVATE(ilc0, ibc0) + DO jk = 1, nlev + DO je = i_startidx, i_endidx + + ilc0 = p_cell_idx(je,jk,jb) + ibc0 = p_cell_blk(je,jk,jb) + + ! Calculate "edge values" of rho and theta_v + ! Note: z_rth_pr contains the perturbation values of rho and theta_v, + ! and the corresponding gradients are stored in z_grad_rth. +#ifdef __SWAPDIM + z_rho_e(je,jk,jb) = & + REAL(p_nh%metrics%rho_ref_me(je,jk,jb),wp) + z_rth_pr(ilc0,jk,ibc0,1) & + + p_distv_bary(je,jk,jb,1) * z_grad_rth(ilc0,jk,ibc0,1) & + + p_distv_bary(je,jk,jb,2) * z_grad_rth(ilc0,jk,ibc0,2) + z_theta_v_e(je,jk,jb) = & + REAL(p_nh%metrics%theta_ref_me(je,jk,jb),wp) + z_rth_pr(ilc0,jk,ibc0,2) & + + p_distv_bary(je,jk,jb,1) * z_grad_rth(ilc0,jk,ibc0,3) & + + p_distv_bary(je,jk,jb,2) * z_grad_rth(ilc0,jk,ibc0,4) +#else + z_rho_e(je,jk,jb) = REAL(p_nh%metrics%rho_ref_me(je,jk,jb),wp) & + + z_rth_pr(1,ilc0,jk,ibc0) & + + p_distv_bary(je,jk,jb,1) * z_grad_rth(1,ilc0,jk,ibc0) & + + p_distv_bary(je,jk,jb,2) * z_grad_rth(2,ilc0,jk,ibc0) + z_theta_v_e(je,jk,jb) = REAL(p_nh%metrics%theta_ref_me(je,jk,jb),wp) & + + z_rth_pr(2,ilc0,jk,ibc0) & + + p_distv_bary(je,jk,jb,1) * z_grad_rth(3,ilc0,jk,ibc0) & + + p_distv_bary(je,jk,jb,2) * z_grad_rth(4,ilc0,jk,ibc0) +#endif + + ENDDO ! loop over edges + ENDDO ! loop over vertical levels +#endif + !$ACC END PARALLEL + + ELSE ! iadv_rhotheta = 1 + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR TILE(32, 4) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx +!DIR$ IVDEP + DO jk = 1, nlev +#else + DO jk = 1, nlev + DO je = i_startidx, i_endidx +#endif + + ! Compute upwind-biased values for rho and theta starting from centered differences + ! Note: the length of the backward trajectory should be 0.5*dtime*(vn,vt) in order to arrive + ! at a second-order accurate FV discretization, but twice the length is needed for numerical + ! stability + z_rho_e(je,jk,jb) = & + p_int%c_lin_e(je,1,jb)*p_nh%prog(nnow)%rho(icidx(je,jb,1),jk,icblk(je,jb,1)) + & + p_int%c_lin_e(je,2,jb)*p_nh%prog(nnow)%rho(icidx(je,jb,2),jk,icblk(je,jb,2)) - & + dtime * (p_nh%prog(nnow)%vn(je,jk,jb)*p_patch%edges%inv_dual_edge_length(je,jb)* & + (p_nh%prog(nnow)%rho(icidx(je,jb,2),jk,icblk(je,jb,2)) - & + p_nh%prog(nnow)%rho(icidx(je,jb,1),jk,icblk(je,jb,1)) ) + p_nh%diag%vt(je,jk,jb) * & + p_patch%edges%inv_primal_edge_length(je,jb) * p_patch%edges%tangent_orientation(je,jb) * & + (z_rho_v(ividx(je,jb,2),jk,ivblk(je,jb,2)) - z_rho_v(ividx(je,jb,1),jk,ivblk(je,jb,1)) ) ) + + z_theta_v_e(je,jk,jb) = & + p_int%c_lin_e(je,1,jb)*p_nh%prog(nnow)%theta_v(icidx(je,jb,1),jk,icblk(je,jb,1)) + & + p_int%c_lin_e(je,2,jb)*p_nh%prog(nnow)%theta_v(icidx(je,jb,2),jk,icblk(je,jb,2)) - & + dtime * (p_nh%prog(nnow)%vn(je,jk,jb)*p_patch%edges%inv_dual_edge_length(je,jb)* & + (p_nh%prog(nnow)%theta_v(icidx(je,jb,2),jk,icblk(je,jb,2)) - & + p_nh%prog(nnow)%theta_v(icidx(je,jb,1),jk,icblk(je,jb,1)) ) + p_nh%diag%vt(je,jk,jb) * & + p_patch%edges%inv_primal_edge_length(je,jb) * p_patch%edges%tangent_orientation(je,jb) * & + (z_theta_v_v(ividx(je,jb,2),jk,ivblk(je,jb,2)) - z_theta_v_v(ividx(je,jb,1),jk,ivblk(je,jb,1)) )) + + ENDDO ! loop over edges + ENDDO ! loop over vertical levels + !$ACC END PARALLEL + ENDIF + + + !$ser savepoint mo_solve_nonhydro_stencil_16_fused_btraj_traj_o1_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing p_dthalf=0.5_wp*dtime' + + !$ser data p_dthalf=0.5_wp*dtime + + PRINT *, 'Serializing p_vn=p_nh%prog(nnow)%vn(:,:,1)' + + !$ser data p_vn=p_nh%prog(nnow)%vn(:,:,1) + + PRINT *, 'Serializing p_vt=p_nh%diag%vt(:,:,1)' + + !$ser data p_vt=p_nh%diag%vt(:,:,1) + + PRINT *, 'Serializing primal_normal_cell_1=p_patch%edges%primal_normal_cell_x(:,:,1)' + + !$ser data primal_normal_cell_1=p_patch%edges%primal_normal_cell_x(:,:,1) + + PRINT *, 'Serializing dual_normal_cell_1=p_patch%edges%dual_normal_cell_x(:,:,1)' + + !$ser data dual_normal_cell_1=p_patch%edges%dual_normal_cell_x(:,:,1) + + PRINT *, 'Serializing primal_normal_cell_2=p_patch%edges%primal_normal_cell_y(:,:,1)' + + !$ser data primal_normal_cell_2=p_patch%edges%primal_normal_cell_y(:,:,1) + + PRINT *, 'Serializing dual_normal_cell_2=p_patch%edges%dual_normal_cell_y(:,:,1)' + + !$ser data dual_normal_cell_2=p_patch%edges%dual_normal_cell_y(:,:,1) + + PRINT *, 'Serializing rho_ref_me=p_nh%metrics%rho_ref_me(:,:,1)' + + !$ser data rho_ref_me=p_nh%metrics%rho_ref_me(:,:,1) + + PRINT *, 'Serializing theta_ref_me=p_nh%metrics%theta_ref_me(:,:,1)' + + !$ser data theta_ref_me=p_nh%metrics%theta_ref_me(:,:,1) + + PRINT *, 'Serializing z_grad_rth_1=z_grad_rth(:,:,1,1)' + + !$ser data z_grad_rth_1=z_grad_rth(:,:,1,1) + + PRINT *, 'Serializing z_grad_rth_2=z_grad_rth(:,:,1,2)' + + !$ser data z_grad_rth_2=z_grad_rth(:,:,1,2) + + PRINT *, 'Serializing z_grad_rth_3=z_grad_rth(:,:,1,3)' + + !$ser data z_grad_rth_3=z_grad_rth(:,:,1,3) + + PRINT *, 'Serializing z_grad_rth_4=z_grad_rth(:,:,1,4)' + + !$ser data z_grad_rth_4=z_grad_rth(:,:,1,4) + + PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' + + !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) + + PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' + + !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) + + PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' + + !$ser data z_rho_e=z_rho_e(:,:,1) + + PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' + + !$ser data z_theta_v_e=z_theta_v_e(:,:,1) + + ENDDO +!$OMP END DO + + ENDIF + + ELSE IF (istep == 2 .AND. lhdiff_rcf .AND. divdamp_type >= 3) THEN ! apply div damping on 3D divergence + + ! add dw/dz contribution to divergence damping term + + rl_start = 7 + rl_end = min_rledge_int-2 + + i_startblk = p_patch%edges%start_block(rl_start) + i_endblk = p_patch%edges%end_block (rl_end) + +!$OMP DO PRIVATE(jb,jk,je,i_startidx,i_endidx) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk, i_endblk + + CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + + !$ser savepoint mo_solve_nonhydro_stencil_17_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing hmask_dd3d=p_nh%metrics%hmask_dd3d(:,1)' + + !$ser data hmask_dd3d=p_nh%metrics%hmask_dd3d(:,1) + + PRINT *, 'Serializing scalfac_dd3d=p_nh%metrics%scalfac_dd3d(:)' + + !$ser data scalfac_dd3d=p_nh%metrics%scalfac_dd3d(:) + + PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' + + !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) + + PRINT *, 'Serializing z_dwdz_dd=z_dwdz_dd(:,:,1)' + + !$ser data z_dwdz_dd=z_dwdz_dd(:,:,1) + + PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' + + !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx +!DIR$ IVDEP, PREFERVECTOR + DO jk = kstart_dd3d(jg), nlev + z_graddiv_vn(jk,je,jb) = z_graddiv_vn(jk,je,jb) + p_nh%metrics%hmask_dd3d(je,jb)* & + p_nh%metrics%scalfac_dd3d(jk) * p_patch%edges%inv_dual_edge_length(je,jb)* & + ( z_dwdz_dd(icidx(je,jb,2),jk,icblk(je,jb,2)) - z_dwdz_dd(icidx(je,jb,1),jk,icblk(je,jb,1)) ) +#else + DO jk = kstart_dd3d(jg), nlev + DO je = i_startidx, i_endidx + z_graddiv_vn(je,jk,jb) = z_graddiv_vn(je,jk,jb) + p_nh%metrics%hmask_dd3d(je,jb)* & + p_nh%metrics%scalfac_dd3d(jk) * p_patch%edges%inv_dual_edge_length(je,jb)* & + ( z_dwdz_dd(icidx(je,jb,2),jk,icblk(je,jb,2)) - z_dwdz_dd(icidx(je,jb,1),jk,icblk(je,jb,1)) ) +#endif + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_17_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing hmask_dd3d=p_nh%metrics%hmask_dd3d(:,1)' + + !$ser data hmask_dd3d=p_nh%metrics%hmask_dd3d(:,1) + + PRINT *, 'Serializing scalfac_dd3d=p_nh%metrics%scalfac_dd3d(:)' + + !$ser data scalfac_dd3d=p_nh%metrics%scalfac_dd3d(:) + + PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' + + !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) + + PRINT *, 'Serializing z_dwdz_dd=z_dwdz_dd(:,:,1)' + + !$ser data z_dwdz_dd=z_dwdz_dd(:,:,1) + + PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' + + !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) + + ENDDO +!$OMP END DO + + ENDIF ! istep = 1/2 + + ! Remaining computations at edge points + + rl_start = grf_bdywidth_e + 1 ! boundary update follows below + rl_end = min_rledge_int + + i_startblk = p_patch%edges%start_block(rl_start) + i_endblk = p_patch%edges%end_block(rl_end) + + IF (istep == 1) THEN + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je,z_theta1,z_theta2,ikp1,ikp2) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk, i_endblk + + CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + ! Store values at nest interface levels; this is done here for the first sub-time step, + ! the final averaging is done in mo_nh_nest_utilities:compute_tendencies + IF (idyn_timestep == 1 .AND. l_child_vertnest) THEN + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO je = i_startidx, i_endidx + p_nh%diag%vn_ie_int(je,1,jb) = p_nh%diag%vn_ie(je,nshift,jb) + ENDDO + !$ACC END PARALLEL + ENDIF + + + !$ser savepoint mo_solve_nonhydro_stencil_18_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' + + !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) + + PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' + + !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) + + PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' + + !$ser data z_gradh_exner=z_gradh_exner(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx + DO jk = 1, nflatlev(jg)-1 +#else + DO jk = 1, nflatlev(jg)-1 + DO je = i_startidx, i_endidx +#endif + ! horizontal gradient of Exner pressure where coordinate surfaces are flat + z_gradh_exner(je,jk,jb) = p_patch%edges%inv_dual_edge_length(je,jb)* & + (z_exner_ex_pr(icidx(je,jb,2),jk,icblk(je,jb,2)) - & + z_exner_ex_pr(icidx(je,jb,1),jk,icblk(je,jb,1)) ) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_18_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' + + !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) + + PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' + + !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) + + PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' + + !$ser data z_gradh_exner=z_gradh_exner(:,:,1) + + IF (igradp_method <= 3) THEN + + + !$ser savepoint mo_solve_nonhydro_stencil_19_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' + + !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) + + PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' + + !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) + + PRINT *, 'Serializing ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1)' + + !$ser data ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1) + + PRINT *, 'Serializing c_lin_e=p_int%c_lin_e(:,:,1)' + + !$ser data c_lin_e=p_int%c_lin_e(:,:,1) + + PRINT *, 'Serializing z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1)' + + !$ser data z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1) + + PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' + + !$ser data z_gradh_exner=z_gradh_exner(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx +!DIR$ IVDEP + DO jk = nflatlev(jg), nflat_gradp(jg) +#else +!$NEC outerloop_unroll(8) + DO jk = nflatlev(jg), nflat_gradp(jg) + DO je = i_startidx, i_endidx +#endif + ! horizontal gradient of Exner pressure, including metric correction + z_gradh_exner(je,jk,jb) = p_patch%edges%inv_dual_edge_length(je,jb)* & + (z_exner_ex_pr(icidx(je,jb,2),jk,icblk(je,jb,2)) - & + z_exner_ex_pr(icidx(je,jb,1),jk,icblk(je,jb,1)) ) - & + p_nh%metrics%ddxn_z_full(je,jk,jb) * & +#ifdef __SWAPDIM + (p_int%c_lin_e(je,1,jb)*z_dexner_dz_c(icidx(je,jb,1),jk,icblk(je,jb,1),1) + & + p_int%c_lin_e(je,2,jb)*z_dexner_dz_c(icidx(je,jb,2),jk,icblk(je,jb,2),1)) +#else + (p_int%c_lin_e(je,1,jb)*z_dexner_dz_c(1,icidx(je,jb,1),jk,icblk(je,jb,1)) + & + p_int%c_lin_e(je,2,jb)*z_dexner_dz_c(1,icidx(je,jb,2),jk,icblk(je,jb,2))) +#endif + ENDDO + ENDDO + !$ACC END PARALLEL + + + !$ser savepoint mo_solve_nonhydro_stencil_19_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' + + !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) + + PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' + + !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) + + PRINT *, 'Serializing ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1)' + + !$ser data ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1) + + PRINT *, 'Serializing c_lin_e=p_int%c_lin_e(:,:,1)' + + !$ser data c_lin_e=p_int%c_lin_e(:,:,1) + + PRINT *, 'Serializing z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1)' + + !$ser data z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1) + + PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' + + !$ser data z_gradh_exner=z_gradh_exner(:,:,1) + + + !$ser savepoint mo_solve_nonhydro_stencil_20_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' + + !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) + + PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' + + !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) + + PRINT *, 'Serializing zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1)' + + !$ser data zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1) + + PRINT *, 'Serializing z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1)' + + !$ser data z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1) + + PRINT *, 'Serializing z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2)' + + !$ser data z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2) + + PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' + + !$ser data z_gradh_exner=z_gradh_exner(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR TILE(32, 4) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx +!DIR$ IVDEP, PREFERVECTOR + DO jk = nflat_gradp(jg)+1, nlev +#else +!$NEC outerloop_unroll(8) + DO jk = nflat_gradp(jg)+1, nlev + DO je = i_startidx, i_endidx +#endif + ! horizontal gradient of Exner pressure, Taylor-expansion-based reconstruction + z_gradh_exner(je,jk,jb) = p_patch%edges%inv_dual_edge_length(je,jb)* & + (z_exner_ex_pr(icidx(je,jb,2),ikidx(2,je,jk,jb),icblk(je,jb,2)) + & + p_nh%metrics%zdiff_gradp(2,je,jk,jb)* & +#ifdef __SWAPDIM + (z_dexner_dz_c(icidx(je,jb,2),ikidx(2,je,jk,jb),icblk(je,jb,2),1) + & + p_nh%metrics%zdiff_gradp(2,je,jk,jb)* & + z_dexner_dz_c(icidx(je,jb,2),ikidx(2,je,jk,jb),icblk(je,jb,2),2)) - & + (z_exner_ex_pr(icidx(je,jb,1),ikidx(1,je,jk,jb),icblk(je,jb,1)) + & + p_nh%metrics%zdiff_gradp(1,je,jk,jb)* & + (z_dexner_dz_c(icidx(je,jb,1),ikidx(1,je,jk,jb),icblk(je,jb,1),1) + & + p_nh%metrics%zdiff_gradp(1,je,jk,jb)* & + z_dexner_dz_c(icidx(je,jb,1),ikidx(1,je,jk,jb),icblk(je,jb,1),2)))) +#else + (z_dexner_dz_c(1,icidx(je,jb,2),ikidx(2,je,jk,jb),icblk(je,jb,2)) + & + p_nh%metrics%zdiff_gradp(2,je,jk,jb)* & + z_dexner_dz_c(2,icidx(je,jb,2),ikidx(2,je,jk,jb),icblk(je,jb,2))) - & + (z_exner_ex_pr(icidx(je,jb,1),ikidx(1,je,jk,jb),icblk(je,jb,1)) + & + p_nh%metrics%zdiff_gradp(1,je,jk,jb)* & + (z_dexner_dz_c(1,icidx(je,jb,1),ikidx(1,je,jk,jb),icblk(je,jb,1)) + & + p_nh%metrics%zdiff_gradp(1,je,jk,jb)* & + z_dexner_dz_c(2,icidx(je,jb,1),ikidx(1,je,jk,jb),icblk(je,jb,1))))) +#endif + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_20_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' + + !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) + + PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' + + !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) + + PRINT *, 'Serializing zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1)' + + !$ser data zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1) + + PRINT *, 'Serializing z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1)' + + !$ser data z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1) + + PRINT *, 'Serializing z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2)' + + !$ser data z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2) + + PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' + + !$ser data z_gradh_exner=z_gradh_exner(:,:,1) + + ELSE IF (igradp_method == 4 .OR. igradp_method == 5) THEN + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR TILE(32, 4) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx + DO jk = nflatlev(jg), nlev +#else + DO jk = nflatlev(jg), nlev + DO je = i_startidx, i_endidx +#endif + ! horizontal gradient of Exner pressure, cubic/quadratic interpolation + z_gradh_exner(je,jk,jb) = p_patch%edges%inv_dual_edge_length(je,jb)* & + (z_exner_ex_pr(icidx(je,jb,2),ikidx(2,je,jk,jb)-1,icblk(je,jb,2)) * & + p_nh%metrics%coeff_gradp(5,je,jk,jb) + & + z_exner_ex_pr(icidx(je,jb,2),ikidx(2,je,jk,jb) ,icblk(je,jb,2)) * & + p_nh%metrics%coeff_gradp(6,je,jk,jb) + & + z_exner_ex_pr(icidx(je,jb,2),ikidx(2,je,jk,jb)+1,icblk(je,jb,2)) * & + p_nh%metrics%coeff_gradp(7,je,jk,jb) + & + z_exner_ex_pr(icidx(je,jb,2),ikidx(2,je,jk,jb)+2,icblk(je,jb,2)) * & + p_nh%metrics%coeff_gradp(8,je,jk,jb) - & + (z_exner_ex_pr(icidx(je,jb,1),ikidx(1,je,jk,jb)-1,icblk(je,jb,1)) * & + p_nh%metrics%coeff_gradp(1,je,jk,jb) + & + z_exner_ex_pr(icidx(je,jb,1),ikidx(1,je,jk,jb) ,icblk(je,jb,1)) * & + p_nh%metrics%coeff_gradp(2,je,jk,jb) + & + z_exner_ex_pr(icidx(je,jb,1),ikidx(1,je,jk,jb)+1,icblk(je,jb,1)) * & + p_nh%metrics%coeff_gradp(3,je,jk,jb) + & + z_exner_ex_pr(icidx(je,jb,1),ikidx(1,je,jk,jb)+2,icblk(je,jb,1)) * & + p_nh%metrics%coeff_gradp(4,je,jk,jb)) ) + + ENDDO + ENDDO + !$ACC END PARALLEL + ENDIF + + ! compute hydrostatically approximated correction term that replaces downward extrapolation + IF (igradp_method == 3) THEN + + + !$ser savepoint mo_solve_nonhydro_stencil_21_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing grav_o_cpd=grav_o_cpd' + + !$ser data grav_o_cpd=grav_o_cpd + + PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1)' + + !$ser data zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) + + PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' + + !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) + + PRINT *, 'Serializing z_hydro_corr=z_hydro_corr(:,:,1)' + + !$ser data z_hydro_corr=z_hydro_corr(:,:,1) + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR PRIVATE(z_theta1, z_theta2) + DO je = i_startidx, i_endidx + + z_theta1 = & + p_nh%prog(nnow)%theta_v(icidx(je,jb,1),ikidx(1,je,nlev,jb),icblk(je,jb,1)) + & + p_nh%metrics%zdiff_gradp(1,je,nlev,jb)* & + (p_nh%diag%theta_v_ic(icidx(je,jb,1),ikidx(1,je,nlev,jb), icblk(je,jb,1)) - & + p_nh%diag%theta_v_ic(icidx(je,jb,1),ikidx(1,je,nlev,jb)+1,icblk(je,jb,1))) * & + p_nh%metrics%inv_ddqz_z_full(icidx(je,jb,1),ikidx(1,je,nlev,jb),icblk(je,jb,1)) + + z_theta2 = & + p_nh%prog(nnow)%theta_v(icidx(je,jb,2),ikidx(2,je,nlev,jb),icblk(je,jb,2)) + & + p_nh%metrics%zdiff_gradp(2,je,nlev,jb)* & + (p_nh%diag%theta_v_ic(icidx(je,jb,2),ikidx(2,je,nlev,jb), icblk(je,jb,2)) - & + p_nh%diag%theta_v_ic(icidx(je,jb,2),ikidx(2,je,nlev,jb)+1,icblk(je,jb,2))) * & + p_nh%metrics%inv_ddqz_z_full(icidx(je,jb,2),ikidx(2,je,nlev,jb),icblk(je,jb,2)) + + z_hydro_corr(je,nlev,jb) = grav_o_cpd*p_patch%edges%inv_dual_edge_length(je,jb)* & + (z_theta2-z_theta1)*4._wp/(z_theta1+z_theta2)**2 + + ENDDO + !$ACC END PARALLEL + + + !$ser savepoint mo_solve_nonhydro_stencil_21_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing grav_o_cpd=grav_o_cpd' + + !$ser data grav_o_cpd=grav_o_cpd + + PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1)' + + !$ser data zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) + + PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' + + !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) + + PRINT *, 'Serializing z_hydro_corr=z_hydro_corr(:,:,1)' + + !$ser data z_hydro_corr=z_hydro_corr(:,:,1) + + ELSE IF (igradp_method == 5) THEN + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR PRIVATE(ikp1, ikp2, z_theta1, z_theta2) + DO je = i_startidx, i_endidx + + ikp1 = MIN(nlev,ikidx(1,je,nlev,jb)+2) + ikp2 = MIN(nlev,ikidx(2,je,nlev,jb)+2) + + z_theta1 = & + p_nh%prog(nnow)%theta_v(icidx(je,jb,1),ikidx(1,je,nlev,jb)-1,icblk(je,jb,1)) * & + p_nh%metrics%coeff_gradp(1,je,nlev,jb) + & + p_nh%prog(nnow)%theta_v(icidx(je,jb,1),ikidx(1,je,nlev,jb) ,icblk(je,jb,1)) * & + p_nh%metrics%coeff_gradp(2,je,nlev,jb) + & + p_nh%prog(nnow)%theta_v(icidx(je,jb,1),ikidx(1,je,nlev,jb)+1,icblk(je,jb,1)) * & + p_nh%metrics%coeff_gradp(3,je,nlev,jb) + & + p_nh%prog(nnow)%theta_v(icidx(je,jb,1),ikp1 ,icblk(je,jb,1)) * & + p_nh%metrics%coeff_gradp(4,je,nlev,jb) + + z_theta2 = & + p_nh%prog(nnow)%theta_v(icidx(je,jb,2),ikidx(2,je,nlev,jb)-1,icblk(je,jb,2)) * & + p_nh%metrics%coeff_gradp(5,je,nlev,jb) + & + p_nh%prog(nnow)%theta_v(icidx(je,jb,2),ikidx(2,je,nlev,jb) ,icblk(je,jb,2)) * & + p_nh%metrics%coeff_gradp(6,je,nlev,jb) + & + p_nh%prog(nnow)%theta_v(icidx(je,jb,2),ikidx(2,je,nlev,jb)+1,icblk(je,jb,2)) * & + p_nh%metrics%coeff_gradp(7,je,nlev,jb) + & + p_nh%prog(nnow)%theta_v(icidx(je,jb,2),ikp2 ,icblk(je,jb,2)) * & + p_nh%metrics%coeff_gradp(8,je,nlev,jb) + + z_hydro_corr(je,nlev,jb) = grav_o_cpd*p_patch%edges%inv_dual_edge_length(je,jb)* & + (z_theta2-z_theta1)*4._wp/(z_theta1+z_theta2)**2 + + ENDDO + !$ACC END PARALLEL + ENDIF + + ENDDO +!$OMP END DO + + ENDIF ! istep = 1 + + + IF (istep == 1 .AND. (igradp_method == 3 .OR. igradp_method == 5)) THEN + + + !$ser savepoint mo_solve_nonhydro_stencil_22_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing ipeidx_dsl=p_nh%metrics%pg_edgeidx_dsl(:,:,1)' + + !$ser data ipeidx_dsl=p_nh%metrics%pg_edgeidx_dsl(:,:,1) + + PRINT *, 'Serializing pg_exdist=p_nh%metrics%pg_exdist_dsl(:,:,1)' + + !$ser data pg_exdist=p_nh%metrics%pg_exdist_dsl(:,:,1) + + PRINT *, 'Serializing z_hydro_corr=z_hydro_corr(:,:,1)' + + !$ser data z_hydro_corr=z_hydro_corr(:,:,1) + + PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' + + !$ser data z_gradh_exner=z_gradh_exner(:,:,1) +!$OMP DO PRIVATE(jb,je,ie,nlen_gradp,ishift) ICON_OMP_DEFAULT_SCHEDULE + DO jb = 1, nblks_gradp + IF (jb == nblks_gradp) THEN + nlen_gradp = npromz_gradp + ELSE + nlen_gradp = nproma_gradp + ENDIF + ishift = (jb-1)*nproma_gradp + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) +!$NEC ivdep + !$ACC LOOP GANG VECTOR + DO je = 1, nlen_gradp + ie = ishift+je + + z_gradh_exner(ipeidx(ie),iplev(ie),ipeblk(ie)) = & + z_gradh_exner(ipeidx(ie),iplev(ie),ipeblk(ie)) + & + p_nh%metrics%pg_exdist(ie)*z_hydro_corr(ipeidx(ie),nlev,ipeblk(ie)) + + ENDDO + !$ACC END PARALLEL + ENDDO +!$OMP END DO + + rl_start_2 = grf_bdywidth_e+1 + rl_end_2 = min_rledge + + i_startblk_2 = p_patch%edges%start_block(rl_start_2) + i_endblk_2 = p_patch%edges%end_block(rl_end_2) + + CALL get_indices_e(p_patch, 1, i_startblk_2, i_endblk_2, & + i_startidx_2, i_endidx_2, rl_start_2, rl_end_2) + + + !$ser savepoint mo_solve_nonhydro_stencil_22_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing ipeidx_dsl=p_nh%metrics%pg_edgeidx_dsl(:,:,1)' + + !$ser data ipeidx_dsl=p_nh%metrics%pg_edgeidx_dsl(:,:,1) + + PRINT *, 'Serializing pg_exdist=p_nh%metrics%pg_exdist_dsl(:,:,1)' + + !$ser data pg_exdist=p_nh%metrics%pg_exdist_dsl(:,:,1) + + PRINT *, 'Serializing z_hydro_corr=z_hydro_corr(:,:,1)' + + !$ser data z_hydro_corr=z_hydro_corr(:,:,1) + + PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' + + !$ser data z_gradh_exner=z_gradh_exner(:,:,1) + + ENDIF + + + ! Update horizontal velocity field: advection, Coriolis force, pressure-gradient term, and physics + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je,z_graddiv2_vn, & +!$OMP z_ddt_vn_dyn, z_ddt_vn_apc, z_ddt_vn_cor, z_ddt_vn_pgr, z_ddt_vn_ray, z_d_vn_dmp, z_d_vn_iau & +!$OMP ) ICON_OMP_DEFAULT_SCHEDULE + + DO jb = i_startblk, i_endblk + + CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + IF ((itime_scheme >= 4) .AND. istep == 2) THEN ! use temporally averaged velocity advection terms + + + !$ser savepoint mo_solve_nonhydro_stencil_23_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing cpd=cpd' + + !$ser data cpd=cpd + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing wgt_nnew_vel=wgt_nnew_vel' + + !$ser data wgt_nnew_vel=wgt_nnew_vel + + PRINT *, 'Serializing wgt_nnow_vel=wgt_nnow_vel' + + !$ser data wgt_nnow_vel=wgt_nnow_vel + + PRINT *, 'Serializing vn_nnow=p_nh%prog(nnow)%vn(:,:,1)' + + !$ser data vn_nnow=p_nh%prog(nnow)%vn(:,:,1) + + PRINT *, 'Serializing ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1)' + + !$ser data ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1) + + PRINT *, 'Serializing ddt_vn_adv_ntl2=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl2)' + + !$ser data ddt_vn_adv_ntl2=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl2) + + PRINT *, 'Serializing ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1)' + + !$ser data ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1) + + PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' + + !$ser data z_theta_v_e=z_theta_v_e(:,:,1) + + PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' + + !$ser data z_gradh_exner=z_gradh_exner(:,:,1) + + PRINT *, 'Serializing vn_nnew=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn_nnew=p_nh%prog(nnew)%vn(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + + !$ACC LOOP GANG(STATIC: 1) VECTOR PRIVATE(z_ddt_vn_dyn, z_ddt_vn_apc, z_ddt_vn_cor, z_ddt_vn_pgr) TILE(32, 4) + DO jk = 1, nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + ! + z_ddt_vn_apc = p_nh%diag%ddt_vn_apc_pc(je,jk,jb,ntl1)*wgt_nnow_vel & + & +p_nh%diag%ddt_vn_apc_pc(je,jk,jb,ntl2)*wgt_nnew_vel + z_ddt_vn_pgr = -cpd*z_theta_v_e(je,jk,jb)*z_gradh_exner(je,jk,jb) + ! + z_ddt_vn_dyn = z_ddt_vn_apc & ! advection plus Coriolis + & +z_ddt_vn_pgr & ! pressure gradient + & +p_nh%diag%ddt_vn_phy(je,jk,jb) ! physics applied in dynamics + ! + p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnow)%vn(je,jk,jb) + dtime * z_ddt_vn_dyn + ! +#ifdef __ENABLE_DDT_VN_XYZ__ + IF (p_nh%diag%ddt_vn_adv_is_associated .OR. p_nh%diag%ddt_vn_cor_is_associated) THEN + z_ddt_vn_cor = p_nh%diag%ddt_vn_cor_pc(je,jk,jb,ntl1)*wgt_nnow_vel & + & +p_nh%diag%ddt_vn_cor_pc(je,jk,jb,ntl2)*wgt_nnew_vel + ! + IF (p_nh%diag%ddt_vn_adv_is_associated) THEN + p_nh%diag%ddt_vn_adv(je,jk,jb)= p_nh%diag%ddt_vn_adv(je,jk,jb) + r_nsubsteps *(z_ddt_vn_apc-z_ddt_vn_cor) + END IF + ! + IF (p_nh%diag%ddt_vn_cor_is_associated) THEN + p_nh%diag%ddt_vn_cor(je,jk,jb)= p_nh%diag%ddt_vn_cor(je,jk,jb) + r_nsubsteps * z_ddt_vn_cor + END IF + ! + END IF + ! + IF (p_nh%diag%ddt_vn_pgr_is_associated) THEN + p_nh%diag%ddt_vn_pgr(je,jk,jb) = p_nh%diag%ddt_vn_pgr(je,jk,jb) + r_nsubsteps * z_ddt_vn_pgr + END IF + ! + IF (p_nh%diag%ddt_vn_phd_is_associated) THEN + p_nh%diag%ddt_vn_phd(je,jk,jb) = p_nh%diag%ddt_vn_phd(je,jk,jb) + r_nsubsteps * p_nh%diag%ddt_vn_phy(je,jk,jb) + END IF + ! + IF (p_nh%diag%ddt_vn_dyn_is_associated) THEN + p_nh%diag%ddt_vn_dyn(je,jk,jb) = p_nh%diag%ddt_vn_dyn(je,jk,jb) + r_nsubsteps * z_ddt_vn_dyn + END IF +#endif + ! + ENDDO + ENDDO +!$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_23_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing cpd=cpd' + + !$ser data cpd=cpd + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing wgt_nnew_vel=wgt_nnew_vel' + + !$ser data wgt_nnew_vel=wgt_nnew_vel + + PRINT *, 'Serializing wgt_nnow_vel=wgt_nnow_vel' + + !$ser data wgt_nnow_vel=wgt_nnow_vel + + PRINT *, 'Serializing vn_nnow=p_nh%prog(nnow)%vn(:,:,1)' + + !$ser data vn_nnow=p_nh%prog(nnow)%vn(:,:,1) + + PRINT *, 'Serializing ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1)' + + !$ser data ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1) + + PRINT *, 'Serializing ddt_vn_adv_ntl2=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl2)' + + !$ser data ddt_vn_adv_ntl2=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl2) + + PRINT *, 'Serializing ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1)' + + !$ser data ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1) + + PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' + + !$ser data z_theta_v_e=z_theta_v_e(:,:,1) + + PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' + + !$ser data z_gradh_exner=z_gradh_exner(:,:,1) + + PRINT *, 'Serializing vn_nnew=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn_nnew=p_nh%prog(nnew)%vn(:,:,1) + + ELSE + + + !$ser savepoint mo_solve_nonhydro_stencil_24_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing cpd=cpd' + + !$ser data cpd=cpd + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing vn_nnow=p_nh%prog(nnow)%vn(:,:,1)' + + !$ser data vn_nnow=p_nh%prog(nnow)%vn(:,:,1) + + PRINT *, 'Serializing ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1)' + + !$ser data ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1) + + PRINT *, 'Serializing ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1)' + + !$ser data ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1) + + PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' + + !$ser data z_theta_v_e=z_theta_v_e(:,:,1) + + PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' + + !$ser data z_gradh_exner=z_gradh_exner(:,:,1) + + PRINT *, 'Serializing vn_nnew=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn_nnew=p_nh%prog(nnew)%vn(:,:,1) +!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) + DO jk = 1, nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + ! + p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnow)%vn(je,jk,jb) + dtime * & + & ( p_nh%diag%ddt_vn_apc_pc(je,jk,jb,ntl1) & + & -cpd*z_theta_v_e(je,jk,jb)*z_gradh_exner(je,jk,jb) & + & +p_nh%diag%ddt_vn_phy(je,jk,jb) ) + ! + ENDDO + ENDDO +!$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_24_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing cpd=cpd' + + !$ser data cpd=cpd + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing vn_nnow=p_nh%prog(nnow)%vn(:,:,1)' + + !$ser data vn_nnow=p_nh%prog(nnow)%vn(:,:,1) + + PRINT *, 'Serializing ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1)' + + !$ser data ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1) + + PRINT *, 'Serializing ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1)' + + !$ser data ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1) + + PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' + + !$ser data z_theta_v_e=z_theta_v_e(:,:,1) + + PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' + + !$ser data z_gradh_exner=z_gradh_exner(:,:,1) + + PRINT *, 'Serializing vn_nnew=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn_nnew=p_nh%prog(nnew)%vn(:,:,1) + ENDIF + + IF (lhdiff_rcf .AND. istep == 2 .AND. (divdamp_order == 4 .OR. divdamp_order == 24)) THEN ! fourth-order divergence damping + ! Compute gradient of divergence of gradient of divergence for fourth-order divergence damping + + + !$ser savepoint mo_solve_nonhydro_stencil_25_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing geofac_grdiv=p_int%geofac_grdiv(:,:,1)' + + !$ser data geofac_grdiv=p_int%geofac_grdiv(:,:,1) + + PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' + + !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) + + PRINT *, 'Serializing z_graddiv2_vn=z_graddiv2_vn(:,:)' + + !$ser data z_graddiv2_vn=z_graddiv2_vn(:,:) +!$ACC PARALLEL IF( i_am_accel_node ) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx +!DIR$ IVDEP + DO jk = 1, nlev + z_graddiv2_vn(je,jk) = p_int%geofac_grdiv(je,1,jb)*z_graddiv_vn(jk,je,jb) & + + p_int%geofac_grdiv(je,2,jb)*z_graddiv_vn(jk,iqidx(je,jb,1),iqblk(je,jb,1)) & + + p_int%geofac_grdiv(je,3,jb)*z_graddiv_vn(jk,iqidx(je,jb,2),iqblk(je,jb,2)) & + + p_int%geofac_grdiv(je,4,jb)*z_graddiv_vn(jk,iqidx(je,jb,3),iqblk(je,jb,3)) & + + p_int%geofac_grdiv(je,5,jb)*z_graddiv_vn(jk,iqidx(je,jb,4),iqblk(je,jb,4)) +#else +!$NEC outerloop_unroll(6) + DO jk = 1, nlev + DO je = i_startidx, i_endidx + z_graddiv2_vn(je,jk) = p_int%geofac_grdiv(je,1,jb)*z_graddiv_vn(je,jk,jb) & + + p_int%geofac_grdiv(je,2,jb)*z_graddiv_vn(iqidx(je,jb,1),jk,iqblk(je,jb,1)) & + + p_int%geofac_grdiv(je,3,jb)*z_graddiv_vn(iqidx(je,jb,2),jk,iqblk(je,jb,2)) & + + p_int%geofac_grdiv(je,4,jb)*z_graddiv_vn(iqidx(je,jb,3),jk,iqblk(je,jb,3)) & + + p_int%geofac_grdiv(je,5,jb)*z_graddiv_vn(iqidx(je,jb,4),jk,iqblk(je,jb,4)) +#endif + + ENDDO + ENDDO +!$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_25_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing geofac_grdiv=p_int%geofac_grdiv(:,:,1)' + + !$ser data geofac_grdiv=p_int%geofac_grdiv(:,:,1) + + PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' + + !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) + + PRINT *, 'Serializing z_graddiv2_vn=z_graddiv2_vn(:,:)' + + !$ser data z_graddiv2_vn=z_graddiv2_vn(:,:) + + ENDIF + + IF (lhdiff_rcf .AND. istep == 2) THEN + ! apply divergence damping if diffusion is not called every sound-wave time step + IF (divdamp_order == 2 .OR. (divdamp_order == 24 .AND. scal_divdamp_o2 > 1.e-6_wp) ) THEN ! 2nd-order divergence damping + + + !$ser savepoint mo_solve_nonhydro_stencil_26_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing scal_divdamp_o2=scal_divdamp_o2' + + !$ser data scal_divdamp_o2=scal_divdamp_o2 + + PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' + + !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) +!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) PRIVATE(z_d_vn_dmp) + DO jk = 1, nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + ! +#ifdef __LOOP_EXCHANGE + z_d_vn_dmp = scal_divdamp_o2*z_graddiv_vn(jk,je,jb) +#else + z_d_vn_dmp = scal_divdamp_o2*z_graddiv_vn(je,jk,jb) +#endif + ! + p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnew)%vn(je,jk,jb) + z_d_vn_dmp + ! +#ifdef __ENABLE_DDT_VN_XYZ__ + IF (p_nh%diag%ddt_vn_dmp_is_associated) THEN + p_nh%diag%ddt_vn_dmp(je,jk,jb) = p_nh%diag%ddt_vn_dmp(je,jk,jb) + z_d_vn_dmp * r_dtimensubsteps + END IF + ! + IF (p_nh%diag%ddt_vn_dyn_is_associated) THEN + p_nh%diag%ddt_vn_dyn(je,jk,jb) = p_nh%diag%ddt_vn_dyn(je,jk,jb) + z_d_vn_dmp * r_dtimensubsteps + END IF +#endif + ! + ENDDO + ENDDO +!$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_26_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing scal_divdamp_o2=scal_divdamp_o2' + + !$ser data scal_divdamp_o2=scal_divdamp_o2 + + PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' + + !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + ENDIF + IF (divdamp_order == 4 .OR. (divdamp_order == 24 .AND. divdamp_fac_o2 <= 4._wp*divdamp_fac) ) THEN + IF (l_limited_area .OR. jg > 1) THEN + ! fourth-order divergence damping with reduced damping coefficient along nest boundary + ! (scal_divdamp is negative whereas bdy_divdamp is positive; decreasing the divergence + ! damping along nest boundaries is beneficial because this reduces the interference + ! with the increased diffusion applied in nh_diffusion) + + + !$ser savepoint mo_solve_nonhydro_stencil_27_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing scal_divdamp=scal_divdamp(:)' + + !$ser data scal_divdamp=scal_divdamp(:) + + PRINT *, 'Serializing bdy_divdamp=bdy_divdamp(:)' + + !$ser data bdy_divdamp=bdy_divdamp(:) + + PRINT *, 'Serializing nudgecoeff_e=p_int%nudgecoeff_e(:,1)' + + !$ser data nudgecoeff_e=p_int%nudgecoeff_e(:,1) + + PRINT *, 'Serializing z_graddiv2_vn=z_graddiv2_vn(:,:)' + + !$ser data z_graddiv2_vn=z_graddiv2_vn(:,:) + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) +!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) PRIVATE(z_d_vn_dmp) + DO jk = 1, nlev +!DIR$ IVDEP +!$NEC ivdep + DO je = i_startidx, i_endidx + ! + z_d_vn_dmp = (scal_divdamp(jk)+bdy_divdamp(jk)*p_int%nudgecoeff_e(je,jb))*z_graddiv2_vn(je,jk) + ! + p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnew)%vn(je,jk,jb) + z_d_vn_dmp + ! +#ifdef __ENABLE_DDT_VN_XYZ__ + IF (p_nh%diag%ddt_vn_dmp_is_associated) THEN + p_nh%diag%ddt_vn_dmp(je,jk,jb) = p_nh%diag%ddt_vn_dmp(je,jk,jb) + z_d_vn_dmp * r_dtimensubsteps + END IF + ! + IF (p_nh%diag%ddt_vn_dyn_is_associated) THEN + p_nh%diag%ddt_vn_dyn(je,jk,jb) = p_nh%diag%ddt_vn_dyn(je,jk,jb) + z_d_vn_dmp * r_dtimensubsteps + END IF +#endif + ! + ENDDO + ENDDO +!$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_27_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing scal_divdamp=scal_divdamp(:)' + + !$ser data scal_divdamp=scal_divdamp(:) + + PRINT *, 'Serializing bdy_divdamp=bdy_divdamp(:)' + + !$ser data bdy_divdamp=bdy_divdamp(:) + + PRINT *, 'Serializing nudgecoeff_e=p_int%nudgecoeff_e(:,1)' + + !$ser data nudgecoeff_e=p_int%nudgecoeff_e(:,1) + + PRINT *, 'Serializing z_graddiv2_vn=z_graddiv2_vn(:,:)' + + !$ser data z_graddiv2_vn=z_graddiv2_vn(:,:) + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + ELSE ! fourth-order divergence damping + + + !$ser savepoint mo_solve_nonhydro_4th_order_divdamp_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing scal_divdamp=scal_divdamp(:)' + + !$ser data scal_divdamp=scal_divdamp(:) + + PRINT *, 'Serializing z_graddiv2_vn=z_graddiv2_vn(:,:)' + + !$ser data z_graddiv2_vn=z_graddiv2_vn(:,:) + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + +!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) PRIVATE(z_d_vn_dmp) + DO jk = 1, nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + ! + z_d_vn_dmp = scal_divdamp(jk)*z_graddiv2_vn(je,jk) + ! + p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnew)%vn(je,jk,jb) + z_d_vn_dmp + ! +#ifdef __ENABLE_DDT_VN_XYZ__ + IF (p_nh%diag%ddt_vn_dmp_is_associated) THEN + p_nh%diag%ddt_vn_dmp(je,jk,jb) = p_nh%diag%ddt_vn_dmp(je,jk,jb) + z_d_vn_dmp * r_dtimensubsteps + END IF + ! + IF (p_nh%diag%ddt_vn_dyn_is_associated) THEN + p_nh%diag%ddt_vn_dyn(je,jk,jb) = p_nh%diag%ddt_vn_dyn(je,jk,jb) + z_d_vn_dmp * r_dtimensubsteps + END IF +#endif + ! + ENDDO + ENDDO +!$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_4th_order_divdamp_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing scal_divdamp=scal_divdamp(:)' + + !$ser data scal_divdamp=scal_divdamp(:) + + PRINT *, 'Serializing z_graddiv2_vn=z_graddiv2_vn(:,:)' + + !$ser data z_graddiv2_vn=z_graddiv2_vn(:,:) + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + ENDIF + ENDIF + ENDIF + + IF (is_iau_active) THEN ! add analysis increment from data assimilation + + + !$ser savepoint mo_solve_nonhydro_stencil_28_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing iau_wgt_dyn=iau_wgt_dyn' + + !$ser data iau_wgt_dyn=iau_wgt_dyn + + PRINT *, 'Serializing vn_incr=p_nh%diag%vn_incr(:,:,1)' + + !$ser data vn_incr=p_nh%diag%vn_incr(:,:,1) + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) PRIVATE(z_d_vn_iau) + DO jk = 1, nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + ! + z_d_vn_iau = iau_wgt_dyn*p_nh%diag%vn_incr(je,jk,jb) + ! + p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnew)%vn(je,jk,jb) + z_d_vn_iau + ! +#ifdef __ENABLE_DDT_VN_XYZ__ + IF (istep == 2) THEN + IF (p_nh%diag%ddt_vn_iau_is_associated) THEN + p_nh%diag%ddt_vn_iau(je,jk,jb) = p_nh%diag%ddt_vn_iau(je,jk,jb) + z_d_vn_iau * r_dtimensubsteps + END IF + ! + IF (p_nh%diag%ddt_vn_dyn_is_associated) THEN + p_nh%diag%ddt_vn_dyn(je,jk,jb) = p_nh%diag%ddt_vn_dyn(je,jk,jb) + z_d_vn_iau * r_dtimensubsteps + END IF + END IF +#endif + ! + ENDDO + ENDDO +!$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_28_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing iau_wgt_dyn=iau_wgt_dyn' + + !$ser data iau_wgt_dyn=iau_wgt_dyn + + PRINT *, 'Serializing vn_incr=p_nh%diag%vn_incr(:,:,1)' + + !$ser data vn_incr=p_nh%diag%vn_incr(:,:,1) + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + ENDIF + + ! Classic Rayleigh damping mechanism for vn (requires reference state !!) + ! + IF ( rayleigh_type == RAYLEIGH_CLASSIC ) THEN + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) PRIVATE(z_ddt_vn_ray) + DO jk = 1, nrdmax(jg) +!DIR$ IVDEP + DO je = i_startidx, i_endidx + ! + z_ddt_vn_ray = -p_nh%metrics%rayleigh_vn(jk) * (p_nh%prog(nnew)%vn(je,jk,jb) - p_nh%ref%vn_ref(je,jk,jb)) + ! + p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnew)%vn(je,jk,jb) + z_ddt_vn_ray * dtime + ! +#ifdef __ENABLE_DDT_VN_XYZ__ + IF (istep == 2) THEN + IF (p_nh%diag%ddt_vn_ray_is_associated) THEN + p_nh%diag%ddt_vn_ray(je,jk,jb) = p_nh%diag%ddt_vn_ray(je,jk,jb) + z_ddt_vn_ray * r_nsubsteps + END IF + ! + IF (p_nh%diag%ddt_vn_dyn_is_associated) THEN + p_nh%diag%ddt_vn_dyn(je,jk,jb) = p_nh%diag%ddt_vn_dyn(je,jk,jb) + z_ddt_vn_ray * r_nsubsteps + END IF + END IF +#endif + ! + ENDDO + ENDDO + !$ACC END PARALLEL + ENDIF + ENDDO +!$OMP END DO + + ! Boundary update of horizontal velocity + IF (istep == 1 .AND. (l_limited_area .OR. jg > 1)) THEN + rl_start = 1 + rl_end = grf_bdywidth_e + + i_startblk = p_patch%edges%start_block(rl_start) + i_endblk = p_patch%edges%end_block(rl_end) + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk, i_endblk + + CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + + !$ser savepoint mo_solve_nonhydro_stencil_29_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing grf_tend_vn=p_nh%diag%grf_tend_vn(:,:,1)' + + !$ser data grf_tend_vn=p_nh%diag%grf_tend_vn(:,:,1) + + PRINT *, 'Serializing vn_now=p_nh%prog(nnow)%vn(:,:,1)' + + !$ser data vn_now=p_nh%prog(nnow)%vn(:,:,1) + + PRINT *, 'Serializing vn_new=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn_new=p_nh%prog(nnew)%vn(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 1, nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + ! + p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnow)%vn(je,jk,jb) + p_nh%diag%grf_tend_vn(je,jk,jb) * dtime + ! +#ifdef __ENABLE_DDT_VN_XYZ__ + IF (p_nh%diag%ddt_vn_grf_is_associated) THEN + p_nh%diag%ddt_vn_grf(je,jk,jb) = p_nh%diag%ddt_vn_grf(je,jk,jb) + p_nh%diag%grf_tend_vn(je,jk,jb) * r_nsubsteps + END IF + ! + IF (p_nh%diag%ddt_vn_dyn_is_associated) THEN + p_nh%diag%ddt_vn_dyn(je,jk,jb) = p_nh%diag%ddt_vn_dyn(je,jk,jb) + p_nh%diag%grf_tend_vn(je,jk,jb) * r_nsubsteps + END IF +#endif + ! + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_29_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing grf_tend_vn=p_nh%diag%grf_tend_vn(:,:,1)' + + !$ser data grf_tend_vn=p_nh%diag%grf_tend_vn(:,:,1) + + PRINT *, 'Serializing vn_now=p_nh%prog(nnow)%vn(:,:,1)' + + !$ser data vn_now=p_nh%prog(nnow)%vn(:,:,1) + + PRINT *, 'Serializing vn_new=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn_new=p_nh%prog(nnew)%vn(:,:,1) + + ENDDO +!$OMP END DO + + ENDIF + + ! Preparations for nest boundary interpolation of mass fluxes from parent domain + IF (jg > 1 .AND. grf_intmethod_e >= 5 .AND. idiv_method == 1 .AND. jstep == 0 .AND. istep == 1) THEN + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG + +!$OMP DO PRIVATE(ic,je,jb,jk) ICON_OMP_DEFAULT_SCHEDULE + DO ic = 1, p_nh%metrics%bdy_mflx_e_dim + je = p_nh%metrics%bdy_mflx_e_idx(ic) + jb = p_nh%metrics%bdy_mflx_e_blk(ic) +!DIR$ IVDEP + !$ACC LOOP VECTOR + DO jk = 1, nlev + p_nh%diag%grf_bdy_mflx(jk,ic,2) = p_nh%diag%grf_tend_mflx(je,jk,jb) + p_nh%diag%grf_bdy_mflx(jk,ic,1) = prep_adv%mass_flx_me(je,jk,jb) - dt_shift*p_nh%diag%grf_bdy_mflx(jk,ic,2) + ENDDO + + ENDDO +!$OMP END DO + + !$ACC END PARALLEL + + ENDIF + +!$OMP END PARALLEL + + + !------------------------- + ! communication phase + IF (timers_level > 5) THEN + CALL timer_stop(timer_solve_nh_vnupd) + CALL timer_start(timer_solve_nh_exch) + ENDIF + + IF (itype_comm == 1) THEN + IF (istep == 1) THEN + CALL sync_patch_array_mult(SYNC_E,p_patch,2,p_nh%prog(nnew)%vn,z_rho_e,opt_varname="vn_nnew and z_rho_e") + ELSE + CALL sync_patch_array(SYNC_E,p_patch,p_nh%prog(nnew)%vn,opt_varname="vn_nnew") + ENDIF + ENDIF + + IF (idiv_method == 2 .AND. istep == 1) THEN + CALL sync_patch_array(SYNC_E,p_patch,z_theta_v_e,opt_varname="z_theta_v_e") + END IF + + IF (timers_level > 5) THEN + CALL timer_stop(timer_solve_nh_exch) + CALL timer_start(timer_solve_nh_edgecomp) + ENDIF + ! end communication phase + !------------------------- + +!$OMP PARALLEL PRIVATE (rl_start,rl_end,i_startblk,i_endblk) + rl_start = 5 + rl_end = min_rledge_int - 2 + + i_startblk = p_patch%edges%start_block(rl_start) + i_endblk = p_patch%edges%end_block(rl_end) + + rl_start_2 = 1 + rl_end_2 = min_rledge + + i_startblk_2 = p_patch%edges%start_block(rl_start_2) + i_endblk_2 = p_patch%edges%end_block(rl_end_2) + + CALL get_indices_e(p_patch, 1, i_startblk_2, i_endblk_2, & + i_startidx_2, i_endidx_2, rl_start_2, rl_end_2) + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je,z_vn_avg) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk, i_endblk + + CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + IF (istep == 1) THEN + + + + !$ser savepoint mo_solve_nonhydro_stencil_30_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing e_flx_avg=p_int%e_flx_avg(:,:,1)' + + !$ser data e_flx_avg=p_int%e_flx_avg(:,:,1) + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + PRINT *, 'Serializing geofac_grdiv=p_int%geofac_grdiv(:,:,1)' + + !$ser data geofac_grdiv=p_int%geofac_grdiv(:,:,1) + + PRINT *, 'Serializing rbf_vec_coeff_e=p_int%rbf_vec_coeff_e_dsl(:,:,1)' + + !$ser data rbf_vec_coeff_e=p_int%rbf_vec_coeff_e_dsl(:,:,1) + + PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' + + !$ser data z_vn_avg=z_vn_avg(:,:) + + PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' + + !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) + + PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' + + !$ser data vt=p_nh%diag%vt(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx +!DIR$ IVDEP + DO jk = 1, nlev +#else +!$NEC outerloop_unroll(8) + DO jk = 1, nlev +!$NEC vovertake + DO je = i_startidx, i_endidx +#endif + ! Average normal wind components in order to get nearly second-order accurate divergence + z_vn_avg(je,jk) = p_int%e_flx_avg(je,1,jb)*p_nh%prog(nnew)%vn(je,jk,jb) & + + p_int%e_flx_avg(je,2,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,1),jk,iqblk(je,jb,1)) & + + p_int%e_flx_avg(je,3,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,2),jk,iqblk(je,jb,2)) & + + p_int%e_flx_avg(je,4,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,3),jk,iqblk(je,jb,3)) & + + p_int%e_flx_avg(je,5,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,4),jk,iqblk(je,jb,4)) + + ! Compute gradient of divergence of vn for divergence damping +#ifdef __LOOP_EXCHANGE + z_graddiv_vn(jk,je,jb) = p_int%geofac_grdiv(je,1,jb)*p_nh%prog(nnew)%vn(je,jk,jb) & +#else + z_graddiv_vn(je,jk,jb) = p_int%geofac_grdiv(je,1,jb)*p_nh%prog(nnew)%vn(je,jk,jb) & +#endif + + p_int%geofac_grdiv(je,2,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,1),jk,iqblk(je,jb,1)) & + + p_int%geofac_grdiv(je,3,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,2),jk,iqblk(je,jb,2)) & + + p_int%geofac_grdiv(je,4,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,3),jk,iqblk(je,jb,3)) & + + p_int%geofac_grdiv(je,5,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,4),jk,iqblk(je,jb,4)) + + ! RBF reconstruction of tangential wind component + p_nh%diag%vt(je,jk,jb) = p_int%rbf_vec_coeff_e(1,je,jb) & + * p_nh%prog(nnew)%vn(iqidx(je,jb,1),jk,iqblk(je,jb,1)) & + + p_int%rbf_vec_coeff_e(2,je,jb) & + * p_nh%prog(nnew)%vn(iqidx(je,jb,2),jk,iqblk(je,jb,2)) & + + p_int%rbf_vec_coeff_e(3,je,jb) & + * p_nh%prog(nnew)%vn(iqidx(je,jb,3),jk,iqblk(je,jb,3)) & + + p_int%rbf_vec_coeff_e(4,je,jb) & + * p_nh%prog(nnew)%vn(iqidx(je,jb,4),jk,iqblk(je,jb,4)) + ENDDO + ENDDO +!$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_30_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing e_flx_avg=p_int%e_flx_avg(:,:,1)' + + !$ser data e_flx_avg=p_int%e_flx_avg(:,:,1) + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + PRINT *, 'Serializing geofac_grdiv=p_int%geofac_grdiv(:,:,1)' + + !$ser data geofac_grdiv=p_int%geofac_grdiv(:,:,1) + + PRINT *, 'Serializing rbf_vec_coeff_e=p_int%rbf_vec_coeff_e_dsl(:,:,1)' + + !$ser data rbf_vec_coeff_e=p_int%rbf_vec_coeff_e_dsl(:,:,1) + + PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' + + !$ser data z_vn_avg=z_vn_avg(:,:) + + PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' + + !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) + + PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' + + !$ser data vt=p_nh%diag%vt(:,:,1) + + ELSE IF (itime_scheme >= 5) THEN + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx +!DIR$ IVDEP + DO jk = 1, nlev +#else + DO jk = 1, nlev + DO je = i_startidx, i_endidx +#endif + ! Average normal wind components in order to get nearly second-order accurate divergence + z_vn_avg(je,jk) = p_int%e_flx_avg(je,1,jb)*p_nh%prog(nnew)%vn(je,jk,jb) & + + p_int%e_flx_avg(je,2,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,1),jk,iqblk(je,jb,1)) & + + p_int%e_flx_avg(je,3,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,2),jk,iqblk(je,jb,2)) & + + p_int%e_flx_avg(je,4,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,3),jk,iqblk(je,jb,3)) & + + p_int%e_flx_avg(je,5,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,4),jk,iqblk(je,jb,4)) + + ! RBF reconstruction of tangential wind component + p_nh%diag%vt(je,jk,jb) = p_int%rbf_vec_coeff_e(1,je,jb) & + * p_nh%prog(nnew)%vn(iqidx(je,jb,1),jk,iqblk(je,jb,1)) & + + p_int%rbf_vec_coeff_e(2,je,jb) & + * p_nh%prog(nnew)%vn(iqidx(je,jb,2),jk,iqblk(je,jb,2)) & + + p_int%rbf_vec_coeff_e(3,je,jb) & + * p_nh%prog(nnew)%vn(iqidx(je,jb,3),jk,iqblk(je,jb,3)) & + + p_int%rbf_vec_coeff_e(4,je,jb) & + * p_nh%prog(nnew)%vn(iqidx(je,jb,4),jk,iqblk(je,jb,4)) + + ENDDO + ENDDO +!$ACC END PARALLEL + + ELSE + + + !$ser savepoint mo_solve_nonhydro_stencil_31_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing e_flx_avg=p_int%e_flx_avg(:,:,1)' + + !$ser data e_flx_avg=p_int%e_flx_avg(:,:,1) + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' + + !$ser data z_vn_avg=z_vn_avg(:,:) +!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) +#ifdef __LOOP_EXCHANGE + DO je = i_startidx, i_endidx +!DIR$ IVDEP + DO jk = 1, nlev +#else +!$NEC outerloop_unroll(8) + DO jk = 1, nlev +!$NEC vovertake + DO je = i_startidx, i_endidx +#endif + ! Average normal wind components in order to get nearly second-order accurate divergence + z_vn_avg(je,jk) = p_int%e_flx_avg(je,1,jb)*p_nh%prog(nnew)%vn(je,jk,jb) & + + p_int%e_flx_avg(je,2,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,1),jk,iqblk(je,jb,1)) & + + p_int%e_flx_avg(je,3,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,2),jk,iqblk(je,jb,2)) & + + p_int%e_flx_avg(je,4,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,3),jk,iqblk(je,jb,3)) & + + p_int%e_flx_avg(je,5,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,4),jk,iqblk(je,jb,4)) + ENDDO + ENDDO +!$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_31_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing e_flx_avg=p_int%e_flx_avg(:,:,1)' + + !$ser data e_flx_avg=p_int%e_flx_avg(:,:,1) + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' + + !$ser data z_vn_avg=z_vn_avg(:,:) + ENDIF + + IF (idiv_method == 1) THEN ! Compute fluxes at edges using averaged velocities + ! corresponding computation for idiv_method=2 follows later + + + !$ser savepoint mo_solve_nonhydro_stencil_32_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' + + !$ser data z_rho_e=z_rho_e(:,:,1) + + PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' + + !$ser data z_vn_avg=z_vn_avg(:,:) + + PRINT *, 'Serializing ddqz_z_full_e=p_nh%metrics%ddqz_z_full_e(:,:,1)' + + !$ser data ddqz_z_full_e=p_nh%metrics%ddqz_z_full_e(:,:,1) + + PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' + + !$ser data z_theta_v_e=z_theta_v_e(:,:,1) + + PRINT *, 'Serializing mass_fl_e=p_nh%diag%mass_fl_e(:,:,1)' + + !$ser data mass_fl_e=p_nh%diag%mass_fl_e(:,:,1) + + PRINT *, 'Serializing z_theta_v_fl_e=z_theta_v_fl_e(:,:,1)' + + !$ser data z_theta_v_fl_e=z_theta_v_fl_e(:,:,1) +!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) + DO jk = 1,nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + + p_nh%diag%mass_fl_e(je,jk,jb) = z_rho_e(je,jk,jb) * & + z_vn_avg(je,jk) * p_nh%metrics%ddqz_z_full_e(je,jk,jb) + z_theta_v_fl_e(je,jk,jb) = p_nh%diag%mass_fl_e(je,jk,jb) * & + z_theta_v_e(je,jk,jb) + + ENDDO + ENDDO +!$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_32_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' + + !$ser data z_rho_e=z_rho_e(:,:,1) + + PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' + + !$ser data z_vn_avg=z_vn_avg(:,:) + + PRINT *, 'Serializing ddqz_z_full_e=p_nh%metrics%ddqz_z_full_e(:,:,1)' + + !$ser data ddqz_z_full_e=p_nh%metrics%ddqz_z_full_e(:,:,1) + + PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' + + !$ser data z_theta_v_e=z_theta_v_e(:,:,1) + + PRINT *, 'Serializing mass_fl_e=p_nh%diag%mass_fl_e(:,:,1)' + + !$ser data mass_fl_e=p_nh%diag%mass_fl_e(:,:,1) + + PRINT *, 'Serializing z_theta_v_fl_e=z_theta_v_fl_e(:,:,1)' + + !$ser data z_theta_v_fl_e=z_theta_v_fl_e(:,:,1) + + IF (lsave_mflx .AND. istep == 2) THEN ! store mass flux for nest boundary interpolation +#ifndef _OPENACC + DO je = i_startidx, i_endidx + IF (p_patch%edges%refin_ctrl(je,jb) <= -4 .AND. p_patch%edges%refin_ctrl(je,jb) >= -6) THEN +!DIR$ IVDEP + DO jk=1,nlev + p_nh%diag%mass_fl_e_sv(je,jk,jb) = p_nh%diag%mass_fl_e(je,jk,jb) + ENDDO + ENDIF + ENDDO +#else + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) + DO jk=1,nlev + DO je = i_startidx, i_endidx + IF (p_patch%edges%refin_ctrl(je,jb) <= -4 .AND. p_patch%edges%refin_ctrl(je,jb) >= -6) THEN + p_nh%diag%mass_fl_e_sv(je,jk,jb) = p_nh%diag%mass_fl_e(je,jk,jb) + ENDIF + ENDDO + ENDDO + !$ACC END PARALLEL +#endif + ENDIF + + IF (lprep_adv .AND. istep == 2) THEN ! Preprations for tracer advection + IF (lclean_mflx) THEN + + + !$ser savepoint mo_solve_nonhydro_stencil_33_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing vn_traj=prep_adv%vn_traj(:,:,1)' + + !$ser data vn_traj=prep_adv%vn_traj(:,:,1) + + PRINT *, 'Serializing mass_flx_me=prep_adv%mass_flx_me(:,:,1)' + + !$ser data mass_flx_me=prep_adv%mass_flx_me(:,:,1) +!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) + DO jk = 1, nlev +!$NEC ivdep + DO je = i_startidx, i_endidx + prep_adv%vn_traj(je,jk,jb) = 0._wp + prep_adv%mass_flx_me(je,jk,jb) = 0._wp + ENDDO + ENDDO +!$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_33_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing vn_traj=prep_adv%vn_traj(:,:,1)' + + !$ser data vn_traj=prep_adv%vn_traj(:,:,1) + + PRINT *, 'Serializing mass_flx_me=prep_adv%mass_flx_me(:,:,1)' + + !$ser data mass_flx_me=prep_adv%mass_flx_me(:,:,1) + + ENDIF + + + !$ser savepoint mo_solve_nonhydro_stencil_34_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing r_nsubsteps=r_nsubsteps' + + !$ser data r_nsubsteps=r_nsubsteps + + PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' + + !$ser data z_vn_avg=z_vn_avg(:,:) + + PRINT *, 'Serializing mass_fl_e=p_nh%diag%mass_fl_e(:,:,1)' + + !$ser data mass_fl_e=p_nh%diag%mass_fl_e(:,:,1) + + PRINT *, 'Serializing vn_traj=prep_adv%vn_traj(:,:,1)' + + !$ser data vn_traj=prep_adv%vn_traj(:,:,1) + + PRINT *, 'Serializing mass_flx_me=prep_adv%mass_flx_me(:,:,1)' + + !$ser data mass_flx_me=prep_adv%mass_flx_me(:,:,1) +!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) + DO jk = 1, nlev +!$NEC ivdep + DO je = i_startidx, i_endidx + prep_adv%vn_traj(je,jk,jb) = prep_adv%vn_traj(je,jk,jb) + r_nsubsteps*z_vn_avg(je,jk) + prep_adv%mass_flx_me(je,jk,jb) = prep_adv%mass_flx_me(je,jk,jb) + r_nsubsteps*p_nh%diag%mass_fl_e(je,jk,jb) + ENDDO + ENDDO +!$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_34_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing r_nsubsteps=r_nsubsteps' + + !$ser data r_nsubsteps=r_nsubsteps + + PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' + + !$ser data z_vn_avg=z_vn_avg(:,:) + + PRINT *, 'Serializing mass_fl_e=p_nh%diag%mass_fl_e(:,:,1)' + + !$ser data mass_fl_e=p_nh%diag%mass_fl_e(:,:,1) + + PRINT *, 'Serializing vn_traj=prep_adv%vn_traj(:,:,1)' + + !$ser data vn_traj=prep_adv%vn_traj(:,:,1) + + PRINT *, 'Serializing mass_flx_me=prep_adv%mass_flx_me(:,:,1)' + + !$ser data mass_flx_me=prep_adv%mass_flx_me(:,:,1) + + ENDIF + + ENDIF + + IF (istep == 1 .OR. itime_scheme >= 5) THEN + ! Compute contravariant correction for vertical velocity at full levels + + + !$ser savepoint mo_solve_nonhydro_stencil_35_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + PRINT *, 'Serializing ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1)' + + !$ser data ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1) + + PRINT *, 'Serializing ddxt_z_full=p_nh%metrics%ddxt_z_full(:,:,1)' + + !$ser data ddxt_z_full=p_nh%metrics%ddxt_z_full(:,:,1) + + PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' + + !$ser data vt=p_nh%diag%vt(:,:,1) + + PRINT *, 'Serializing z_w_concorr_me=z_w_concorr_me(:,:,1)' + + !$ser data z_w_concorr_me=z_w_concorr_me(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = nflatlev(jg), nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + z_w_concorr_me(je,jk,jb) = & + p_nh%prog(nnew)%vn(je,jk,jb)*p_nh%metrics%ddxn_z_full(je,jk,jb) + & + p_nh%diag%vt(je,jk,jb) *p_nh%metrics%ddxt_z_full(je,jk,jb) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_35_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + PRINT *, 'Serializing ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1)' + + !$ser data ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1) + + PRINT *, 'Serializing ddxt_z_full=p_nh%metrics%ddxt_z_full(:,:,1)' + + !$ser data ddxt_z_full=p_nh%metrics%ddxt_z_full(:,:,1) + + PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' + + !$ser data vt=p_nh%diag%vt(:,:,1) + + PRINT *, 'Serializing z_w_concorr_me=z_w_concorr_me(:,:,1)' + + !$ser data z_w_concorr_me=z_w_concorr_me(:,:,1) + ENDIF + + IF (istep == 1) THEN + ! Interpolate vn to interface levels and compute horizontal part of kinetic energy on edges + ! (needed in velocity tendencies called at istep=2) + + + !$ser savepoint mo_solve_nonhydro_stencil_36_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing wgtfac_e=p_nh%metrics%wgtfac_e(:,:,1)' + + !$ser data wgtfac_e=p_nh%metrics%wgtfac_e(:,:,1) + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' + + !$ser data vt=p_nh%diag%vt(:,:,1) + + PRINT *, 'Serializing vn_ie=p_nh%diag%vn_ie(:,:,1)' + + !$ser data vn_ie=p_nh%diag%vn_ie(:,:,1) + + PRINT *, 'Serializing z_vt_ie=z_vt_ie(:,:,1)' + + !$ser data z_vt_ie=z_vt_ie(:,:,1) + + PRINT *, 'Serializing z_kin_hor_e=z_kin_hor_e(:,:,1)' + + !$ser data z_kin_hor_e=z_kin_hor_e(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) +!$NEC outerloop_unroll(3) + DO jk = 2, nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + p_nh%diag%vn_ie(je,jk,jb) = & + p_nh%metrics%wgtfac_e(je,jk,jb) *p_nh%prog(nnew)%vn(je,jk ,jb) + & + (1._wp - p_nh%metrics%wgtfac_e(je,jk,jb))*p_nh%prog(nnew)%vn(je,jk-1,jb) + z_vt_ie(je,jk,jb) = & + p_nh%metrics%wgtfac_e(je,jk,jb) *p_nh%diag%vt(je,jk ,jb) + & + (1._wp - p_nh%metrics%wgtfac_e(je,jk,jb))*p_nh%diag%vt(je,jk-1,jb) + z_kin_hor_e(je,jk,jb) = 0.5_wp*(p_nh%prog(nnew)%vn(je,jk,jb)**2 + p_nh%diag%vt(je,jk,jb)**2) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_36_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing wgtfac_e=p_nh%metrics%wgtfac_e(:,:,1)' + + !$ser data wgtfac_e=p_nh%metrics%wgtfac_e(:,:,1) + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' + + !$ser data vt=p_nh%diag%vt(:,:,1) + + PRINT *, 'Serializing vn_ie=p_nh%diag%vn_ie(:,:,1)' + + !$ser data vn_ie=p_nh%diag%vn_ie(:,:,1) + + PRINT *, 'Serializing z_vt_ie=z_vt_ie(:,:,1)' + + !$ser data z_vt_ie=z_vt_ie(:,:,1) + + PRINT *, 'Serializing z_kin_hor_e=z_kin_hor_e(:,:,1)' + + !$ser data z_kin_hor_e=z_kin_hor_e(:,:,1) + + IF (.NOT. l_vert_nested) THEN + ! Top and bottom levels +!DIR$ IVDEP + + + !$ser savepoint mo_solve_nonhydro_stencil_37_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' + + !$ser data vt=p_nh%diag%vt(:,:,1) + + PRINT *, 'Serializing vn_ie=p_nh%diag%vn_ie(:,:,1)' + + !$ser data vn_ie=p_nh%diag%vn_ie(:,:,1) + + PRINT *, 'Serializing z_vt_ie=z_vt_ie(:,:,1)' + + !$ser data z_vt_ie=z_vt_ie(:,:,1) + + PRINT *, 'Serializing z_kin_hor_e=z_kin_hor_e(:,:,1)' + + !$ser data z_kin_hor_e=z_kin_hor_e(:,:,1) + + + !$ser savepoint mo_solve_nonhydro_stencil_38_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + PRINT *, 'Serializing wgtfacq_e=p_nh%metrics%wgtfacq_e_dsl(:,:,1)' + + !$ser data wgtfacq_e=p_nh%metrics%wgtfacq_e_dsl(:,:,1) + + PRINT *, 'Serializing vn_ie=p_nh%diag%vn_ie(:,:,1)' + + !$ser data vn_ie=p_nh%diag%vn_ie(:,:,1) + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO je = i_startidx, i_endidx + ! Quadratic extrapolation at the top turned out to cause numerical instability in pathological cases, + ! thus we use a no-gradient condition in the upper half layer + p_nh%diag%vn_ie(je,1,jb) = p_nh%prog(nnew)%vn(je,1,jb) + ! vt_ie(jk=1) is actually unused, but we need it for convenience of implementation + z_vt_ie(je,1,jb) = p_nh%diag%vt(je,1,jb) + ! + z_kin_hor_e(je,1,jb) = 0.5_wp*(p_nh%prog(nnew)%vn(je,1,jb)**2 + p_nh%diag%vt(je,1,jb)**2) + p_nh%diag%vn_ie(je,nlevp1,jb) = & + p_nh%metrics%wgtfacq_e(je,1,jb)*p_nh%prog(nnew)%vn(je,nlev,jb) + & + p_nh%metrics%wgtfacq_e(je,2,jb)*p_nh%prog(nnew)%vn(je,nlev-1,jb) + & + p_nh%metrics%wgtfacq_e(je,3,jb)*p_nh%prog(nnew)%vn(je,nlev-2,jb) + ENDDO + !$ACC END PARALLEL + + + !$ser savepoint mo_solve_nonhydro_stencil_37_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' + + !$ser data vt=p_nh%diag%vt(:,:,1) + + PRINT *, 'Serializing vn_ie=p_nh%diag%vn_ie(:,:,1)' + + !$ser data vn_ie=p_nh%diag%vn_ie(:,:,1) + + PRINT *, 'Serializing z_vt_ie=z_vt_ie(:,:,1)' + + !$ser data z_vt_ie=z_vt_ie(:,:,1) + + PRINT *, 'Serializing z_kin_hor_e=z_kin_hor_e(:,:,1)' + + !$ser data z_kin_hor_e=z_kin_hor_e(:,:,1) + + !$ser savepoint mo_solve_nonhydro_stencil_38_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' + + !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) + + PRINT *, 'Serializing wgtfacq_e=p_nh%metrics%wgtfacq_e_dsl(:,:,1)' + + !$ser data wgtfacq_e=p_nh%metrics%wgtfacq_e_dsl(:,:,1) + + PRINT *, 'Serializing vn_ie=p_nh%diag%vn_ie(:,:,1)' + + !$ser data vn_ie=p_nh%diag%vn_ie(:,:,1) + + ELSE + ! vn_ie(jk=1) is interpolated horizontally from the parent domain, and linearly interpolated in time + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR +!DIR$ IVDEP + DO je = i_startidx, i_endidx + p_nh%diag%vn_ie(je,1,jb) = p_nh%diag%vn_ie_ubc(je,1,jb)+dt_linintp_ubc_nnew*p_nh%diag%vn_ie_ubc(je,2,jb) + ! vt_ie(jk=1) is actually unused, but we need it for convenience of implementation + z_vt_ie(je,1,jb) = p_nh%diag%vt(je,1,jb) + ! + z_kin_hor_e(je,1,jb) = 0.5_wp*(p_nh%prog(nnew)%vn(je,1,jb)**2 + p_nh%diag%vt(je,1,jb)**2) + p_nh%diag%vn_ie(je,nlevp1,jb) = & + p_nh%metrics%wgtfacq_e(je,1,jb)*p_nh%prog(nnew)%vn(je,nlev,jb) + & + p_nh%metrics%wgtfacq_e(je,2,jb)*p_nh%prog(nnew)%vn(je,nlev-1,jb) + & + p_nh%metrics%wgtfacq_e(je,3,jb)*p_nh%prog(nnew)%vn(je,nlev-2,jb) + ENDDO + !$ACC END PARALLEL + ENDIF + ENDIF + + ENDDO +!$OMP END DO + + ! Apply mass fluxes across lateral nest boundary interpolated from parent domain + IF (jg > 1 .AND. grf_intmethod_e >= 5 .AND. idiv_method == 1) THEN + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + ! PGI 21.2 requires GANG-VECTOR on this level. (Having the jk as VECTOR crashes.) + ! PRIVATE clause is required as je,jb are used in each vector thread. + !$ACC LOOP GANG VECTOR PRIVATE(je, jb) + +!$OMP DO PRIVATE(ic,je,jb,jk) ICON_OMP_DEFAULT_SCHEDULE + DO ic = 1, p_nh%metrics%bdy_mflx_e_dim + je = p_nh%metrics%bdy_mflx_e_idx(ic) + jb = p_nh%metrics%bdy_mflx_e_blk(ic) + + ! This is needed for tracer mass consistency along the lateral boundaries + IF (lprep_adv .AND. istep == 2) THEN ! subtract mass flux added previously... + !$ACC LOOP SEQ +!$NEC ivdep + DO jk = 1, nlev + prep_adv%mass_flx_me(je,jk,jb) = prep_adv%mass_flx_me(je,jk,jb) - r_nsubsteps*p_nh%diag%mass_fl_e(je,jk,jb) + prep_adv%vn_traj(je,jk,jb) = prep_adv%vn_traj(je,jk,jb) - r_nsubsteps*p_nh%diag%mass_fl_e(je,jk,jb) / & + (z_rho_e(je,jk,jb) * p_nh%metrics%ddqz_z_full_e(je,jk,jb)) + ENDDO + ENDIF + +!DIR$ IVDEP + !$ACC LOOP SEQ +!$NEC ivdep + DO jk = 1, nlev + p_nh%diag%mass_fl_e(je,jk,jb) = p_nh%diag%grf_bdy_mflx(jk,ic,1) + & + REAL(jstep,wp)*dtime*p_nh%diag%grf_bdy_mflx(jk,ic,2) + z_theta_v_fl_e(je,jk,jb) = p_nh%diag%mass_fl_e(je,jk,jb) * z_theta_v_e(je,jk,jb) + ENDDO + + IF (lprep_adv .AND. istep == 2) THEN ! ... and add the corrected one again + !$ACC LOOP SEQ +!$NEC ivdep + DO jk = 1, nlev + prep_adv%mass_flx_me(je,jk,jb) = prep_adv%mass_flx_me(je,jk,jb) + r_nsubsteps*p_nh%diag%mass_fl_e(je,jk,jb) + prep_adv%vn_traj(je,jk,jb) = prep_adv%vn_traj(je,jk,jb) + r_nsubsteps*p_nh%diag%mass_fl_e(je,jk,jb) / & + (z_rho_e(je,jk,jb) * p_nh%metrics%ddqz_z_full_e(je,jk,jb)) + ENDDO + ENDIF + + ENDDO +!$OMP END DO + + !$ACC END PARALLEL + + ENDIF + + + ! It turned out that it is sufficient to compute the contravariant correction in the + ! predictor step at time level n+1; repeating the calculation in the corrector step + ! has negligible impact on the results except in very-high resolution runs with extremely steep mountains + IF (istep == 1 .OR. itime_scheme >= 5) THEN + + rl_start = 3 + rl_end = min_rlcell_int - 1 + + i_startblk = p_patch%cells%start_block(rl_start) + i_endblk = p_patch%cells%end_block(rl_end) + +#ifdef _OPENACC +! +! This is one of the very few code divergences for OPENACC (see comment below) +! + DO jb = i_startblk, i_endblk + + CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + ! ... and to interface levels + + + !$ser savepoint mo_solve_nonhydro_stencil_39_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing e_bln_c_s=p_int%e_bln_c_s(:,:,1)' + + !$ser data e_bln_c_s=p_int%e_bln_c_s(:,:,1) + + PRINT *, 'Serializing z_w_concorr_me=z_w_concorr_me(:,:,1)' + + !$ser data z_w_concorr_me=z_w_concorr_me(:,:,1) + + PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' + + !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR TILE(32, 4) PRIVATE(z_w_concorr_mc_m1, z_w_concorr_mc_m0) + DO jk = nflatlev(jg)+1, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + ! COMMENT: this optimization yields drastically better performance in an OpenACC context + ! Interpolate contravariant correction to cell centers... + z_w_concorr_mc_m1 = & + p_int%e_bln_c_s(jc,1,jb)*z_w_concorr_me(ieidx(jc,jb,1),jk-1,ieblk(jc,jb,1)) + & + p_int%e_bln_c_s(jc,2,jb)*z_w_concorr_me(ieidx(jc,jb,2),jk-1,ieblk(jc,jb,2)) + & + p_int%e_bln_c_s(jc,3,jb)*z_w_concorr_me(ieidx(jc,jb,3),jk-1,ieblk(jc,jb,3)) + z_w_concorr_mc_m0 = & + p_int%e_bln_c_s(jc,1,jb)*z_w_concorr_me(ieidx(jc,jb,1),jk,ieblk(jc,jb,1)) + & + p_int%e_bln_c_s(jc,2,jb)*z_w_concorr_me(ieidx(jc,jb,2),jk,ieblk(jc,jb,2)) + & + p_int%e_bln_c_s(jc,3,jb)*z_w_concorr_me(ieidx(jc,jb,3),jk,ieblk(jc,jb,3)) + p_nh%diag%w_concorr_c(jc,jk,jb) = & + p_nh%metrics%wgtfac_c(jc,jk,jb)*z_w_concorr_mc_m0 + & + (1._vp - p_nh%metrics%wgtfac_c(jc,jk,jb))*z_w_concorr_mc_m1 + ENDDO + ENDDO + !$ACC END PARALLEL + + + !$ser savepoint mo_solve_nonhydro_stencil_39_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing e_bln_c_s=p_int%e_bln_c_s(:,:,1)' + + !$ser data e_bln_c_s=p_int%e_bln_c_s(:,:,1) + + PRINT *, 'Serializing z_w_concorr_me=z_w_concorr_me(:,:,1)' + + !$ser data z_w_concorr_me=z_w_concorr_me(:,:,1) + + PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' + + !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) + + + !$ser savepoint mo_solve_nonhydro_stencil_40_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing e_bln_c_s=p_int%e_bln_c_s(:,:,1)' + + !$ser data e_bln_c_s=p_int%e_bln_c_s(:,:,1) + + PRINT *, 'Serializing z_w_concorr_me=z_w_concorr_me(:,:,1)' + + !$ser data z_w_concorr_me=z_w_concorr_me(:,:,1) + + PRINT *, 'Serializing wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1)' + + !$ser data wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR PRIVATE(z_w_concorr_mc_m2, z_w_concorr_mc_m1, z_w_concorr_mc_m0) +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + ! Interpolate contravariant correction to cell centers... + z_w_concorr_mc_m2 = & + p_int%e_bln_c_s(jc,1,jb)*z_w_concorr_me(ieidx(jc,jb,1),nlev-2,ieblk(jc,jb,1)) + & + p_int%e_bln_c_s(jc,2,jb)*z_w_concorr_me(ieidx(jc,jb,2),nlev-2,ieblk(jc,jb,2)) + & + p_int%e_bln_c_s(jc,3,jb)*z_w_concorr_me(ieidx(jc,jb,3),nlev-2,ieblk(jc,jb,3)) + + z_w_concorr_mc_m1 = & + p_int%e_bln_c_s(jc,1,jb)*z_w_concorr_me(ieidx(jc,jb,1),nlev-1,ieblk(jc,jb,1)) + & + p_int%e_bln_c_s(jc,2,jb)*z_w_concorr_me(ieidx(jc,jb,2),nlev-1,ieblk(jc,jb,2)) + & + p_int%e_bln_c_s(jc,3,jb)*z_w_concorr_me(ieidx(jc,jb,3),nlev-1,ieblk(jc,jb,3)) + + z_w_concorr_mc_m0 = & + p_int%e_bln_c_s(jc,1,jb)*z_w_concorr_me(ieidx(jc,jb,1),nlev,ieblk(jc,jb,1)) + & + p_int%e_bln_c_s(jc,2,jb)*z_w_concorr_me(ieidx(jc,jb,2),nlev,ieblk(jc,jb,2)) + & + p_int%e_bln_c_s(jc,3,jb)*z_w_concorr_me(ieidx(jc,jb,3),nlev,ieblk(jc,jb,3)) + + p_nh%diag%w_concorr_c(jc,nlevp1,jb) = & + p_nh%metrics%wgtfacq_c(jc,1,jb)*z_w_concorr_mc_m0 + & + p_nh%metrics%wgtfacq_c(jc,2,jb)*z_w_concorr_mc_m1 + & + p_nh%metrics%wgtfacq_c(jc,3,jb)*z_w_concorr_mc_m2 + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_40_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing e_bln_c_s=p_int%e_bln_c_s(:,:,1)' + + !$ser data e_bln_c_s=p_int%e_bln_c_s(:,:,1) + + PRINT *, 'Serializing z_w_concorr_me=z_w_concorr_me(:,:,1)' + + !$ser data z_w_concorr_me=z_w_concorr_me(:,:,1) + + PRINT *, 'Serializing wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1)' + + !$ser data wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) + + ENDDO +#else +! +! OMP-only code +! +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,jc,z_w_concorr_mc) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk, i_endblk + + CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + ! Interpolate contravariant correction to cell centers... +#ifdef __LOOP_EXCHANGE + DO jc = i_startidx, i_endidx +!DIR$ IVDEP + DO jk = nflatlev(jg), nlev +#else + DO jk = nflatlev(jg), nlev + DO jc = i_startidx, i_endidx +#endif + + z_w_concorr_mc(jc,jk) = & + p_int%e_bln_c_s(jc,1,jb)*z_w_concorr_me(ieidx(jc,jb,1),jk,ieblk(jc,jb,1)) + & + p_int%e_bln_c_s(jc,2,jb)*z_w_concorr_me(ieidx(jc,jb,2),jk,ieblk(jc,jb,2)) + & + p_int%e_bln_c_s(jc,3,jb)*z_w_concorr_me(ieidx(jc,jb,3),jk,ieblk(jc,jb,3)) + + ENDDO + ENDDO + + ! ... and to interface levels + DO jk = nflatlev(jg)+1, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + p_nh%diag%w_concorr_c(jc,jk,jb) = & + p_nh%metrics%wgtfac_c(jc,jk,jb)*z_w_concorr_mc(jc,jk) + & + (1._vp - p_nh%metrics%wgtfac_c(jc,jk,jb))*z_w_concorr_mc(jc,jk-1) + ENDDO + ENDDO +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + p_nh%diag%w_concorr_c(jc,nlevp1,jb) = & + p_nh%metrics%wgtfacq_c(jc,1,jb)*z_w_concorr_mc(jc,nlev) + & + p_nh%metrics%wgtfacq_c(jc,2,jb)*z_w_concorr_mc(jc,nlev-1) + & + p_nh%metrics%wgtfacq_c(jc,3,jb)*z_w_concorr_mc(jc,nlev-2) + ENDDO + + ENDDO +!$OMP END DO +#endif + ENDIF + + IF (idiv_method == 2) THEN ! Compute fluxes at edges from original velocities + rl_start = 7 + rl_end = min_rledge_int - 3 + + i_startblk = p_patch%edges%start_block(rl_start) + i_endblk = p_patch%edges%end_block(rl_end) + + IF (jg > 1 .OR. l_limited_area) THEN + + CALL init_zero_contiguous_dp(& + z_theta_v_fl_e(1,1,p_patch%edges%start_block(5)), & + nproma * nlev * (i_startblk - p_patch%edges%start_block(5) + 1), & + opt_acc_async=.TRUE., lacc=i_am_accel_node) +!$OMP BARRIER + ENDIF + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk, i_endblk + + CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 1,nlev +!DIR$ IVDEP + DO je = i_startidx, i_endidx + + p_nh%diag%mass_fl_e(je,jk,jb) = z_rho_e(je,jk,jb) & + * p_nh%prog(nnew)%vn(je,jk,jb) * p_nh%metrics%ddqz_z_full_e(je,jk,jb) + z_theta_v_fl_e(je,jk,jb)= p_nh%diag%mass_fl_e(je,jk,jb) & + * z_theta_v_e(je,jk,jb) + + ENDDO + ENDDO + !$ACC END PARALLEL + + ENDDO +!$OMP END DO + + ENDIF ! idiv_method = 2 + +!$OMP END PARALLEL + + IF (timers_level > 5) THEN + CALL timer_stop(timer_solve_nh_edgecomp) + CALL timer_start(timer_solve_nh_vimpl) + ENDIF + + IF (idiv_method == 2) THEN ! use averaged divergence - idiv_method=1 is inlined for better cache efficiency + +!TODO remove the wait after everything is ASYNC(1) + !$ACC WAIT + + ! horizontal divergences of rho and rhotheta are processed in one step for efficiency + CALL div_avg(p_nh%diag%mass_fl_e, p_patch, p_int, p_int%c_bln_avg, z_mass_fl_div, & + opt_in2=z_theta_v_fl_e, opt_out2=z_theta_v_fl_div, opt_rlstart=4, & + opt_rlend=min_rlcell_int) + ENDIF + +!$OMP PARALLEL PRIVATE (rl_start,rl_end,i_startblk,i_endblk,jk_start) + + rl_start = grf_bdywidth_c+1 + rl_end = min_rlcell_int + + i_startblk = p_patch%cells%start_block(rl_start) + i_endblk = p_patch%cells%end_block(rl_end) + + IF (l_vert_nested) THEN + jk_start = 2 + ELSE + jk_start = 1 + ENDIF + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,jc,z_w_expl,z_contr_w_fl_l,z_rho_expl,z_exner_expl, & +!$OMP z_a,z_b,z_c,z_g,z_q,z_alpha,z_beta,z_gamma,ic,z_flxdiv_mass,z_flxdiv_theta ) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk, i_endblk + + CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + IF (idiv_method == 1) THEN + ! horizontal divergences of rho and rhotheta are inlined and processed in one step for efficiency + + + !$ser savepoint mo_solve_nonhydro_stencil_41_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing geofac_div=p_int%geofac_div(:,:,1)' + + !$ser data geofac_div=p_int%geofac_div(:,:,1) + + PRINT *, 'Serializing mass_fl_e=p_nh%diag%mass_fl_e(:,:,1)' + + !$ser data mass_fl_e=p_nh%diag%mass_fl_e(:,:,1) + + PRINT *, 'Serializing z_theta_v_fl_e=z_theta_v_fl_e(:,:,1)' + + !$ser data z_theta_v_fl_e=z_theta_v_fl_e(:,:,1) + + PRINT *, 'Serializing z_flxdiv_mass=z_flxdiv_mass(:,:)' + + !$ser data z_flxdiv_mass=z_flxdiv_mass(:,:) + + PRINT *, 'Serializing z_flxdiv_theta=z_flxdiv_theta(:,:)' + + !$ser data z_flxdiv_theta=z_flxdiv_theta(:,:) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) +#ifdef __LOOP_EXCHANGE + DO jc = i_startidx, i_endidx +!DIR$ IVDEP, PREFERVECTOR + DO jk = 1, nlev +#else +!$NEC outerloop_unroll(8) + DO jk = 1, nlev + DO jc = i_startidx, i_endidx +#endif + z_flxdiv_mass(jc,jk) = & + p_nh%diag%mass_fl_e(ieidx(jc,jb,1),jk,ieblk(jc,jb,1)) * p_int%geofac_div(jc,1,jb) + & + p_nh%diag%mass_fl_e(ieidx(jc,jb,2),jk,ieblk(jc,jb,2)) * p_int%geofac_div(jc,2,jb) + & + p_nh%diag%mass_fl_e(ieidx(jc,jb,3),jk,ieblk(jc,jb,3)) * p_int%geofac_div(jc,3,jb) + + z_flxdiv_theta(jc,jk) = & + z_theta_v_fl_e(ieidx(jc,jb,1),jk,ieblk(jc,jb,1)) * p_int%geofac_div(jc,1,jb) + & + z_theta_v_fl_e(ieidx(jc,jb,2),jk,ieblk(jc,jb,2)) * p_int%geofac_div(jc,2,jb) + & + z_theta_v_fl_e(ieidx(jc,jb,3),jk,ieblk(jc,jb,3)) * p_int%geofac_div(jc,3,jb) + END DO + END DO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_41_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing geofac_div=p_int%geofac_div(:,:,1)' + + !$ser data geofac_div=p_int%geofac_div(:,:,1) + + PRINT *, 'Serializing mass_fl_e=p_nh%diag%mass_fl_e(:,:,1)' + + !$ser data mass_fl_e=p_nh%diag%mass_fl_e(:,:,1) + + PRINT *, 'Serializing z_theta_v_fl_e=z_theta_v_fl_e(:,:,1)' + + !$ser data z_theta_v_fl_e=z_theta_v_fl_e(:,:,1) + + PRINT *, 'Serializing z_flxdiv_mass=z_flxdiv_mass(:,:)' + + !$ser data z_flxdiv_mass=z_flxdiv_mass(:,:) + + PRINT *, 'Serializing z_flxdiv_theta=z_flxdiv_theta(:,:)' + + !$ser data z_flxdiv_theta=z_flxdiv_theta(:,:) + + ELSE ! idiv_method = 2 - just copy values to local 2D array + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 1, nlev + DO jc = i_startidx, i_endidx + z_flxdiv_mass(jc,jk) = z_mass_fl_div(jc,jk,jb) + z_flxdiv_theta(jc,jk) = z_theta_v_fl_div(jc,jk,jb) + END DO + END DO + !$ACC END PARALLEL + + ENDIF + + ! upper boundary conditions for rho_ic and theta_v_ic in the case of vertical nesting + ! + ! kept constant during predictor/corrector step, and linearly interpolated for + ! each dynamics substep. + ! Hence, copying them every dynamics substep during the predictor step (istep=1) is sufficient. + IF (l_vert_nested .AND. istep == 1) THEN + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + + p_nh%diag%theta_v_ic(jc,1,jb) = p_nh%diag%theta_v_ic_ubc(jc,jb,1) & + & + dt_linintp_ubc * p_nh%diag%theta_v_ic_ubc(jc,jb,2) + + p_nh%diag%rho_ic(jc,1,jb) = p_nh%diag%rho_ic_ubc(jc,jb,1) & + & + dt_linintp_ubc * p_nh%diag%rho_ic_ubc(jc,jb,2) + + z_mflx_top(jc,jb) = p_nh%diag%mflx_ic_ubc(jc,jb,1) & + & + dt_linintp_ubc * p_nh%diag%mflx_ic_ubc(jc,jb,2) + + ENDDO + !$ACC END PARALLEL + ENDIF + + ! Start of vertically implicit solver part for sound-wave terms; + ! advective terms and gravity-wave terms are treated explicitly + ! + IF (istep == 2 .AND. (itime_scheme >= 4)) THEN + + + !$ser savepoint mo_solve_nonhydro_stencil_42_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing cpd=cpd' + + !$ser data cpd=cpd + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing wgt_nnew_vel=wgt_nnew_vel' + + !$ser data wgt_nnew_vel=wgt_nnew_vel + + PRINT *, 'Serializing wgt_nnow_vel=wgt_nnow_vel' + + !$ser data wgt_nnow_vel=wgt_nnow_vel + + PRINT *, 'Serializing z_w_expl=z_w_expl(:,:)' + + !$ser data z_w_expl=z_w_expl(:,:) + + PRINT *, 'Serializing w_nnow=p_nh%prog(nnow)%w(:,:,jb)' + + !$ser data w_nnow=p_nh%prog(nnow)%w(:,:,jb) + + PRINT *, 'Serializing ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1)' + + !$ser data ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1) + + PRINT *, 'Serializing ddt_w_adv_ntl2=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl2)' + + !$ser data ddt_w_adv_ntl2=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl2) + + PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb)' + + !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb) + + PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' + + !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,jb)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,jb) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb) + + PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb)' + + !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 2, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + + ! explicit part for w - use temporally averaged advection terms for better numerical stability + ! the explicit weight for the pressure-gradient term is already included in z_th_ddz_exner_c + z_w_expl(jc,jk) = p_nh%prog(nnow)%w(jc,jk,jb) + dtime * & + (wgt_nnow_vel*p_nh%diag%ddt_w_adv_pc(jc,jk,jb,ntl1) + & + wgt_nnew_vel*p_nh%diag%ddt_w_adv_pc(jc,jk,jb,ntl2) & + -cpd*z_th_ddz_exner_c(jc,jk,jb) ) + + ! contravariant vertical velocity times density for explicit part + z_contr_w_fl_l(jc,jk) = p_nh%diag%rho_ic(jc,jk,jb)*(-p_nh%diag%w_concorr_c(jc,jk,jb) & + + p_nh%metrics%vwind_expl_wgt(jc,jb)*p_nh%prog(nnow)%w(jc,jk,jb) ) + + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_42_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing cpd=cpd' + + !$ser data cpd=cpd + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing wgt_nnew_vel=wgt_nnew_vel' + + !$ser data wgt_nnew_vel=wgt_nnew_vel + + PRINT *, 'Serializing wgt_nnow_vel=wgt_nnow_vel' + + !$ser data wgt_nnow_vel=wgt_nnow_vel + + PRINT *, 'Serializing z_w_expl=z_w_expl(:,:)' + + !$ser data z_w_expl=z_w_expl(:,:) + + PRINT *, 'Serializing w_nnow=p_nh%prog(nnow)%w(:,:,jb)' + + !$ser data w_nnow=p_nh%prog(nnow)%w(:,:,jb) + + PRINT *, 'Serializing ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1)' + + !$ser data ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1) + + PRINT *, 'Serializing ddt_w_adv_ntl2=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl2)' + + !$ser data ddt_w_adv_ntl2=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl2) + + PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb)' + + !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb) + + PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' + + !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,jb)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,jb) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb) + + PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb)' + + !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb) + + ELSE + + + !$ser savepoint mo_solve_nonhydro_stencil_43_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing cpd=cpd' + + !$ser data cpd=cpd + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing z_w_expl=z_w_expl(:,:)' + + !$ser data z_w_expl=z_w_expl(:,:) + + PRINT *, 'Serializing w_nnow=p_nh%prog(nnow)%w(:,:,jb)' + + !$ser data w_nnow=p_nh%prog(nnow)%w(:,:,jb) + + PRINT *, 'Serializing ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1)' + + !$ser data ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1) + + PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb)' + + !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb) + + PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' + + !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,jb)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,jb) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb) + + PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb)' + + !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 2, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + + ! explicit part for w + z_w_expl(jc,jk) = p_nh%prog(nnow)%w(jc,jk,jb) + dtime * & + (p_nh%diag%ddt_w_adv_pc(jc,jk,jb,ntl1)-cpd*z_th_ddz_exner_c(jc,jk,jb)) + + ! contravariant vertical velocity times density for explicit part + z_contr_w_fl_l(jc,jk) = p_nh%diag%rho_ic(jc,jk,jb)*(-p_nh%diag%w_concorr_c(jc,jk,jb) & + + p_nh%metrics%vwind_expl_wgt(jc,jb)*p_nh%prog(nnow)%w(jc,jk,jb) ) + + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_43_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing cpd=cpd' + + !$ser data cpd=cpd + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing z_w_expl=z_w_expl(:,:)' + + !$ser data z_w_expl=z_w_expl(:,:) + + PRINT *, 'Serializing w_nnow=p_nh%prog(nnow)%w(:,:,jb)' + + !$ser data w_nnow=p_nh%prog(nnow)%w(:,:,jb) + + PRINT *, 'Serializing ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1)' + + !$ser data ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1) + + PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb)' + + !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb) + + PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' + + !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,jb)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,jb) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb) + + PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb)' + + !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb) + + ENDIF + + ! Solver coefficients + + !$ser savepoint mo_solve_nonhydro_stencil_44_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing cvd=cvd' + + !$ser data cvd=cvd + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing rd=rd' + + !$ser data rd=rd + + PRINT *, 'Serializing z_beta=z_beta(:,:)' + + !$ser data z_beta=z_beta(:,:) + + PRINT *, 'Serializing exner_nnow=p_nh%prog(nnow)%exner(:,:,jb)' + + !$ser data exner_nnow=p_nh%prog(nnow)%exner(:,:,jb) + + PRINT *, 'Serializing rho_nnow=p_nh%prog(nnow)%rho(:,:,jb)' + + !$ser data rho_nnow=p_nh%prog(nnow)%rho(:,:,jb) + + PRINT *, 'Serializing theta_v_nnow=p_nh%prog(nnow)%theta_v(:,:,jb)' + + !$ser data theta_v_nnow=p_nh%prog(nnow)%theta_v(:,:,jb) + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb) + + PRINT *, 'Serializing z_alpha=z_alpha(:,:)' + + !$ser data z_alpha=z_alpha(:,:) + + PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,jb)' + + !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,jb) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb) + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,jb)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,jb) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 1, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + z_beta(jc,jk)=dtime*rd*p_nh%prog(nnow)%exner(jc,jk,jb) / & + (cvd*p_nh%prog(nnow)%rho(jc,jk,jb)*p_nh%prog(nnow)%theta_v(jc,jk,jb)) * & + p_nh%metrics%inv_ddqz_z_full(jc,jk,jb) + + z_alpha(jc,jk)= p_nh%metrics%vwind_impl_wgt(jc,jb)* & + & p_nh%diag%theta_v_ic(jc,jk,jb)*p_nh%diag%rho_ic(jc,jk,jb) + ENDDO + ENDDO + !$ACC END PARALLEL + + + !$ser savepoint mo_solve_nonhydro_stencil_44_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing cvd=cvd' + + !$ser data cvd=cvd + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing rd=rd' + + !$ser data rd=rd + + PRINT *, 'Serializing z_beta=z_beta(:,:)' + + !$ser data z_beta=z_beta(:,:) + + PRINT *, 'Serializing exner_nnow=p_nh%prog(nnow)%exner(:,:,jb)' + + !$ser data exner_nnow=p_nh%prog(nnow)%exner(:,:,jb) + + PRINT *, 'Serializing rho_nnow=p_nh%prog(nnow)%rho(:,:,jb)' + + !$ser data rho_nnow=p_nh%prog(nnow)%rho(:,:,jb) + + PRINT *, 'Serializing theta_v_nnow=p_nh%prog(nnow)%theta_v(:,:,jb)' + + !$ser data theta_v_nnow=p_nh%prog(nnow)%theta_v(:,:,jb) + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb) + + PRINT *, 'Serializing z_alpha=z_alpha(:,:)' + + !$ser data z_alpha=z_alpha(:,:) + + PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,jb)' + + !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,jb) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb) + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,jb)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,jb) + + + + !$ser savepoint mo_solve_nonhydro_stencil_45_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_alpha=z_alpha(:,:)' + + !$ser data z_alpha=z_alpha(:,:) + + + !$ser savepoint mo_solve_nonhydro_stencil_45_b_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_q=z_q(:,:)' + + !$ser data z_q=z_q(:,:) + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + z_alpha(jc,nlevp1) = 0.0_wp + ! + ! Note: z_q is used in the tridiagonal matrix solver for w below. + ! z_q(1) is always zero, irrespective of w(1)=0 or w(1)/=0 + ! z_q(1)=0 is equivalent to cp(slev)=c(slev)/b(slev) in mo_math_utilities:tdma_solver_vec + z_q(jc,1) = 0._vp + ENDDO + !$ACC END PARALLEL + + + !$ser savepoint mo_solve_nonhydro_stencil_45_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_alpha=z_alpha(:,:)' + + !$ser data z_alpha=z_alpha(:,:) + + !$ser savepoint mo_solve_nonhydro_stencil_45_b_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_q=z_q(:,:)' + + !$ser data z_q=z_q(:,:) + + + ! upper boundary condition for w (interpolated from parent domain in case of vertical nesting) + ! Note: the upper b.c. reduces to w(1) = 0 in the absence of diabatic heating + IF (l_open_ubc .AND. .NOT. l_vert_nested) THEN + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + p_nh%prog(nnew)%w(jc,1,jb) = z_thermal_exp(jc,jb) + z_contr_w_fl_l(jc,1) = p_nh%diag%rho_ic(jc,1,jb)*p_nh%prog(nnow)%w(jc,1,jb) & + * p_nh%metrics%vwind_expl_wgt(jc,jb) + ENDDO + !$ACC END PARALLEL + ELSE IF (.NOT. l_open_ubc .AND. .NOT. l_vert_nested) THEN + + + !$ser savepoint mo_solve_nonhydro_stencil_46_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing w_nnew=p_nh%prog(nnew)%w(:,:,jb)' + + !$ser data w_nnew=p_nh%prog(nnew)%w(:,:,jb) + + PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' + + !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + p_nh%prog(nnew)%w(jc,1,jb) = 0._wp + z_contr_w_fl_l(jc,1) = 0._wp + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_46_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing w_nnew=p_nh%prog(nnew)%w(:,:,jb)' + + !$ser data w_nnew=p_nh%prog(nnew)%w(:,:,jb) + + PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' + + !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) + + ELSE ! l_vert_nested + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + ! UBC for w: horizontally interpolated from the parent interface level, + ! and linearly interpolated in time. + p_nh%prog(nnew)%w(jc,1,jb) = p_nh%diag%w_ubc(jc,jb,1) & + & + dt_linintp_ubc_nnew * p_nh%diag%w_ubc(jc,jb,2) + ! + z_contr_w_fl_l(jc,1) = z_mflx_top(jc,jb) * p_nh%metrics%vwind_expl_wgt(jc,jb) + ENDDO + !$ACC END PARALLEL + ENDIF + + ! lower boundary condition for w, consistent with contravariant correction + + !$ser savepoint mo_solve_nonhydro_stencil_47_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing w_nnew=p_nh%prog(nnew)%w(:,:,jb)' + + !$ser data w_nnew=p_nh%prog(nnew)%w(:,:,jb) + + PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' + + !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + p_nh%prog(nnew)%w(jc,nlevp1,jb) = p_nh%diag%w_concorr_c(jc,nlevp1,jb) + z_contr_w_fl_l(jc,nlevp1) = 0.0_wp + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_47_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing w_nnew=p_nh%prog(nnew)%w(:,:,jb)' + + !$ser data w_nnew=p_nh%prog(nnew)%w(:,:,jb) + + PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' + + !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb) + + + ! Explicit parts of density and Exner pressure + ! + ! Top level first + + + !$ser savepoint mo_solve_nonhydro_stencil_48_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' + + !$ser data z_rho_expl=z_rho_expl(:,:) + + PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' + + !$ser data z_exner_expl=z_exner_expl(:,:) + + PRINT *, 'Serializing rho_nnow=p_nh%prog(nnow)%rho(:,:,jb)' + + !$ser data rho_nnow=p_nh%prog(nnow)%rho(:,:,jb) + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb) + + PRINT *, 'Serializing z_flxdiv_mass=z_flxdiv_mass(:,:)' + + !$ser data z_flxdiv_mass=z_flxdiv_mass(:,:) + + PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' + + !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) + + PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,jb)' + + !$ser data exner_pr=p_nh%diag%exner_pr(:,:,jb) + + PRINT *, 'Serializing z_beta=z_beta(:,:)' + + !$ser data z_beta=z_beta(:,:) + + PRINT *, 'Serializing z_flxdiv_theta=z_flxdiv_theta(:,:)' + + !$ser data z_flxdiv_theta=z_flxdiv_theta(:,:) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb) + + PRINT *, 'Serializing ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb)' + + !$ser data ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + z_rho_expl(jc,1)= p_nh%prog(nnow)%rho(jc,1,jb) & + & -dtime*p_nh%metrics%inv_ddqz_z_full(jc,1,jb) & + & *(z_flxdiv_mass(jc,1) & + & +z_contr_w_fl_l(jc,1 ) & + & -z_contr_w_fl_l(jc,2 )) + + z_exner_expl(jc,1)= p_nh%diag%exner_pr(jc,1,jb) & + & -z_beta (jc,1)*(z_flxdiv_theta(jc,1) & + & +p_nh%diag%theta_v_ic(jc,1,jb)*z_contr_w_fl_l(jc,1) & + & -p_nh%diag%theta_v_ic(jc,2,jb)*z_contr_w_fl_l(jc,2)) & + & +dtime*p_nh%diag%ddt_exner_phy(jc,1,jb) + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_48_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' + + !$ser data z_rho_expl=z_rho_expl(:,:) + + PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' + + !$ser data z_exner_expl=z_exner_expl(:,:) + + PRINT *, 'Serializing rho_nnow=p_nh%prog(nnow)%rho(:,:,jb)' + + !$ser data rho_nnow=p_nh%prog(nnow)%rho(:,:,jb) + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb) + + PRINT *, 'Serializing z_flxdiv_mass=z_flxdiv_mass(:,:)' + + !$ser data z_flxdiv_mass=z_flxdiv_mass(:,:) + + PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' + + !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) + + PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,jb)' + + !$ser data exner_pr=p_nh%diag%exner_pr(:,:,jb) + + PRINT *, 'Serializing z_beta=z_beta(:,:)' + + !$ser data z_beta=z_beta(:,:) + + PRINT *, 'Serializing z_flxdiv_theta=z_flxdiv_theta(:,:)' + + !$ser data z_flxdiv_theta=z_flxdiv_theta(:,:) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb) + + PRINT *, 'Serializing ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb)' + + !$ser data ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb) + + ! Other levels + + !$ser savepoint mo_solve_nonhydro_stencil_49_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' + + !$ser data z_rho_expl=z_rho_expl(:,:) + + PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' + + !$ser data z_exner_expl=z_exner_expl(:,:) + + PRINT *, 'Serializing rho_nnow=p_nh%prog(nnow)%rho(:,:,jb)' + + !$ser data rho_nnow=p_nh%prog(nnow)%rho(:,:,jb) + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb) + + PRINT *, 'Serializing z_flxdiv_mass=z_flxdiv_mass(:,:)' + + !$ser data z_flxdiv_mass=z_flxdiv_mass(:,:) + + PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' + + !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) + + PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,jb)' + + !$ser data exner_pr=p_nh%diag%exner_pr(:,:,jb) + + PRINT *, 'Serializing z_beta=z_beta(:,:)' + + !$ser data z_beta=z_beta(:,:) + + PRINT *, 'Serializing z_flxdiv_theta=z_flxdiv_theta(:,:)' + + !$ser data z_flxdiv_theta=z_flxdiv_theta(:,:) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb) + + PRINT *, 'Serializing ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb)' + + !$ser data ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 2, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + z_rho_expl(jc,jk)= p_nh%prog(nnow)%rho(jc,jk ,jb) & + & -dtime*p_nh%metrics%inv_ddqz_z_full(jc,jk ,jb) & + & *(z_flxdiv_mass(jc,jk ) & + & +z_contr_w_fl_l(jc,jk ) & + & -z_contr_w_fl_l(jc,jk+1 )) + + z_exner_expl(jc,jk)= p_nh%diag%exner_pr(jc,jk,jb) - z_beta(jc,jk) & + & *(z_flxdiv_theta(jc,jk) & + & +p_nh%diag%theta_v_ic(jc,jk ,jb)*z_contr_w_fl_l(jc,jk ) & + & -p_nh%diag%theta_v_ic(jc,jk+1,jb)*z_contr_w_fl_l(jc,jk+1)) & + & +dtime*p_nh%diag%ddt_exner_phy(jc,jk,jb) + + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_49_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' + + !$ser data z_rho_expl=z_rho_expl(:,:) + + PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' + + !$ser data z_exner_expl=z_exner_expl(:,:) + + PRINT *, 'Serializing rho_nnow=p_nh%prog(nnow)%rho(:,:,jb)' + + !$ser data rho_nnow=p_nh%prog(nnow)%rho(:,:,jb) + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb) + + PRINT *, 'Serializing z_flxdiv_mass=z_flxdiv_mass(:,:)' + + !$ser data z_flxdiv_mass=z_flxdiv_mass(:,:) + + PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' + + !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) + + PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,jb)' + + !$ser data exner_pr=p_nh%diag%exner_pr(:,:,jb) + + PRINT *, 'Serializing z_beta=z_beta(:,:)' + + !$ser data z_beta=z_beta(:,:) + + PRINT *, 'Serializing z_flxdiv_theta=z_flxdiv_theta(:,:)' + + !$ser data z_flxdiv_theta=z_flxdiv_theta(:,:) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb) + + PRINT *, 'Serializing ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb)' + + !$ser data ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb) + + + IF (is_iau_active) THEN ! add analysis increments from data assimilation to density and exner pressure + + + !$ser savepoint mo_solve_nonhydro_stencil_50_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing iau_wgt_dyn=iau_wgt_dyn' + + !$ser data iau_wgt_dyn=iau_wgt_dyn + + PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' + + !$ser data z_rho_expl=z_rho_expl(:,:) + + PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' + + !$ser data z_exner_expl=z_exner_expl(:,:) + + PRINT *, 'Serializing rho_incr=p_nh%diag%rho_incr(:,:,jb)' + + !$ser data rho_incr=p_nh%diag%rho_incr(:,:,jb) + + PRINT *, 'Serializing exner_incr=p_nh%diag%exner_incr(:,:,jb)' + + !$ser data exner_incr=p_nh%diag%exner_incr(:,:,jb) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 1, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + z_rho_expl(jc,jk) = z_rho_expl(jc,jk) + iau_wgt_dyn*p_nh%diag%rho_incr(jc,jk,jb) + z_exner_expl(jc,jk) = z_exner_expl(jc,jk) + iau_wgt_dyn*p_nh%diag%exner_incr(jc,jk,jb) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_50_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing iau_wgt_dyn=iau_wgt_dyn' + + !$ser data iau_wgt_dyn=iau_wgt_dyn + + PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' + + !$ser data z_rho_expl=z_rho_expl(:,:) + + PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' + + !$ser data z_exner_expl=z_exner_expl(:,:) + + PRINT *, 'Serializing rho_incr=p_nh%diag%rho_incr(:,:,jb)' + + !$ser data rho_incr=p_nh%diag%rho_incr(:,:,jb) + + PRINT *, 'Serializing exner_incr=p_nh%diag%exner_incr(:,:,jb)' + + !$ser data exner_incr=p_nh%diag%exner_incr(:,:,jb) + + ENDIF + + ! + ! Solve tridiagonal matrix for w + ! +! TODO: not parallelized + + + !$ser savepoint mo_solve_nonhydro_stencil_52_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing cpd=cpd' + + !$ser data cpd=cpd + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' + + !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) + + PRINT *, 'Serializing ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1)' + + !$ser data ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1) + + PRINT *, 'Serializing z_alpha=z_alpha(:,:)' + + !$ser data z_alpha=z_alpha(:,:) + + PRINT *, 'Serializing z_beta=z_beta(:,:)' + + !$ser data z_beta=z_beta(:,:) + + PRINT *, 'Serializing z_w_expl=z_w_expl(:,:)' + + !$ser data z_w_expl=z_w_expl(:,:) + + PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' + + !$ser data z_exner_expl=z_exner_expl(:,:) + + PRINT *, 'Serializing z_q=z_q(:,:)' + + !$ser data z_q=z_q(:,:) + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP SEQ +!$NEC outerloop_unroll(8) + DO jk = 2, nlev +!DIR$ IVDEP +!$NEC ivdep + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + z_gamma = dtime*cpd*p_nh%metrics%vwind_impl_wgt(jc,jb)* & + p_nh%diag%theta_v_ic(jc,jk,jb)/p_nh%metrics%ddqz_z_half(jc,jk,jb) + z_a = -z_gamma*z_beta(jc,jk-1)*z_alpha(jc,jk-1) + z_c = -z_gamma*z_beta(jc,jk )*z_alpha(jc,jk+1) + z_b = 1.0_vp+z_gamma*z_alpha(jc,jk) & + *(z_beta(jc,jk-1)+z_beta(jc,jk)) + z_g = 1.0_vp/(z_b+z_a*z_q(jc,jk-1)) + z_q(jc,jk) = - z_c*z_g + p_nh%prog(nnew)%w(jc,jk,jb) = z_w_expl(jc,jk) - z_gamma & + & *(z_exner_expl(jc,jk-1)-z_exner_expl(jc,jk)) + p_nh%prog(nnew)%w(jc,jk,jb) = (p_nh%prog(nnew)%w(jc,jk,jb) & + -z_a*p_nh%prog(nnew)%w(jc,jk-1,jb))*z_g + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_52_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing cpd=cpd' + + !$ser data cpd=cpd + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' + + !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) + + PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' + + !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) + + PRINT *, 'Serializing ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1)' + + !$ser data ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1) + + PRINT *, 'Serializing z_alpha=z_alpha(:,:)' + + !$ser data z_alpha=z_alpha(:,:) + + PRINT *, 'Serializing z_beta=z_beta(:,:)' + + !$ser data z_beta=z_beta(:,:) + + PRINT *, 'Serializing z_w_expl=z_w_expl(:,:)' + + !$ser data z_w_expl=z_w_expl(:,:) + + PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' + + !$ser data z_exner_expl=z_exner_expl(:,:) + + PRINT *, 'Serializing z_q=z_q(:,:)' + + !$ser data z_q=z_q(:,:) + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + + + !$ser savepoint mo_solve_nonhydro_stencil_53_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_q=z_q' + + !$ser data z_q=z_q + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP SEQ + DO jk = nlev-1, 2, -1 +!DIR$ IVDEP + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + p_nh%prog(nnew)%w(jc,jk,jb) = p_nh%prog(nnew)%w(jc,jk,jb)& + & +p_nh%prog(nnew)%w(jc,jk+1,jb)*z_q(jc,jk) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_53_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_q=z_q' + + !$ser data z_q=z_q + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + + ! Rayleigh damping mechanism (Klemp,Dudhia,Hassiotis: MWR136,pp.3987-4004) + ! + IF ( rayleigh_type == RAYLEIGH_KLEMP ) THEN + +!$ACC PARALLEL IF( i_am_accel_node ) DEFAULT(PRESENT) ASYNC(1) +!$ACC LOOP GANG VECTOR COLLAPSE(1) +DO jc = 1, nproma + w_1(jc,jb) = p_nh%prog(nnew)%w(jc,1,jb) +ENDDO +!$ACC END PARALLEL + + + !$ser savepoint mo_solve_nonhydro_stencil_54_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_raylfac=z_raylfac(:)' + + !$ser data z_raylfac=z_raylfac(:) + + PRINT *, 'Serializing w_1=w_1(:,1)' + + !$ser data w_1=w_1(:,1) + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 2, nrdmax(jg) +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + p_nh%prog(nnew)%w(jc,jk,jb) = z_raylfac(jk)*p_nh%prog(nnew)%w(jc,jk,jb) + & + (1._wp-z_raylfac(jk))*p_nh%prog(nnew)%w(jc,1,jb) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_54_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing z_raylfac=z_raylfac(:)' + + !$ser data z_raylfac=z_raylfac(:) + + PRINT *, 'Serializing w_1=w_1(:,1)' + + !$ser data w_1=w_1(:,1) + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + + ! Classic Rayleigh damping mechanism for w (requires reference state !!) + ! + ELSE IF ( rayleigh_type == RAYLEIGH_CLASSIC ) THEN + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 2, nrdmax(jg) +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + p_nh%prog(nnew)%w(jc,jk,jb) = p_nh%prog(nnew)%w(jc,jk,jb) & + & - dtime*p_nh%metrics%rayleigh_w(jk) & + & * ( p_nh%prog(nnew)%w(jc,jk,jb) & + & - p_nh%ref%w_ref(jc,jk,jb) ) + ENDDO + ENDDO + !$ACC END PARALLEL + ENDIF + + ! Results for thermodynamic variables + + !$ser savepoint mo_solve_nonhydro_stencil_55_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing cvd_o_rd=cvd_o_rd' + + !$ser data cvd_o_rd=cvd_o_rd + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' + + !$ser data z_rho_expl=z_rho_expl(:,:) + + PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' + + !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + + PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' + + !$ser data z_exner_expl=z_exner_expl(:,:) + + PRINT *, 'Serializing exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1)' + + !$ser data exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1) + + PRINT *, 'Serializing z_alpha=z_alpha(:,:)' + + !$ser data z_alpha=z_alpha(:,:) + + PRINT *, 'Serializing z_beta=z_beta' + + !$ser data z_beta=z_beta + + PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' + + !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) + + PRINT *, 'Serializing theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing exner_now=p_nh%prog(nnow)%exner(:,:,1)' + + !$ser data exner_now=p_nh%prog(nnow)%exner(:,:,1) + + PRINT *, 'Serializing rho_new=p_nh%prog(nnew)%rho(:,:,1)' + + !$ser data rho_new=p_nh%prog(nnew)%rho(:,:,1) + + PRINT *, 'Serializing exner_new=p_nh%prog(nnew)%exner(:,:,1)' + + !$ser data exner_new=p_nh%prog(nnew)%exner(:,:,1) + + PRINT *, 'Serializing theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1)' + + !$ser data theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR TILE(128, 1) +!$NEC outerloop_unroll(8) + DO jk = jk_start, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + + ! density + p_nh%prog(nnew)%rho(jc,jk,jb) = z_rho_expl(jc,jk) & + - p_nh%metrics%vwind_impl_wgt(jc,jb)*dtime & + * p_nh%metrics%inv_ddqz_z_full(jc,jk,jb) & + *(p_nh%diag%rho_ic(jc,jk ,jb)*p_nh%prog(nnew)%w(jc,jk ,jb) & + - p_nh%diag%rho_ic(jc,jk+1,jb)*p_nh%prog(nnew)%w(jc,jk+1,jb)) + + ! exner + p_nh%prog(nnew)%exner(jc,jk,jb) = z_exner_expl(jc,jk) & + + p_nh%metrics%exner_ref_mc(jc,jk,jb)-z_beta(jc,jk) & + *(z_alpha(jc,jk )*p_nh%prog(nnew)%w(jc,jk ,jb) & + - z_alpha(jc,jk+1)*p_nh%prog(nnew)%w(jc,jk+1,jb)) + + ! theta + p_nh%prog(nnew)%theta_v(jc,jk,jb) = p_nh%prog(nnow)%rho(jc,jk,jb)*p_nh%prog(nnow)%theta_v(jc,jk,jb) & + *( (p_nh%prog(nnew)%exner(jc,jk,jb)/p_nh%prog(nnow)%exner(jc,jk,jb)-1.0_wp) * cvd_o_rd+1.0_wp ) & + / p_nh%prog(nnew)%rho(jc,jk,jb) + + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_55_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing cvd_o_rd=cvd_o_rd' + + !$ser data cvd_o_rd=cvd_o_rd + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' + + !$ser data z_rho_expl=z_rho_expl(:,:) + + PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' + + !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + + PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' + + !$ser data z_exner_expl=z_exner_expl(:,:) + + PRINT *, 'Serializing exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1)' + + !$ser data exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1) + + PRINT *, 'Serializing z_alpha=z_alpha(:,:)' + + !$ser data z_alpha=z_alpha(:,:) + + PRINT *, 'Serializing z_beta=z_beta' + + !$ser data z_beta=z_beta + + PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' + + !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) + + PRINT *, 'Serializing theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing exner_now=p_nh%prog(nnow)%exner(:,:,1)' + + !$ser data exner_now=p_nh%prog(nnow)%exner(:,:,1) + + PRINT *, 'Serializing rho_new=p_nh%prog(nnew)%rho(:,:,1)' + + !$ser data rho_new=p_nh%prog(nnew)%rho(:,:,1) + + PRINT *, 'Serializing exner_new=p_nh%prog(nnew)%exner(:,:,1)' + + !$ser data exner_new=p_nh%prog(nnew)%exner(:,:,1) + + PRINT *, 'Serializing theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1)' + + !$ser data theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1) + + ! Special treatment of uppermost layer in the case of vertical nesting + IF (l_vert_nested) THEN + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + + ! density + p_nh%prog(nnew)%rho(jc,1,jb) = z_rho_expl(jc,1) & + - p_nh%metrics%vwind_impl_wgt(jc,jb)*dtime & + * p_nh%metrics%inv_ddqz_z_full(jc,1,jb) & + *(z_mflx_top(jc,jb) - p_nh%diag%rho_ic(jc,2,jb)*p_nh%prog(nnew)%w(jc,2,jb)) + + ! exner + p_nh%prog(nnew)%exner(jc,1,jb) = z_exner_expl(jc,1) & + + p_nh%metrics%exner_ref_mc(jc,1,jb)-z_beta(jc,1) & + *(p_nh%metrics%vwind_impl_wgt(jc,jb)*p_nh%diag%theta_v_ic(jc,1,jb) & + * z_mflx_top(jc,jb) - z_alpha(jc,2)*p_nh%prog(nnew)%w(jc,2,jb)) + + ! theta + p_nh%prog(nnew)%theta_v(jc,1,jb) = p_nh%prog(nnow)%rho(jc,1,jb)*p_nh%prog(nnow)%theta_v(jc,1,jb) & + *( (p_nh%prog(nnew)%exner(jc,1,jb)/p_nh%prog(nnow)%exner(jc,1,jb)-1.0_wp) * cvd_o_rd+1.0_wp ) & + /p_nh%prog(nnew)%rho(jc,1,jb) + + ENDDO + !$ACC END PARALLEL + ENDIF + + + ! compute dw/dz for divergence damping term + IF (lhdiff_rcf .AND. istep == 1 .AND. divdamp_type >= 3) THEN + + + !$ser savepoint mo_solve_nonhydro_stencil_56_63_8f4253f6-09a1-46f7-a72e-95b3ff4ebb06_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) + + PRINT *, 'Serializing z_dwdz_dd=z_dwdz_dd(:,:,1)' + + !$ser data z_dwdz_dd=z_dwdz_dd(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR TILE(32, 4) + DO jk = kstart_dd3d(jg), nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + z_dwdz_dd(jc,jk,jb) = p_nh%metrics%inv_ddqz_z_full(jc,jk,jb) * & + ( (p_nh%prog(nnew)%w(jc,jk,jb)-p_nh%prog(nnew)%w(jc,jk+1,jb)) - & + (p_nh%diag%w_concorr_c(jc,jk,jb)-p_nh%diag%w_concorr_c(jc,jk+1,jb)) ) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_56_63_8f4253f6-09a1-46f7-a72e-95b3ff4ebb06_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) + + PRINT *, 'Serializing z_dwdz_dd=z_dwdz_dd(:,:,1)' + + !$ser data z_dwdz_dd=z_dwdz_dd(:,:,1) + ENDIF + + ! Preparations for tracer advection + IF (lprep_adv .AND. istep == 2) THEN + IF (lclean_mflx) THEN + + + !$ser savepoint mo_solve_nonhydro_stencil_57_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' + + !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 1, nlev +!$NEC ivdep + DO jc = i_startidx, i_endidx + prep_adv%mass_flx_ic(jc,jk,jb) = 0._wp + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_57_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' + + !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) + + ENDIF + + + !$ser savepoint mo_solve_nonhydro_stencil_58_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing r_nsubsteps=r_nsubsteps' + + !$ser data r_nsubsteps=r_nsubsteps + + PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' + + !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) + + PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' + + !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + + PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' + + !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = jk_start, nlev +!$NEC ivdep + DO jc = i_startidx, i_endidx + prep_adv%mass_flx_ic(jc,jk,jb) = prep_adv%mass_flx_ic(jc,jk,jb) + r_nsubsteps * ( z_contr_w_fl_l(jc,jk) + & + p_nh%diag%rho_ic(jc,jk,jb) * p_nh%metrics%vwind_impl_wgt(jc,jb) * p_nh%prog(nnew)%w(jc,jk,jb) ) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_58_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing r_nsubsteps=r_nsubsteps' + + !$ser data r_nsubsteps=r_nsubsteps + + PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' + + !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) + + PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' + + !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + + PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' + + !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) + + IF (l_vert_nested) THEN + ! Use mass flux which has been interpolated to the upper nest boundary. + ! This mass flux is also seen by the mass continuity equation (rho). + ! Hence, by using the same mass flux for the tracer mass continuity equations, + ! consistency with continuity (CWC) is ensured. + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + prep_adv%mass_flx_ic(jc,1,jb) = prep_adv%mass_flx_ic(jc,1,jb) + & + r_nsubsteps * z_mflx_top(jc,jb) + ENDDO + !$ACC END PARALLEL + ENDIF + ENDIF + + ! store dynamical part of exner time increment in exner_dyn_incr + ! the conversion into a temperature tendency is done in the NWP interface + IF (istep == 1 .AND. idyn_timestep == 1) THEN + + + !$ser savepoint mo_solve_nonhydro_stencil_59_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing exner=p_nh%prog(nnow)%exner(:,:,1)' + + !$ser data exner=p_nh%prog(nnow)%exner(:,:,1) + + PRINT *, 'Serializing exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1)' + + !$ser data exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = kstart_moist(jg), nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + p_nh%diag%exner_dyn_incr(jc,jk,jb) = p_nh%prog(nnow)%exner(jc,jk,jb) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_59_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing exner=p_nh%prog(nnow)%exner(:,:,1)' + + !$ser data exner=p_nh%prog(nnow)%exner(:,:,1) + + PRINT *, 'Serializing exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1)' + + !$ser data exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1) + + ELSE IF (istep == 2 .AND. idyn_timestep == ndyn_substeps_var(jg)) THEN + + + !$ser savepoint mo_solve_nonhydro_stencil_60_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing ndyn_substeps_var=real(ndyn_substeps_var(jg),wp)' + + !$ser data ndyn_substeps_var=real(ndyn_substeps_var(jg),wp) + + PRINT *, 'Serializing exner=p_nh%prog(nnew)%exner(:,:,1)' + + !$ser data exner=p_nh%prog(nnew)%exner(:,:,1) + + PRINT *, 'Serializing ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,1)' + + !$ser data ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,1) + + PRINT *, 'Serializing exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1)' + + !$ser data exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = kstart_moist(jg), nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + p_nh%diag%exner_dyn_incr(jc,jk,jb) = p_nh%prog(nnew)%exner(jc,jk,jb) - & + (p_nh%diag%exner_dyn_incr(jc,jk,jb) + ndyn_substeps_var(jg)*dtime*p_nh%diag%ddt_exner_phy(jc,jk,jb)) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_60_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing ndyn_substeps_var=real(ndyn_substeps_var(jg),wp)' + + !$ser data ndyn_substeps_var=real(ndyn_substeps_var(jg),wp) + + PRINT *, 'Serializing exner=p_nh%prog(nnew)%exner(:,:,1)' + + !$ser data exner=p_nh%prog(nnew)%exner(:,:,1) + + PRINT *, 'Serializing ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,1)' + + !$ser data ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,1) + + PRINT *, 'Serializing exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1)' + + !$ser data exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1) + + ENDIF + + IF (istep == 2 .AND. l_child_vertnest) THEN + ! Store values at nest interface levels +!DIR$ IVDEP + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + + p_nh%diag%w_int(jc,jb,idyn_timestep) = & + 0.5_wp*(p_nh%prog(nnow)%w(jc,nshift,jb) + p_nh%prog(nnew)%w(jc,nshift,jb)) + + p_nh%diag%theta_v_ic_int(jc,jb,idyn_timestep) = p_nh%diag%theta_v_ic(jc,nshift,jb) + + p_nh%diag%rho_ic_int(jc,jb,idyn_timestep) = p_nh%diag%rho_ic(jc,nshift,jb) + + p_nh%diag%mflx_ic_int(jc,jb,idyn_timestep) = p_nh%diag%rho_ic(jc,nshift,jb) * & + (p_nh%metrics%vwind_expl_wgt(jc,jb)*p_nh%prog(nnow)%w(jc,nshift,jb) + & + p_nh%metrics%vwind_impl_wgt(jc,jb)*p_nh%prog(nnew)%w(jc,nshift,jb)) + ENDDO + !$ACC END PARALLEL + ENDIF + + ENDDO +!$OMP END DO + + ! Boundary update in case of nesting + IF (l_limited_area .OR. jg > 1) THEN + + rl_start = 1 + rl_end = grf_bdywidth_c + + i_startblk = p_patch%cells%start_block(rl_start) + i_endblk = p_patch%cells%end_block(rl_end) + +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,jc) ICON_OMP_DEFAULT_SCHEDULE + DO jb = i_startblk, i_endblk + + CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + ! non-MPI-parallelized (serial) case + IF (istep == 1 .AND. my_process_is_mpi_all_seq() ) THEN + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 1, nlev +#if __INTEL_COMPILER != 1400 || __INTEL_COMPILER_UPDATE != 3 +!DIR$ IVDEP +#endif + DO jc = i_startidx, i_endidx + + p_nh%prog(nnew)%rho(jc,jk,jb) = p_nh%prog(nnow)%rho(jc,jk,jb) + & + dtime*p_nh%diag%grf_tend_rho(jc,jk,jb) + + p_nh%prog(nnew)%theta_v(jc,jk,jb) = p_nh%prog(nnow)%theta_v(jc,jk,jb) + & + dtime*p_nh%diag%grf_tend_thv(jc,jk,jb) + + ! Diagnose exner from rho*theta + p_nh%prog(nnew)%exner(jc,jk,jb) = EXP(rd_o_cvd*LOG(rd_o_p0ref* & + p_nh%prog(nnew)%rho(jc,jk,jb)*p_nh%prog(nnew)%theta_v(jc,jk,jb))) + + p_nh%prog(nnew)%w(jc,jk,jb) = p_nh%prog(nnow)%w(jc,jk,jb) + & + dtime*p_nh%diag%grf_tend_w(jc,jk,jb) + + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + p_nh%prog(nnew)%w(jc,nlevp1,jb) = p_nh%prog(nnow)%w(jc,nlevp1,jb) + & + dtime*p_nh%diag%grf_tend_w(jc,nlevp1,jb) + ENDDO + !$ACC END PARALLEL + + ELSE IF (istep == 1 ) THEN + + ! In the MPI-parallelized case, only rho and w are updated here, + ! and theta_v is preliminarily stored on exner in order to save + ! halo communications + + + + !$ser savepoint mo_solve_nonhydro_stencil_61_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' + + !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) + + PRINT *, 'Serializing grf_tend_rho=p_nh%diag%grf_tend_rho(:,:,1)' + + !$ser data grf_tend_rho=p_nh%diag%grf_tend_rho(:,:,1) + + PRINT *, 'Serializing theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing grf_tend_thv=p_nh%diag%grf_tend_thv(:,:,1)' + + !$ser data grf_tend_thv=p_nh%diag%grf_tend_thv(:,:,1) + + PRINT *, 'Serializing w_now=p_nh%prog(nnow)%w(:,:,1)' + + !$ser data w_now=p_nh%prog(nnow)%w(:,:,1) + + PRINT *, 'Serializing grf_tend_w=p_nh%diag%grf_tend_w(:,:,1)' + + !$ser data grf_tend_w=p_nh%diag%grf_tend_w(:,:,1) + + PRINT *, 'Serializing rho_new=p_nh%prog(nnew)%rho(:,:,1)' + + !$ser data rho_new=p_nh%prog(nnew)%rho(:,:,1) + + PRINT *, 'Serializing exner_new=p_nh%prog(nnew)%exner(:,:,1)' + + !$ser data exner_new=p_nh%prog(nnew)%exner(:,:,1) + + PRINT *, 'Serializing w_new=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w_new=p_nh%prog(nnew)%w(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 1, nlev +#if __INTEL_COMPILER != 1400 || __INTEL_COMPILER_UPDATE != 3 +!DIR$ IVDEP +#endif + DO jc = i_startidx, i_endidx + + p_nh%prog(nnew)%rho(jc,jk,jb) = p_nh%prog(nnow)%rho(jc,jk,jb) + & + dtime*p_nh%diag%grf_tend_rho(jc,jk,jb) + + ! *** Storing theta_v on exner is done to save MPI communications *** + ! DO NOT TOUCH THIS! + p_nh%prog(nnew)%exner(jc,jk,jb) = p_nh%prog(nnow)%theta_v(jc,jk,jb) + & + dtime*p_nh%diag%grf_tend_thv(jc,jk,jb) + + p_nh%prog(nnew)%w(jc,jk,jb) = p_nh%prog(nnow)%w(jc,jk,jb) + & + dtime*p_nh%diag%grf_tend_w(jc,jk,jb) + + ENDDO + ENDDO + !$ACC END PARALLEL + + + !$ser savepoint mo_solve_nonhydro_stencil_61_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' + + !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) + + PRINT *, 'Serializing grf_tend_rho=p_nh%diag%grf_tend_rho(:,:,1)' + + !$ser data grf_tend_rho=p_nh%diag%grf_tend_rho(:,:,1) + + PRINT *, 'Serializing theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing grf_tend_thv=p_nh%diag%grf_tend_thv(:,:,1)' + + !$ser data grf_tend_thv=p_nh%diag%grf_tend_thv(:,:,1) + + PRINT *, 'Serializing w_now=p_nh%prog(nnow)%w(:,:,1)' + + !$ser data w_now=p_nh%prog(nnow)%w(:,:,1) + + PRINT *, 'Serializing grf_tend_w=p_nh%diag%grf_tend_w(:,:,1)' + + !$ser data grf_tend_w=p_nh%diag%grf_tend_w(:,:,1) + + PRINT *, 'Serializing rho_new=p_nh%prog(nnew)%rho(:,:,1)' + + !$ser data rho_new=p_nh%prog(nnew)%rho(:,:,1) + + PRINT *, 'Serializing exner_new=p_nh%prog(nnew)%exner(:,:,1)' + + !$ser data exner_new=p_nh%prog(nnew)%exner(:,:,1) + + PRINT *, 'Serializing w_new=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w_new=p_nh%prog(nnew)%w(:,:,1) + + + !$ser savepoint mo_solve_nonhydro_stencil_62_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing w_now=p_nh%prog(nnow)%w(:,:,1)' + + !$ser data w_now=p_nh%prog(nnow)%w(:,:,1) + + PRINT *, 'Serializing grf_tend_w=p_nh%diag%grf_tend_w(:,:,1)' + + !$ser data grf_tend_w=p_nh%diag%grf_tend_w(:,:,1) + + PRINT *, 'Serializing w_new=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w_new=p_nh%prog(nnew)%w(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + p_nh%prog(nnew)%w(jc,nlevp1,jb) = p_nh%prog(nnow)%w(jc,nlevp1,jb) + & + dtime*p_nh%diag%grf_tend_w(jc,nlevp1,jb) + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_62_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing dtime=dtime' + + !$ser data dtime=dtime + + PRINT *, 'Serializing w_now=p_nh%prog(nnow)%w(:,:,1)' + + !$ser data w_now=p_nh%prog(nnow)%w(:,:,1) + + PRINT *, 'Serializing grf_tend_w=p_nh%diag%grf_tend_w(:,:,1)' + + !$ser data grf_tend_w=p_nh%diag%grf_tend_w(:,:,1) + + PRINT *, 'Serializing w_new=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w_new=p_nh%prog(nnew)%w(:,:,1) + + ENDIF + + ! compute dw/dz for divergence damping term + IF (lhdiff_rcf .AND. istep == 1 .AND. divdamp_type >= 3) THEN + + + !$ser savepoint mo_solve_nonhydro_stencil_56_63_3f33cb44-1850-43af-a392-c804c0530f9f_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) + + PRINT *, 'Serializing z_dwdz_dd=z_dwdz_dd(:,:,1)' + + !$ser data z_dwdz_dd=z_dwdz_dd(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR TILE(32, 4) + DO jk = kstart_dd3d(jg), nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + z_dwdz_dd(jc,jk,jb) = p_nh%metrics%inv_ddqz_z_full(jc,jk,jb) * & + ( (p_nh%prog(nnew)%w(jc,jk,jb)-p_nh%prog(nnew)%w(jc,jk+1,jb)) - & + (p_nh%diag%w_concorr_c(jc,jk,jb)-p_nh%diag%w_concorr_c(jc,jk+1,jb)) ) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_56_63_3f33cb44-1850-43af-a392-c804c0530f9f_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' + + !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) + + PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w=p_nh%prog(nnew)%w(:,:,1) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) + + PRINT *, 'Serializing z_dwdz_dd=z_dwdz_dd(:,:,1)' + + !$ser data z_dwdz_dd=z_dwdz_dd(:,:,1) + + ENDIF + + ! Preparations for tracer advection + ! + ! Note that the vertical mass flux at nest boundary points is required in case that + ! vertical tracer transport precedes horizontal tracer transport. + IF (lprep_adv .AND. istep == 2) THEN + IF (lclean_mflx) THEN + + + !$ser savepoint mo_solve_nonhydro_stencil_64_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' + + !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) + !$ACC KERNELS IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + prep_adv%mass_flx_ic(i_startidx:i_endidx,:,jb) = 0._wp + !$ACC END KERNELS + + !$ser savepoint mo_solve_nonhydro_stencil_64_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' + + !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) + ENDIF + + + !$ser savepoint mo_solve_nonhydro_stencil_65_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing r_nsubsteps=r_nsubsteps' + + !$ser data r_nsubsteps=r_nsubsteps + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) + + PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1)' + + !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1) + + PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' + + !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) + + PRINT *, 'Serializing w_now=p_nh%prog(nnow)%w(:,:,1)' + + !$ser data w_now=p_nh%prog(nnow)%w(:,:,1) + + PRINT *, 'Serializing w_new=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w_new=p_nh%prog(nnew)%w(:,:,1) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) + + PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' + + !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = jk_start, nlev +!DIR$ IVDEP +!$NEC ivdep + DO jc = i_startidx, i_endidx + prep_adv%mass_flx_ic(jc,jk,jb) = prep_adv%mass_flx_ic(jc,jk,jb) + r_nsubsteps*p_nh%diag%rho_ic(jc,jk,jb)* & + (p_nh%metrics%vwind_expl_wgt(jc,jb)*p_nh%prog(nnow)%w(jc,jk,jb) + & + p_nh%metrics%vwind_impl_wgt(jc,jb)*p_nh%prog(nnew)%w(jc,jk,jb) - p_nh%diag%w_concorr_c(jc,jk,jb) ) + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_65_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing r_nsubsteps=r_nsubsteps' + + !$ser data r_nsubsteps=r_nsubsteps + + PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' + + !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) + + PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1)' + + !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1) + + PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' + + !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) + + PRINT *, 'Serializing w_now=p_nh%prog(nnow)%w(:,:,1)' + + !$ser data w_now=p_nh%prog(nnow)%w(:,:,1) + + PRINT *, 'Serializing w_new=p_nh%prog(nnew)%w(:,:,1)' + + !$ser data w_new=p_nh%prog(nnew)%w(:,:,1) + + PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' + + !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) + + PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' + + !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) + + IF (l_vert_nested) THEN + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR + DO jc = i_startidx, i_endidx + prep_adv%mass_flx_ic(jc,1,jb) = prep_adv%mass_flx_ic(jc,1,jb) + & + r_nsubsteps * (p_nh%diag%mflx_ic_ubc(jc,jb,1) & + + dt_linintp_ubc * p_nh%diag%mflx_ic_ubc(jc,jb,2)) + ENDDO + !$ACC END PARALLEL + ENDIF + ENDIF + + ENDDO +!$OMP END DO + + ENDIF + +!$OMP END PARALLEL + + + !------------------------- + ! communication phase + + IF (timers_level > 5) THEN + CALL timer_stop(timer_solve_nh_vimpl) + CALL timer_start(timer_solve_nh_exch) + ENDIF + + IF (itype_comm == 1) THEN + IF (istep == 1) THEN + IF (lhdiff_rcf .AND. divdamp_type >= 3) THEN + ! Synchronize w and vertical contribution to divergence damping +#ifdef __MIXED_PRECISION + CALL sync_patch_array_mult_mp(SYNC_C,p_patch,1,1,p_nh%prog(nnew)%w,f3din1_sp=z_dwdz_dd, & + & opt_varname="w_nnew and z_dwdz_dd") +#else + CALL sync_patch_array_mult(SYNC_C,p_patch,2,p_nh%prog(nnew)%w,z_dwdz_dd, & + & opt_varname="w_nnew and z_dwdz_dd") +#endif + ELSE + ! Only w needs to be synchronized + CALL sync_patch_array(SYNC_C,p_patch,p_nh%prog(nnew)%w,opt_varname="w_nnew") + ENDIF + ELSE ! istep = 2: synchronize all prognostic variables + CALL sync_patch_array_mult(SYNC_C,p_patch,3,p_nh%prog(nnew)%rho, & + p_nh%prog(nnew)%exner,p_nh%prog(nnew)%w,opt_varname="rho, exner, w_nnew") + ENDIF + ENDIF + + IF (timers_level > 5) CALL timer_stop(timer_solve_nh_exch) + + ! end communication phase + !------------------------- + + ENDDO ! istep-loop + + + ! The remaining computations are needed for MPI-parallelized applications only + IF ( .NOT. my_process_is_mpi_all_seq() ) THEN + +! OpenMP directives are commented for the NEC because the overhead is too large +#if !defined( __SX__ ) +!$OMP PARALLEL PRIVATE(rl_start,rl_end,i_startblk,i_endblk) +#endif + IF (l_limited_area .OR. jg > 1) THEN + + ! Index list over halo points lying in the boundary interpolation zone + ! Note: this list typically contains at most 10 grid points + + + !$ser savepoint mo_solve_nonhydro_stencil_66_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing rd_o_cvd=rd_o_cvd' + + !$ser data rd_o_cvd=rd_o_cvd + + PRINT *, 'Serializing rd_o_p0ref=rd_o_p0ref' + + !$ser data rd_o_p0ref=rd_o_p0ref + + PRINT *, 'Serializing bdy_halo_c=p_nh%metrics%mask_prog_halo_c_dsl_low_refin(:,1)' + + !$ser data bdy_halo_c=p_nh%metrics%mask_prog_halo_c_dsl_low_refin(:,1) + + PRINT *, 'Serializing rho=p_nh%prog(nnew)%rho(:,:,1)' + + !$ser data rho=p_nh%prog(nnew)%rho(:,:,1) + + PRINT *, 'Serializing theta_v=p_nh%prog(nnew)%theta_v(:,:,1)' + + !$ser data theta_v=p_nh%prog(nnew)%theta_v(:,:,1) + + PRINT *, 'Serializing exner=p_nh%prog(nnew)%exner(:,:,1)' + + !$ser data exner=p_nh%prog(nnew)%exner(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG +#ifndef __SX__ +!$OMP DO PRIVATE(jb,ic,jk,jc) ICON_OMP_DEFAULT_SCHEDULE +#endif + DO ic = 1, p_nh%metrics%bdy_halo_c_dim + + jb = p_nh%metrics%bdy_halo_c_blk(ic) + jc = p_nh%metrics%bdy_halo_c_idx(ic) +!DIR$ IVDEP + !$ACC LOOP VECTOR + DO jk = 1, nlev + p_nh%prog(nnew)%theta_v(jc,jk,jb) = p_nh%prog(nnew)%exner(jc,jk,jb) + + ! Diagnose exner from rho*theta + p_nh%prog(nnew)%exner(jc,jk,jb) = EXP(rd_o_cvd*LOG(rd_o_p0ref* & + p_nh%prog(nnew)%rho(jc,jk,jb)*p_nh%prog(nnew)%theta_v(jc,jk,jb))) + + ENDDO + ENDDO + !$ACC END PARALLEL + + rl_start = min_rlcell_int - 1 + rl_end = min_rlcell + + CALL get_indices_c(p_patch, 1, 1, 1, & + i_startidx, i_endidx, rl_start, rl_end) + + + !$ser savepoint mo_solve_nonhydro_stencil_66_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing rd_o_cvd=rd_o_cvd' + + !$ser data rd_o_cvd=rd_o_cvd + + PRINT *, 'Serializing rd_o_p0ref=rd_o_p0ref' + + !$ser data rd_o_p0ref=rd_o_p0ref + + PRINT *, 'Serializing bdy_halo_c=p_nh%metrics%mask_prog_halo_c_dsl_low_refin(:,1)' + + !$ser data bdy_halo_c=p_nh%metrics%mask_prog_halo_c_dsl_low_refin(:,1) + + PRINT *, 'Serializing rho=p_nh%prog(nnew)%rho(:,:,1)' + + !$ser data rho=p_nh%prog(nnew)%rho(:,:,1) + + PRINT *, 'Serializing theta_v=p_nh%prog(nnew)%theta_v(:,:,1)' + + !$ser data theta_v=p_nh%prog(nnew)%theta_v(:,:,1) + + PRINT *, 'Serializing exner=p_nh%prog(nnew)%exner(:,:,1)' + + !$ser data exner=p_nh%prog(nnew)%exner(:,:,1) + +#ifndef __SX__ +!$OMP END DO +#endif + + rl_start = 1 + rl_end = grf_bdywidth_c + + i_startblk = p_patch%cells%start_block(rl_start) + i_endblk = p_patch%cells%end_block(rl_end) + +#ifndef __SX__ +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,jc) ICON_OMP_DEFAULT_SCHEDULE +#endif + DO jb = i_startblk, i_endblk + + CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + + !$ser savepoint mo_solve_nonhydro_stencil_67_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing rd_o_cvd=rd_o_cvd' + + !$ser data rd_o_cvd=rd_o_cvd + + PRINT *, 'Serializing rd_o_p0ref=rd_o_p0ref' + + !$ser data rd_o_p0ref=rd_o_p0ref + + PRINT *, 'Serializing rho=p_nh%prog(nnew)%rho(:,:,1)' + + !$ser data rho=p_nh%prog(nnew)%rho(:,:,1) + + PRINT *, 'Serializing theta_v=p_nh%prog(nnew)%theta_v(:,:,1)' + + !$ser data theta_v=p_nh%prog(nnew)%theta_v(:,:,1) + + PRINT *, 'Serializing exner=p_nh%prog(nnew)%exner(:,:,1)' + + !$ser data exner=p_nh%prog(nnew)%exner(:,:,1) + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + !$ACC LOOP GANG VECTOR COLLAPSE(2) + DO jk = 1, nlev +!DIR$ IVDEP + DO jc = i_startidx, i_endidx + + p_nh%prog(nnew)%theta_v(jc,jk,jb) = p_nh%prog(nnew)%exner(jc,jk,jb) + + ! Diagnose exner from rhotheta + p_nh%prog(nnew)%exner(jc,jk,jb) = EXP(rd_o_cvd*LOG(rd_o_p0ref* & + p_nh%prog(nnew)%rho(jc,jk,jb)*p_nh%prog(nnew)%theta_v(jc,jk,jb))) + + ENDDO + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_67_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing rd_o_cvd=rd_o_cvd' + + !$ser data rd_o_cvd=rd_o_cvd + + PRINT *, 'Serializing rd_o_p0ref=rd_o_p0ref' + + !$ser data rd_o_p0ref=rd_o_p0ref + + PRINT *, 'Serializing rho=p_nh%prog(nnew)%rho(:,:,1)' + + !$ser data rho=p_nh%prog(nnew)%rho(:,:,1) + + PRINT *, 'Serializing theta_v=p_nh%prog(nnew)%theta_v(:,:,1)' + + !$ser data theta_v=p_nh%prog(nnew)%theta_v(:,:,1) + + PRINT *, 'Serializing exner=p_nh%prog(nnew)%exner(:,:,1)' + + !$ser data exner=p_nh%prog(nnew)%exner(:,:,1) + ENDDO +#ifndef __SX__ +!$OMP END DO +#endif + ENDIF + + rl_start = min_rlcell_int - 1 + rl_end = min_rlcell + + i_startblk = p_patch%cells%start_block(rl_start) + i_endblk = p_patch%cells%end_block(rl_end) + +#ifndef __SX__ +!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,jc) ICON_OMP_DEFAULT_SCHEDULE +#endif + DO jb = i_startblk, i_endblk + + CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, & + i_startidx, i_endidx, rl_start, rl_end) + + + !$ser savepoint mo_solve_nonhydro_stencil_68_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing mask_prog_halo_c=p_nh%metrics%mask_prog_halo_c(:,1)' + + !$ser data mask_prog_halo_c=p_nh%metrics%mask_prog_halo_c(:,1) + + PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' + + !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) + + PRINT *, 'Serializing theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing exner_new=p_nh%prog(nnew)%exner(:,:,1)' + + !$ser data exner_new=p_nh%prog(nnew)%exner(:,:,1) + + PRINT *, 'Serializing exner_now=p_nh%prog(nnow)%exner(:,:,1)' + + !$ser data exner_now=p_nh%prog(nnow)%exner(:,:,1) + + PRINT *, 'Serializing rho_new=p_nh%prog(nnew)%rho(:,:,1)' + + !$ser data rho_new=p_nh%prog(nnew)%rho(:,:,1) + + PRINT *, 'Serializing theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1)' + + !$ser data theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1) + + PRINT *, 'Serializing cvd_o_rd=cvd_o_rd' + + !$ser data cvd_o_rd=cvd_o_rd + !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) + +#ifdef __LOOP_EXCHANGE + !$ACC LOOP GANG + DO jc = i_startidx, i_endidx + IF (p_nh%metrics%mask_prog_halo_c(jc,jb)) THEN +!DIR$ IVDEP + !$ACC LOOP VECTOR + DO jk = 1, nlev +#else + !$ACC LOOP GANG VECTOR TILE(32, 4) + DO jk = 1, nlev + DO jc = i_startidx, i_endidx + IF (p_nh%metrics%mask_prog_halo_c(jc,jb)) THEN +#endif + p_nh%prog(nnew)%theta_v(jc,jk,jb) = p_nh%prog(nnow)%rho(jc,jk,jb)*p_nh%prog(nnow)%theta_v(jc,jk,jb) & + *( (p_nh%prog(nnew)%exner(jc,jk,jb)/p_nh%prog(nnow)%exner(jc,jk,jb)-1.0_wp) * cvd_o_rd+1.0_wp ) & + / p_nh%prog(nnew)%rho(jc,jk,jb) + +#ifdef __LOOP_EXCHANGE + ENDDO + ENDIF +#else + ENDIF + ENDDO +#endif + ENDDO + !$ACC END PARALLEL + + !$ser savepoint mo_solve_nonhydro_stencil_68_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr + + PRINT *, 'Serializing mask_prog_halo_c=p_nh%metrics%mask_prog_halo_c(:,1)' + + !$ser data mask_prog_halo_c=p_nh%metrics%mask_prog_halo_c(:,1) + + PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' + + !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) + + PRINT *, 'Serializing theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1)' + + !$ser data theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1) + + PRINT *, 'Serializing exner_new=p_nh%prog(nnew)%exner(:,:,1)' + + !$ser data exner_new=p_nh%prog(nnew)%exner(:,:,1) + + PRINT *, 'Serializing exner_now=p_nh%prog(nnow)%exner(:,:,1)' + + !$ser data exner_now=p_nh%prog(nnow)%exner(:,:,1) + + PRINT *, 'Serializing rho_new=p_nh%prog(nnew)%rho(:,:,1)' + + !$ser data rho_new=p_nh%prog(nnew)%rho(:,:,1) + + PRINT *, 'Serializing theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1)' + + !$ser data theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1) + + PRINT *, 'Serializing cvd_o_rd=cvd_o_rd' + + !$ser data cvd_o_rd=cvd_o_rd + + + ENDDO +#ifndef __SX__ +!$OMP END DO NOWAIT +!$OMP END PARALLEL +#endif + + ENDIF ! .NOT. my_process_is_mpi_all_seq() + + IF (ltimer) CALL timer_stop(timer_solve_nh) + CALL message('DSL', 'all dycore kernels ran') + + !$ACC WAIT + !$ACC END DATA + +#if !defined (__LOOP_EXCHANGE) && !defined (__SX__) + CALL btraj%destruct() +#endif + + END SUBROUTINE solve_nh + +#ifdef _OPENACC + + SUBROUTINE h2d_solve_nonhydro( nnow, jstep, jg, idiv_method, grf_intmethod_e, lprep_adv, l_vert_nested, is_iau_active, & + p_nh, prep_adv ) + + INTEGER, INTENT(IN) :: nnow, jstep, jg, idiv_method, grf_intmethod_e + LOGICAL, INTENT(IN) :: l_vert_nested, lprep_adv, is_iau_active + + TYPE(t_nh_state), INTENT(INOUT) :: p_nh + TYPE(t_prepare_adv), TARGET, INTENT(INOUT) :: prep_adv + + REAL(wp), DIMENSION(:,:,:), POINTER :: exner_tmp, rho_tmp, theta_v_tmp, vn_tmp, w_tmp ! p_prog WP + REAL(wp), DIMENSION(:,:,:), POINTER :: vn_ie_ubc_tmp ! p_diag WP 2D + REAL(wp), DIMENSION(:,:,:), POINTER :: w_ubc_tmp, mflx_ic_ubc_tmp, theta_v_ic_ubc_tmp, rho_ic_ubc_tmp ! p_diag WP + + REAL(wp), DIMENSION(:,:,:), POINTER :: theta_v_ic_tmp, rho_ic_tmp ! p_diag WP + REAL(wp), DIMENSION(:,:,:), POINTER :: mass_fl_e_tmp, exner_pr_tmp ! p_diag WP + REAL(wp), DIMENSION(:,:,:), POINTER :: grf_bdy_mflx_tmp ! p_diag WP + + REAL(vp), DIMENSION(:,:,:), POINTER :: vt_tmp, vn_ie_tmp, w_concorr_c_tmp, ddt_exner_phy_tmp ! p_diag VP + REAL(vp), DIMENSION(:,:,:), POINTER :: exner_dyn_incr_tmp ! p_diag VP + REAL(vp), DIMENSION(:,:,:), POINTER :: ddt_vn_phy_tmp ! p_diag VP + + REAL(vp), DIMENSION(:,:,:), POINTER :: rho_incr_tmp, exner_incr_tmp ! p_diag VP + REAL(wp), DIMENSION(:,:,:), POINTER :: vn_traj_tmp, mass_flx_me_tmp, mass_flx_ic_tmp ! prep_adv WP + REAL(wp), DIMENSION(:,:,:), POINTER :: vn_ref_tmp, w_ref_tmp ! p_ref WP + + REAL(vp), DIMENSION(:,:,:,:), POINTER :: ddt_vn_apc_pc_tmp + REAL(vp), DIMENSION(:,:,:,:), POINTER :: ddt_vn_cor_pc_tmp + REAL(vp), DIMENSION(:,:,:,:), POINTER :: ddt_w_adv_pc_tmp + + REAL(wp), DIMENSION(:,:,:), POINTER :: ddt_vn_dyn_tmp, ddt_vn_dmp_tmp, ddt_vn_adv_tmp, ddt_vn_cor_tmp ! p_diag WP + REAL(wp), DIMENSION(:,:,:), POINTER :: ddt_vn_pgr_tmp, ddt_vn_phd_tmp, ddt_vn_iau_tmp, ddt_vn_ray_tmp ! p_diag WP + REAL(wp), DIMENSION(:,:,:), POINTER :: ddt_vn_grf_tmp ! p_diag WP + +! p_patch: +! p_patch%cells: edge_idx/blk +! p_patch%edges: cell_idx/blk, vertex_idx/blk, quad_idx/blk, +! primal/dual_normal_cell, inv_primal/dual_edge_length, tangent_orientation, refin_ctrl + +! +! p_nh%metrics: vertidx_gradp, pg_vertidx, pg_edgeidx, pg_edgeblk, +! bdy_halo_c_blk, bdy_halo_c_idx, bdy_mflx_e_blk, bdy_mflx_e_idx, +! coeff_gradp, d_exner_dz_ref_ic, d2dexdz2_fac1_mc, +! ddqz_z_half, ddxn_z_full, ddxt_z_full, ddqz_z_full_e, +! exner_exfac, exner_ref_mc, hmask_dd3d, inv_ddqz_z_full, +! mask_prog_halo_c, nudge_e_blk, nudge_e_idx, pg_exdist, +! rayleigh_vn, rayleigh_w, rho_ref_mc, rho_ref_me, +! scalfac_dd3d, theta_ref_ic, theta_ref_mc, theta_ref_me, +! vwind_expl_wgt, vwind_impl_wgt, +! wgtfac_c, wgtfac_e, wgtfacq_c, wgtfacq1_c, zdiff_gradp + + +! p_nh%prog(nnow) All present (above) + + exner_tmp => p_nh%prog(nnow)%exner + rho_tmp => p_nh%prog(nnow)%rho + theta_v_tmp => p_nh%prog(nnow)%theta_v + vn_tmp => p_nh%prog(nnow)%vn + w_tmp => p_nh%prog(nnow)%w + !$ACC UPDATE DEVICE(exner_tmp, rho_tmp, theta_v_tmp, vn_tmp, w_tmp) + +! p_nh%diag: + + rho_ic_tmp => p_nh%diag%rho_ic + theta_v_ic_tmp => p_nh%diag%theta_v_ic + !$ACC UPDATE DEVICE(rho_ic_tmp, theta_v_ic_tmp) + + vt_tmp => p_nh%diag%vt + vn_ie_tmp => p_nh%diag%vn_ie + w_concorr_c_tmp => p_nh%diag%w_concorr_c + !$ACC UPDATE DEVICE(vt_tmp, vn_ie_tmp, w_concorr_c_tmp) + + mass_fl_e_tmp => p_nh%diag%mass_fl_e + exner_pr_tmp => p_nh%diag%exner_pr + exner_dyn_incr_tmp => p_nh%diag%exner_dyn_incr + !$ACC UPDATE DEVICE(mass_fl_e_tmp, exner_pr_tmp, exner_dyn_incr_tmp) + +! WS: I do not think these are necessary, but adding for completeness + ddt_vn_apc_pc_tmp => p_nh%diag%ddt_vn_apc_pc + ddt_w_adv_pc_tmp => p_nh%diag%ddt_w_adv_pc + !$ACC UPDATE DEVICE(ddt_vn_apc_pc_tmp, ddt_w_adv_pc_tmp) + IF (p_nh%diag%ddt_vn_adv_is_associated .OR. p_nh%diag%ddt_vn_cor_is_associated) THEN + ddt_vn_cor_pc_tmp => p_nh%diag%ddt_vn_cor_pc + !$ACC UPDATE DEVICE(ddt_vn_cor_pc_tmp) + END IF + +! MAG: For completeness + ddt_vn_dyn_tmp => p_nh%diag%ddt_vn_dyn + !$ACC UPDATE DEVICE(ddt_vn_dyn_tmp) IF(p_nh%diag%ddt_vn_dyn_is_associated) + ddt_vn_dmp_tmp => p_nh%diag%ddt_vn_dmp + !$ACC UPDATE DEVICE(ddt_vn_dmp_tmp) IF(p_nh%diag%ddt_vn_dmp_is_associated) + ddt_vn_adv_tmp => p_nh%diag%ddt_vn_adv + !$ACC UPDATE DEVICE(ddt_vn_adv_tmp) IF(p_nh%diag%ddt_vn_adv_is_associated) + ddt_vn_cor_tmp => p_nh%diag%ddt_vn_cor + !$ACC UPDATE DEVICE(ddt_vn_cor_tmp) IF(p_nh%diag%ddt_vn_cor_is_associated) + ddt_vn_pgr_tmp => p_nh%diag%ddt_vn_pgr + !$ACC UPDATE DEVICE(ddt_vn_pgr_tmp) IF(p_nh%diag%ddt_vn_pgr_is_associated) + ddt_vn_phd_tmp => p_nh%diag%ddt_vn_phd + !$ACC UPDATE DEVICE(ddt_vn_phd_tmp) IF(p_nh%diag%ddt_vn_phd_is_associated) + ddt_vn_iau_tmp => p_nh%diag%ddt_vn_iau + !$ACC UPDATE DEVICE(ddt_vn_iau_tmp) IF(p_nh%diag%ddt_vn_iau_is_associated) + ddt_vn_ray_tmp => p_nh%diag%ddt_vn_ray + !$ACC UPDATE DEVICE(ddt_vn_ray_tmp) IF(p_nh%diag%ddt_vn_ray_is_associated) + ddt_vn_grf_tmp => p_nh%diag%ddt_vn_grf + !$ACC UPDATE DEVICE(ddt_vn_grf_tmp) IF(p_nh%diag%ddt_vn_grf_is_associated) + + mflx_ic_ubc_tmp => p_nh%diag%mflx_ic_ubc + vn_ie_ubc_tmp => p_nh%diag%vn_ie_ubc + theta_v_ic_ubc_tmp => p_nh%diag%theta_v_ic_ubc + rho_ic_ubc_tmp => p_nh%diag%rho_ic_ubc + w_ubc_tmp => p_nh%diag%w_ubc + !$ACC UPDATE DEVICE(mflx_ic_ubc_tmp, vn_ie_ubc_tmp, theta_v_ic_ubc_tmp, rho_ic_ubc_tmp, w_ubc_tmp) IF(l_vert_nested) + + ddt_exner_phy_tmp => p_nh%diag%ddt_exner_phy + ddt_vn_phy_tmp => p_nh%diag%ddt_vn_phy + !$ACC UPDATE DEVICE(ddt_exner_phy_tmp, ddt_vn_phy_tmp) + + rho_incr_tmp => p_nh%diag%rho_incr + exner_incr_tmp => p_nh%diag%exner_incr + !$ACC UPDATE DEVICE(rho_incr_tmp, exner_incr_tmp) + + grf_bdy_mflx_tmp => p_nh%diag%grf_bdy_mflx + !$ACC UPDATE DEVICE(grf_bdy_mflx_tmp) IF((jg > 1) .AND. (grf_intmethod_e >= 5) .AND. (idiv_method == 1) .AND. (jstep == 0)) + +! prep_adv: + + vn_traj_tmp => prep_adv%vn_traj + mass_flx_me_tmp => prep_adv%mass_flx_me + mass_flx_ic_tmp => prep_adv%mass_flx_ic + !$ACC UPDATE DEVICE(vn_traj_tmp, mass_flx_me_tmp, mass_flx_ic_tmp) IF(lprep_adv) + +! p_nh%ref: + + vn_ref_tmp => p_nh%ref%vn_ref + w_ref_tmp => p_nh%ref%w_ref + !$ACC UPDATE DEVICE(vn_ref_tmp, w_ref_tmp) + + END SUBROUTINE h2d_solve_nonhydro + + SUBROUTINE d2h_solve_nonhydro( nnew, jstep, jg, idyn_timestep, grf_intmethod_e, idiv_method, lsave_mflx, & + & l_child_vertnest, lprep_adv, p_nh, prep_adv ) + + INTEGER, INTENT(IN) :: nnew, jstep, jg, idyn_timestep, grf_intmethod_e, idiv_method + LOGICAL, INTENT(IN) :: lsave_mflx, l_child_vertnest, lprep_adv + + TYPE(t_nh_state), INTENT(INOUT) :: p_nh + TYPE(t_prepare_adv), TARGET, INTENT(INOUT) :: prep_adv + + REAL(wp), DIMENSION(:,:,:), POINTER :: exner_tmp, rho_tmp, theta_v_tmp, vn_tmp, w_tmp ! p_prog WP + REAL(wp), DIMENSION(:,:,:), POINTER :: vn_ie_int_tmp ! p_diag WP 2D + REAL(wp), DIMENSION(:,:,:), POINTER :: theta_v_ic_tmp, rho_ic_tmp, rho_ic_int_tmp, w_int_tmp ! p_diag WP + REAL(wp), DIMENSION(:,:,:), POINTER :: theta_v_ic_int_tmp, grf_bdy_mflx_tmp ! p_diag WP + REAL(wp), DIMENSION(:,:,:), POINTER :: mass_fl_e_tmp, mflx_ic_int_tmp, exner_pr_tmp ! p_diag WP + + REAL(vp), DIMENSION(:,:,:), POINTER :: vt_tmp, vn_ie_tmp, w_concorr_c_tmp ! p_diag VP + REAL(vp), DIMENSION(:,:,:), POINTER :: mass_fl_e_sv_tmp ! p_diag VP + REAL(vp), DIMENSION(:,:,:), POINTER :: exner_dyn_incr_tmp ! p_diag VP + REAL(wp), DIMENSION(:,:,:), POINTER :: vn_traj_tmp, mass_flx_me_tmp, mass_flx_ic_tmp ! prep_adv WP + REAL(vp), DIMENSION(:,:,:,:), POINTER :: ddt_vn_apc_pc_tmp, ddt_vn_cor_pc_tmp, ddt_w_adv_pc_tmp + + REAL(wp), DIMENSION(:,:,:), POINTER :: ddt_vn_dyn_tmp, ddt_vn_dmp_tmp, ddt_vn_adv_tmp, ddt_vn_cor_tmp ! p_diag WP + REAL(wp), DIMENSION(:,:,:), POINTER :: ddt_vn_pgr_tmp, ddt_vn_phd_tmp, ddt_vn_iau_tmp, ddt_vn_ray_tmp ! p_diag WP + REAL(wp), DIMENSION(:,:,:), POINTER :: ddt_vn_grf_tmp ! p_diag WP + +! The following code is necessary if the Dycore is to be run in isolation on the GPU +! Update all device output on host: the prognostic variables have shifted from nnow to nnew; diagnostics pointers set above + + exner_tmp => p_nh%prog(nnew)%exner + rho_tmp => p_nh%prog(nnew)%rho + theta_v_tmp => p_nh%prog(nnew)%theta_v + vn_tmp => p_nh%prog(nnew)%vn + w_tmp => p_nh%prog(nnew)%w + !$ACC UPDATE HOST(exner_tmp, rho_tmp, theta_v_tmp, vn_tmp, w_tmp) + + vt_tmp => p_nh%diag%vt + vn_ie_tmp => p_nh%diag%vn_ie + rho_ic_tmp => p_nh%diag%rho_ic + theta_v_ic_tmp => p_nh%diag%theta_v_ic + exner_pr_tmp => p_nh%diag%exner_pr + !$ACC UPDATE HOST(vt_tmp, vn_ie_tmp, rho_ic_tmp, theta_v_ic_tmp, exner_pr_tmp) + + w_concorr_c_tmp => p_nh%diag%w_concorr_c + mass_fl_e_tmp => p_nh%diag%mass_fl_e + exner_dyn_incr_tmp => p_nh%diag%exner_dyn_incr + !$ACC UPDATE HOST(w_concorr_c_tmp, mass_fl_e_tmp, exner_dyn_incr_tmp) + + ddt_vn_apc_pc_tmp => p_nh%diag%ddt_vn_apc_pc + ddt_w_adv_pc_tmp => p_nh%diag%ddt_w_adv_pc + !$ACC UPDATE HOST(ddt_vn_apc_pc_tmp, ddt_w_adv_pc_tmp) + IF (p_nh%diag%ddt_vn_adv_is_associated .OR. p_nh%diag%ddt_vn_cor_is_associated) THEN + ddt_vn_cor_pc_tmp => p_nh%diag%ddt_vn_cor_pc + !$ACC UPDATE HOST(ddt_vn_cor_pc_tmp) + END IF + +! MAG: For completeness + ddt_vn_dyn_tmp => p_nh%diag%ddt_vn_dyn + !$ACC UPDATE HOST(ddt_vn_dyn_tmp) IF(p_nh%diag%ddt_vn_dyn_is_associated) + ddt_vn_dmp_tmp => p_nh%diag%ddt_vn_dmp + !$ACC UPDATE HOST(ddt_vn_dmp_tmp) IF(p_nh%diag%ddt_vn_dmp_is_associated) + ddt_vn_adv_tmp => p_nh%diag%ddt_vn_adv + !$ACC UPDATE HOST(ddt_vn_adv_tmp) IF(p_nh%diag%ddt_vn_adv_is_associated) + ddt_vn_cor_tmp => p_nh%diag%ddt_vn_cor + !$ACC UPDATE HOST(ddt_vn_cor_tmp) IF(p_nh%diag%ddt_vn_cor_is_associated) + ddt_vn_pgr_tmp => p_nh%diag%ddt_vn_pgr + !$ACC UPDATE HOST(ddt_vn_pgr_tmp) IF(p_nh%diag%ddt_vn_pgr_is_associated) + ddt_vn_phd_tmp => p_nh%diag%ddt_vn_phd + !$ACC UPDATE HOST(ddt_vn_phd_tmp) IF(p_nh%diag%ddt_vn_phd_is_associated) + ddt_vn_iau_tmp => p_nh%diag%ddt_vn_iau + !$ACC UPDATE HOST(ddt_vn_iau_tmp) IF(p_nh%diag%ddt_vn_iau_is_associated) + ddt_vn_ray_tmp => p_nh%diag%ddt_vn_ray + !$ACC UPDATE HOST(ddt_vn_ray_tmp) IF(p_nh%diag%ddt_vn_ray_is_associated) + ddt_vn_grf_tmp => p_nh%diag%ddt_vn_grf + !$ACC UPDATE HOST(ddt_vn_grf_tmp) IF(p_nh%diag%ddt_vn_grf_is_associated) + + mass_fl_e_sv_tmp => p_nh%diag%mass_fl_e_sv + !$ACC UPDATE HOST(mass_fl_e_sv_tmp) IF(lsave_mflx) + + w_int_tmp => p_nh%diag%w_int + mflx_ic_int_tmp => p_nh%diag%mflx_ic_int + theta_v_ic_int_tmp => p_nh%diag%theta_v_ic_int + rho_ic_int_tmp => p_nh%diag%rho_ic_int + !$ACC UPDATE HOST(w_int_tmp, mflx_ic_int_tmp, theta_v_ic_int_tmp, rho_ic_int_tmp) IF(l_child_vertnest) + + vn_ie_int_tmp => p_nh%diag%vn_ie_int + !$ACC UPDATE HOST(vn_ie_int_tmp) IF(idyn_timestep == 1 .AND. l_child_vertnest) + + grf_bdy_mflx_tmp => p_nh%diag%grf_bdy_mflx + !$ACC UPDATE HOST(grf_bdy_mflx_tmp) IF((jg > 1) .AND. (grf_intmethod_e >= 5) .AND. (idiv_method == 1) .AND. (jstep == 0)) + + vn_traj_tmp => prep_adv%vn_traj + mass_flx_me_tmp => prep_adv%mass_flx_me + mass_flx_ic_tmp => prep_adv%mass_flx_ic + !$ACC UPDATE HOST(vn_traj_tmp, mass_flx_me_tmp, mass_flx_ic_tmp) IF(lprep_adv) + + END SUBROUTINE d2h_solve_nonhydro + +#endif + +END MODULE mo_solve_nonhydro From 3fbfadb47da9284159c963bc4339b809d0f7981b Mon Sep 17 00:00:00 2001 From: samkellerhals Date: Tue, 9 May 2023 14:42:33 +0200 Subject: [PATCH 13/21] Do not serialise tolerance data --- test.f90 | 7020 ------------------------------------------------------ 1 file changed, 7020 deletions(-) delete mode 100644 test.f90 diff --git a/test.f90 b/test.f90 deleted file mode 100644 index 7a5a424067..0000000000 --- a/test.f90 +++ /dev/null @@ -1,7020 +0,0 @@ -!> -!! mo_solve_nonhydro -!! -!! This module contains the nonhydrostatic dynamical core for the triangular version -!! Its routines were previously contained in mo_divergent_modes and mo_vector_operations -!! but have been extracted for better memory efficiency -!! -!! @author Guenther Zaengl, DWD -!! -!! @par Revision History -!! Initial release by Guenther Zaengl (2010-10-13) based on earlier work -!! by Almut Gassmann, MPI-M -!! Modification by William Sawyer, CSCS (2015-02-06) -!! - OpenACC implementation -!! -!! @par Copyright and License -!! -!! This code is subject to the DWD and MPI-M-Software-License-Agreement in -!! its most recent form. -!! Please see the file LICENSE in the root of the source tree for this code. -!! Where software is supplied by third parties, it is indicated in the -!! headers of the routines. -!! - -!---------------------------- -#include "omp_definitions.inc" -!---------------------------- - -MODULE mo_solve_nonhydro - - USE mo_kind, ONLY: wp, vp - USE mo_nonhydrostatic_config,ONLY: itime_scheme,iadv_rhotheta, igradp_method, l_open_ubc, & - kstart_moist, lhdiff_rcf, divdamp_order, & - divdamp_fac, divdamp_fac2, divdamp_fac3, divdamp_fac4, & - divdamp_z, divdamp_z2, divdamp_z3, divdamp_z4, & - divdamp_type, rayleigh_type, rhotheta_offctr, & - veladv_offctr, divdamp_fac_o2, kstart_dd3d, ndyn_substeps_var - USE mo_dynamics_config, ONLY: idiv_method - USE mo_parallel_config, ONLY: nproma, p_test_run, itype_comm, use_dycore_barrier, & - & cpu_min_nproma - USE mo_run_config, ONLY: ltimer, timers_level, lvert_nest - USE mo_model_domain, ONLY: t_patch - USE mo_grid_config, ONLY: l_limited_area - USE mo_gridref_config, ONLY: grf_intmethod_e - USE mo_interpol_config, ONLY: nudge_max_coeff - USE mo_intp_data_strc, ONLY: t_int_state - USE mo_intp, ONLY: cells2verts_scalar - USE mo_nonhydro_types, ONLY: t_nh_state - USE mo_physical_constants,ONLY: cpd, rd, cvd, cvd_o_rd, grav, rd_o_cpd, p0ref - USE mo_math_gradients, ONLY: grad_green_gauss_cell - USE mo_velocity_advection,ONLY: velocity_tendencies - USE mo_math_constants, ONLY: dbl_eps - USE mo_math_divrot, ONLY: div_avg - USE mo_vertical_grid, ONLY: nrdmax, nflat_gradp - USE mo_init_vgrid, ONLY: nflatlev - USE mo_loopindices, ONLY: get_indices_c, get_indices_e - USE mo_impl_constants, ONLY: min_rlcell_int, min_rledge_int, min_rlvert_int, & - & min_rlcell, min_rledge, RAYLEIGH_CLASSIC, RAYLEIGH_KLEMP - USE mo_impl_constants_grf,ONLY: grf_bdywidth_c, grf_bdywidth_e - USE mo_advection_hflux, ONLY: upwind_hflux_miura3 - USE mo_advection_traj, ONLY: t_back_traj, btraj_compute_o1 - USE mo_sync, ONLY: SYNC_E, SYNC_C, sync_patch_array, & - sync_patch_array_mult, sync_patch_array_mult_mp - USE mo_mpi, ONLY: my_process_is_mpi_all_seq, work_mpi_barrier, i_am_accel_node - USE mo_timer, ONLY: timer_solve_nh, timer_barrier, timer_start, timer_stop, & - timer_solve_nh_cellcomp, timer_solve_nh_edgecomp, & - timer_solve_nh_vnupd, timer_solve_nh_vimpl, timer_solve_nh_exch - USE mo_exception, ONLY: message - USE mo_icon_comm_lib, ONLY: icon_comm_sync - USE mo_vertical_coord_table,ONLY: vct_a - USE mo_prepadv_types, ONLY: t_prepare_adv - USE mo_initicon_config, ONLY: is_iau_active, iau_wgt_dyn - USE mo_fortran_tools, ONLY: init_zero_contiguous_dp, init_zero_contiguous_sp ! Import both for mixed prec. - !$ser verbatim USE mo_nonhydro_state, ONLY: jstep_ptr, nstep_ptr, mo_solve_nonhydro_ctr -#ifdef _OPENACC - USE mo_mpi, ONLY: my_process_is_work -#endif - - - USE cudafor - USE nvtx - - IMPLICIT NONE - - PRIVATE - - - REAL(wp), PARAMETER :: rd_o_cvd = 1._wp / cvd_o_rd - REAL(wp), PARAMETER :: cpd_o_rd = 1._wp / rd_o_cpd - REAL(wp), PARAMETER :: rd_o_p0ref = rd / p0ref - REAL(wp), PARAMETER :: grav_o_cpd = grav / cpd - - PUBLIC :: solve_nh - -#ifdef _CRAYFTN -#define __CRAY_FTN_VERSION (_RELEASE_MAJOR * 100 + _RELEASE_MINOR) -#endif - - ! On the vectorizing DWD-NEC the diagnostics for the tendencies of the normal wind - ! from terms xyz, ddt_vn_xyz, is disabled by default due to the fear that the - ! conditional storage in conditionally allocated global fields is attempted even if - ! the condition is not given and therefore the global field not allocated. If this - ! happens, this would results in a corrupted memory. - ! (Requested by G. Zaengl based on earlier problems with similar constructs.) -#ifndef __SX__ -#define __ENABLE_DDT_VN_XYZ__ -#endif - - CONTAINS - - - !> - !! solve_nh - !! - !! Main solver routine for nonhydrostatic dynamical core - !! - !! @par Revision History - !! Development started by Guenther Zaengl on 2010-02-03 - !! Modification by Sebastian Borchert, DWD (2017-07-07) - !! (Dear developer, for computational efficiency reasons, a copy of this subroutine - !! exists in 'src/atm_dyn_iconam/mo_nh_deepatmo_solve'. If you would change something here, - !! please consider to apply your development there, too, in order to help preventing - !! the copy from diverging and becoming a code corpse sooner or later. Thank you!) - !! - SUBROUTINE solve_nh (p_nh, p_patch, p_int, prep_adv, nnow, nnew, l_init, l_recompute, lsave_mflx, & - lprep_adv, lclean_mflx, idyn_timestep, jstep, dtime) - - TYPE(t_nh_state), TARGET, INTENT(INOUT) :: p_nh - TYPE(t_int_state), TARGET, INTENT(IN) :: p_int - TYPE(t_patch), TARGET, INTENT(INOUT) :: p_patch - TYPE(t_prepare_adv), TARGET, INTENT(INOUT) :: prep_adv - - ! Initialization switch that has to be .TRUE. at the initial time step only (not for restart) - LOGICAL, INTENT(IN) :: l_init - ! Switch to recompute velocity tendencies after a physics call irrespective of the time scheme option - LOGICAL, INTENT(IN) :: l_recompute - ! Switch if mass flux needs to be saved for nest boundary interpolation tendency computation - LOGICAL, INTENT(IN) :: lsave_mflx - ! Switch if preparations for tracer advection shall be computed - LOGICAL, INTENT(IN) :: lprep_adv - ! Switch if mass fluxes computed for tracer advection need to be reinitialized - LOGICAL, INTENT(IN) :: lclean_mflx - ! Counter of dynamics time step within a large time step (ranges from 1 to ndyn_substeps) - INTEGER, INTENT(IN) :: idyn_timestep - ! Time step count since last boundary interpolation (ranges from 0 to 2*ndyn_substeps-1) - INTEGER, INTENT(IN) :: jstep - ! Time levels - INTEGER, INTENT(IN) :: nnow, nnew - ! Dynamics time step - REAL(wp), INTENT(IN) :: dtime - - ! Local variables - INTEGER :: jb, jk, jc, je, jks, jg - INTEGER :: nlev, nlevp1 !< number of full levels - INTEGER :: i_startblk, i_endblk, i_startidx, i_endidx, ishift - INTEGER :: rl_start, rl_end, istep, ntl1, ntl2, nvar, nshift, nshift_total - INTEGER :: i_startblk_2, i_endblk_2, i_startidx_2, i_endidx_2, rl_start_2, rl_end_2 - INTEGER :: ic, ie, ilc0, ibc0, ikp1, ikp2 - - REAL(wp) :: z_theta_v_fl_e (nproma,p_patch%nlev ,p_patch%nblks_e), & - z_theta_v_e (nproma,p_patch%nlev ,p_patch%nblks_e), & - z_rho_e (nproma,p_patch%nlev ,p_patch%nblks_e), & - z_mass_fl_div (nproma,p_patch%nlev ,p_patch%nblks_c), & ! used for idiv_method=2 only - z_theta_v_fl_div(nproma,p_patch%nlev ,p_patch%nblks_c), & ! used for idiv_method=2 only - z_theta_v_v (nproma,p_patch%nlev ,p_patch%nblks_v), & ! used for iadv_rhotheta=1 only - z_rho_v (nproma,p_patch%nlev ,p_patch%nblks_v) ! used for iadv_rhotheta=1 only - -#if !defined (__LOOP_EXCHANGE) && !defined (__SX__) - TYPE(t_back_traj), SAVE :: btraj -#endif - - ! The data type vp (variable precision) is by default the same as wp but reduces - ! to single precision when the __MIXED_PRECISION cpp flag is set at compile time -#ifdef __SWAPDIM - REAL(vp) :: z_th_ddz_exner_c(nproma,p_patch%nlev ,p_patch%nblks_c), & - z_dexner_dz_c (nproma,p_patch%nlev ,p_patch%nblks_c,2), & - z_vt_ie (nproma,p_patch%nlev ,p_patch%nblks_e), & - z_kin_hor_e (nproma,p_patch%nlev ,p_patch%nblks_e), & - z_exner_ex_pr (nproma,p_patch%nlevp1,p_patch%nblks_c), & - z_gradh_exner (nproma,p_patch%nlev ,p_patch%nblks_e), & - z_rth_pr (nproma,p_patch%nlev ,p_patch%nblks_c,2), & - z_grad_rth (nproma,p_patch%nlev ,p_patch%nblks_c,4), & - z_w_concorr_me (nproma,p_patch%nlev ,p_patch%nblks_e) -#else - REAL(vp) :: z_th_ddz_exner_c(nproma,p_patch%nlev,p_patch%nblks_c), & - z_dexner_dz_c (2,nproma,p_patch%nlev,p_patch%nblks_c), & - z_vt_ie (nproma,p_patch%nlev,p_patch%nblks_e), & - z_kin_hor_e (nproma,p_patch%nlev,p_patch%nblks_e), & - z_exner_ex_pr (nproma,p_patch%nlevp1,p_patch%nblks_c), & ! nlevp1 is intended here - z_gradh_exner (nproma,p_patch%nlev,p_patch%nblks_e), & - z_rth_pr (2,nproma,p_patch%nlev,p_patch%nblks_c), & - z_grad_rth (4,nproma,p_patch%nlev,p_patch%nblks_c), & - z_w_concorr_me (nproma,p_patch%nlev,p_patch%nblks_e) -#endif - ! This field in addition has reversed index order (vertical first) for optimization -#ifdef __LOOP_EXCHANGE - REAL(vp) :: z_graddiv_vn (p_patch%nlev,nproma,p_patch%nblks_e) -#else - REAL(vp) :: z_graddiv_vn (nproma,p_patch%nlev,p_patch%nblks_e) -#endif - - REAL(wp) :: z_w_expl (nproma,p_patch%nlevp1), & - z_thermal_exp (nproma,p_patch%nblks_c), & - z_vn_avg (nproma,p_patch%nlev ), & - z_mflx_top (nproma,p_patch%nblks_c), & - z_contr_w_fl_l (nproma,p_patch%nlevp1), & - z_rho_expl (nproma,p_patch%nlev ), & - z_exner_expl (nproma,p_patch%nlev ) - REAL(wp) :: z_theta_tavg_m1, z_theta_tavg, z_rho_tavg_m1, z_rho_tavg - REAL(wp) :: z_thermal_exp_local ! local variable to use in OpenACC loop - - - - ! The data type vp (variable precision) is by default the same as wp but reduces - ! to single precision when the __MIXED_PRECISION cpp flag is set at compile time - - ! TODO : of these, fairly easy to scalarize: z_theta_v_pr_ic - REAL(vp) :: z_alpha (nproma,p_patch%nlevp1), & - z_beta (nproma,p_patch%nlev ), & - z_q (nproma,p_patch%nlev ), & - z_graddiv2_vn (nproma,p_patch%nlev ), & - z_theta_v_pr_ic (nproma,p_patch%nlevp1), & - z_exner_ic (nproma,p_patch%nlevp1), & - z_w_concorr_mc (nproma,p_patch%nlev ), & - z_flxdiv_mass (nproma,p_patch%nlev ), & - z_flxdiv_theta (nproma,p_patch%nlev ), & - z_hydro_corr (nproma,p_patch%nlev,p_patch%nblks_e) - - REAL(vp) :: z_a, z_b, z_c, z_g, z_gamma, & - z_w_backtraj, z_theta_v_pr_mc_m1, z_theta_v_pr_mc - -#ifdef _OPENACC - REAL(vp) :: z_w_concorr_mc_m0, z_w_concorr_mc_m1, z_w_concorr_mc_m2 -#endif - - REAL(wp) :: z_theta1, z_theta2, wgt_nnow_vel, wgt_nnew_vel, & - dt_shift, wgt_nnow_rth, wgt_nnew_rth, dthalf, & - r_nsubsteps, r_dtimensubsteps, scal_divdamp_o2, & - alin, dz32, df32, dz42, df42, bqdr, aqdr, & - zf, dzlin, dzqdr - ! time shifts for linear interpolation of nest UBC - REAL(wp) :: dt_linintp_ubc, dt_linintp_ubc_nnow, dt_linintp_ubc_nnew - REAL(wp) :: z_raylfac(nrdmax(p_patch%id)) - REAL(wp) :: z_ntdistv_bary_1, distv_bary_1, z_ntdistv_bary_2, distv_bary_2 - - REAL(wp), DIMENSION(p_patch%nlev) :: scal_divdamp, bdy_divdamp, enh_divdamp_fac - REAL(vp) :: z_dwdz_dd(nproma,kstart_dd3d(p_patch%id):p_patch%nlev,p_patch%nblks_c) - - ! Local variables for normal wind tendencies and differentials - REAL(wp) :: z_ddt_vn_dyn, z_ddt_vn_apc, z_ddt_vn_cor, & - & z_ddt_vn_pgr, z_ddt_vn_ray, & - & z_d_vn_dmp, z_d_vn_iau - - REAL(wp), DIMENSION(nproma, p_patch%nblks_c) :: w_1 - !-------------------------------------------------------------------------- - ! OUT/INOUT FIELDS DSL - ! - - - - INTEGER, DIMENSION(:,:,:,:), POINTER :: ikoffset_dsl - - ! - ! OUT/INOUT FIELDS DSL - !-------------------------------------------------------------------------- - -#ifdef __INTEL_COMPILER -!DIR$ ATTRIBUTES ALIGN : 64 :: z_theta_v_fl_e,z_theta_v_e,z_rho_e,z_mass_fl_div -!DIR$ ATTRIBUTES ALIGN : 64 :: z_theta_v_fl_div,z_theta_v_v,z_rho_v,z_dwdz_dd -!DIR$ ATTRIBUTES ALIGN : 64 :: z_th_ddz_exner_c,z_dexner_dz_c,z_vt_ie,z_kin_hor_e -!DIR$ ATTRIBUTES ALIGN : 64 :: z_exner_ex_pr,z_gradh_exner,z_rth_pr,z_grad_rth -!DIR$ ATTRIBUTES ALIGN : 64 :: z_w_concorr_me,z_graddiv_vn,z_w_expl,z_thermal_exp -!DIR$ ATTRIBUTES ALIGN : 64 :: z_vn_avg,z_mflx_top,z_contr_w_fl_l,z_rho_expl -!DIR$ ATTRIBUTES ALIGN : 64 :: z_exner_expl,z_alpha,z_beta,z_q,z_graddiv2_vn -!DIR$ ATTRIBUTES ALIGN : 64 :: z_theta_v_pr_ic,z_exner_ic,z_w_concorr_mc -!DIR$ ATTRIBUTES ALIGN : 64 :: z_flxdiv_mass,z_flxdiv_theta,z_hydro_corr -!DIR$ ATTRIBUTES ALIGN : 64 :: z_raylfac,scal_divdamp,bdy_divdamp,enh_divdamp_fac -#endif - - INTEGER :: nproma_gradp, nblks_gradp, npromz_gradp, nlen_gradp, jk_start - LOGICAL :: lcompute, lcleanup, lvn_only, lvn_pos - - ! Local variables to control vertical nesting - LOGICAL :: l_vert_nested, l_child_vertnest - - ! Pointers - INTEGER, POINTER, CONTIGUOUS :: & - ! to cell indices - icidx(:,:,:), icblk(:,:,:), & - ! to edge indices - ieidx(:,:,:), ieblk(:,:,:), & - ! to vertex indices - ividx(:,:,:), ivblk(:,:,:), & - ! to vertical neighbor indices for pressure gradient computation - ikidx(:,:,:,:), & - ! to quad edge indices - iqidx(:,:,:), iqblk(:,:,:), & - ! for igradp_method = 3 - iplev(:), ipeidx(:), ipeblk(:) -#if !defined (__LOOP_EXCHANGE) && !defined (__SX__) -! These convenience pointers are needed to avoid PGI trying to copy derived type instance btraj back from device to host - INTEGER, POINTER :: p_cell_idx(:,:,:), p_cell_blk(:,:,:) - REAL(vp), POINTER :: p_distv_bary(:,:,:,:) -#endif -#ifdef __SX__ - REAL(wp) :: z_rho_tavg_m1_v(nproma), z_theta_tavg_m1_v(nproma) - REAL(vp) :: z_theta_v_pr_mc_m1_v(nproma) -#endif - !------------------------------------------------------------------- - IF (use_dycore_barrier) THEN - CALL timer_start(timer_barrier) - CALL work_mpi_barrier() - CALL timer_stop(timer_barrier) - ENDIF - !------------------------------------------------------------------- - -#if !defined (__LOOP_EXCHANGE) && !defined (__SX__) - CALL btraj%construct(nproma,p_patch%nlev,p_patch%nblks_e,2) -! These convenience pointers are needed to avoid PGI trying to copy derived type instance btraj back from device to host - p_cell_idx => btraj%cell_idx - p_cell_blk => btraj%cell_blk - p_distv_bary => btraj%distv_bary -#endif - - jg = p_patch%id - - IF (lvert_nest .AND. (p_patch%nshift_total > 0)) THEN - l_vert_nested = .TRUE. - nshift_total = p_patch%nshift_total - ELSE - l_vert_nested = .FALSE. - nshift_total = 0 - ENDIF - IF (lvert_nest .AND. p_patch%n_childdom > 0 .AND. & - (p_patch%nshift_child > 0 .OR. p_patch%nshift_total > 0)) THEN - l_child_vertnest = .TRUE. - nshift = p_patch%nshift_child + 1 - ELSE - l_child_vertnest = .FALSE. - nshift = 0 - ENDIF - dthalf = 0.5_wp*dtime - - CALL message('DSL', 'start running dycore kernels') - IF (ltimer) CALL timer_start(timer_solve_nh) - - ! Inverse value of ndyn_substeps for tracer advection precomputations - r_nsubsteps = 1._wp/REAL(ndyn_substeps_var(jg),wp) - - ! Inverse value of dtime * ndyn_substeps_var - r_dtimensubsteps = 1._wp/(dtime*REAL(ndyn_substeps_var(jg),wp)) - - ! number of vertical levels - nlev = p_patch%nlev - nlevp1 = p_patch%nlevp1 - - ! Set pointers to neighbor cells - icidx => p_patch%edges%cell_idx - icblk => p_patch%edges%cell_blk - - ! Set pointers to neighbor edges - ieidx => p_patch%cells%edge_idx - ieblk => p_patch%cells%edge_blk - - ! Set pointers to vertices of an edge - ividx => p_patch%edges%vertex_idx - ivblk => p_patch%edges%vertex_blk - - ! Set pointer to vertical neighbor indices for pressure gradient - ikidx => p_nh%metrics%vertidx_gradp - - ! Set pointers to quad edges - iqidx => p_patch%edges%quad_idx - iqblk => p_patch%edges%quad_blk - - ! DA: moved from below to here to get into the same ACC data section - iplev => p_nh%metrics%pg_vertidx - ipeidx => p_nh%metrics%pg_edgeidx - ipeblk => p_nh%metrics%pg_edgeblk - - !$ser verbatim mo_solve_nonhydro_ctr = mo_solve_nonhydro_ctr + 1 - - ! Precompute Rayleigh damping factor - DO jk = 2, nrdmax(jg) - z_raylfac(jk) = 1.0_wp/(1.0_wp+dtime*p_nh%metrics%rayleigh_w(jk)) - ENDDO - - ! Fourth-order divergence damping - ! - ! The divergence damping factor enh_divdamp_fac is defined as a profile in height z - ! above sea level with 4 height sections: - ! - ! enh_divdamp_fac(z) = divdamp_fac ! z <= divdamp_z - ! enh_divdamp_fac(z) = divdamp_fac + (z-divdamp_z )* alin ! divdamp_z <= z <= divdamp_z2 - ! enh_divdamp_fac(z) = divdamp_fac2 + (z-divdamp_z2)*(aqdr+(z-divdamp_z2)*bqdr) ! divdamp_z2 <= z <= divdamp_z4 - ! enh_divdamp_fac(z) = divdamp_fac4 ! divdamp_z4 <= z - ! - alin = (divdamp_fac2-divdamp_fac)/(divdamp_z2-divdamp_z) - ! - df32 = divdamp_fac3-divdamp_fac2; dz32 = divdamp_z3-divdamp_z2 - df42 = divdamp_fac4-divdamp_fac2; dz42 = divdamp_z4-divdamp_z2 - ! - bqdr = (df42*dz32-df32*dz42)/(dz32*dz42*(dz42-dz32)) - aqdr = df32/dz32-bqdr*dz32 - ! - DO jk = 1, nlev - jks = jk + nshift_total - zf = 0.5_wp*(vct_a(jks)+vct_a(jks+1)) - dzlin = MIN(divdamp_z2-divdamp_z ,MAX(0._wp,zf-divdamp_z )) - dzqdr = MIN(divdamp_z4-divdamp_z2,MAX(0._wp,zf-divdamp_z2)) - ! - IF (divdamp_order == 24) THEN - enh_divdamp_fac(jk) = MAX( 0._wp, divdamp_fac + dzlin*alin + dzqdr*(aqdr+dzqdr*bqdr) - 0.25_wp*divdamp_fac_o2 ) - ELSE - enh_divdamp_fac(jk) = divdamp_fac + dzlin*alin + dzqdr*(aqdr+dzqdr*bqdr) - ENDIF - ENDDO - - scal_divdamp(:) = - enh_divdamp_fac(:) * p_patch%geometry_info%mean_cell_area**2 - - ! Time increment for backward-shifting of lateral boundary mass flux - dt_shift = dtime*REAL(2*ndyn_substeps_var(jg)-1,wp)/2._wp ! == dt_phy - 0.5*dtime - - ! Time increment for linear interpolation of nest UBC. - ! The linear interpolation is of the form - ! \phi(t) = \phi0 + (t-t0)*dphi/dt, with t=(jstep+0.5)*dtime, and t0=dt_phy - ! - ! dt_linintp_ubc == (t-t0) - dt_linintp_ubc = jstep*dtime - dt_shift ! valid for center of current time step - dt_linintp_ubc_nnow = dt_linintp_ubc - 0.5_wp*dtime - dt_linintp_ubc_nnew = dt_linintp_ubc + 0.5_wp*dtime - - ! Coefficient for reduced fourth-order divergence damping along nest boundaries - bdy_divdamp(:) = 0.75_wp/(nudge_max_coeff + dbl_eps)*ABS(scal_divdamp(:)) - - !$ACC DATA CREATE(z_kin_hor_e, z_vt_ie, z_w_concorr_me, z_mass_fl_div, z_theta_v_fl_e, z_theta_v_fl_div) & - !$ACC CREATE(z_dexner_dz_c, z_exner_ex_pr, z_gradh_exner, z_rth_pr, z_grad_rth) & - !$ACC CREATE(z_theta_v_pr_ic, z_th_ddz_exner_c, z_w_concorr_mc) & - !$ACC CREATE(z_vn_avg, z_rho_e, z_theta_v_e, z_dwdz_dd, z_thermal_exp, z_mflx_top) & - !$ACC CREATE(z_exner_ic, z_alpha, z_beta, z_q, z_contr_w_fl_l, z_exner_expl) & - !$ACC CREATE(z_flxdiv_mass, z_flxdiv_theta, z_rho_expl, z_w_expl) & - !$ACC CREATE(z_rho_v, z_theta_v_v, z_graddiv_vn, z_hydro_corr, z_graddiv2_vn) & - !$ACC CREATE(w_1) & - !$ACC COPYIN(nflatlev, nflat_gradp, kstart_dd3d, kstart_moist, nrdmax) & - !$ACC COPYIN(z_raylfac, ndyn_substeps_var, scal_divdamp, bdy_divdamp) & -#ifndef __LOOP_EXCHANGE - !$ACC PRESENT(p_cell_idx, p_cell_blk, p_distv_bary) & -#endif - !$ACC PRESENT(prep_adv, p_int, p_patch, p_nh) & - !$ACC PRESENT(icidx, icblk, ividx, ivblk, ieidx, ieblk, ikidx, iqidx, iqblk) & - !$ACC PRESENT(ipeidx, ipeblk, iplev) & - !$ACC IF(i_am_accel_node) - - - ! scaling factor for second-order divergence damping: divdamp_fac_o2*delta_x**2 - ! delta_x**2 is approximated by the mean cell area - scal_divdamp_o2 = divdamp_fac_o2 * p_patch%geometry_info%mean_cell_area - - - IF (p_test_run) THEN - !$ACC KERNELS IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - z_rho_e = 0._wp - z_theta_v_e = 0._wp - z_dwdz_dd = 0._wp - z_graddiv_vn= 0._wp - !$ACC END KERNELS - ENDIF - - ! Set time levels of ddt_adv fields for call to velocity_tendencies - IF (itime_scheme >= 4) THEN ! Velocity advection averaging nnow and nnew tendencies - ntl1 = nnow - ntl2 = nnew - ELSE ! Velocity advection is taken at nnew only - ntl1 = 1 - ntl2 = 1 - ENDIF - - ! Weighting coefficients for velocity advection if tendency averaging is used - ! The off-centering specified here turned out to be beneficial to numerical - ! stability in extreme situations - wgt_nnow_vel = 0.5_wp - veladv_offctr ! default value for veladv_offctr is 0.25 - wgt_nnew_vel = 0.5_wp + veladv_offctr - - ! Weighting coefficients for rho and theta at interface levels in the corrector step - ! This empirically determined weighting minimizes the vertical wind off-centering - ! needed for numerical stability of vertical sound wave propagation - wgt_nnew_rth = 0.5_wp + rhotheta_offctr ! default value for rhotheta_offctr is -0.1 - wgt_nnow_rth = 1._wp - wgt_nnew_rth - -!$NEC sparse - DO istep = 1, 2 - - IF (istep == 1) THEN ! predictor step - IF (itime_scheme >= 6 .OR. l_init .OR. l_recompute) THEN - IF (itime_scheme < 6 .AND. .NOT. l_init) THEN - lvn_only = .TRUE. ! Recompute only vn tendency - ELSE - lvn_only = .FALSE. - ENDIF - CALL velocity_tendencies(p_nh%prog(nnow),p_patch,p_int,p_nh%metrics,p_nh%diag,z_w_concorr_me, & - z_kin_hor_e,z_vt_ie,ntl1,istep,lvn_only,dtime,dt_linintp_ubc_nnow) - ENDIF - nvar = nnow - ELSE ! corrector step - lvn_only = .FALSE. - CALL velocity_tendencies(p_nh%prog(nnew),p_patch,p_int,p_nh%metrics,p_nh%diag,z_w_concorr_me, & - z_kin_hor_e,z_vt_ie,ntl2,istep,lvn_only,dtime,dt_linintp_ubc_nnew) - nvar = nnew - ENDIF - - - ! Preparations for igradp_method = 3/5 (reformulated extrapolation below the ground) - IF (istep == 1 .AND. (igradp_method == 3 .OR. igradp_method == 5)) THEN - - nproma_gradp = cpu_min_nproma(nproma,256) - nblks_gradp = INT(p_nh%metrics%pg_listdim/nproma_gradp) - npromz_gradp = MOD(p_nh%metrics%pg_listdim,nproma_gradp) - IF (npromz_gradp > 0) THEN - nblks_gradp = nblks_gradp + 1 - ELSE - npromz_gradp = nproma_gradp - ENDIF - - ENDIF - - IF (timers_level > 5) CALL timer_start(timer_solve_nh_cellcomp) - - ! Computations on mass points -!$OMP PARALLEL PRIVATE (rl_start,rl_end,i_startblk,i_endblk) - - rl_start = 3 - IF (istep == 1) THEN - rl_end = min_rlcell_int - 1 - ELSE ! halo points are not needed in step 2 - rl_end = min_rlcell_int - ENDIF - - i_startblk = p_patch%cells%start_block(rl_start) - i_endblk = p_patch%cells%end_block(rl_end) - - ! DSL: Instead of calling init_zero_contiguous_dp to set z_rth_pr to zero, - ! introduce a stencil that does the same thing, but does not touch the - ! padding, so it can be verified. - - rl_start_2 = 1 - rl_end_2 = min_rlcell - - i_startblk_2 = p_patch%cells%start_block(rl_start_2) - i_endblk_2 = p_patch%cells%end_block(rl_end_2) - - ! initialize nest boundary points of z_rth_pr with zero - IF (istep == 1 .AND. (jg > 1 .OR. l_limited_area)) THEN - - CALL get_indices_c(p_patch, 1, i_startblk_2, i_endblk_2, & - i_startidx_2, i_endidx_2, rl_start_2, rl_end_2) - - - !$ser init directory="." prefix="liskov-serialisation" - - !$ser savepoint mo_solve_nonhydro_stencil_01_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' - - !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) - - PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) -!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO jk = 1, nlev - DO jc = i_startidx_2, i_endidx_2 - z_rth_pr(jc,jk,1,1) = 0._wp - z_rth_pr(jc,jk,1,2) = 0._wp - ENDDO - ENDDO -!$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_01_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' - - !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) - - PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) -!$OMP BARRIER - ENDIF - -!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,jc,z_exner_ic,z_theta_v_pr_ic,z_w_backtraj,& -!$OMP z_theta_v_pr_mc_m1,z_theta_v_pr_mc,z_rho_tavg_m1,z_rho_tavg, & -#ifdef __SX__ -!$OMP z_rho_tavg_m1_v,z_theta_tavg_m1_v,z_theta_v_pr_mc_m1_v, & -#endif -!$OMP z_theta_tavg_m1,z_theta_tavg,z_thermal_exp_local) ICON_OMP_DEFAULT_SCHEDULE - DO jb = i_startblk, i_endblk - - CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - IF (istep == 1) THEN ! to be executed in predictor step only - - - !$ser savepoint mo_solve_nonhydro_stencil_02_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing exner_exfac=p_nh%metrics%exner_exfac(:,:,1)' - - !$ser data exner_exfac=p_nh%metrics%exner_exfac(:,:,1) - - PRINT *, 'Serializing exner=p_nh%prog(nnow)%exner(:,:,1)' - - !$ser data exner=p_nh%prog(nnow)%exner(:,:,1) - - PRINT *, 'Serializing exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1)' - - !$ser data exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1) - - PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,1)' - - !$ser data exner_pr=p_nh%diag%exner_pr(:,:,1) - - PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' - - !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 1, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - ! temporally extrapolated perturbation Exner pressure (used for horizontal gradients only) - z_exner_ex_pr(jc,jk,jb) = (1._wp + p_nh%metrics%exner_exfac(jc,jk,jb)) * & - (p_nh%prog(nnow)%exner(jc,jk,jb) - p_nh%metrics%exner_ref_mc(jc,jk,jb)) - & - p_nh%metrics%exner_exfac(jc,jk,jb) * p_nh%diag%exner_pr(jc,jk,jb) - - ! non-extrapolated perturbation Exner pressure, saved in exner_pr for the next time step - p_nh%diag%exner_pr(jc,jk,jb) = p_nh%prog(nnow)%exner(jc,jk,jb) - & - p_nh%metrics%exner_ref_mc(jc,jk,jb) - - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_02_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing exner_exfac=p_nh%metrics%exner_exfac(:,:,1)' - - !$ser data exner_exfac=p_nh%metrics%exner_exfac(:,:,1) - - PRINT *, 'Serializing exner=p_nh%prog(nnow)%exner(:,:,1)' - - !$ser data exner=p_nh%prog(nnow)%exner(:,:,1) - - PRINT *, 'Serializing exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1)' - - !$ser data exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1) - - PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,1)' - - !$ser data exner_pr=p_nh%diag%exner_pr(:,:,1) - - PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' - - !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) - - ! The purpose of the extra level of exner_pr is to simplify coding for - ! igradp_method=4/5. It is multiplied with zero and thus actually not used - - - !$ser savepoint mo_solve_nonhydro_stencil_03_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' - - !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) - !$ACC KERNELS IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - z_exner_ex_pr(i_startidx:i_endidx,nlevp1,jb) = 0._wp - !$ACC END KERNELS - - !$ser savepoint mo_solve_nonhydro_stencil_03_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' - - !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) - - IF (l_open_ubc .AND. .NOT. l_vert_nested) THEN - ! Compute contribution of thermal expansion to vertical wind at model top - ! Isothermal expansion is assumed - -#ifdef _OPENACC -! Exchanging loop order to remove data dep -! TODO: evaluate if this makes sense - !$ACC PARALLEL IF(i_am_accel_node) PRIVATE(z_thermal_exp_local) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - z_thermal_exp_local = 0._wp - DO jk = 1, nlev - z_thermal_exp_local= z_thermal_exp_local + cvd_o_rd & - * p_nh%diag%ddt_exner_phy(jc,jk,jb) & - / (p_nh%prog(nnow)%exner(jc,jk,jb)*p_nh%metrics%inv_ddqz_z_full(jc,jk,jb)) - ENDDO - z_thermal_exp(jc,jb) = z_thermal_exp_local - ENDDO - !$ACC END PARALLEL - -#else - z_thermal_exp(:,jb) = 0._wp - DO jk = 1, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - z_thermal_exp(jc,jb) = z_thermal_exp(jc,jb) + cvd_o_rd & - * p_nh%diag%ddt_exner_phy(jc,jk,jb) & - / (p_nh%prog(nnow)%exner(jc,jk,jb)*p_nh%metrics%inv_ddqz_z_full(jc,jk,jb)) - ENDDO - ENDDO -#endif - - ENDIF - - IF (igradp_method <= 3) THEN - ! Perturbation Exner pressure on bottom half level -!DIR$ IVDEP - - - !$ser savepoint mo_solve_nonhydro_stencil_04_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1)' - - !$ser data wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1) - - PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' - - !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) - - PRINT *, 'Serializing z_exner_ic=z_exner_ic(:,:)' - - !$ser data z_exner_ic=z_exner_ic(:,:) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - z_exner_ic(jc,nlevp1) = & - p_nh%metrics%wgtfacq_c(jc,1,jb)*z_exner_ex_pr(jc,nlev ,jb) + & - p_nh%metrics%wgtfacq_c(jc,2,jb)*z_exner_ex_pr(jc,nlev-1,jb) + & - p_nh%metrics%wgtfacq_c(jc,3,jb)*z_exner_ex_pr(jc,nlev-2,jb) - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_04_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1)' - - !$ser data wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1) - - PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' - - !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) - - PRINT *, 'Serializing z_exner_ic=z_exner_ic(:,:)' - - !$ser data z_exner_ic=z_exner_ic(:,:) - -! WS: moved full z_exner_ic calculation here to avoid OpenACC dependency on jk+1 below -! possibly GZ will want to consider the cache ramifications of this change for CPU - - !$ser savepoint mo_solve_nonhydro_stencil_05_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' - - !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) - - PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' - - !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) - - PRINT *, 'Serializing z_exner_ic=z_exner_ic(:,:)' - - !$ser data z_exner_ic=z_exner_ic(:,:) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR TILE(32, 4) - DO jk = nlev, MAX(2,nflatlev(jg)), -1 -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - ! Exner pressure on remaining half levels for metric correction term - z_exner_ic(jc,jk) = & - p_nh%metrics%wgtfac_c(jc,jk,jb) *z_exner_ex_pr(jc,jk ,jb) + & - (1._vp-p_nh%metrics%wgtfac_c(jc,jk,jb))*z_exner_ex_pr(jc,jk-1,jb) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_05_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' - - !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) - - PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' - - !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) - - PRINT *, 'Serializing z_exner_ic=z_exner_ic(:,:)' - - !$ser data z_exner_ic=z_exner_ic(:,:) - - - - !$ser savepoint mo_solve_nonhydro_stencil_06_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_exner_ic=z_exner_ic(:,:)' - - !$ser data z_exner_ic=z_exner_ic(:,:) - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) - - PRINT *, 'Serializing z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1)' - - !$ser data z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR TILE(32, 4) - DO jk = nlev, MAX(2,nflatlev(jg)), -1 -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - - ! First vertical derivative of perturbation Exner pressure -#ifdef __SWAPDIM - z_dexner_dz_c(jc,jk,jb,1) = & -#else - z_dexner_dz_c(1,jc,jk,jb) = & -#endif - (z_exner_ic(jc,jk) - z_exner_ic(jc,jk+1)) * & - p_nh%metrics%inv_ddqz_z_full(jc,jk,jb) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_06_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_exner_ic=z_exner_ic(:,:)' - - !$ser data z_exner_ic=z_exner_ic(:,:) - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) - - PRINT *, 'Serializing z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1)' - - !$ser data z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1) - - IF (nflatlev(jg) == 1) THEN - ! Perturbation Exner pressure on top half level -!DIR$ IVDEP - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - z_exner_ic(jc,1) = & - p_nh%metrics%wgtfacq1_c(jc,1,jb)*z_exner_ex_pr(jc,1,jb) + & - p_nh%metrics%wgtfacq1_c(jc,2,jb)*z_exner_ex_pr(jc,2,jb) + & - p_nh%metrics%wgtfacq1_c(jc,3,jb)*z_exner_ex_pr(jc,3,jb) - - ! First vertical derivative of perturbation Exner pressure -#ifdef __SWAPDIM - z_dexner_dz_c(jc,1,jb,1) = & -#else - z_dexner_dz_c(1,jc,1,jb) = & -#endif - (z_exner_ic(jc,1) - z_exner_ic(jc,2)) * & - p_nh%metrics%inv_ddqz_z_full(jc,1,jb) - ENDDO - !$ACC END PARALLEL - ENDIF - - ENDIF - - - !$ser savepoint mo_solve_nonhydro_stencil_07_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing rho=p_nh%prog(nnow)%rho(:,:,1)' - - !$ser data rho=p_nh%prog(nnow)%rho(:,:,1) - - PRINT *, 'Serializing rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1)' - - !$ser data rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1) - - PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' - - !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) - - PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' - - !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) - - PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) -#ifdef __SWAPDIM - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - z_rth_pr(jc,1,jb,1) = p_nh%prog(nnow)%rho(jc,1,jb) - & - p_nh%metrics%rho_ref_mc(jc,1,jb) - z_rth_pr(jc,1,jb,2) = p_nh%prog(nnow)%theta_v(jc,1,jb) - & - p_nh%metrics%theta_ref_mc(jc,1,jb) - ENDDO -#else - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - z_rth_pr(1,jc,1,jb) = p_nh%prog(nnow)%rho(jc,1,jb) - & - p_nh%metrics%rho_ref_mc(jc,1,jb) - z_rth_pr(2,jc,1,jb) = p_nh%prog(nnow)%theta_v(jc,1,jb) - & - p_nh%metrics%theta_ref_mc(jc,1,jb) - ENDDO -#endif - !$ACC END PARALLEL - - - !$ser savepoint mo_solve_nonhydro_stencil_07_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing rho=p_nh%prog(nnow)%rho(:,:,1)' - - !$ser data rho=p_nh%prog(nnow)%rho(:,:,1) - - PRINT *, 'Serializing rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1)' - - !$ser data rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1) - - PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' - - !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) - - PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' - - !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) - - PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) - - - !$ser savepoint mo_solve_nonhydro_stencil_08_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' - - !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) - - PRINT *, 'Serializing rho=p_nh%prog(nnow)%rho(:,:,1)' - - !$ser data rho=p_nh%prog(nnow)%rho(:,:,1) - - PRINT *, 'Serializing rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1)' - - !$ser data rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1) - - PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' - - !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) - - PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' - - !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) - - PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR TILE(32, 4) - DO jk = 2, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - ! density at interface levels for vertical flux divergence computation - p_nh%diag%rho_ic(jc,jk,jb) = p_nh%metrics%wgtfac_c(jc,jk,jb) *p_nh%prog(nnow)%rho(jc,jk ,jb) + & - (1._wp-p_nh%metrics%wgtfac_c(jc,jk,jb))*p_nh%prog(nnow)%rho(jc,jk-1,jb) - - ! perturbation density and virtual potential temperature at main levels for horizontal flux divergence term - ! (needed in the predictor step only) -#ifdef __SWAPDIM - z_rth_pr(jc,jk,jb,1) = p_nh%prog(nnow)%rho(jc,jk,jb) - p_nh%metrics%rho_ref_mc(jc,jk,jb) - z_rth_pr(jc,jk,jb,2) = p_nh%prog(nnow)%theta_v(jc,jk,jb) - p_nh%metrics%theta_ref_mc(jc,jk,jb) -#else - z_rth_pr(1,jc,jk,jb) = p_nh%prog(nnow)%rho(jc,jk,jb) - p_nh%metrics%rho_ref_mc(jc,jk,jb) - z_rth_pr(2,jc,jk,jb) = p_nh%prog(nnow)%theta_v(jc,jk,jb) - p_nh%metrics%theta_ref_mc(jc,jk,jb) -#endif -#ifdef _OPENACC - ENDDO - ENDDO - !$ACC END PARALLEL -#endif - - - !$ser savepoint mo_solve_nonhydro_stencil_08_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' - - !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) - - PRINT *, 'Serializing rho=p_nh%prog(nnow)%rho(:,:,1)' - - !$ser data rho=p_nh%prog(nnow)%rho(:,:,1) - - PRINT *, 'Serializing rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1)' - - !$ser data rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1) - - PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' - - !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) - - PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' - - !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) - - PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) - - - !$ser savepoint mo_solve_nonhydro_stencil_09_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' - - !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) - - PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) - - PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1)' - - !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1) - - PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,1)' - - !$ser data exner_pr=p_nh%diag%exner_pr(:,:,1) - - PRINT *, 'Serializing d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1)' - - !$ser data d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1) - - PRINT *, 'Serializing ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1)' - - !$ser data ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1) - - PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' - - !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) - - PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1)' - - !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1) -#ifdef _OPENACC - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 2, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx -#endif - - ! perturbation virtual potential temperature at interface levels -#ifdef __SWAPDIM - z_theta_v_pr_ic(jc,jk) = & - p_nh%metrics%wgtfac_c(jc,jk,jb) *z_rth_pr(jc,jk ,jb,2) + & - (1._vp-p_nh%metrics%wgtfac_c(jc,jk,jb))*z_rth_pr(jc,jk-1,jb,2) -#else - z_theta_v_pr_ic(jc,jk) = & - p_nh%metrics%wgtfac_c(jc,jk,jb) *z_rth_pr(2,jc,jk ,jb) + & - (1._vp-p_nh%metrics%wgtfac_c(jc,jk,jb))*z_rth_pr(2,jc,jk-1,jb) -#endif - ! virtual potential temperature at interface levels - p_nh%diag%theta_v_ic(jc,jk,jb) = & - p_nh%metrics%wgtfac_c(jc,jk,jb) *p_nh%prog(nnow)%theta_v(jc,jk ,jb) + & - (1._wp-p_nh%metrics%wgtfac_c(jc,jk,jb))*p_nh%prog(nnow)%theta_v(jc,jk-1,jb) - - ! vertical pressure gradient * theta_v - z_th_ddz_exner_c(jc,jk,jb) = p_nh%metrics%vwind_expl_wgt(jc,jb)* & - p_nh%diag%theta_v_ic(jc,jk,jb) * (p_nh%diag%exner_pr(jc,jk-1,jb)- & - p_nh%diag%exner_pr(jc,jk,jb)) / p_nh%metrics%ddqz_z_half(jc,jk,jb) + & - z_theta_v_pr_ic(jc,jk)*p_nh%metrics%d_exner_dz_ref_ic(jc,jk,jb) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_09_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' - - !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) - - PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) - - PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1)' - - !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1) - - PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,1)' - - !$ser data exner_pr=p_nh%diag%exner_pr(:,:,1) - - PRINT *, 'Serializing d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1)' - - !$ser data d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1) - - PRINT *, 'Serializing ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1)' - - !$ser data ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1) - - PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' - - !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) - - PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1)' - - !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1) - - - ELSE ! istep = 2 - in this step, an upwind-biased discretization is used for rho_ic and theta_v_ic - ! in order to reduce the numerical dispersion errors -#ifdef __SX__ - ! precompute values for jk = 1 which are previous values in first iteration of jk compute loop - jk = 2 - DO jc = i_startidx, i_endidx - z_rho_tavg_m1_v(jc) = wgt_nnow_rth*p_nh%prog(nnow)%rho(jc,jk-1,jb) + & - wgt_nnew_rth*p_nh%prog(nvar)%rho(jc,jk-1,jb) - z_theta_tavg_m1_v(jc) = wgt_nnow_rth*p_nh%prog(nnow)%theta_v(jc,jk-1,jb) + & - wgt_nnew_rth*p_nh%prog(nvar)%theta_v(jc,jk-1,jb) - z_theta_v_pr_mc_m1_v(jc) = z_theta_tavg_m1_v(jc) - p_nh%metrics%theta_ref_mc(jc,jk-1,jb) - ENDDO -#endif - - - !$ser savepoint mo_solve_nonhydro_stencil_10_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing wgt_nnew_rth=wgt_nnew_rth' - - !$ser data wgt_nnew_rth=wgt_nnew_rth - - PRINT *, 'Serializing wgt_nnow_rth=wgt_nnow_rth' - - !$ser data wgt_nnow_rth=wgt_nnow_rth - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) - - PRINT *, 'Serializing ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1)' - - !$ser data ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1) - - PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' - - !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) - - PRINT *, 'Serializing rho_var=p_nh%prog(nvar)%rho(:,:,1)' - - !$ser data rho_var=p_nh%prog(nvar)%rho(:,:,1) - - PRINT *, 'Serializing theta_now=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_now=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing theta_var=p_nh%prog(nvar)%theta_v(:,:,1)' - - !$ser data theta_var=p_nh%prog(nvar)%theta_v(:,:,1) - - PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' - - !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) - - PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' - - !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) - - PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1)' - - !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1) - - PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,1)' - - !$ser data exner_pr=p_nh%diag%exner_pr(:,:,1) - - PRINT *, 'Serializing d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1)' - - !$ser data d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1) - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) - - PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' - - !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) - - PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1)' - - !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR TILE(128, *) & - !$ACC PRIVATE(z_w_backtraj, z_rho_tavg_m1, z_theta_tavg_m1, z_rho_tavg) & - !$ACC PRIVATE(z_theta_tavg, z_theta_v_pr_mc_m1, z_theta_v_pr_mc) - DO jk = 2, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - ! backward trajectory - use w(nnew) in order to be at the same time level as w_concorr - z_w_backtraj = - (p_nh%prog(nnew)%w(jc,jk,jb) - p_nh%diag%w_concorr_c(jc,jk,jb)) * & - dtime*0.5_wp/p_nh%metrics%ddqz_z_half(jc,jk,jb) - - ! temporally averaged density and virtual potential temperature depending on rhotheta_offctr - ! (see pre-computation above) -#ifndef __SX__ - z_rho_tavg_m1 = wgt_nnow_rth*p_nh%prog(nnow)%rho(jc,jk-1,jb) + & - wgt_nnew_rth*p_nh%prog(nvar)%rho(jc,jk-1,jb) - z_theta_tavg_m1 = wgt_nnow_rth*p_nh%prog(nnow)%theta_v(jc,jk-1,jb) + & - wgt_nnew_rth*p_nh%prog(nvar)%theta_v(jc,jk-1,jb) -#else - z_rho_tavg_m1 = z_rho_tavg_m1_v(jc) - z_theta_tavg_m1 = z_theta_tavg_m1_v(jc) -#endif - - z_rho_tavg = wgt_nnow_rth*p_nh%prog(nnow)%rho(jc,jk,jb) + & - wgt_nnew_rth*p_nh%prog(nvar)%rho(jc,jk,jb) - z_theta_tavg = wgt_nnow_rth*p_nh%prog(nnow)%theta_v(jc,jk,jb) + & - wgt_nnew_rth*p_nh%prog(nvar)%theta_v(jc,jk,jb) - - ! density at interface levels for vertical flux divergence computation - p_nh%diag%rho_ic(jc,jk,jb) = p_nh%metrics%wgtfac_c(jc,jk,jb) *z_rho_tavg + & - (1._wp-p_nh%metrics%wgtfac_c(jc,jk,jb))*z_rho_tavg_m1 + & - z_w_backtraj*(z_rho_tavg_m1-z_rho_tavg) - - ! perturbation virtual potential temperature at main levels -#ifndef __SX__ - z_theta_v_pr_mc_m1 = z_theta_tavg_m1 - p_nh%metrics%theta_ref_mc(jc,jk-1,jb) -#else - z_theta_v_pr_mc_m1 = z_theta_v_pr_mc_m1_v(jc) -#endif - z_theta_v_pr_mc = z_theta_tavg - p_nh%metrics%theta_ref_mc(jc,jk,jb) - - ! perturbation virtual potential temperature at interface levels - z_theta_v_pr_ic(jc,jk) = & - p_nh%metrics%wgtfac_c(jc,jk,jb) *z_theta_v_pr_mc + & - (1._vp-p_nh%metrics%wgtfac_c(jc,jk,jb))*z_theta_v_pr_mc_m1 - - ! virtual potential temperature at interface levels - p_nh%diag%theta_v_ic(jc,jk,jb) = p_nh%metrics%wgtfac_c(jc,jk,jb) *z_theta_tavg + & - (1._wp-p_nh%metrics%wgtfac_c(jc,jk,jb))*z_theta_tavg_m1 + & - z_w_backtraj*(z_theta_tavg_m1-z_theta_tavg) - - ! vertical pressure gradient * theta_v - z_th_ddz_exner_c(jc,jk,jb) = p_nh%metrics%vwind_expl_wgt(jc,jb)* & - p_nh%diag%theta_v_ic(jc,jk,jb) * (p_nh%diag%exner_pr(jc,jk-1,jb)- & - p_nh%diag%exner_pr(jc,jk,jb)) / p_nh%metrics%ddqz_z_half(jc,jk,jb) + & - z_theta_v_pr_ic(jc,jk)*p_nh%metrics%d_exner_dz_ref_ic(jc,jk,jb) - -#ifdef __SX__ - ! save current values as previous values for next iteration - z_rho_tavg_m1_v(jc) = z_rho_tavg - z_theta_tavg_m1_v(jc) = z_theta_tavg - z_theta_v_pr_mc_m1_v(jc) = z_theta_v_pr_mc -#endif - - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_10_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing wgt_nnew_rth=wgt_nnew_rth' - - !$ser data wgt_nnew_rth=wgt_nnew_rth - - PRINT *, 'Serializing wgt_nnow_rth=wgt_nnow_rth' - - !$ser data wgt_nnow_rth=wgt_nnow_rth - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) - - PRINT *, 'Serializing ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1)' - - !$ser data ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1) - - PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' - - !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) - - PRINT *, 'Serializing rho_var=p_nh%prog(nvar)%rho(:,:,1)' - - !$ser data rho_var=p_nh%prog(nvar)%rho(:,:,1) - - PRINT *, 'Serializing theta_now=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_now=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing theta_var=p_nh%prog(nvar)%theta_v(:,:,1)' - - !$ser data theta_var=p_nh%prog(nvar)%theta_v(:,:,1) - - PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' - - !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) - - PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' - - !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) - - PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1)' - - !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1) - - PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,1)' - - !$ser data exner_pr=p_nh%diag%exner_pr(:,:,1) - - PRINT *, 'Serializing d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1)' - - !$ser data d_exner_dz_ref_ic=p_nh%metrics%d_exner_dz_ref_ic(:,:,1) - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) - - PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' - - !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) - - PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1)' - - !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,1) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) - - ENDIF ! istep = 1/2 - - ! rho and theta at top level (in case of vertical nesting, upper boundary conditions - ! are set in the vertical solver loop) - IF (l_open_ubc .AND. .NOT. l_vert_nested) THEN - IF ( istep == 1 ) THEN - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) -!DIR$ IVDEP - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - p_nh%diag%theta_v_ic(jc,1,jb) = & - p_nh%metrics%theta_ref_ic(jc,1,jb) + & -#ifdef __SWAPDIM - p_nh%metrics%wgtfacq1_c(jc,1,jb)*z_rth_pr(jc,1,jb,2) + & - p_nh%metrics%wgtfacq1_c(jc,2,jb)*z_rth_pr(jc,2,jb,2) + & - p_nh%metrics%wgtfacq1_c(jc,3,jb)*z_rth_pr(jc,3,jb,2) -#else - p_nh%metrics%wgtfacq1_c(jc,1,jb)*z_rth_pr(2,jc,1,jb) + & - p_nh%metrics%wgtfacq1_c(jc,2,jb)*z_rth_pr(2,jc,2,jb) + & - p_nh%metrics%wgtfacq1_c(jc,3,jb)*z_rth_pr(2,jc,3,jb) -#endif - ENDDO - !$ACC END PARALLEL - ELSE ! ISTEP == 2 - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) -!DIR$ IVDEP - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - p_nh%diag%theta_v_ic(jc,1,jb) = p_nh%metrics%theta_ref_ic(jc,1,jb) + & - p_nh%metrics%wgtfacq1_c(jc,1,jb)* ( wgt_nnow_rth*p_nh%prog(nnow)%theta_v(jc,1,jb) + & - wgt_nnew_rth*p_nh%prog(nvar)%theta_v(jc,1,jb) - p_nh%metrics%theta_ref_mc(jc,1,jb) ) + & - p_nh%metrics%wgtfacq1_c(jc,2,jb)*( wgt_nnow_rth*p_nh%prog(nnow)%theta_v(jc,2,jb) + & - wgt_nnew_rth*p_nh%prog(nvar)%theta_v(jc,2,jb) - p_nh%metrics%theta_ref_mc(jc,2,jb) ) + & - p_nh%metrics%wgtfacq1_c(jc,3,jb)*( wgt_nnow_rth*p_nh%prog(nnow)%theta_v(jc,3,jb) + & - wgt_nnew_rth*p_nh%prog(nvar)%theta_v(jc,3,jb) - p_nh%metrics%theta_ref_mc(jc,3,jb) ) - ENDDO - !$ACC END PARALLEL - ENDIF - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) -!DIR$ IVDEP - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - p_nh%diag%rho_ic(jc,1,jb) = wgt_nnow_rth*( & - p_nh%metrics%wgtfacq1_c(jc,1,jb)*p_nh%prog(nnow)%rho(jc,1,jb) + & - p_nh%metrics%wgtfacq1_c(jc,2,jb)*p_nh%prog(nnow)%rho(jc,2,jb) + & - p_nh%metrics%wgtfacq1_c(jc,3,jb)*p_nh%prog(nnow)%rho(jc,3,jb))+ & - wgt_nnew_rth * ( & - p_nh%metrics%wgtfacq1_c(jc,1,jb)*p_nh%prog(nvar)%rho(jc,1,jb) + & - p_nh%metrics%wgtfacq1_c(jc,2,jb)*p_nh%prog(nvar)%rho(jc,2,jb) + & - p_nh%metrics%wgtfacq1_c(jc,3,jb)*p_nh%prog(nvar)%rho(jc,3,jb) ) - ENDDO - !$ACC END PARALLEL - ENDIF - - IF (istep == 1) THEN - - ! Perturbation theta at top and surface levels - - !$ser savepoint mo_solve_nonhydro_stencil_11_lower_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' - - !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) -!DIR$ IVDEP - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - z_theta_v_pr_ic(jc,1) = 0._wp - ENDDO -!$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_11_lower_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' - - !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) - - - - !$ser savepoint mo_solve_nonhydro_stencil_11_upper_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1)' - - !$ser data wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1) - - PRINT *, 'Serializing z_rth_pr=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr=z_rth_pr(:,:,1,2) - - PRINT *, 'Serializing theta_ref_ic=p_nh%metrics%theta_ref_ic(:,:,1)' - - !$ser data theta_ref_ic=p_nh%metrics%theta_ref_ic(:,:,1) - - PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' - - !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) -!$ACC PARALLEL IF( i_am_accel_node ) DEFAULT(PRESENT) ASYNC(1) -!DIR$ IVDEP - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - z_theta_v_pr_ic(jc,nlevp1) = & -#ifdef __SWAPDIM - p_nh%metrics%wgtfacq_c(jc,1,jb)*z_rth_pr(jc,nlev ,jb,2) + & - p_nh%metrics%wgtfacq_c(jc,2,jb)*z_rth_pr(jc,nlev-1,jb,2) + & - p_nh%metrics%wgtfacq_c(jc,3,jb)*z_rth_pr(jc,nlev-2,jb,2) -#else - p_nh%metrics%wgtfacq_c(jc,1,jb)*z_rth_pr(2,jc,nlev ,jb) + & - p_nh%metrics%wgtfacq_c(jc,2,jb)*z_rth_pr(2,jc,nlev-1,jb) + & - p_nh%metrics%wgtfacq_c(jc,3,jb)*z_rth_pr(2,jc,nlev-2,jb) -#endif - p_nh%diag%theta_v_ic(jc,nlevp1,jb) = & - p_nh%metrics%theta_ref_ic(jc,nlevp1,jb) + z_theta_v_pr_ic(jc,nlevp1) - ENDDO - !$ACC END PARALLEL - - - !$ser savepoint mo_solve_nonhydro_stencil_11_upper_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1)' - - !$ser data wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1) - - PRINT *, 'Serializing z_rth_pr=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr=z_rth_pr(:,:,1,2) - - PRINT *, 'Serializing theta_ref_ic=p_nh%metrics%theta_ref_ic(:,:,1)' - - !$ser data theta_ref_ic=p_nh%metrics%theta_ref_ic(:,:,1) - - PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' - - !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) - - IF (igradp_method <= 3) THEN - - - !$ser savepoint mo_solve_nonhydro_stencil_12_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' - - !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) - - PRINT *, 'Serializing d2dexdz2_fac1_mc=p_nh%metrics%d2dexdz2_fac1_mc(:,:,1)' - - !$ser data d2dexdz2_fac1_mc=p_nh%metrics%d2dexdz2_fac1_mc(:,:,1) - - PRINT *, 'Serializing d2dexdz2_fac2_mc=p_nh%metrics%d2dexdz2_fac2_mc(:,:,1)' - - !$ser data d2dexdz2_fac2_mc=p_nh%metrics%d2dexdz2_fac2_mc(:,:,1) - - PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) - - PRINT *, 'Serializing z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2)' - - !$ser data z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR TILE(32, 4) - DO jk = nflat_gradp(jg), nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - ! Second vertical derivative of perturbation Exner pressure (hydrostatic approximation) -#ifdef __SWAPDIM - z_dexner_dz_c(jc,jk,jb,2) = -0.5_vp * & - ((z_theta_v_pr_ic(jc,jk) - z_theta_v_pr_ic(jc,jk+1)) * & - p_nh%metrics%d2dexdz2_fac1_mc(jc,jk,jb) + z_rth_pr(jc,jk,jb,2) * & -#else - z_dexner_dz_c(2,jc,jk,jb) = -0.5_vp * & - ((z_theta_v_pr_ic(jc,jk) - z_theta_v_pr_ic(jc,jk+1)) * & - p_nh%metrics%d2dexdz2_fac1_mc(jc,jk,jb) + z_rth_pr(2,jc,jk,jb) * & -#endif - p_nh%metrics%d2dexdz2_fac2_mc(jc,jk,jb)) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_12_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_theta_v_pr_ic=z_theta_v_pr_ic(:,:)' - - !$ser data z_theta_v_pr_ic=z_theta_v_pr_ic(:,:) - - PRINT *, 'Serializing d2dexdz2_fac1_mc=p_nh%metrics%d2dexdz2_fac1_mc(:,:,1)' - - !$ser data d2dexdz2_fac1_mc=p_nh%metrics%d2dexdz2_fac1_mc(:,:,1) - - PRINT *, 'Serializing d2dexdz2_fac2_mc=p_nh%metrics%d2dexdz2_fac2_mc(:,:,1)' - - !$ser data d2dexdz2_fac2_mc=p_nh%metrics%d2dexdz2_fac2_mc(:,:,1) - - PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) - - PRINT *, 'Serializing z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2)' - - !$ser data z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2) - ENDIF - - ENDIF ! istep == 1 - - ENDDO -!$OMP END DO NOWAIT - - IF (istep == 1) THEN - ! Add computation of z_grad_rth (perturbation density and virtual potential temperature at main levels) - ! at outer halo points: needed for correct calculation of the upwind gradients for Miura scheme - rl_start = min_rlcell_int - 2 - rl_end = min_rlcell_int - 2 - - i_startblk = p_patch%cells%start_block(rl_start) - i_endblk = p_patch%cells%end_block(rl_end) - -!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,jc) ICON_OMP_DEFAULT_SCHEDULE - DO jb = i_startblk, i_endblk - - CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, i_startidx, i_endidx, rl_start, rl_end) - - - !$ser savepoint mo_solve_nonhydro_stencil_13_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing rho=p_nh%prog(nnow)%rho(:,:,1)' - - !$ser data rho=p_nh%prog(nnow)%rho(:,:,1) - - PRINT *, 'Serializing rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1)' - - !$ser data rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1) - - PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' - - !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) - - PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' - - !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) - - PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 1, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx -#ifdef __SWAPDIM - z_rth_pr(jc,jk,jb,1) = p_nh%prog(nnow)%rho(jc,jk,jb) - p_nh%metrics%rho_ref_mc(jc,jk,jb) - z_rth_pr(jc,jk,jb,2) = p_nh%prog(nnow)%theta_v(jc,jk,jb) - p_nh%metrics%theta_ref_mc(jc,jk,jb) -#else - z_rth_pr(1,jc,jk,jb) = p_nh%prog(nnow)%rho(jc,jk,jb) - p_nh%metrics%rho_ref_mc(jc,jk,jb) - z_rth_pr(2,jc,jk,jb) = p_nh%prog(nnow)%theta_v(jc,jk,jb) - p_nh%metrics%theta_ref_mc(jc,jk,jb) -#endif - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_13_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing rho=p_nh%prog(nnow)%rho(:,:,1)' - - !$ser data rho=p_nh%prog(nnow)%rho(:,:,1) - - PRINT *, 'Serializing rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1)' - - !$ser data rho_ref_mc=p_nh%metrics%rho_ref_mc(:,:,1) - - PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1)' - - !$ser data theta_ref_mc=p_nh%metrics%theta_ref_mc(:,:,1) - - PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' - - !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) - - PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) - - ENDDO -!$OMP END DO NOWAIT - - ENDIF -!$OMP END PARALLEL - - IF (timers_level > 5) THEN - CALL timer_stop(timer_solve_nh_cellcomp) - CALL timer_start(timer_solve_nh_vnupd) - ENDIF - - ! Compute rho and theta at edges for horizontal flux divergence term - IF (istep == 1) THEN - IF (iadv_rhotheta == 1) THEN ! Simplified Miura scheme - !DA: TODO: remove the wait after everything is async - !$ACC WAIT - ! Compute density and potential temperature at vertices - CALL cells2verts_scalar(p_nh%prog(nnow)%rho,p_patch, p_int%cells_aw_verts, & - z_rho_v, opt_rlend=min_rlvert_int-1) - CALL cells2verts_scalar(p_nh%prog(nnow)%theta_v,p_patch, p_int%cells_aw_verts, & - z_theta_v_v, opt_rlend=min_rlvert_int-1) - - ELSE IF (iadv_rhotheta == 2) THEN ! Miura second-order upwind scheme - -#if !defined (__LOOP_EXCHANGE) && !defined (__SX__) && !defined (_OPENACC) - ! Compute backward trajectory - code is inlined for cache-based machines (see below) - CALL btraj_compute_o1( btraj = btraj, & !inout - & ptr_p = p_patch, & !in - & ptr_int = p_int, & !in - & p_vn = p_nh%prog(nnow)%vn, & !in -#ifdef __MIXED_PRECISION - & p_vt = REAL(p_nh%diag%vt,wp), & !in ! this results in differences in distv_bary, not sure why... -#else - & p_vt = p_nh%diag%vt, & !in -#endif - & p_dthalf = 0.5_wp*dtime, & !in - & opt_rlstart = 7, & !in - & opt_rlend = min_rledge_int-1, & !in - & opt_acc_async = .TRUE. ) !in -#endif - - ! Compute Green-Gauss gradients for rho and theta -!TODO: grad_green_gauss_cell adjust... - CALL grad_green_gauss_cell(z_rth_pr, p_patch, p_int, z_grad_rth, & - opt_rlstart=3, opt_rlend=min_rlcell_int-1, opt_acc_async=.TRUE.) - - ELSE IF (iadv_rhotheta == 3) THEN ! Third-order Miura scheme (does not perform well yet) - - !DA: TODO: remove the wait after everything is async - !$ACC WAIT - - lcompute =.TRUE. - lcleanup =.FALSE. - ! First call: compute backward trajectory with wind at time level nnow - - CALL upwind_hflux_miura3(p_patch, p_nh%prog(nnow)%rho, p_nh%prog(nnow)%vn, & - p_nh%prog(nnow)%vn, REAL(p_nh%diag%vt,wp), dtime, p_int, & - lcompute, lcleanup, 0, z_rho_e, & - opt_rlstart=7, opt_lout_edge=.TRUE. ) - - ! Second call: compute only reconstructed value for flux divergence - lcompute =.FALSE. - lcleanup =.TRUE. - CALL upwind_hflux_miura3(p_patch, p_nh%prog(nnow)%theta_v, p_nh%prog(nnow)%vn, & - p_nh%prog(nnow)%vn, REAL(p_nh%diag%vt,wp), dtime, p_int, & - lcompute, lcleanup, 0, z_theta_v_e, & - opt_rlstart=7, opt_lout_edge=.TRUE. ) - - ENDIF - ENDIF ! istep = 1 - -!$OMP PARALLEL PRIVATE (rl_start,rl_end,i_startblk,i_endblk) - IF (istep == 1) THEN - ! Compute 'edge values' of density and virtual potential temperature for horizontal - ! flux divergence term; this is included in upwind_hflux_miura3 for option 3 - IF (iadv_rhotheta <= 2) THEN - - rl_start = min_rledge_int-2 - ! Initialize halo edges with zero in order to avoid access of uninitialized array elements - i_startblk = p_patch%edges%start_block(rl_start) - IF (idiv_method == 1) THEN - rl_end = min_rledge_int-2 - i_endblk = p_patch%edges%end_block(rl_end) - ELSE - rl_end = min_rledge_int-3 - i_endblk = p_patch%edges%end_block(rl_end) - ENDIF - - IF (i_endblk >= i_startblk) THEN - ! DSL: Instead of calling init_zero_contiguous_dp to set z_rho_e and - ! z_theta_v_e to zero, introduce a stencil that does the same thing, - ! but does not touch the padding, so it can be verified. - - CALL get_indices_e(p_patch, 1, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - - !$ser savepoint mo_solve_nonhydro_stencil_14_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' - - !$ser data z_rho_e=z_rho_e(:,:,1) - - PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' - - !$ser data z_theta_v_e=z_theta_v_e(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO jk = 1, nlev - DO jc = i_startidx, i_endidx - z_rho_e(jc,jk,1) = 0._wp - z_theta_v_e(jc,jk,1) = 0._wp - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_14_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' - - !$ser data z_rho_e=z_rho_e(:,:,1) - - PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' - - !$ser data z_theta_v_e=z_theta_v_e(:,:,1) - ENDIF -!$OMP BARRIER - - rl_start = 7 - rl_end = min_rledge_int-1 - - i_startblk = p_patch%edges%start_block(rl_start) - i_endblk = p_patch%edges%end_block (rl_end) - - ! initialize also nest boundary points with zero - IF (jg > 1 .OR. l_limited_area) THEN - ! DSL: Instead of calling init_zero_contiguous_dp to set z_rho_e and - ! z_theta_v_e to zero, introduce a stencil that does the same thing, - ! but does not touch the padding, so it can be verified. - - rl_start_2 = 1 - rl_end_2 = min_rledge_int-1 - - i_startblk_2 = p_patch%edges%start_block(rl_start_2) - i_endblk_2 = p_patch%edges%end_block (rl_end_2) - - CALL get_indices_e(p_patch, 1, i_startblk_2, i_endblk_2, & - i_startidx_2, i_endidx_2, rl_start_2, rl_end_2) - - - !$ser savepoint mo_solve_nonhydro_stencil_15_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' - - !$ser data z_rho_e=z_rho_e(:,:,1) - - PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' - - !$ser data z_theta_v_e=z_theta_v_e(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO jk = 1, nlev - DO jc = i_startidx_2, i_endidx_2 - z_rho_e(jc,jk,1) = 0._wp - z_theta_v_e(jc,jk,1) = 0._wp - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_15_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' - - !$ser data z_rho_e=z_rho_e(:,:,1) - - PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' - - !$ser data z_theta_v_e=z_theta_v_e(:,:,1) -!$OMP BARRIER - ENDIF - -!$OMP DO PRIVATE(jb,jk,je,i_startidx,i_endidx,ilc0,ibc0,lvn_pos,& -!$OMP z_ntdistv_bary_1,z_ntdistv_bary_2,distv_bary_1,distv_bary_2) ICON_OMP_DEFAULT_SCHEDULE - DO jb = i_startblk, i_endblk - - CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - IF (iadv_rhotheta == 2) THEN - ! Operations from upwind_hflux_miura are inlined in order to process both - ! fields in one step - - - !$ser savepoint mo_solve_nonhydro_stencil_16_fused_btraj_traj_o1_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing p_dthalf=0.5_wp*dtime' - - !$ser data p_dthalf=0.5_wp*dtime - - PRINT *, 'Serializing p_vn=p_nh%prog(nnow)%vn(:,:,1)' - - !$ser data p_vn=p_nh%prog(nnow)%vn(:,:,1) - - PRINT *, 'Serializing p_vt=p_nh%diag%vt(:,:,1)' - - !$ser data p_vt=p_nh%diag%vt(:,:,1) - - PRINT *, 'Serializing primal_normal_cell_1=p_patch%edges%primal_normal_cell_x(:,:,1)' - - !$ser data primal_normal_cell_1=p_patch%edges%primal_normal_cell_x(:,:,1) - - PRINT *, 'Serializing dual_normal_cell_1=p_patch%edges%dual_normal_cell_x(:,:,1)' - - !$ser data dual_normal_cell_1=p_patch%edges%dual_normal_cell_x(:,:,1) - - PRINT *, 'Serializing primal_normal_cell_2=p_patch%edges%primal_normal_cell_y(:,:,1)' - - !$ser data primal_normal_cell_2=p_patch%edges%primal_normal_cell_y(:,:,1) - - PRINT *, 'Serializing dual_normal_cell_2=p_patch%edges%dual_normal_cell_y(:,:,1)' - - !$ser data dual_normal_cell_2=p_patch%edges%dual_normal_cell_y(:,:,1) - - PRINT *, 'Serializing rho_ref_me=p_nh%metrics%rho_ref_me(:,:,1)' - - !$ser data rho_ref_me=p_nh%metrics%rho_ref_me(:,:,1) - - PRINT *, 'Serializing theta_ref_me=p_nh%metrics%theta_ref_me(:,:,1)' - - !$ser data theta_ref_me=p_nh%metrics%theta_ref_me(:,:,1) - - PRINT *, 'Serializing z_grad_rth_1=z_grad_rth(:,:,1,1)' - - !$ser data z_grad_rth_1=z_grad_rth(:,:,1,1) - - PRINT *, 'Serializing z_grad_rth_2=z_grad_rth(:,:,1,2)' - - !$ser data z_grad_rth_2=z_grad_rth(:,:,1,2) - - PRINT *, 'Serializing z_grad_rth_3=z_grad_rth(:,:,1,3)' - - !$ser data z_grad_rth_3=z_grad_rth(:,:,1,3) - - PRINT *, 'Serializing z_grad_rth_4=z_grad_rth(:,:,1,4)' - - !$ser data z_grad_rth_4=z_grad_rth(:,:,1,4) - - PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' - - !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) - - PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) - - PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' - - !$ser data z_rho_e=z_rho_e(:,:,1) - - PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' - - !$ser data z_theta_v_e=z_theta_v_e(:,:,1) - - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) -#if defined (__LOOP_EXCHANGE) || defined (__SX__) || defined (_OPENACC) - ! For cache-based machines, also the back-trajectory computation is inlined to improve efficiency - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) & - !$ACC PRIVATE(lvn_pos, ilc0, ibc0, z_ntdistv_bary_1, z_ntdistv_bary_2, distv_bary_1, distv_bary_2) -#ifdef __LOOP_EXCHANGE - DO je = i_startidx, i_endidx -!DIR$ IVDEP, PREFERVECTOR - DO jk = 1, nlev -#else - DO jk = 1, nlev - DO je = i_startidx, i_endidx -#endif - lvn_pos = p_nh%prog(nnow)%vn(je,jk,jb) >= 0._wp - - ! line and block indices of upwind neighbor cell - ilc0 = MERGE(p_patch%edges%cell_idx(je,jb,1),p_patch%edges%cell_idx(je,jb,2),lvn_pos) - ibc0 = MERGE(p_patch%edges%cell_blk(je,jb,1),p_patch%edges%cell_blk(je,jb,2),lvn_pos) - - ! distances from upwind mass point to the end point of the backward trajectory - ! in edge-normal and tangential directions - z_ntdistv_bary_1 = - ( p_nh%prog(nnow)%vn(je,jk,jb) * dthalf + & - MERGE(p_int%pos_on_tplane_e(je,jb,1,1), p_int%pos_on_tplane_e(je,jb,2,1),lvn_pos)) - - z_ntdistv_bary_2 = - ( p_nh%diag%vt(je,jk,jb) * dthalf + & - MERGE(p_int%pos_on_tplane_e(je,jb,1,2), p_int%pos_on_tplane_e(je,jb,2,2),lvn_pos)) - - ! rotate distance vectors into local lat-lon coordinates: - ! - ! component in longitudinal direction - distv_bary_1 = & - z_ntdistv_bary_1*MERGE(p_patch%edges%primal_normal_cell(je,jb,1)%v1, & - p_patch%edges%primal_normal_cell(je,jb,2)%v1,lvn_pos) & - + z_ntdistv_bary_2*MERGE(p_patch%edges%dual_normal_cell(je,jb,1)%v1, & - p_patch%edges%dual_normal_cell(je,jb,2)%v1,lvn_pos) - - ! component in latitudinal direction - distv_bary_2 = & - z_ntdistv_bary_1*MERGE(p_patch%edges%primal_normal_cell(je,jb,1)%v2, & - p_patch%edges%primal_normal_cell(je,jb,2)%v2,lvn_pos) & - + z_ntdistv_bary_2*MERGE(p_patch%edges%dual_normal_cell(je,jb,1)%v2, & - p_patch%edges%dual_normal_cell(je,jb,2)%v2,lvn_pos) - - - ! Calculate "edge values" of rho and theta_v - ! Note: z_rth_pr contains the perturbation values of rho and theta_v, - ! and the corresponding gradients are stored in z_grad_rth. -#ifdef __SWAPDIM - z_rho_e(je,jk,jb) = & - REAL(p_nh%metrics%rho_ref_me(je,jk,jb),wp) + z_rth_pr(ilc0,jk,ibc0,1) & - + distv_bary_1 * z_grad_rth(ilc0,jk,ibc0,1) & - + distv_bary_2 * z_grad_rth(ilc0,jk,ibc0,2) - z_theta_v_e(je,jk,jb) = & - REAL(p_nh%metrics%theta_ref_me(je,jk,jb),wp) + z_rth_pr(ilc0,jk,ibc0,2) & - + distv_bary_1 * z_grad_rth(ilc0,jk,ibc0,3) & - + distv_bary_2 * z_grad_rth(ilc0,jk,ibc0,4) -#else - z_rho_e(je,jk,jb) = REAL(p_nh%metrics%rho_ref_me(je,jk,jb),wp) & - + z_rth_pr(1,ilc0,jk,ibc0) & - + distv_bary_1 * z_grad_rth(1,ilc0,jk,ibc0) & - + distv_bary_2 * z_grad_rth(2,ilc0,jk,ibc0) - - z_theta_v_e(je,jk,jb) = REAL(p_nh%metrics%theta_ref_me(je,jk,jb),wp) & - + z_rth_pr(2,ilc0,jk,ibc0) & - + distv_bary_1 * z_grad_rth(3,ilc0,jk,ibc0) & - + distv_bary_2 * z_grad_rth(4,ilc0,jk,ibc0) -#endif - ENDDO ! loop over vertical levels - ENDDO ! loop over edges -#else - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) PRIVATE(ilc0, ibc0) - DO jk = 1, nlev - DO je = i_startidx, i_endidx - - ilc0 = p_cell_idx(je,jk,jb) - ibc0 = p_cell_blk(je,jk,jb) - - ! Calculate "edge values" of rho and theta_v - ! Note: z_rth_pr contains the perturbation values of rho and theta_v, - ! and the corresponding gradients are stored in z_grad_rth. -#ifdef __SWAPDIM - z_rho_e(je,jk,jb) = & - REAL(p_nh%metrics%rho_ref_me(je,jk,jb),wp) + z_rth_pr(ilc0,jk,ibc0,1) & - + p_distv_bary(je,jk,jb,1) * z_grad_rth(ilc0,jk,ibc0,1) & - + p_distv_bary(je,jk,jb,2) * z_grad_rth(ilc0,jk,ibc0,2) - z_theta_v_e(je,jk,jb) = & - REAL(p_nh%metrics%theta_ref_me(je,jk,jb),wp) + z_rth_pr(ilc0,jk,ibc0,2) & - + p_distv_bary(je,jk,jb,1) * z_grad_rth(ilc0,jk,ibc0,3) & - + p_distv_bary(je,jk,jb,2) * z_grad_rth(ilc0,jk,ibc0,4) -#else - z_rho_e(je,jk,jb) = REAL(p_nh%metrics%rho_ref_me(je,jk,jb),wp) & - + z_rth_pr(1,ilc0,jk,ibc0) & - + p_distv_bary(je,jk,jb,1) * z_grad_rth(1,ilc0,jk,ibc0) & - + p_distv_bary(je,jk,jb,2) * z_grad_rth(2,ilc0,jk,ibc0) - z_theta_v_e(je,jk,jb) = REAL(p_nh%metrics%theta_ref_me(je,jk,jb),wp) & - + z_rth_pr(2,ilc0,jk,ibc0) & - + p_distv_bary(je,jk,jb,1) * z_grad_rth(3,ilc0,jk,ibc0) & - + p_distv_bary(je,jk,jb,2) * z_grad_rth(4,ilc0,jk,ibc0) -#endif - - ENDDO ! loop over edges - ENDDO ! loop over vertical levels -#endif - !$ACC END PARALLEL - - ELSE ! iadv_rhotheta = 1 - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR TILE(32, 4) -#ifdef __LOOP_EXCHANGE - DO je = i_startidx, i_endidx -!DIR$ IVDEP - DO jk = 1, nlev -#else - DO jk = 1, nlev - DO je = i_startidx, i_endidx -#endif - - ! Compute upwind-biased values for rho and theta starting from centered differences - ! Note: the length of the backward trajectory should be 0.5*dtime*(vn,vt) in order to arrive - ! at a second-order accurate FV discretization, but twice the length is needed for numerical - ! stability - z_rho_e(je,jk,jb) = & - p_int%c_lin_e(je,1,jb)*p_nh%prog(nnow)%rho(icidx(je,jb,1),jk,icblk(je,jb,1)) + & - p_int%c_lin_e(je,2,jb)*p_nh%prog(nnow)%rho(icidx(je,jb,2),jk,icblk(je,jb,2)) - & - dtime * (p_nh%prog(nnow)%vn(je,jk,jb)*p_patch%edges%inv_dual_edge_length(je,jb)* & - (p_nh%prog(nnow)%rho(icidx(je,jb,2),jk,icblk(je,jb,2)) - & - p_nh%prog(nnow)%rho(icidx(je,jb,1),jk,icblk(je,jb,1)) ) + p_nh%diag%vt(je,jk,jb) * & - p_patch%edges%inv_primal_edge_length(je,jb) * p_patch%edges%tangent_orientation(je,jb) * & - (z_rho_v(ividx(je,jb,2),jk,ivblk(je,jb,2)) - z_rho_v(ividx(je,jb,1),jk,ivblk(je,jb,1)) ) ) - - z_theta_v_e(je,jk,jb) = & - p_int%c_lin_e(je,1,jb)*p_nh%prog(nnow)%theta_v(icidx(je,jb,1),jk,icblk(je,jb,1)) + & - p_int%c_lin_e(je,2,jb)*p_nh%prog(nnow)%theta_v(icidx(je,jb,2),jk,icblk(je,jb,2)) - & - dtime * (p_nh%prog(nnow)%vn(je,jk,jb)*p_patch%edges%inv_dual_edge_length(je,jb)* & - (p_nh%prog(nnow)%theta_v(icidx(je,jb,2),jk,icblk(je,jb,2)) - & - p_nh%prog(nnow)%theta_v(icidx(je,jb,1),jk,icblk(je,jb,1)) ) + p_nh%diag%vt(je,jk,jb) * & - p_patch%edges%inv_primal_edge_length(je,jb) * p_patch%edges%tangent_orientation(je,jb) * & - (z_theta_v_v(ividx(je,jb,2),jk,ivblk(je,jb,2)) - z_theta_v_v(ividx(je,jb,1),jk,ivblk(je,jb,1)) )) - - ENDDO ! loop over edges - ENDDO ! loop over vertical levels - !$ACC END PARALLEL - ENDIF - - - !$ser savepoint mo_solve_nonhydro_stencil_16_fused_btraj_traj_o1_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing p_dthalf=0.5_wp*dtime' - - !$ser data p_dthalf=0.5_wp*dtime - - PRINT *, 'Serializing p_vn=p_nh%prog(nnow)%vn(:,:,1)' - - !$ser data p_vn=p_nh%prog(nnow)%vn(:,:,1) - - PRINT *, 'Serializing p_vt=p_nh%diag%vt(:,:,1)' - - !$ser data p_vt=p_nh%diag%vt(:,:,1) - - PRINT *, 'Serializing primal_normal_cell_1=p_patch%edges%primal_normal_cell_x(:,:,1)' - - !$ser data primal_normal_cell_1=p_patch%edges%primal_normal_cell_x(:,:,1) - - PRINT *, 'Serializing dual_normal_cell_1=p_patch%edges%dual_normal_cell_x(:,:,1)' - - !$ser data dual_normal_cell_1=p_patch%edges%dual_normal_cell_x(:,:,1) - - PRINT *, 'Serializing primal_normal_cell_2=p_patch%edges%primal_normal_cell_y(:,:,1)' - - !$ser data primal_normal_cell_2=p_patch%edges%primal_normal_cell_y(:,:,1) - - PRINT *, 'Serializing dual_normal_cell_2=p_patch%edges%dual_normal_cell_y(:,:,1)' - - !$ser data dual_normal_cell_2=p_patch%edges%dual_normal_cell_y(:,:,1) - - PRINT *, 'Serializing rho_ref_me=p_nh%metrics%rho_ref_me(:,:,1)' - - !$ser data rho_ref_me=p_nh%metrics%rho_ref_me(:,:,1) - - PRINT *, 'Serializing theta_ref_me=p_nh%metrics%theta_ref_me(:,:,1)' - - !$ser data theta_ref_me=p_nh%metrics%theta_ref_me(:,:,1) - - PRINT *, 'Serializing z_grad_rth_1=z_grad_rth(:,:,1,1)' - - !$ser data z_grad_rth_1=z_grad_rth(:,:,1,1) - - PRINT *, 'Serializing z_grad_rth_2=z_grad_rth(:,:,1,2)' - - !$ser data z_grad_rth_2=z_grad_rth(:,:,1,2) - - PRINT *, 'Serializing z_grad_rth_3=z_grad_rth(:,:,1,3)' - - !$ser data z_grad_rth_3=z_grad_rth(:,:,1,3) - - PRINT *, 'Serializing z_grad_rth_4=z_grad_rth(:,:,1,4)' - - !$ser data z_grad_rth_4=z_grad_rth(:,:,1,4) - - PRINT *, 'Serializing z_rth_pr_1=z_rth_pr(:,:,1,1)' - - !$ser data z_rth_pr_1=z_rth_pr(:,:,1,1) - - PRINT *, 'Serializing z_rth_pr_2=z_rth_pr(:,:,1,2)' - - !$ser data z_rth_pr_2=z_rth_pr(:,:,1,2) - - PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' - - !$ser data z_rho_e=z_rho_e(:,:,1) - - PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' - - !$ser data z_theta_v_e=z_theta_v_e(:,:,1) - - ENDDO -!$OMP END DO - - ENDIF - - ELSE IF (istep == 2 .AND. lhdiff_rcf .AND. divdamp_type >= 3) THEN ! apply div damping on 3D divergence - - ! add dw/dz contribution to divergence damping term - - rl_start = 7 - rl_end = min_rledge_int-2 - - i_startblk = p_patch%edges%start_block(rl_start) - i_endblk = p_patch%edges%end_block (rl_end) - -!$OMP DO PRIVATE(jb,jk,je,i_startidx,i_endidx) ICON_OMP_DEFAULT_SCHEDULE - DO jb = i_startblk, i_endblk - - CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - - !$ser savepoint mo_solve_nonhydro_stencil_17_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing hmask_dd3d=p_nh%metrics%hmask_dd3d(:,1)' - - !$ser data hmask_dd3d=p_nh%metrics%hmask_dd3d(:,1) - - PRINT *, 'Serializing scalfac_dd3d=p_nh%metrics%scalfac_dd3d(:)' - - !$ser data scalfac_dd3d=p_nh%metrics%scalfac_dd3d(:) - - PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' - - !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) - - PRINT *, 'Serializing z_dwdz_dd=z_dwdz_dd(:,:,1)' - - !$ser data z_dwdz_dd=z_dwdz_dd(:,:,1) - - PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' - - !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) -#ifdef __LOOP_EXCHANGE - DO je = i_startidx, i_endidx -!DIR$ IVDEP, PREFERVECTOR - DO jk = kstart_dd3d(jg), nlev - z_graddiv_vn(jk,je,jb) = z_graddiv_vn(jk,je,jb) + p_nh%metrics%hmask_dd3d(je,jb)* & - p_nh%metrics%scalfac_dd3d(jk) * p_patch%edges%inv_dual_edge_length(je,jb)* & - ( z_dwdz_dd(icidx(je,jb,2),jk,icblk(je,jb,2)) - z_dwdz_dd(icidx(je,jb,1),jk,icblk(je,jb,1)) ) -#else - DO jk = kstart_dd3d(jg), nlev - DO je = i_startidx, i_endidx - z_graddiv_vn(je,jk,jb) = z_graddiv_vn(je,jk,jb) + p_nh%metrics%hmask_dd3d(je,jb)* & - p_nh%metrics%scalfac_dd3d(jk) * p_patch%edges%inv_dual_edge_length(je,jb)* & - ( z_dwdz_dd(icidx(je,jb,2),jk,icblk(je,jb,2)) - z_dwdz_dd(icidx(je,jb,1),jk,icblk(je,jb,1)) ) -#endif - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_17_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing hmask_dd3d=p_nh%metrics%hmask_dd3d(:,1)' - - !$ser data hmask_dd3d=p_nh%metrics%hmask_dd3d(:,1) - - PRINT *, 'Serializing scalfac_dd3d=p_nh%metrics%scalfac_dd3d(:)' - - !$ser data scalfac_dd3d=p_nh%metrics%scalfac_dd3d(:) - - PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' - - !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) - - PRINT *, 'Serializing z_dwdz_dd=z_dwdz_dd(:,:,1)' - - !$ser data z_dwdz_dd=z_dwdz_dd(:,:,1) - - PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' - - !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) - - ENDDO -!$OMP END DO - - ENDIF ! istep = 1/2 - - ! Remaining computations at edge points - - rl_start = grf_bdywidth_e + 1 ! boundary update follows below - rl_end = min_rledge_int - - i_startblk = p_patch%edges%start_block(rl_start) - i_endblk = p_patch%edges%end_block(rl_end) - - IF (istep == 1) THEN - -!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je,z_theta1,z_theta2,ikp1,ikp2) ICON_OMP_DEFAULT_SCHEDULE - DO jb = i_startblk, i_endblk - - CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - ! Store values at nest interface levels; this is done here for the first sub-time step, - ! the final averaging is done in mo_nh_nest_utilities:compute_tendencies - IF (idyn_timestep == 1 .AND. l_child_vertnest) THEN - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO je = i_startidx, i_endidx - p_nh%diag%vn_ie_int(je,1,jb) = p_nh%diag%vn_ie(je,nshift,jb) - ENDDO - !$ACC END PARALLEL - ENDIF - - - !$ser savepoint mo_solve_nonhydro_stencil_18_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' - - !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) - - PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' - - !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) - - PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' - - !$ser data z_gradh_exner=z_gradh_exner(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) -#ifdef __LOOP_EXCHANGE - DO je = i_startidx, i_endidx - DO jk = 1, nflatlev(jg)-1 -#else - DO jk = 1, nflatlev(jg)-1 - DO je = i_startidx, i_endidx -#endif - ! horizontal gradient of Exner pressure where coordinate surfaces are flat - z_gradh_exner(je,jk,jb) = p_patch%edges%inv_dual_edge_length(je,jb)* & - (z_exner_ex_pr(icidx(je,jb,2),jk,icblk(je,jb,2)) - & - z_exner_ex_pr(icidx(je,jb,1),jk,icblk(je,jb,1)) ) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_18_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' - - !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) - - PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' - - !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) - - PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' - - !$ser data z_gradh_exner=z_gradh_exner(:,:,1) - - IF (igradp_method <= 3) THEN - - - !$ser savepoint mo_solve_nonhydro_stencil_19_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' - - !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) - - PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' - - !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) - - PRINT *, 'Serializing ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1)' - - !$ser data ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1) - - PRINT *, 'Serializing c_lin_e=p_int%c_lin_e(:,:,1)' - - !$ser data c_lin_e=p_int%c_lin_e(:,:,1) - - PRINT *, 'Serializing z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1)' - - !$ser data z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1) - - PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' - - !$ser data z_gradh_exner=z_gradh_exner(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) -#ifdef __LOOP_EXCHANGE - DO je = i_startidx, i_endidx -!DIR$ IVDEP - DO jk = nflatlev(jg), nflat_gradp(jg) -#else -!$NEC outerloop_unroll(8) - DO jk = nflatlev(jg), nflat_gradp(jg) - DO je = i_startidx, i_endidx -#endif - ! horizontal gradient of Exner pressure, including metric correction - z_gradh_exner(je,jk,jb) = p_patch%edges%inv_dual_edge_length(je,jb)* & - (z_exner_ex_pr(icidx(je,jb,2),jk,icblk(je,jb,2)) - & - z_exner_ex_pr(icidx(je,jb,1),jk,icblk(je,jb,1)) ) - & - p_nh%metrics%ddxn_z_full(je,jk,jb) * & -#ifdef __SWAPDIM - (p_int%c_lin_e(je,1,jb)*z_dexner_dz_c(icidx(je,jb,1),jk,icblk(je,jb,1),1) + & - p_int%c_lin_e(je,2,jb)*z_dexner_dz_c(icidx(je,jb,2),jk,icblk(je,jb,2),1)) -#else - (p_int%c_lin_e(je,1,jb)*z_dexner_dz_c(1,icidx(je,jb,1),jk,icblk(je,jb,1)) + & - p_int%c_lin_e(je,2,jb)*z_dexner_dz_c(1,icidx(je,jb,2),jk,icblk(je,jb,2))) -#endif - ENDDO - ENDDO - !$ACC END PARALLEL - - - !$ser savepoint mo_solve_nonhydro_stencil_19_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' - - !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) - - PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' - - !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) - - PRINT *, 'Serializing ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1)' - - !$ser data ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1) - - PRINT *, 'Serializing c_lin_e=p_int%c_lin_e(:,:,1)' - - !$ser data c_lin_e=p_int%c_lin_e(:,:,1) - - PRINT *, 'Serializing z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1)' - - !$ser data z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1) - - PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' - - !$ser data z_gradh_exner=z_gradh_exner(:,:,1) - - - !$ser savepoint mo_solve_nonhydro_stencil_20_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' - - !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) - - PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' - - !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) - - PRINT *, 'Serializing zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1)' - - !$ser data zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1) - - PRINT *, 'Serializing z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1)' - - !$ser data z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1) - - PRINT *, 'Serializing z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2)' - - !$ser data z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2) - - PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' - - !$ser data z_gradh_exner=z_gradh_exner(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR TILE(32, 4) -#ifdef __LOOP_EXCHANGE - DO je = i_startidx, i_endidx -!DIR$ IVDEP, PREFERVECTOR - DO jk = nflat_gradp(jg)+1, nlev -#else -!$NEC outerloop_unroll(8) - DO jk = nflat_gradp(jg)+1, nlev - DO je = i_startidx, i_endidx -#endif - ! horizontal gradient of Exner pressure, Taylor-expansion-based reconstruction - z_gradh_exner(je,jk,jb) = p_patch%edges%inv_dual_edge_length(je,jb)* & - (z_exner_ex_pr(icidx(je,jb,2),ikidx(2,je,jk,jb),icblk(je,jb,2)) + & - p_nh%metrics%zdiff_gradp(2,je,jk,jb)* & -#ifdef __SWAPDIM - (z_dexner_dz_c(icidx(je,jb,2),ikidx(2,je,jk,jb),icblk(je,jb,2),1) + & - p_nh%metrics%zdiff_gradp(2,je,jk,jb)* & - z_dexner_dz_c(icidx(je,jb,2),ikidx(2,je,jk,jb),icblk(je,jb,2),2)) - & - (z_exner_ex_pr(icidx(je,jb,1),ikidx(1,je,jk,jb),icblk(je,jb,1)) + & - p_nh%metrics%zdiff_gradp(1,je,jk,jb)* & - (z_dexner_dz_c(icidx(je,jb,1),ikidx(1,je,jk,jb),icblk(je,jb,1),1) + & - p_nh%metrics%zdiff_gradp(1,je,jk,jb)* & - z_dexner_dz_c(icidx(je,jb,1),ikidx(1,je,jk,jb),icblk(je,jb,1),2)))) -#else - (z_dexner_dz_c(1,icidx(je,jb,2),ikidx(2,je,jk,jb),icblk(je,jb,2)) + & - p_nh%metrics%zdiff_gradp(2,je,jk,jb)* & - z_dexner_dz_c(2,icidx(je,jb,2),ikidx(2,je,jk,jb),icblk(je,jb,2))) - & - (z_exner_ex_pr(icidx(je,jb,1),ikidx(1,je,jk,jb),icblk(je,jb,1)) + & - p_nh%metrics%zdiff_gradp(1,je,jk,jb)* & - (z_dexner_dz_c(1,icidx(je,jb,1),ikidx(1,je,jk,jb),icblk(je,jb,1)) + & - p_nh%metrics%zdiff_gradp(1,je,jk,jb)* & - z_dexner_dz_c(2,icidx(je,jb,1),ikidx(1,je,jk,jb),icblk(je,jb,1))))) -#endif - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_20_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' - - !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) - - PRINT *, 'Serializing z_exner_ex_pr=z_exner_ex_pr(:,:,1)' - - !$ser data z_exner_ex_pr=z_exner_ex_pr(:,:,1) - - PRINT *, 'Serializing zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1)' - - !$ser data zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1) - - PRINT *, 'Serializing z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1)' - - !$ser data z_dexner_dz_c_1=z_dexner_dz_c(:,:,1,1) - - PRINT *, 'Serializing z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2)' - - !$ser data z_dexner_dz_c_2=z_dexner_dz_c(:,:,1,2) - - PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' - - !$ser data z_gradh_exner=z_gradh_exner(:,:,1) - - ELSE IF (igradp_method == 4 .OR. igradp_method == 5) THEN - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR TILE(32, 4) -#ifdef __LOOP_EXCHANGE - DO je = i_startidx, i_endidx - DO jk = nflatlev(jg), nlev -#else - DO jk = nflatlev(jg), nlev - DO je = i_startidx, i_endidx -#endif - ! horizontal gradient of Exner pressure, cubic/quadratic interpolation - z_gradh_exner(je,jk,jb) = p_patch%edges%inv_dual_edge_length(je,jb)* & - (z_exner_ex_pr(icidx(je,jb,2),ikidx(2,je,jk,jb)-1,icblk(je,jb,2)) * & - p_nh%metrics%coeff_gradp(5,je,jk,jb) + & - z_exner_ex_pr(icidx(je,jb,2),ikidx(2,je,jk,jb) ,icblk(je,jb,2)) * & - p_nh%metrics%coeff_gradp(6,je,jk,jb) + & - z_exner_ex_pr(icidx(je,jb,2),ikidx(2,je,jk,jb)+1,icblk(je,jb,2)) * & - p_nh%metrics%coeff_gradp(7,je,jk,jb) + & - z_exner_ex_pr(icidx(je,jb,2),ikidx(2,je,jk,jb)+2,icblk(je,jb,2)) * & - p_nh%metrics%coeff_gradp(8,je,jk,jb) - & - (z_exner_ex_pr(icidx(je,jb,1),ikidx(1,je,jk,jb)-1,icblk(je,jb,1)) * & - p_nh%metrics%coeff_gradp(1,je,jk,jb) + & - z_exner_ex_pr(icidx(je,jb,1),ikidx(1,je,jk,jb) ,icblk(je,jb,1)) * & - p_nh%metrics%coeff_gradp(2,je,jk,jb) + & - z_exner_ex_pr(icidx(je,jb,1),ikidx(1,je,jk,jb)+1,icblk(je,jb,1)) * & - p_nh%metrics%coeff_gradp(3,je,jk,jb) + & - z_exner_ex_pr(icidx(je,jb,1),ikidx(1,je,jk,jb)+2,icblk(je,jb,1)) * & - p_nh%metrics%coeff_gradp(4,je,jk,jb)) ) - - ENDDO - ENDDO - !$ACC END PARALLEL - ENDIF - - ! compute hydrostatically approximated correction term that replaces downward extrapolation - IF (igradp_method == 3) THEN - - - !$ser savepoint mo_solve_nonhydro_stencil_21_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing grav_o_cpd=grav_o_cpd' - - !$ser data grav_o_cpd=grav_o_cpd - - PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1)' - - !$ser data zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) - - PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' - - !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) - - PRINT *, 'Serializing z_hydro_corr=z_hydro_corr(:,:,1)' - - !$ser data z_hydro_corr=z_hydro_corr(:,:,1) - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR PRIVATE(z_theta1, z_theta2) - DO je = i_startidx, i_endidx - - z_theta1 = & - p_nh%prog(nnow)%theta_v(icidx(je,jb,1),ikidx(1,je,nlev,jb),icblk(je,jb,1)) + & - p_nh%metrics%zdiff_gradp(1,je,nlev,jb)* & - (p_nh%diag%theta_v_ic(icidx(je,jb,1),ikidx(1,je,nlev,jb), icblk(je,jb,1)) - & - p_nh%diag%theta_v_ic(icidx(je,jb,1),ikidx(1,je,nlev,jb)+1,icblk(je,jb,1))) * & - p_nh%metrics%inv_ddqz_z_full(icidx(je,jb,1),ikidx(1,je,nlev,jb),icblk(je,jb,1)) - - z_theta2 = & - p_nh%prog(nnow)%theta_v(icidx(je,jb,2),ikidx(2,je,nlev,jb),icblk(je,jb,2)) + & - p_nh%metrics%zdiff_gradp(2,je,nlev,jb)* & - (p_nh%diag%theta_v_ic(icidx(je,jb,2),ikidx(2,je,nlev,jb), icblk(je,jb,2)) - & - p_nh%diag%theta_v_ic(icidx(je,jb,2),ikidx(2,je,nlev,jb)+1,icblk(je,jb,2))) * & - p_nh%metrics%inv_ddqz_z_full(icidx(je,jb,2),ikidx(2,je,nlev,jb),icblk(je,jb,2)) - - z_hydro_corr(je,nlev,jb) = grav_o_cpd*p_patch%edges%inv_dual_edge_length(je,jb)* & - (z_theta2-z_theta1)*4._wp/(z_theta1+z_theta2)**2 - - ENDDO - !$ACC END PARALLEL - - - !$ser savepoint mo_solve_nonhydro_stencil_21_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing grav_o_cpd=grav_o_cpd' - - !$ser data grav_o_cpd=grav_o_cpd - - PRINT *, 'Serializing theta_v=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1)' - - !$ser data zdiff_gradp=p_nh%metrics%zdiff_gradp_dsl(:,:,:,1) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) - - PRINT *, 'Serializing inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1)' - - !$ser data inv_dual_edge_length=p_patch%edges%inv_dual_edge_length(:,1) - - PRINT *, 'Serializing z_hydro_corr=z_hydro_corr(:,:,1)' - - !$ser data z_hydro_corr=z_hydro_corr(:,:,1) - - ELSE IF (igradp_method == 5) THEN - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR PRIVATE(ikp1, ikp2, z_theta1, z_theta2) - DO je = i_startidx, i_endidx - - ikp1 = MIN(nlev,ikidx(1,je,nlev,jb)+2) - ikp2 = MIN(nlev,ikidx(2,je,nlev,jb)+2) - - z_theta1 = & - p_nh%prog(nnow)%theta_v(icidx(je,jb,1),ikidx(1,je,nlev,jb)-1,icblk(je,jb,1)) * & - p_nh%metrics%coeff_gradp(1,je,nlev,jb) + & - p_nh%prog(nnow)%theta_v(icidx(je,jb,1),ikidx(1,je,nlev,jb) ,icblk(je,jb,1)) * & - p_nh%metrics%coeff_gradp(2,je,nlev,jb) + & - p_nh%prog(nnow)%theta_v(icidx(je,jb,1),ikidx(1,je,nlev,jb)+1,icblk(je,jb,1)) * & - p_nh%metrics%coeff_gradp(3,je,nlev,jb) + & - p_nh%prog(nnow)%theta_v(icidx(je,jb,1),ikp1 ,icblk(je,jb,1)) * & - p_nh%metrics%coeff_gradp(4,je,nlev,jb) - - z_theta2 = & - p_nh%prog(nnow)%theta_v(icidx(je,jb,2),ikidx(2,je,nlev,jb)-1,icblk(je,jb,2)) * & - p_nh%metrics%coeff_gradp(5,je,nlev,jb) + & - p_nh%prog(nnow)%theta_v(icidx(je,jb,2),ikidx(2,je,nlev,jb) ,icblk(je,jb,2)) * & - p_nh%metrics%coeff_gradp(6,je,nlev,jb) + & - p_nh%prog(nnow)%theta_v(icidx(je,jb,2),ikidx(2,je,nlev,jb)+1,icblk(je,jb,2)) * & - p_nh%metrics%coeff_gradp(7,je,nlev,jb) + & - p_nh%prog(nnow)%theta_v(icidx(je,jb,2),ikp2 ,icblk(je,jb,2)) * & - p_nh%metrics%coeff_gradp(8,je,nlev,jb) - - z_hydro_corr(je,nlev,jb) = grav_o_cpd*p_patch%edges%inv_dual_edge_length(je,jb)* & - (z_theta2-z_theta1)*4._wp/(z_theta1+z_theta2)**2 - - ENDDO - !$ACC END PARALLEL - ENDIF - - ENDDO -!$OMP END DO - - ENDIF ! istep = 1 - - - IF (istep == 1 .AND. (igradp_method == 3 .OR. igradp_method == 5)) THEN - - - !$ser savepoint mo_solve_nonhydro_stencil_22_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing ipeidx_dsl=p_nh%metrics%pg_edgeidx_dsl(:,:,1)' - - !$ser data ipeidx_dsl=p_nh%metrics%pg_edgeidx_dsl(:,:,1) - - PRINT *, 'Serializing pg_exdist=p_nh%metrics%pg_exdist_dsl(:,:,1)' - - !$ser data pg_exdist=p_nh%metrics%pg_exdist_dsl(:,:,1) - - PRINT *, 'Serializing z_hydro_corr=z_hydro_corr(:,:,1)' - - !$ser data z_hydro_corr=z_hydro_corr(:,:,1) - - PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' - - !$ser data z_gradh_exner=z_gradh_exner(:,:,1) -!$OMP DO PRIVATE(jb,je,ie,nlen_gradp,ishift) ICON_OMP_DEFAULT_SCHEDULE - DO jb = 1, nblks_gradp - IF (jb == nblks_gradp) THEN - nlen_gradp = npromz_gradp - ELSE - nlen_gradp = nproma_gradp - ENDIF - ishift = (jb-1)*nproma_gradp - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) -!$NEC ivdep - !$ACC LOOP GANG VECTOR - DO je = 1, nlen_gradp - ie = ishift+je - - z_gradh_exner(ipeidx(ie),iplev(ie),ipeblk(ie)) = & - z_gradh_exner(ipeidx(ie),iplev(ie),ipeblk(ie)) + & - p_nh%metrics%pg_exdist(ie)*z_hydro_corr(ipeidx(ie),nlev,ipeblk(ie)) - - ENDDO - !$ACC END PARALLEL - ENDDO -!$OMP END DO - - rl_start_2 = grf_bdywidth_e+1 - rl_end_2 = min_rledge - - i_startblk_2 = p_patch%edges%start_block(rl_start_2) - i_endblk_2 = p_patch%edges%end_block(rl_end_2) - - CALL get_indices_e(p_patch, 1, i_startblk_2, i_endblk_2, & - i_startidx_2, i_endidx_2, rl_start_2, rl_end_2) - - - !$ser savepoint mo_solve_nonhydro_stencil_22_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing ipeidx_dsl=p_nh%metrics%pg_edgeidx_dsl(:,:,1)' - - !$ser data ipeidx_dsl=p_nh%metrics%pg_edgeidx_dsl(:,:,1) - - PRINT *, 'Serializing pg_exdist=p_nh%metrics%pg_exdist_dsl(:,:,1)' - - !$ser data pg_exdist=p_nh%metrics%pg_exdist_dsl(:,:,1) - - PRINT *, 'Serializing z_hydro_corr=z_hydro_corr(:,:,1)' - - !$ser data z_hydro_corr=z_hydro_corr(:,:,1) - - PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' - - !$ser data z_gradh_exner=z_gradh_exner(:,:,1) - - ENDIF - - - ! Update horizontal velocity field: advection, Coriolis force, pressure-gradient term, and physics - -!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je,z_graddiv2_vn, & -!$OMP z_ddt_vn_dyn, z_ddt_vn_apc, z_ddt_vn_cor, z_ddt_vn_pgr, z_ddt_vn_ray, z_d_vn_dmp, z_d_vn_iau & -!$OMP ) ICON_OMP_DEFAULT_SCHEDULE - - DO jb = i_startblk, i_endblk - - CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - IF ((itime_scheme >= 4) .AND. istep == 2) THEN ! use temporally averaged velocity advection terms - - - !$ser savepoint mo_solve_nonhydro_stencil_23_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing cpd=cpd' - - !$ser data cpd=cpd - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing wgt_nnew_vel=wgt_nnew_vel' - - !$ser data wgt_nnew_vel=wgt_nnew_vel - - PRINT *, 'Serializing wgt_nnow_vel=wgt_nnow_vel' - - !$ser data wgt_nnow_vel=wgt_nnow_vel - - PRINT *, 'Serializing vn_nnow=p_nh%prog(nnow)%vn(:,:,1)' - - !$ser data vn_nnow=p_nh%prog(nnow)%vn(:,:,1) - - PRINT *, 'Serializing ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1)' - - !$ser data ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1) - - PRINT *, 'Serializing ddt_vn_adv_ntl2=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl2)' - - !$ser data ddt_vn_adv_ntl2=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl2) - - PRINT *, 'Serializing ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1)' - - !$ser data ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1) - - PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' - - !$ser data z_theta_v_e=z_theta_v_e(:,:,1) - - PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' - - !$ser data z_gradh_exner=z_gradh_exner(:,:,1) - - PRINT *, 'Serializing vn_nnew=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn_nnew=p_nh%prog(nnew)%vn(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - - !$ACC LOOP GANG(STATIC: 1) VECTOR PRIVATE(z_ddt_vn_dyn, z_ddt_vn_apc, z_ddt_vn_cor, z_ddt_vn_pgr) TILE(32, 4) - DO jk = 1, nlev -!DIR$ IVDEP - DO je = i_startidx, i_endidx - ! - z_ddt_vn_apc = p_nh%diag%ddt_vn_apc_pc(je,jk,jb,ntl1)*wgt_nnow_vel & - & +p_nh%diag%ddt_vn_apc_pc(je,jk,jb,ntl2)*wgt_nnew_vel - z_ddt_vn_pgr = -cpd*z_theta_v_e(je,jk,jb)*z_gradh_exner(je,jk,jb) - ! - z_ddt_vn_dyn = z_ddt_vn_apc & ! advection plus Coriolis - & +z_ddt_vn_pgr & ! pressure gradient - & +p_nh%diag%ddt_vn_phy(je,jk,jb) ! physics applied in dynamics - ! - p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnow)%vn(je,jk,jb) + dtime * z_ddt_vn_dyn - ! -#ifdef __ENABLE_DDT_VN_XYZ__ - IF (p_nh%diag%ddt_vn_adv_is_associated .OR. p_nh%diag%ddt_vn_cor_is_associated) THEN - z_ddt_vn_cor = p_nh%diag%ddt_vn_cor_pc(je,jk,jb,ntl1)*wgt_nnow_vel & - & +p_nh%diag%ddt_vn_cor_pc(je,jk,jb,ntl2)*wgt_nnew_vel - ! - IF (p_nh%diag%ddt_vn_adv_is_associated) THEN - p_nh%diag%ddt_vn_adv(je,jk,jb)= p_nh%diag%ddt_vn_adv(je,jk,jb) + r_nsubsteps *(z_ddt_vn_apc-z_ddt_vn_cor) - END IF - ! - IF (p_nh%diag%ddt_vn_cor_is_associated) THEN - p_nh%diag%ddt_vn_cor(je,jk,jb)= p_nh%diag%ddt_vn_cor(je,jk,jb) + r_nsubsteps * z_ddt_vn_cor - END IF - ! - END IF - ! - IF (p_nh%diag%ddt_vn_pgr_is_associated) THEN - p_nh%diag%ddt_vn_pgr(je,jk,jb) = p_nh%diag%ddt_vn_pgr(je,jk,jb) + r_nsubsteps * z_ddt_vn_pgr - END IF - ! - IF (p_nh%diag%ddt_vn_phd_is_associated) THEN - p_nh%diag%ddt_vn_phd(je,jk,jb) = p_nh%diag%ddt_vn_phd(je,jk,jb) + r_nsubsteps * p_nh%diag%ddt_vn_phy(je,jk,jb) - END IF - ! - IF (p_nh%diag%ddt_vn_dyn_is_associated) THEN - p_nh%diag%ddt_vn_dyn(je,jk,jb) = p_nh%diag%ddt_vn_dyn(je,jk,jb) + r_nsubsteps * z_ddt_vn_dyn - END IF -#endif - ! - ENDDO - ENDDO -!$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_23_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing cpd=cpd' - - !$ser data cpd=cpd - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing wgt_nnew_vel=wgt_nnew_vel' - - !$ser data wgt_nnew_vel=wgt_nnew_vel - - PRINT *, 'Serializing wgt_nnow_vel=wgt_nnow_vel' - - !$ser data wgt_nnow_vel=wgt_nnow_vel - - PRINT *, 'Serializing vn_nnow=p_nh%prog(nnow)%vn(:,:,1)' - - !$ser data vn_nnow=p_nh%prog(nnow)%vn(:,:,1) - - PRINT *, 'Serializing ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1)' - - !$ser data ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1) - - PRINT *, 'Serializing ddt_vn_adv_ntl2=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl2)' - - !$ser data ddt_vn_adv_ntl2=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl2) - - PRINT *, 'Serializing ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1)' - - !$ser data ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1) - - PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' - - !$ser data z_theta_v_e=z_theta_v_e(:,:,1) - - PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' - - !$ser data z_gradh_exner=z_gradh_exner(:,:,1) - - PRINT *, 'Serializing vn_nnew=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn_nnew=p_nh%prog(nnew)%vn(:,:,1) - - ELSE - - - !$ser savepoint mo_solve_nonhydro_stencil_24_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing cpd=cpd' - - !$ser data cpd=cpd - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing vn_nnow=p_nh%prog(nnow)%vn(:,:,1)' - - !$ser data vn_nnow=p_nh%prog(nnow)%vn(:,:,1) - - PRINT *, 'Serializing ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1)' - - !$ser data ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1) - - PRINT *, 'Serializing ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1)' - - !$ser data ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1) - - PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' - - !$ser data z_theta_v_e=z_theta_v_e(:,:,1) - - PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' - - !$ser data z_gradh_exner=z_gradh_exner(:,:,1) - - PRINT *, 'Serializing vn_nnew=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn_nnew=p_nh%prog(nnew)%vn(:,:,1) -!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) - DO jk = 1, nlev -!DIR$ IVDEP - DO je = i_startidx, i_endidx - ! - p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnow)%vn(je,jk,jb) + dtime * & - & ( p_nh%diag%ddt_vn_apc_pc(je,jk,jb,ntl1) & - & -cpd*z_theta_v_e(je,jk,jb)*z_gradh_exner(je,jk,jb) & - & +p_nh%diag%ddt_vn_phy(je,jk,jb) ) - ! - ENDDO - ENDDO -!$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_24_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing cpd=cpd' - - !$ser data cpd=cpd - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing vn_nnow=p_nh%prog(nnow)%vn(:,:,1)' - - !$ser data vn_nnow=p_nh%prog(nnow)%vn(:,:,1) - - PRINT *, 'Serializing ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1)' - - !$ser data ddt_vn_adv_ntl1=p_nh%diag%ddt_vn_apc_pc(:,:,1,ntl1) - - PRINT *, 'Serializing ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1)' - - !$ser data ddt_vn_phy=p_nh%diag%ddt_vn_phy(:,:,1) - - PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' - - !$ser data z_theta_v_e=z_theta_v_e(:,:,1) - - PRINT *, 'Serializing z_gradh_exner=z_gradh_exner(:,:,1)' - - !$ser data z_gradh_exner=z_gradh_exner(:,:,1) - - PRINT *, 'Serializing vn_nnew=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn_nnew=p_nh%prog(nnew)%vn(:,:,1) - ENDIF - - IF (lhdiff_rcf .AND. istep == 2 .AND. (divdamp_order == 4 .OR. divdamp_order == 24)) THEN ! fourth-order divergence damping - ! Compute gradient of divergence of gradient of divergence for fourth-order divergence damping - - - !$ser savepoint mo_solve_nonhydro_stencil_25_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing geofac_grdiv=p_int%geofac_grdiv(:,:,1)' - - !$ser data geofac_grdiv=p_int%geofac_grdiv(:,:,1) - - PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' - - !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) - - PRINT *, 'Serializing z_graddiv2_vn=z_graddiv2_vn(:,:)' - - !$ser data z_graddiv2_vn=z_graddiv2_vn(:,:) -!$ACC PARALLEL IF( i_am_accel_node ) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) -#ifdef __LOOP_EXCHANGE - DO je = i_startidx, i_endidx -!DIR$ IVDEP - DO jk = 1, nlev - z_graddiv2_vn(je,jk) = p_int%geofac_grdiv(je,1,jb)*z_graddiv_vn(jk,je,jb) & - + p_int%geofac_grdiv(je,2,jb)*z_graddiv_vn(jk,iqidx(je,jb,1),iqblk(je,jb,1)) & - + p_int%geofac_grdiv(je,3,jb)*z_graddiv_vn(jk,iqidx(je,jb,2),iqblk(je,jb,2)) & - + p_int%geofac_grdiv(je,4,jb)*z_graddiv_vn(jk,iqidx(je,jb,3),iqblk(je,jb,3)) & - + p_int%geofac_grdiv(je,5,jb)*z_graddiv_vn(jk,iqidx(je,jb,4),iqblk(je,jb,4)) -#else -!$NEC outerloop_unroll(6) - DO jk = 1, nlev - DO je = i_startidx, i_endidx - z_graddiv2_vn(je,jk) = p_int%geofac_grdiv(je,1,jb)*z_graddiv_vn(je,jk,jb) & - + p_int%geofac_grdiv(je,2,jb)*z_graddiv_vn(iqidx(je,jb,1),jk,iqblk(je,jb,1)) & - + p_int%geofac_grdiv(je,3,jb)*z_graddiv_vn(iqidx(je,jb,2),jk,iqblk(je,jb,2)) & - + p_int%geofac_grdiv(je,4,jb)*z_graddiv_vn(iqidx(je,jb,3),jk,iqblk(je,jb,3)) & - + p_int%geofac_grdiv(je,5,jb)*z_graddiv_vn(iqidx(je,jb,4),jk,iqblk(je,jb,4)) -#endif - - ENDDO - ENDDO -!$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_25_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing geofac_grdiv=p_int%geofac_grdiv(:,:,1)' - - !$ser data geofac_grdiv=p_int%geofac_grdiv(:,:,1) - - PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' - - !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) - - PRINT *, 'Serializing z_graddiv2_vn=z_graddiv2_vn(:,:)' - - !$ser data z_graddiv2_vn=z_graddiv2_vn(:,:) - - ENDIF - - IF (lhdiff_rcf .AND. istep == 2) THEN - ! apply divergence damping if diffusion is not called every sound-wave time step - IF (divdamp_order == 2 .OR. (divdamp_order == 24 .AND. scal_divdamp_o2 > 1.e-6_wp) ) THEN ! 2nd-order divergence damping - - - !$ser savepoint mo_solve_nonhydro_stencil_26_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing scal_divdamp_o2=scal_divdamp_o2' - - !$ser data scal_divdamp_o2=scal_divdamp_o2 - - PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' - - !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) -!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) PRIVATE(z_d_vn_dmp) - DO jk = 1, nlev -!DIR$ IVDEP - DO je = i_startidx, i_endidx - ! -#ifdef __LOOP_EXCHANGE - z_d_vn_dmp = scal_divdamp_o2*z_graddiv_vn(jk,je,jb) -#else - z_d_vn_dmp = scal_divdamp_o2*z_graddiv_vn(je,jk,jb) -#endif - ! - p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnew)%vn(je,jk,jb) + z_d_vn_dmp - ! -#ifdef __ENABLE_DDT_VN_XYZ__ - IF (p_nh%diag%ddt_vn_dmp_is_associated) THEN - p_nh%diag%ddt_vn_dmp(je,jk,jb) = p_nh%diag%ddt_vn_dmp(je,jk,jb) + z_d_vn_dmp * r_dtimensubsteps - END IF - ! - IF (p_nh%diag%ddt_vn_dyn_is_associated) THEN - p_nh%diag%ddt_vn_dyn(je,jk,jb) = p_nh%diag%ddt_vn_dyn(je,jk,jb) + z_d_vn_dmp * r_dtimensubsteps - END IF -#endif - ! - ENDDO - ENDDO -!$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_26_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing scal_divdamp_o2=scal_divdamp_o2' - - !$ser data scal_divdamp_o2=scal_divdamp_o2 - - PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' - - !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - ENDIF - IF (divdamp_order == 4 .OR. (divdamp_order == 24 .AND. divdamp_fac_o2 <= 4._wp*divdamp_fac) ) THEN - IF (l_limited_area .OR. jg > 1) THEN - ! fourth-order divergence damping with reduced damping coefficient along nest boundary - ! (scal_divdamp is negative whereas bdy_divdamp is positive; decreasing the divergence - ! damping along nest boundaries is beneficial because this reduces the interference - ! with the increased diffusion applied in nh_diffusion) - - - !$ser savepoint mo_solve_nonhydro_stencil_27_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing scal_divdamp=scal_divdamp(:)' - - !$ser data scal_divdamp=scal_divdamp(:) - - PRINT *, 'Serializing bdy_divdamp=bdy_divdamp(:)' - - !$ser data bdy_divdamp=bdy_divdamp(:) - - PRINT *, 'Serializing nudgecoeff_e=p_int%nudgecoeff_e(:,1)' - - !$ser data nudgecoeff_e=p_int%nudgecoeff_e(:,1) - - PRINT *, 'Serializing z_graddiv2_vn=z_graddiv2_vn(:,:)' - - !$ser data z_graddiv2_vn=z_graddiv2_vn(:,:) - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) -!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) PRIVATE(z_d_vn_dmp) - DO jk = 1, nlev -!DIR$ IVDEP -!$NEC ivdep - DO je = i_startidx, i_endidx - ! - z_d_vn_dmp = (scal_divdamp(jk)+bdy_divdamp(jk)*p_int%nudgecoeff_e(je,jb))*z_graddiv2_vn(je,jk) - ! - p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnew)%vn(je,jk,jb) + z_d_vn_dmp - ! -#ifdef __ENABLE_DDT_VN_XYZ__ - IF (p_nh%diag%ddt_vn_dmp_is_associated) THEN - p_nh%diag%ddt_vn_dmp(je,jk,jb) = p_nh%diag%ddt_vn_dmp(je,jk,jb) + z_d_vn_dmp * r_dtimensubsteps - END IF - ! - IF (p_nh%diag%ddt_vn_dyn_is_associated) THEN - p_nh%diag%ddt_vn_dyn(je,jk,jb) = p_nh%diag%ddt_vn_dyn(je,jk,jb) + z_d_vn_dmp * r_dtimensubsteps - END IF -#endif - ! - ENDDO - ENDDO -!$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_27_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing scal_divdamp=scal_divdamp(:)' - - !$ser data scal_divdamp=scal_divdamp(:) - - PRINT *, 'Serializing bdy_divdamp=bdy_divdamp(:)' - - !$ser data bdy_divdamp=bdy_divdamp(:) - - PRINT *, 'Serializing nudgecoeff_e=p_int%nudgecoeff_e(:,1)' - - !$ser data nudgecoeff_e=p_int%nudgecoeff_e(:,1) - - PRINT *, 'Serializing z_graddiv2_vn=z_graddiv2_vn(:,:)' - - !$ser data z_graddiv2_vn=z_graddiv2_vn(:,:) - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - ELSE ! fourth-order divergence damping - - - !$ser savepoint mo_solve_nonhydro_4th_order_divdamp_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing scal_divdamp=scal_divdamp(:)' - - !$ser data scal_divdamp=scal_divdamp(:) - - PRINT *, 'Serializing z_graddiv2_vn=z_graddiv2_vn(:,:)' - - !$ser data z_graddiv2_vn=z_graddiv2_vn(:,:) - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - -!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) PRIVATE(z_d_vn_dmp) - DO jk = 1, nlev -!DIR$ IVDEP - DO je = i_startidx, i_endidx - ! - z_d_vn_dmp = scal_divdamp(jk)*z_graddiv2_vn(je,jk) - ! - p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnew)%vn(je,jk,jb) + z_d_vn_dmp - ! -#ifdef __ENABLE_DDT_VN_XYZ__ - IF (p_nh%diag%ddt_vn_dmp_is_associated) THEN - p_nh%diag%ddt_vn_dmp(je,jk,jb) = p_nh%diag%ddt_vn_dmp(je,jk,jb) + z_d_vn_dmp * r_dtimensubsteps - END IF - ! - IF (p_nh%diag%ddt_vn_dyn_is_associated) THEN - p_nh%diag%ddt_vn_dyn(je,jk,jb) = p_nh%diag%ddt_vn_dyn(je,jk,jb) + z_d_vn_dmp * r_dtimensubsteps - END IF -#endif - ! - ENDDO - ENDDO -!$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_4th_order_divdamp_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing scal_divdamp=scal_divdamp(:)' - - !$ser data scal_divdamp=scal_divdamp(:) - - PRINT *, 'Serializing z_graddiv2_vn=z_graddiv2_vn(:,:)' - - !$ser data z_graddiv2_vn=z_graddiv2_vn(:,:) - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - ENDIF - ENDIF - ENDIF - - IF (is_iau_active) THEN ! add analysis increment from data assimilation - - - !$ser savepoint mo_solve_nonhydro_stencil_28_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing iau_wgt_dyn=iau_wgt_dyn' - - !$ser data iau_wgt_dyn=iau_wgt_dyn - - PRINT *, 'Serializing vn_incr=p_nh%diag%vn_incr(:,:,1)' - - !$ser data vn_incr=p_nh%diag%vn_incr(:,:,1) - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) PRIVATE(z_d_vn_iau) - DO jk = 1, nlev -!DIR$ IVDEP - DO je = i_startidx, i_endidx - ! - z_d_vn_iau = iau_wgt_dyn*p_nh%diag%vn_incr(je,jk,jb) - ! - p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnew)%vn(je,jk,jb) + z_d_vn_iau - ! -#ifdef __ENABLE_DDT_VN_XYZ__ - IF (istep == 2) THEN - IF (p_nh%diag%ddt_vn_iau_is_associated) THEN - p_nh%diag%ddt_vn_iau(je,jk,jb) = p_nh%diag%ddt_vn_iau(je,jk,jb) + z_d_vn_iau * r_dtimensubsteps - END IF - ! - IF (p_nh%diag%ddt_vn_dyn_is_associated) THEN - p_nh%diag%ddt_vn_dyn(je,jk,jb) = p_nh%diag%ddt_vn_dyn(je,jk,jb) + z_d_vn_iau * r_dtimensubsteps - END IF - END IF -#endif - ! - ENDDO - ENDDO -!$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_28_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing iau_wgt_dyn=iau_wgt_dyn' - - !$ser data iau_wgt_dyn=iau_wgt_dyn - - PRINT *, 'Serializing vn_incr=p_nh%diag%vn_incr(:,:,1)' - - !$ser data vn_incr=p_nh%diag%vn_incr(:,:,1) - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - ENDIF - - ! Classic Rayleigh damping mechanism for vn (requires reference state !!) - ! - IF ( rayleigh_type == RAYLEIGH_CLASSIC ) THEN - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) PRIVATE(z_ddt_vn_ray) - DO jk = 1, nrdmax(jg) -!DIR$ IVDEP - DO je = i_startidx, i_endidx - ! - z_ddt_vn_ray = -p_nh%metrics%rayleigh_vn(jk) * (p_nh%prog(nnew)%vn(je,jk,jb) - p_nh%ref%vn_ref(je,jk,jb)) - ! - p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnew)%vn(je,jk,jb) + z_ddt_vn_ray * dtime - ! -#ifdef __ENABLE_DDT_VN_XYZ__ - IF (istep == 2) THEN - IF (p_nh%diag%ddt_vn_ray_is_associated) THEN - p_nh%diag%ddt_vn_ray(je,jk,jb) = p_nh%diag%ddt_vn_ray(je,jk,jb) + z_ddt_vn_ray * r_nsubsteps - END IF - ! - IF (p_nh%diag%ddt_vn_dyn_is_associated) THEN - p_nh%diag%ddt_vn_dyn(je,jk,jb) = p_nh%diag%ddt_vn_dyn(je,jk,jb) + z_ddt_vn_ray * r_nsubsteps - END IF - END IF -#endif - ! - ENDDO - ENDDO - !$ACC END PARALLEL - ENDIF - ENDDO -!$OMP END DO - - ! Boundary update of horizontal velocity - IF (istep == 1 .AND. (l_limited_area .OR. jg > 1)) THEN - rl_start = 1 - rl_end = grf_bdywidth_e - - i_startblk = p_patch%edges%start_block(rl_start) - i_endblk = p_patch%edges%end_block(rl_end) - -!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je) ICON_OMP_DEFAULT_SCHEDULE - DO jb = i_startblk, i_endblk - - CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - - !$ser savepoint mo_solve_nonhydro_stencil_29_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing grf_tend_vn=p_nh%diag%grf_tend_vn(:,:,1)' - - !$ser data grf_tend_vn=p_nh%diag%grf_tend_vn(:,:,1) - - PRINT *, 'Serializing vn_now=p_nh%prog(nnow)%vn(:,:,1)' - - !$ser data vn_now=p_nh%prog(nnow)%vn(:,:,1) - - PRINT *, 'Serializing vn_new=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn_new=p_nh%prog(nnew)%vn(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 1, nlev -!DIR$ IVDEP - DO je = i_startidx, i_endidx - ! - p_nh%prog(nnew)%vn(je,jk,jb) = p_nh%prog(nnow)%vn(je,jk,jb) + p_nh%diag%grf_tend_vn(je,jk,jb) * dtime - ! -#ifdef __ENABLE_DDT_VN_XYZ__ - IF (p_nh%diag%ddt_vn_grf_is_associated) THEN - p_nh%diag%ddt_vn_grf(je,jk,jb) = p_nh%diag%ddt_vn_grf(je,jk,jb) + p_nh%diag%grf_tend_vn(je,jk,jb) * r_nsubsteps - END IF - ! - IF (p_nh%diag%ddt_vn_dyn_is_associated) THEN - p_nh%diag%ddt_vn_dyn(je,jk,jb) = p_nh%diag%ddt_vn_dyn(je,jk,jb) + p_nh%diag%grf_tend_vn(je,jk,jb) * r_nsubsteps - END IF -#endif - ! - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_29_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing grf_tend_vn=p_nh%diag%grf_tend_vn(:,:,1)' - - !$ser data grf_tend_vn=p_nh%diag%grf_tend_vn(:,:,1) - - PRINT *, 'Serializing vn_now=p_nh%prog(nnow)%vn(:,:,1)' - - !$ser data vn_now=p_nh%prog(nnow)%vn(:,:,1) - - PRINT *, 'Serializing vn_new=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn_new=p_nh%prog(nnew)%vn(:,:,1) - - ENDDO -!$OMP END DO - - ENDIF - - ! Preparations for nest boundary interpolation of mass fluxes from parent domain - IF (jg > 1 .AND. grf_intmethod_e >= 5 .AND. idiv_method == 1 .AND. jstep == 0 .AND. istep == 1) THEN - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG - -!$OMP DO PRIVATE(ic,je,jb,jk) ICON_OMP_DEFAULT_SCHEDULE - DO ic = 1, p_nh%metrics%bdy_mflx_e_dim - je = p_nh%metrics%bdy_mflx_e_idx(ic) - jb = p_nh%metrics%bdy_mflx_e_blk(ic) -!DIR$ IVDEP - !$ACC LOOP VECTOR - DO jk = 1, nlev - p_nh%diag%grf_bdy_mflx(jk,ic,2) = p_nh%diag%grf_tend_mflx(je,jk,jb) - p_nh%diag%grf_bdy_mflx(jk,ic,1) = prep_adv%mass_flx_me(je,jk,jb) - dt_shift*p_nh%diag%grf_bdy_mflx(jk,ic,2) - ENDDO - - ENDDO -!$OMP END DO - - !$ACC END PARALLEL - - ENDIF - -!$OMP END PARALLEL - - - !------------------------- - ! communication phase - IF (timers_level > 5) THEN - CALL timer_stop(timer_solve_nh_vnupd) - CALL timer_start(timer_solve_nh_exch) - ENDIF - - IF (itype_comm == 1) THEN - IF (istep == 1) THEN - CALL sync_patch_array_mult(SYNC_E,p_patch,2,p_nh%prog(nnew)%vn,z_rho_e,opt_varname="vn_nnew and z_rho_e") - ELSE - CALL sync_patch_array(SYNC_E,p_patch,p_nh%prog(nnew)%vn,opt_varname="vn_nnew") - ENDIF - ENDIF - - IF (idiv_method == 2 .AND. istep == 1) THEN - CALL sync_patch_array(SYNC_E,p_patch,z_theta_v_e,opt_varname="z_theta_v_e") - END IF - - IF (timers_level > 5) THEN - CALL timer_stop(timer_solve_nh_exch) - CALL timer_start(timer_solve_nh_edgecomp) - ENDIF - ! end communication phase - !------------------------- - -!$OMP PARALLEL PRIVATE (rl_start,rl_end,i_startblk,i_endblk) - rl_start = 5 - rl_end = min_rledge_int - 2 - - i_startblk = p_patch%edges%start_block(rl_start) - i_endblk = p_patch%edges%end_block(rl_end) - - rl_start_2 = 1 - rl_end_2 = min_rledge - - i_startblk_2 = p_patch%edges%start_block(rl_start_2) - i_endblk_2 = p_patch%edges%end_block(rl_end_2) - - CALL get_indices_e(p_patch, 1, i_startblk_2, i_endblk_2, & - i_startidx_2, i_endidx_2, rl_start_2, rl_end_2) - -!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je,z_vn_avg) ICON_OMP_DEFAULT_SCHEDULE - DO jb = i_startblk, i_endblk - - CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - IF (istep == 1) THEN - - - - !$ser savepoint mo_solve_nonhydro_stencil_30_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing e_flx_avg=p_int%e_flx_avg(:,:,1)' - - !$ser data e_flx_avg=p_int%e_flx_avg(:,:,1) - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - PRINT *, 'Serializing geofac_grdiv=p_int%geofac_grdiv(:,:,1)' - - !$ser data geofac_grdiv=p_int%geofac_grdiv(:,:,1) - - PRINT *, 'Serializing rbf_vec_coeff_e=p_int%rbf_vec_coeff_e_dsl(:,:,1)' - - !$ser data rbf_vec_coeff_e=p_int%rbf_vec_coeff_e_dsl(:,:,1) - - PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' - - !$ser data z_vn_avg=z_vn_avg(:,:) - - PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' - - !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) - - PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' - - !$ser data vt=p_nh%diag%vt(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) -#ifdef __LOOP_EXCHANGE - DO je = i_startidx, i_endidx -!DIR$ IVDEP - DO jk = 1, nlev -#else -!$NEC outerloop_unroll(8) - DO jk = 1, nlev -!$NEC vovertake - DO je = i_startidx, i_endidx -#endif - ! Average normal wind components in order to get nearly second-order accurate divergence - z_vn_avg(je,jk) = p_int%e_flx_avg(je,1,jb)*p_nh%prog(nnew)%vn(je,jk,jb) & - + p_int%e_flx_avg(je,2,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,1),jk,iqblk(je,jb,1)) & - + p_int%e_flx_avg(je,3,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,2),jk,iqblk(je,jb,2)) & - + p_int%e_flx_avg(je,4,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,3),jk,iqblk(je,jb,3)) & - + p_int%e_flx_avg(je,5,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,4),jk,iqblk(je,jb,4)) - - ! Compute gradient of divergence of vn for divergence damping -#ifdef __LOOP_EXCHANGE - z_graddiv_vn(jk,je,jb) = p_int%geofac_grdiv(je,1,jb)*p_nh%prog(nnew)%vn(je,jk,jb) & -#else - z_graddiv_vn(je,jk,jb) = p_int%geofac_grdiv(je,1,jb)*p_nh%prog(nnew)%vn(je,jk,jb) & -#endif - + p_int%geofac_grdiv(je,2,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,1),jk,iqblk(je,jb,1)) & - + p_int%geofac_grdiv(je,3,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,2),jk,iqblk(je,jb,2)) & - + p_int%geofac_grdiv(je,4,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,3),jk,iqblk(je,jb,3)) & - + p_int%geofac_grdiv(je,5,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,4),jk,iqblk(je,jb,4)) - - ! RBF reconstruction of tangential wind component - p_nh%diag%vt(je,jk,jb) = p_int%rbf_vec_coeff_e(1,je,jb) & - * p_nh%prog(nnew)%vn(iqidx(je,jb,1),jk,iqblk(je,jb,1)) & - + p_int%rbf_vec_coeff_e(2,je,jb) & - * p_nh%prog(nnew)%vn(iqidx(je,jb,2),jk,iqblk(je,jb,2)) & - + p_int%rbf_vec_coeff_e(3,je,jb) & - * p_nh%prog(nnew)%vn(iqidx(je,jb,3),jk,iqblk(je,jb,3)) & - + p_int%rbf_vec_coeff_e(4,je,jb) & - * p_nh%prog(nnew)%vn(iqidx(je,jb,4),jk,iqblk(je,jb,4)) - ENDDO - ENDDO -!$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_30_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing e_flx_avg=p_int%e_flx_avg(:,:,1)' - - !$ser data e_flx_avg=p_int%e_flx_avg(:,:,1) - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - PRINT *, 'Serializing geofac_grdiv=p_int%geofac_grdiv(:,:,1)' - - !$ser data geofac_grdiv=p_int%geofac_grdiv(:,:,1) - - PRINT *, 'Serializing rbf_vec_coeff_e=p_int%rbf_vec_coeff_e_dsl(:,:,1)' - - !$ser data rbf_vec_coeff_e=p_int%rbf_vec_coeff_e_dsl(:,:,1) - - PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' - - !$ser data z_vn_avg=z_vn_avg(:,:) - - PRINT *, 'Serializing z_graddiv_vn=z_graddiv_vn(:,:,1)' - - !$ser data z_graddiv_vn=z_graddiv_vn(:,:,1) - - PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' - - !$ser data vt=p_nh%diag%vt(:,:,1) - - ELSE IF (itime_scheme >= 5) THEN - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) -#ifdef __LOOP_EXCHANGE - DO je = i_startidx, i_endidx -!DIR$ IVDEP - DO jk = 1, nlev -#else - DO jk = 1, nlev - DO je = i_startidx, i_endidx -#endif - ! Average normal wind components in order to get nearly second-order accurate divergence - z_vn_avg(je,jk) = p_int%e_flx_avg(je,1,jb)*p_nh%prog(nnew)%vn(je,jk,jb) & - + p_int%e_flx_avg(je,2,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,1),jk,iqblk(je,jb,1)) & - + p_int%e_flx_avg(je,3,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,2),jk,iqblk(je,jb,2)) & - + p_int%e_flx_avg(je,4,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,3),jk,iqblk(je,jb,3)) & - + p_int%e_flx_avg(je,5,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,4),jk,iqblk(je,jb,4)) - - ! RBF reconstruction of tangential wind component - p_nh%diag%vt(je,jk,jb) = p_int%rbf_vec_coeff_e(1,je,jb) & - * p_nh%prog(nnew)%vn(iqidx(je,jb,1),jk,iqblk(je,jb,1)) & - + p_int%rbf_vec_coeff_e(2,je,jb) & - * p_nh%prog(nnew)%vn(iqidx(je,jb,2),jk,iqblk(je,jb,2)) & - + p_int%rbf_vec_coeff_e(3,je,jb) & - * p_nh%prog(nnew)%vn(iqidx(je,jb,3),jk,iqblk(je,jb,3)) & - + p_int%rbf_vec_coeff_e(4,je,jb) & - * p_nh%prog(nnew)%vn(iqidx(je,jb,4),jk,iqblk(je,jb,4)) - - ENDDO - ENDDO -!$ACC END PARALLEL - - ELSE - - - !$ser savepoint mo_solve_nonhydro_stencil_31_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing e_flx_avg=p_int%e_flx_avg(:,:,1)' - - !$ser data e_flx_avg=p_int%e_flx_avg(:,:,1) - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' - - !$ser data z_vn_avg=z_vn_avg(:,:) -!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) -#ifdef __LOOP_EXCHANGE - DO je = i_startidx, i_endidx -!DIR$ IVDEP - DO jk = 1, nlev -#else -!$NEC outerloop_unroll(8) - DO jk = 1, nlev -!$NEC vovertake - DO je = i_startidx, i_endidx -#endif - ! Average normal wind components in order to get nearly second-order accurate divergence - z_vn_avg(je,jk) = p_int%e_flx_avg(je,1,jb)*p_nh%prog(nnew)%vn(je,jk,jb) & - + p_int%e_flx_avg(je,2,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,1),jk,iqblk(je,jb,1)) & - + p_int%e_flx_avg(je,3,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,2),jk,iqblk(je,jb,2)) & - + p_int%e_flx_avg(je,4,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,3),jk,iqblk(je,jb,3)) & - + p_int%e_flx_avg(je,5,jb)*p_nh%prog(nnew)%vn(iqidx(je,jb,4),jk,iqblk(je,jb,4)) - ENDDO - ENDDO -!$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_31_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing e_flx_avg=p_int%e_flx_avg(:,:,1)' - - !$ser data e_flx_avg=p_int%e_flx_avg(:,:,1) - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' - - !$ser data z_vn_avg=z_vn_avg(:,:) - ENDIF - - IF (idiv_method == 1) THEN ! Compute fluxes at edges using averaged velocities - ! corresponding computation for idiv_method=2 follows later - - - !$ser savepoint mo_solve_nonhydro_stencil_32_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' - - !$ser data z_rho_e=z_rho_e(:,:,1) - - PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' - - !$ser data z_vn_avg=z_vn_avg(:,:) - - PRINT *, 'Serializing ddqz_z_full_e=p_nh%metrics%ddqz_z_full_e(:,:,1)' - - !$ser data ddqz_z_full_e=p_nh%metrics%ddqz_z_full_e(:,:,1) - - PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' - - !$ser data z_theta_v_e=z_theta_v_e(:,:,1) - - PRINT *, 'Serializing mass_fl_e=p_nh%diag%mass_fl_e(:,:,1)' - - !$ser data mass_fl_e=p_nh%diag%mass_fl_e(:,:,1) - - PRINT *, 'Serializing z_theta_v_fl_e=z_theta_v_fl_e(:,:,1)' - - !$ser data z_theta_v_fl_e=z_theta_v_fl_e(:,:,1) -!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) - DO jk = 1,nlev -!DIR$ IVDEP - DO je = i_startidx, i_endidx - - p_nh%diag%mass_fl_e(je,jk,jb) = z_rho_e(je,jk,jb) * & - z_vn_avg(je,jk) * p_nh%metrics%ddqz_z_full_e(je,jk,jb) - z_theta_v_fl_e(je,jk,jb) = p_nh%diag%mass_fl_e(je,jk,jb) * & - z_theta_v_e(je,jk,jb) - - ENDDO - ENDDO -!$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_32_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_rho_e=z_rho_e(:,:,1)' - - !$ser data z_rho_e=z_rho_e(:,:,1) - - PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' - - !$ser data z_vn_avg=z_vn_avg(:,:) - - PRINT *, 'Serializing ddqz_z_full_e=p_nh%metrics%ddqz_z_full_e(:,:,1)' - - !$ser data ddqz_z_full_e=p_nh%metrics%ddqz_z_full_e(:,:,1) - - PRINT *, 'Serializing z_theta_v_e=z_theta_v_e(:,:,1)' - - !$ser data z_theta_v_e=z_theta_v_e(:,:,1) - - PRINT *, 'Serializing mass_fl_e=p_nh%diag%mass_fl_e(:,:,1)' - - !$ser data mass_fl_e=p_nh%diag%mass_fl_e(:,:,1) - - PRINT *, 'Serializing z_theta_v_fl_e=z_theta_v_fl_e(:,:,1)' - - !$ser data z_theta_v_fl_e=z_theta_v_fl_e(:,:,1) - - IF (lsave_mflx .AND. istep == 2) THEN ! store mass flux for nest boundary interpolation -#ifndef _OPENACC - DO je = i_startidx, i_endidx - IF (p_patch%edges%refin_ctrl(je,jb) <= -4 .AND. p_patch%edges%refin_ctrl(je,jb) >= -6) THEN -!DIR$ IVDEP - DO jk=1,nlev - p_nh%diag%mass_fl_e_sv(je,jk,jb) = p_nh%diag%mass_fl_e(je,jk,jb) - ENDDO - ENDIF - ENDDO -#else - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) - DO jk=1,nlev - DO je = i_startidx, i_endidx - IF (p_patch%edges%refin_ctrl(je,jb) <= -4 .AND. p_patch%edges%refin_ctrl(je,jb) >= -6) THEN - p_nh%diag%mass_fl_e_sv(je,jk,jb) = p_nh%diag%mass_fl_e(je,jk,jb) - ENDIF - ENDDO - ENDDO - !$ACC END PARALLEL -#endif - ENDIF - - IF (lprep_adv .AND. istep == 2) THEN ! Preprations for tracer advection - IF (lclean_mflx) THEN - - - !$ser savepoint mo_solve_nonhydro_stencil_33_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing vn_traj=prep_adv%vn_traj(:,:,1)' - - !$ser data vn_traj=prep_adv%vn_traj(:,:,1) - - PRINT *, 'Serializing mass_flx_me=prep_adv%mass_flx_me(:,:,1)' - - !$ser data mass_flx_me=prep_adv%mass_flx_me(:,:,1) -!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) - DO jk = 1, nlev -!$NEC ivdep - DO je = i_startidx, i_endidx - prep_adv%vn_traj(je,jk,jb) = 0._wp - prep_adv%mass_flx_me(je,jk,jb) = 0._wp - ENDDO - ENDDO -!$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_33_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing vn_traj=prep_adv%vn_traj(:,:,1)' - - !$ser data vn_traj=prep_adv%vn_traj(:,:,1) - - PRINT *, 'Serializing mass_flx_me=prep_adv%mass_flx_me(:,:,1)' - - !$ser data mass_flx_me=prep_adv%mass_flx_me(:,:,1) - - ENDIF - - - !$ser savepoint mo_solve_nonhydro_stencil_34_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing r_nsubsteps=r_nsubsteps' - - !$ser data r_nsubsteps=r_nsubsteps - - PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' - - !$ser data z_vn_avg=z_vn_avg(:,:) - - PRINT *, 'Serializing mass_fl_e=p_nh%diag%mass_fl_e(:,:,1)' - - !$ser data mass_fl_e=p_nh%diag%mass_fl_e(:,:,1) - - PRINT *, 'Serializing vn_traj=prep_adv%vn_traj(:,:,1)' - - !$ser data vn_traj=prep_adv%vn_traj(:,:,1) - - PRINT *, 'Serializing mass_flx_me=prep_adv%mass_flx_me(:,:,1)' - - !$ser data mass_flx_me=prep_adv%mass_flx_me(:,:,1) -!$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG(STATIC: 1) VECTOR TILE(32, 4) - DO jk = 1, nlev -!$NEC ivdep - DO je = i_startidx, i_endidx - prep_adv%vn_traj(je,jk,jb) = prep_adv%vn_traj(je,jk,jb) + r_nsubsteps*z_vn_avg(je,jk) - prep_adv%mass_flx_me(je,jk,jb) = prep_adv%mass_flx_me(je,jk,jb) + r_nsubsteps*p_nh%diag%mass_fl_e(je,jk,jb) - ENDDO - ENDDO -!$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_34_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing r_nsubsteps=r_nsubsteps' - - !$ser data r_nsubsteps=r_nsubsteps - - PRINT *, 'Serializing z_vn_avg=z_vn_avg(:,:)' - - !$ser data z_vn_avg=z_vn_avg(:,:) - - PRINT *, 'Serializing mass_fl_e=p_nh%diag%mass_fl_e(:,:,1)' - - !$ser data mass_fl_e=p_nh%diag%mass_fl_e(:,:,1) - - PRINT *, 'Serializing vn_traj=prep_adv%vn_traj(:,:,1)' - - !$ser data vn_traj=prep_adv%vn_traj(:,:,1) - - PRINT *, 'Serializing mass_flx_me=prep_adv%mass_flx_me(:,:,1)' - - !$ser data mass_flx_me=prep_adv%mass_flx_me(:,:,1) - - ENDIF - - ENDIF - - IF (istep == 1 .OR. itime_scheme >= 5) THEN - ! Compute contravariant correction for vertical velocity at full levels - - - !$ser savepoint mo_solve_nonhydro_stencil_35_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - PRINT *, 'Serializing ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1)' - - !$ser data ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1) - - PRINT *, 'Serializing ddxt_z_full=p_nh%metrics%ddxt_z_full(:,:,1)' - - !$ser data ddxt_z_full=p_nh%metrics%ddxt_z_full(:,:,1) - - PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' - - !$ser data vt=p_nh%diag%vt(:,:,1) - - PRINT *, 'Serializing z_w_concorr_me=z_w_concorr_me(:,:,1)' - - !$ser data z_w_concorr_me=z_w_concorr_me(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = nflatlev(jg), nlev -!DIR$ IVDEP - DO je = i_startidx, i_endidx - z_w_concorr_me(je,jk,jb) = & - p_nh%prog(nnew)%vn(je,jk,jb)*p_nh%metrics%ddxn_z_full(je,jk,jb) + & - p_nh%diag%vt(je,jk,jb) *p_nh%metrics%ddxt_z_full(je,jk,jb) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_35_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - PRINT *, 'Serializing ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1)' - - !$ser data ddxn_z_full=p_nh%metrics%ddxn_z_full(:,:,1) - - PRINT *, 'Serializing ddxt_z_full=p_nh%metrics%ddxt_z_full(:,:,1)' - - !$ser data ddxt_z_full=p_nh%metrics%ddxt_z_full(:,:,1) - - PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' - - !$ser data vt=p_nh%diag%vt(:,:,1) - - PRINT *, 'Serializing z_w_concorr_me=z_w_concorr_me(:,:,1)' - - !$ser data z_w_concorr_me=z_w_concorr_me(:,:,1) - ENDIF - - IF (istep == 1) THEN - ! Interpolate vn to interface levels and compute horizontal part of kinetic energy on edges - ! (needed in velocity tendencies called at istep=2) - - - !$ser savepoint mo_solve_nonhydro_stencil_36_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing wgtfac_e=p_nh%metrics%wgtfac_e(:,:,1)' - - !$ser data wgtfac_e=p_nh%metrics%wgtfac_e(:,:,1) - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' - - !$ser data vt=p_nh%diag%vt(:,:,1) - - PRINT *, 'Serializing vn_ie=p_nh%diag%vn_ie(:,:,1)' - - !$ser data vn_ie=p_nh%diag%vn_ie(:,:,1) - - PRINT *, 'Serializing z_vt_ie=z_vt_ie(:,:,1)' - - !$ser data z_vt_ie=z_vt_ie(:,:,1) - - PRINT *, 'Serializing z_kin_hor_e=z_kin_hor_e(:,:,1)' - - !$ser data z_kin_hor_e=z_kin_hor_e(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) -!$NEC outerloop_unroll(3) - DO jk = 2, nlev -!DIR$ IVDEP - DO je = i_startidx, i_endidx - p_nh%diag%vn_ie(je,jk,jb) = & - p_nh%metrics%wgtfac_e(je,jk,jb) *p_nh%prog(nnew)%vn(je,jk ,jb) + & - (1._wp - p_nh%metrics%wgtfac_e(je,jk,jb))*p_nh%prog(nnew)%vn(je,jk-1,jb) - z_vt_ie(je,jk,jb) = & - p_nh%metrics%wgtfac_e(je,jk,jb) *p_nh%diag%vt(je,jk ,jb) + & - (1._wp - p_nh%metrics%wgtfac_e(je,jk,jb))*p_nh%diag%vt(je,jk-1,jb) - z_kin_hor_e(je,jk,jb) = 0.5_wp*(p_nh%prog(nnew)%vn(je,jk,jb)**2 + p_nh%diag%vt(je,jk,jb)**2) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_36_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing wgtfac_e=p_nh%metrics%wgtfac_e(:,:,1)' - - !$ser data wgtfac_e=p_nh%metrics%wgtfac_e(:,:,1) - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' - - !$ser data vt=p_nh%diag%vt(:,:,1) - - PRINT *, 'Serializing vn_ie=p_nh%diag%vn_ie(:,:,1)' - - !$ser data vn_ie=p_nh%diag%vn_ie(:,:,1) - - PRINT *, 'Serializing z_vt_ie=z_vt_ie(:,:,1)' - - !$ser data z_vt_ie=z_vt_ie(:,:,1) - - PRINT *, 'Serializing z_kin_hor_e=z_kin_hor_e(:,:,1)' - - !$ser data z_kin_hor_e=z_kin_hor_e(:,:,1) - - IF (.NOT. l_vert_nested) THEN - ! Top and bottom levels -!DIR$ IVDEP - - - !$ser savepoint mo_solve_nonhydro_stencil_37_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' - - !$ser data vt=p_nh%diag%vt(:,:,1) - - PRINT *, 'Serializing vn_ie=p_nh%diag%vn_ie(:,:,1)' - - !$ser data vn_ie=p_nh%diag%vn_ie(:,:,1) - - PRINT *, 'Serializing z_vt_ie=z_vt_ie(:,:,1)' - - !$ser data z_vt_ie=z_vt_ie(:,:,1) - - PRINT *, 'Serializing z_kin_hor_e=z_kin_hor_e(:,:,1)' - - !$ser data z_kin_hor_e=z_kin_hor_e(:,:,1) - - - !$ser savepoint mo_solve_nonhydro_stencil_38_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - PRINT *, 'Serializing wgtfacq_e=p_nh%metrics%wgtfacq_e_dsl(:,:,1)' - - !$ser data wgtfacq_e=p_nh%metrics%wgtfacq_e_dsl(:,:,1) - - PRINT *, 'Serializing vn_ie=p_nh%diag%vn_ie(:,:,1)' - - !$ser data vn_ie=p_nh%diag%vn_ie(:,:,1) - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO je = i_startidx, i_endidx - ! Quadratic extrapolation at the top turned out to cause numerical instability in pathological cases, - ! thus we use a no-gradient condition in the upper half layer - p_nh%diag%vn_ie(je,1,jb) = p_nh%prog(nnew)%vn(je,1,jb) - ! vt_ie(jk=1) is actually unused, but we need it for convenience of implementation - z_vt_ie(je,1,jb) = p_nh%diag%vt(je,1,jb) - ! - z_kin_hor_e(je,1,jb) = 0.5_wp*(p_nh%prog(nnew)%vn(je,1,jb)**2 + p_nh%diag%vt(je,1,jb)**2) - p_nh%diag%vn_ie(je,nlevp1,jb) = & - p_nh%metrics%wgtfacq_e(je,1,jb)*p_nh%prog(nnew)%vn(je,nlev,jb) + & - p_nh%metrics%wgtfacq_e(je,2,jb)*p_nh%prog(nnew)%vn(je,nlev-1,jb) + & - p_nh%metrics%wgtfacq_e(je,3,jb)*p_nh%prog(nnew)%vn(je,nlev-2,jb) - ENDDO - !$ACC END PARALLEL - - - !$ser savepoint mo_solve_nonhydro_stencil_37_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - PRINT *, 'Serializing vt=p_nh%diag%vt(:,:,1)' - - !$ser data vt=p_nh%diag%vt(:,:,1) - - PRINT *, 'Serializing vn_ie=p_nh%diag%vn_ie(:,:,1)' - - !$ser data vn_ie=p_nh%diag%vn_ie(:,:,1) - - PRINT *, 'Serializing z_vt_ie=z_vt_ie(:,:,1)' - - !$ser data z_vt_ie=z_vt_ie(:,:,1) - - PRINT *, 'Serializing z_kin_hor_e=z_kin_hor_e(:,:,1)' - - !$ser data z_kin_hor_e=z_kin_hor_e(:,:,1) - - !$ser savepoint mo_solve_nonhydro_stencil_38_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing vn=p_nh%prog(nnew)%vn(:,:,1)' - - !$ser data vn=p_nh%prog(nnew)%vn(:,:,1) - - PRINT *, 'Serializing wgtfacq_e=p_nh%metrics%wgtfacq_e_dsl(:,:,1)' - - !$ser data wgtfacq_e=p_nh%metrics%wgtfacq_e_dsl(:,:,1) - - PRINT *, 'Serializing vn_ie=p_nh%diag%vn_ie(:,:,1)' - - !$ser data vn_ie=p_nh%diag%vn_ie(:,:,1) - - ELSE - ! vn_ie(jk=1) is interpolated horizontally from the parent domain, and linearly interpolated in time - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR -!DIR$ IVDEP - DO je = i_startidx, i_endidx - p_nh%diag%vn_ie(je,1,jb) = p_nh%diag%vn_ie_ubc(je,1,jb)+dt_linintp_ubc_nnew*p_nh%diag%vn_ie_ubc(je,2,jb) - ! vt_ie(jk=1) is actually unused, but we need it for convenience of implementation - z_vt_ie(je,1,jb) = p_nh%diag%vt(je,1,jb) - ! - z_kin_hor_e(je,1,jb) = 0.5_wp*(p_nh%prog(nnew)%vn(je,1,jb)**2 + p_nh%diag%vt(je,1,jb)**2) - p_nh%diag%vn_ie(je,nlevp1,jb) = & - p_nh%metrics%wgtfacq_e(je,1,jb)*p_nh%prog(nnew)%vn(je,nlev,jb) + & - p_nh%metrics%wgtfacq_e(je,2,jb)*p_nh%prog(nnew)%vn(je,nlev-1,jb) + & - p_nh%metrics%wgtfacq_e(je,3,jb)*p_nh%prog(nnew)%vn(je,nlev-2,jb) - ENDDO - !$ACC END PARALLEL - ENDIF - ENDIF - - ENDDO -!$OMP END DO - - ! Apply mass fluxes across lateral nest boundary interpolated from parent domain - IF (jg > 1 .AND. grf_intmethod_e >= 5 .AND. idiv_method == 1) THEN - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - ! PGI 21.2 requires GANG-VECTOR on this level. (Having the jk as VECTOR crashes.) - ! PRIVATE clause is required as je,jb are used in each vector thread. - !$ACC LOOP GANG VECTOR PRIVATE(je, jb) - -!$OMP DO PRIVATE(ic,je,jb,jk) ICON_OMP_DEFAULT_SCHEDULE - DO ic = 1, p_nh%metrics%bdy_mflx_e_dim - je = p_nh%metrics%bdy_mflx_e_idx(ic) - jb = p_nh%metrics%bdy_mflx_e_blk(ic) - - ! This is needed for tracer mass consistency along the lateral boundaries - IF (lprep_adv .AND. istep == 2) THEN ! subtract mass flux added previously... - !$ACC LOOP SEQ -!$NEC ivdep - DO jk = 1, nlev - prep_adv%mass_flx_me(je,jk,jb) = prep_adv%mass_flx_me(je,jk,jb) - r_nsubsteps*p_nh%diag%mass_fl_e(je,jk,jb) - prep_adv%vn_traj(je,jk,jb) = prep_adv%vn_traj(je,jk,jb) - r_nsubsteps*p_nh%diag%mass_fl_e(je,jk,jb) / & - (z_rho_e(je,jk,jb) * p_nh%metrics%ddqz_z_full_e(je,jk,jb)) - ENDDO - ENDIF - -!DIR$ IVDEP - !$ACC LOOP SEQ -!$NEC ivdep - DO jk = 1, nlev - p_nh%diag%mass_fl_e(je,jk,jb) = p_nh%diag%grf_bdy_mflx(jk,ic,1) + & - REAL(jstep,wp)*dtime*p_nh%diag%grf_bdy_mflx(jk,ic,2) - z_theta_v_fl_e(je,jk,jb) = p_nh%diag%mass_fl_e(je,jk,jb) * z_theta_v_e(je,jk,jb) - ENDDO - - IF (lprep_adv .AND. istep == 2) THEN ! ... and add the corrected one again - !$ACC LOOP SEQ -!$NEC ivdep - DO jk = 1, nlev - prep_adv%mass_flx_me(je,jk,jb) = prep_adv%mass_flx_me(je,jk,jb) + r_nsubsteps*p_nh%diag%mass_fl_e(je,jk,jb) - prep_adv%vn_traj(je,jk,jb) = prep_adv%vn_traj(je,jk,jb) + r_nsubsteps*p_nh%diag%mass_fl_e(je,jk,jb) / & - (z_rho_e(je,jk,jb) * p_nh%metrics%ddqz_z_full_e(je,jk,jb)) - ENDDO - ENDIF - - ENDDO -!$OMP END DO - - !$ACC END PARALLEL - - ENDIF - - - ! It turned out that it is sufficient to compute the contravariant correction in the - ! predictor step at time level n+1; repeating the calculation in the corrector step - ! has negligible impact on the results except in very-high resolution runs with extremely steep mountains - IF (istep == 1 .OR. itime_scheme >= 5) THEN - - rl_start = 3 - rl_end = min_rlcell_int - 1 - - i_startblk = p_patch%cells%start_block(rl_start) - i_endblk = p_patch%cells%end_block(rl_end) - -#ifdef _OPENACC -! -! This is one of the very few code divergences for OPENACC (see comment below) -! - DO jb = i_startblk, i_endblk - - CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - ! ... and to interface levels - - - !$ser savepoint mo_solve_nonhydro_stencil_39_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing e_bln_c_s=p_int%e_bln_c_s(:,:,1)' - - !$ser data e_bln_c_s=p_int%e_bln_c_s(:,:,1) - - PRINT *, 'Serializing z_w_concorr_me=z_w_concorr_me(:,:,1)' - - !$ser data z_w_concorr_me=z_w_concorr_me(:,:,1) - - PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' - - !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR TILE(32, 4) PRIVATE(z_w_concorr_mc_m1, z_w_concorr_mc_m0) - DO jk = nflatlev(jg)+1, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - ! COMMENT: this optimization yields drastically better performance in an OpenACC context - ! Interpolate contravariant correction to cell centers... - z_w_concorr_mc_m1 = & - p_int%e_bln_c_s(jc,1,jb)*z_w_concorr_me(ieidx(jc,jb,1),jk-1,ieblk(jc,jb,1)) + & - p_int%e_bln_c_s(jc,2,jb)*z_w_concorr_me(ieidx(jc,jb,2),jk-1,ieblk(jc,jb,2)) + & - p_int%e_bln_c_s(jc,3,jb)*z_w_concorr_me(ieidx(jc,jb,3),jk-1,ieblk(jc,jb,3)) - z_w_concorr_mc_m0 = & - p_int%e_bln_c_s(jc,1,jb)*z_w_concorr_me(ieidx(jc,jb,1),jk,ieblk(jc,jb,1)) + & - p_int%e_bln_c_s(jc,2,jb)*z_w_concorr_me(ieidx(jc,jb,2),jk,ieblk(jc,jb,2)) + & - p_int%e_bln_c_s(jc,3,jb)*z_w_concorr_me(ieidx(jc,jb,3),jk,ieblk(jc,jb,3)) - p_nh%diag%w_concorr_c(jc,jk,jb) = & - p_nh%metrics%wgtfac_c(jc,jk,jb)*z_w_concorr_mc_m0 + & - (1._vp - p_nh%metrics%wgtfac_c(jc,jk,jb))*z_w_concorr_mc_m1 - ENDDO - ENDDO - !$ACC END PARALLEL - - - !$ser savepoint mo_solve_nonhydro_stencil_39_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing e_bln_c_s=p_int%e_bln_c_s(:,:,1)' - - !$ser data e_bln_c_s=p_int%e_bln_c_s(:,:,1) - - PRINT *, 'Serializing z_w_concorr_me=z_w_concorr_me(:,:,1)' - - !$ser data z_w_concorr_me=z_w_concorr_me(:,:,1) - - PRINT *, 'Serializing wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1)' - - !$ser data wgtfac_c=p_nh%metrics%wgtfac_c(:,:,1) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) - - - !$ser savepoint mo_solve_nonhydro_stencil_40_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing e_bln_c_s=p_int%e_bln_c_s(:,:,1)' - - !$ser data e_bln_c_s=p_int%e_bln_c_s(:,:,1) - - PRINT *, 'Serializing z_w_concorr_me=z_w_concorr_me(:,:,1)' - - !$ser data z_w_concorr_me=z_w_concorr_me(:,:,1) - - PRINT *, 'Serializing wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1)' - - !$ser data wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR PRIVATE(z_w_concorr_mc_m2, z_w_concorr_mc_m1, z_w_concorr_mc_m0) -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - ! Interpolate contravariant correction to cell centers... - z_w_concorr_mc_m2 = & - p_int%e_bln_c_s(jc,1,jb)*z_w_concorr_me(ieidx(jc,jb,1),nlev-2,ieblk(jc,jb,1)) + & - p_int%e_bln_c_s(jc,2,jb)*z_w_concorr_me(ieidx(jc,jb,2),nlev-2,ieblk(jc,jb,2)) + & - p_int%e_bln_c_s(jc,3,jb)*z_w_concorr_me(ieidx(jc,jb,3),nlev-2,ieblk(jc,jb,3)) - - z_w_concorr_mc_m1 = & - p_int%e_bln_c_s(jc,1,jb)*z_w_concorr_me(ieidx(jc,jb,1),nlev-1,ieblk(jc,jb,1)) + & - p_int%e_bln_c_s(jc,2,jb)*z_w_concorr_me(ieidx(jc,jb,2),nlev-1,ieblk(jc,jb,2)) + & - p_int%e_bln_c_s(jc,3,jb)*z_w_concorr_me(ieidx(jc,jb,3),nlev-1,ieblk(jc,jb,3)) - - z_w_concorr_mc_m0 = & - p_int%e_bln_c_s(jc,1,jb)*z_w_concorr_me(ieidx(jc,jb,1),nlev,ieblk(jc,jb,1)) + & - p_int%e_bln_c_s(jc,2,jb)*z_w_concorr_me(ieidx(jc,jb,2),nlev,ieblk(jc,jb,2)) + & - p_int%e_bln_c_s(jc,3,jb)*z_w_concorr_me(ieidx(jc,jb,3),nlev,ieblk(jc,jb,3)) - - p_nh%diag%w_concorr_c(jc,nlevp1,jb) = & - p_nh%metrics%wgtfacq_c(jc,1,jb)*z_w_concorr_mc_m0 + & - p_nh%metrics%wgtfacq_c(jc,2,jb)*z_w_concorr_mc_m1 + & - p_nh%metrics%wgtfacq_c(jc,3,jb)*z_w_concorr_mc_m2 - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_40_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing e_bln_c_s=p_int%e_bln_c_s(:,:,1)' - - !$ser data e_bln_c_s=p_int%e_bln_c_s(:,:,1) - - PRINT *, 'Serializing z_w_concorr_me=z_w_concorr_me(:,:,1)' - - !$ser data z_w_concorr_me=z_w_concorr_me(:,:,1) - - PRINT *, 'Serializing wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1)' - - !$ser data wgtfacq_c=p_nh%metrics%wgtfacq_c_dsl(:,:,1) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) - - ENDDO -#else -! -! OMP-only code -! -!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,jc,z_w_concorr_mc) ICON_OMP_DEFAULT_SCHEDULE - DO jb = i_startblk, i_endblk - - CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - ! Interpolate contravariant correction to cell centers... -#ifdef __LOOP_EXCHANGE - DO jc = i_startidx, i_endidx -!DIR$ IVDEP - DO jk = nflatlev(jg), nlev -#else - DO jk = nflatlev(jg), nlev - DO jc = i_startidx, i_endidx -#endif - - z_w_concorr_mc(jc,jk) = & - p_int%e_bln_c_s(jc,1,jb)*z_w_concorr_me(ieidx(jc,jb,1),jk,ieblk(jc,jb,1)) + & - p_int%e_bln_c_s(jc,2,jb)*z_w_concorr_me(ieidx(jc,jb,2),jk,ieblk(jc,jb,2)) + & - p_int%e_bln_c_s(jc,3,jb)*z_w_concorr_me(ieidx(jc,jb,3),jk,ieblk(jc,jb,3)) - - ENDDO - ENDDO - - ! ... and to interface levels - DO jk = nflatlev(jg)+1, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - p_nh%diag%w_concorr_c(jc,jk,jb) = & - p_nh%metrics%wgtfac_c(jc,jk,jb)*z_w_concorr_mc(jc,jk) + & - (1._vp - p_nh%metrics%wgtfac_c(jc,jk,jb))*z_w_concorr_mc(jc,jk-1) - ENDDO - ENDDO -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - p_nh%diag%w_concorr_c(jc,nlevp1,jb) = & - p_nh%metrics%wgtfacq_c(jc,1,jb)*z_w_concorr_mc(jc,nlev) + & - p_nh%metrics%wgtfacq_c(jc,2,jb)*z_w_concorr_mc(jc,nlev-1) + & - p_nh%metrics%wgtfacq_c(jc,3,jb)*z_w_concorr_mc(jc,nlev-2) - ENDDO - - ENDDO -!$OMP END DO -#endif - ENDIF - - IF (idiv_method == 2) THEN ! Compute fluxes at edges from original velocities - rl_start = 7 - rl_end = min_rledge_int - 3 - - i_startblk = p_patch%edges%start_block(rl_start) - i_endblk = p_patch%edges%end_block(rl_end) - - IF (jg > 1 .OR. l_limited_area) THEN - - CALL init_zero_contiguous_dp(& - z_theta_v_fl_e(1,1,p_patch%edges%start_block(5)), & - nproma * nlev * (i_startblk - p_patch%edges%start_block(5) + 1), & - opt_acc_async=.TRUE., lacc=i_am_accel_node) -!$OMP BARRIER - ENDIF - -!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,je) ICON_OMP_DEFAULT_SCHEDULE - DO jb = i_startblk, i_endblk - - CALL get_indices_e(p_patch, jb, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 1,nlev -!DIR$ IVDEP - DO je = i_startidx, i_endidx - - p_nh%diag%mass_fl_e(je,jk,jb) = z_rho_e(je,jk,jb) & - * p_nh%prog(nnew)%vn(je,jk,jb) * p_nh%metrics%ddqz_z_full_e(je,jk,jb) - z_theta_v_fl_e(je,jk,jb)= p_nh%diag%mass_fl_e(je,jk,jb) & - * z_theta_v_e(je,jk,jb) - - ENDDO - ENDDO - !$ACC END PARALLEL - - ENDDO -!$OMP END DO - - ENDIF ! idiv_method = 2 - -!$OMP END PARALLEL - - IF (timers_level > 5) THEN - CALL timer_stop(timer_solve_nh_edgecomp) - CALL timer_start(timer_solve_nh_vimpl) - ENDIF - - IF (idiv_method == 2) THEN ! use averaged divergence - idiv_method=1 is inlined for better cache efficiency - -!TODO remove the wait after everything is ASYNC(1) - !$ACC WAIT - - ! horizontal divergences of rho and rhotheta are processed in one step for efficiency - CALL div_avg(p_nh%diag%mass_fl_e, p_patch, p_int, p_int%c_bln_avg, z_mass_fl_div, & - opt_in2=z_theta_v_fl_e, opt_out2=z_theta_v_fl_div, opt_rlstart=4, & - opt_rlend=min_rlcell_int) - ENDIF - -!$OMP PARALLEL PRIVATE (rl_start,rl_end,i_startblk,i_endblk,jk_start) - - rl_start = grf_bdywidth_c+1 - rl_end = min_rlcell_int - - i_startblk = p_patch%cells%start_block(rl_start) - i_endblk = p_patch%cells%end_block(rl_end) - - IF (l_vert_nested) THEN - jk_start = 2 - ELSE - jk_start = 1 - ENDIF - -!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,jc,z_w_expl,z_contr_w_fl_l,z_rho_expl,z_exner_expl, & -!$OMP z_a,z_b,z_c,z_g,z_q,z_alpha,z_beta,z_gamma,ic,z_flxdiv_mass,z_flxdiv_theta ) ICON_OMP_DEFAULT_SCHEDULE - DO jb = i_startblk, i_endblk - - CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - IF (idiv_method == 1) THEN - ! horizontal divergences of rho and rhotheta are inlined and processed in one step for efficiency - - - !$ser savepoint mo_solve_nonhydro_stencil_41_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing geofac_div=p_int%geofac_div(:,:,1)' - - !$ser data geofac_div=p_int%geofac_div(:,:,1) - - PRINT *, 'Serializing mass_fl_e=p_nh%diag%mass_fl_e(:,:,1)' - - !$ser data mass_fl_e=p_nh%diag%mass_fl_e(:,:,1) - - PRINT *, 'Serializing z_theta_v_fl_e=z_theta_v_fl_e(:,:,1)' - - !$ser data z_theta_v_fl_e=z_theta_v_fl_e(:,:,1) - - PRINT *, 'Serializing z_flxdiv_mass=z_flxdiv_mass(:,:)' - - !$ser data z_flxdiv_mass=z_flxdiv_mass(:,:) - - PRINT *, 'Serializing z_flxdiv_theta=z_flxdiv_theta(:,:)' - - !$ser data z_flxdiv_theta=z_flxdiv_theta(:,:) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) -#ifdef __LOOP_EXCHANGE - DO jc = i_startidx, i_endidx -!DIR$ IVDEP, PREFERVECTOR - DO jk = 1, nlev -#else -!$NEC outerloop_unroll(8) - DO jk = 1, nlev - DO jc = i_startidx, i_endidx -#endif - z_flxdiv_mass(jc,jk) = & - p_nh%diag%mass_fl_e(ieidx(jc,jb,1),jk,ieblk(jc,jb,1)) * p_int%geofac_div(jc,1,jb) + & - p_nh%diag%mass_fl_e(ieidx(jc,jb,2),jk,ieblk(jc,jb,2)) * p_int%geofac_div(jc,2,jb) + & - p_nh%diag%mass_fl_e(ieidx(jc,jb,3),jk,ieblk(jc,jb,3)) * p_int%geofac_div(jc,3,jb) - - z_flxdiv_theta(jc,jk) = & - z_theta_v_fl_e(ieidx(jc,jb,1),jk,ieblk(jc,jb,1)) * p_int%geofac_div(jc,1,jb) + & - z_theta_v_fl_e(ieidx(jc,jb,2),jk,ieblk(jc,jb,2)) * p_int%geofac_div(jc,2,jb) + & - z_theta_v_fl_e(ieidx(jc,jb,3),jk,ieblk(jc,jb,3)) * p_int%geofac_div(jc,3,jb) - END DO - END DO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_41_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing geofac_div=p_int%geofac_div(:,:,1)' - - !$ser data geofac_div=p_int%geofac_div(:,:,1) - - PRINT *, 'Serializing mass_fl_e=p_nh%diag%mass_fl_e(:,:,1)' - - !$ser data mass_fl_e=p_nh%diag%mass_fl_e(:,:,1) - - PRINT *, 'Serializing z_theta_v_fl_e=z_theta_v_fl_e(:,:,1)' - - !$ser data z_theta_v_fl_e=z_theta_v_fl_e(:,:,1) - - PRINT *, 'Serializing z_flxdiv_mass=z_flxdiv_mass(:,:)' - - !$ser data z_flxdiv_mass=z_flxdiv_mass(:,:) - - PRINT *, 'Serializing z_flxdiv_theta=z_flxdiv_theta(:,:)' - - !$ser data z_flxdiv_theta=z_flxdiv_theta(:,:) - - ELSE ! idiv_method = 2 - just copy values to local 2D array - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 1, nlev - DO jc = i_startidx, i_endidx - z_flxdiv_mass(jc,jk) = z_mass_fl_div(jc,jk,jb) - z_flxdiv_theta(jc,jk) = z_theta_v_fl_div(jc,jk,jb) - END DO - END DO - !$ACC END PARALLEL - - ENDIF - - ! upper boundary conditions for rho_ic and theta_v_ic in the case of vertical nesting - ! - ! kept constant during predictor/corrector step, and linearly interpolated for - ! each dynamics substep. - ! Hence, copying them every dynamics substep during the predictor step (istep=1) is sufficient. - IF (l_vert_nested .AND. istep == 1) THEN - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - - p_nh%diag%theta_v_ic(jc,1,jb) = p_nh%diag%theta_v_ic_ubc(jc,jb,1) & - & + dt_linintp_ubc * p_nh%diag%theta_v_ic_ubc(jc,jb,2) - - p_nh%diag%rho_ic(jc,1,jb) = p_nh%diag%rho_ic_ubc(jc,jb,1) & - & + dt_linintp_ubc * p_nh%diag%rho_ic_ubc(jc,jb,2) - - z_mflx_top(jc,jb) = p_nh%diag%mflx_ic_ubc(jc,jb,1) & - & + dt_linintp_ubc * p_nh%diag%mflx_ic_ubc(jc,jb,2) - - ENDDO - !$ACC END PARALLEL - ENDIF - - ! Start of vertically implicit solver part for sound-wave terms; - ! advective terms and gravity-wave terms are treated explicitly - ! - IF (istep == 2 .AND. (itime_scheme >= 4)) THEN - - - !$ser savepoint mo_solve_nonhydro_stencil_42_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing cpd=cpd' - - !$ser data cpd=cpd - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing wgt_nnew_vel=wgt_nnew_vel' - - !$ser data wgt_nnew_vel=wgt_nnew_vel - - PRINT *, 'Serializing wgt_nnow_vel=wgt_nnow_vel' - - !$ser data wgt_nnow_vel=wgt_nnow_vel - - PRINT *, 'Serializing z_w_expl=z_w_expl(:,:)' - - !$ser data z_w_expl=z_w_expl(:,:) - - PRINT *, 'Serializing w_nnow=p_nh%prog(nnow)%w(:,:,jb)' - - !$ser data w_nnow=p_nh%prog(nnow)%w(:,:,jb) - - PRINT *, 'Serializing ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1)' - - !$ser data ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1) - - PRINT *, 'Serializing ddt_w_adv_ntl2=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl2)' - - !$ser data ddt_w_adv_ntl2=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl2) - - PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb)' - - !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb) - - PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' - - !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,jb)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,jb) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb) - - PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb)' - - !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 2, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - - ! explicit part for w - use temporally averaged advection terms for better numerical stability - ! the explicit weight for the pressure-gradient term is already included in z_th_ddz_exner_c - z_w_expl(jc,jk) = p_nh%prog(nnow)%w(jc,jk,jb) + dtime * & - (wgt_nnow_vel*p_nh%diag%ddt_w_adv_pc(jc,jk,jb,ntl1) + & - wgt_nnew_vel*p_nh%diag%ddt_w_adv_pc(jc,jk,jb,ntl2) & - -cpd*z_th_ddz_exner_c(jc,jk,jb) ) - - ! contravariant vertical velocity times density for explicit part - z_contr_w_fl_l(jc,jk) = p_nh%diag%rho_ic(jc,jk,jb)*(-p_nh%diag%w_concorr_c(jc,jk,jb) & - + p_nh%metrics%vwind_expl_wgt(jc,jb)*p_nh%prog(nnow)%w(jc,jk,jb) ) - - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_42_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing cpd=cpd' - - !$ser data cpd=cpd - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing wgt_nnew_vel=wgt_nnew_vel' - - !$ser data wgt_nnew_vel=wgt_nnew_vel - - PRINT *, 'Serializing wgt_nnow_vel=wgt_nnow_vel' - - !$ser data wgt_nnow_vel=wgt_nnow_vel - - PRINT *, 'Serializing z_w_expl=z_w_expl(:,:)' - - !$ser data z_w_expl=z_w_expl(:,:) - - PRINT *, 'Serializing w_nnow=p_nh%prog(nnow)%w(:,:,jb)' - - !$ser data w_nnow=p_nh%prog(nnow)%w(:,:,jb) - - PRINT *, 'Serializing ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1)' - - !$ser data ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1) - - PRINT *, 'Serializing ddt_w_adv_ntl2=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl2)' - - !$ser data ddt_w_adv_ntl2=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl2) - - PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb)' - - !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb) - - PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' - - !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,jb)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,jb) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb) - - PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb)' - - !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb) - - ELSE - - - !$ser savepoint mo_solve_nonhydro_stencil_43_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing cpd=cpd' - - !$ser data cpd=cpd - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing z_w_expl=z_w_expl(:,:)' - - !$ser data z_w_expl=z_w_expl(:,:) - - PRINT *, 'Serializing w_nnow=p_nh%prog(nnow)%w(:,:,jb)' - - !$ser data w_nnow=p_nh%prog(nnow)%w(:,:,jb) - - PRINT *, 'Serializing ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1)' - - !$ser data ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1) - - PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb)' - - !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb) - - PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' - - !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,jb)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,jb) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb) - - PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb)' - - !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 2, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - - ! explicit part for w - z_w_expl(jc,jk) = p_nh%prog(nnow)%w(jc,jk,jb) + dtime * & - (p_nh%diag%ddt_w_adv_pc(jc,jk,jb,ntl1)-cpd*z_th_ddz_exner_c(jc,jk,jb)) - - ! contravariant vertical velocity times density for explicit part - z_contr_w_fl_l(jc,jk) = p_nh%diag%rho_ic(jc,jk,jb)*(-p_nh%diag%w_concorr_c(jc,jk,jb) & - + p_nh%metrics%vwind_expl_wgt(jc,jb)*p_nh%prog(nnow)%w(jc,jk,jb) ) - - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_43_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing cpd=cpd' - - !$ser data cpd=cpd - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing z_w_expl=z_w_expl(:,:)' - - !$ser data z_w_expl=z_w_expl(:,:) - - PRINT *, 'Serializing w_nnow=p_nh%prog(nnow)%w(:,:,jb)' - - !$ser data w_nnow=p_nh%prog(nnow)%w(:,:,jb) - - PRINT *, 'Serializing ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1)' - - !$ser data ddt_w_adv_ntl1=p_nh%diag%ddt_w_adv_pc(:,:,jb,ntl1) - - PRINT *, 'Serializing z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb)' - - !$ser data z_th_ddz_exner_c=z_th_ddz_exner_c(:,:,jb) - - PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' - - !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,jb)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,jb) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb) - - PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb)' - - !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,jb) - - ENDIF - - ! Solver coefficients - - !$ser savepoint mo_solve_nonhydro_stencil_44_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing cvd=cvd' - - !$ser data cvd=cvd - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing rd=rd' - - !$ser data rd=rd - - PRINT *, 'Serializing z_beta=z_beta(:,:)' - - !$ser data z_beta=z_beta(:,:) - - PRINT *, 'Serializing exner_nnow=p_nh%prog(nnow)%exner(:,:,jb)' - - !$ser data exner_nnow=p_nh%prog(nnow)%exner(:,:,jb) - - PRINT *, 'Serializing rho_nnow=p_nh%prog(nnow)%rho(:,:,jb)' - - !$ser data rho_nnow=p_nh%prog(nnow)%rho(:,:,jb) - - PRINT *, 'Serializing theta_v_nnow=p_nh%prog(nnow)%theta_v(:,:,jb)' - - !$ser data theta_v_nnow=p_nh%prog(nnow)%theta_v(:,:,jb) - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb) - - PRINT *, 'Serializing z_alpha=z_alpha(:,:)' - - !$ser data z_alpha=z_alpha(:,:) - - PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,jb)' - - !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,jb) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb) - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,jb)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,jb) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 1, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - z_beta(jc,jk)=dtime*rd*p_nh%prog(nnow)%exner(jc,jk,jb) / & - (cvd*p_nh%prog(nnow)%rho(jc,jk,jb)*p_nh%prog(nnow)%theta_v(jc,jk,jb)) * & - p_nh%metrics%inv_ddqz_z_full(jc,jk,jb) - - z_alpha(jc,jk)= p_nh%metrics%vwind_impl_wgt(jc,jb)* & - & p_nh%diag%theta_v_ic(jc,jk,jb)*p_nh%diag%rho_ic(jc,jk,jb) - ENDDO - ENDDO - !$ACC END PARALLEL - - - !$ser savepoint mo_solve_nonhydro_stencil_44_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing cvd=cvd' - - !$ser data cvd=cvd - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing rd=rd' - - !$ser data rd=rd - - PRINT *, 'Serializing z_beta=z_beta(:,:)' - - !$ser data z_beta=z_beta(:,:) - - PRINT *, 'Serializing exner_nnow=p_nh%prog(nnow)%exner(:,:,jb)' - - !$ser data exner_nnow=p_nh%prog(nnow)%exner(:,:,jb) - - PRINT *, 'Serializing rho_nnow=p_nh%prog(nnow)%rho(:,:,jb)' - - !$ser data rho_nnow=p_nh%prog(nnow)%rho(:,:,jb) - - PRINT *, 'Serializing theta_v_nnow=p_nh%prog(nnow)%theta_v(:,:,jb)' - - !$ser data theta_v_nnow=p_nh%prog(nnow)%theta_v(:,:,jb) - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb) - - PRINT *, 'Serializing z_alpha=z_alpha(:,:)' - - !$ser data z_alpha=z_alpha(:,:) - - PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,jb)' - - !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,jb) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb) - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,jb)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,jb) - - - - !$ser savepoint mo_solve_nonhydro_stencil_45_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_alpha=z_alpha(:,:)' - - !$ser data z_alpha=z_alpha(:,:) - - - !$ser savepoint mo_solve_nonhydro_stencil_45_b_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_q=z_q(:,:)' - - !$ser data z_q=z_q(:,:) - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - z_alpha(jc,nlevp1) = 0.0_wp - ! - ! Note: z_q is used in the tridiagonal matrix solver for w below. - ! z_q(1) is always zero, irrespective of w(1)=0 or w(1)/=0 - ! z_q(1)=0 is equivalent to cp(slev)=c(slev)/b(slev) in mo_math_utilities:tdma_solver_vec - z_q(jc,1) = 0._vp - ENDDO - !$ACC END PARALLEL - - - !$ser savepoint mo_solve_nonhydro_stencil_45_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_alpha=z_alpha(:,:)' - - !$ser data z_alpha=z_alpha(:,:) - - !$ser savepoint mo_solve_nonhydro_stencil_45_b_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_q=z_q(:,:)' - - !$ser data z_q=z_q(:,:) - - - ! upper boundary condition for w (interpolated from parent domain in case of vertical nesting) - ! Note: the upper b.c. reduces to w(1) = 0 in the absence of diabatic heating - IF (l_open_ubc .AND. .NOT. l_vert_nested) THEN - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - p_nh%prog(nnew)%w(jc,1,jb) = z_thermal_exp(jc,jb) - z_contr_w_fl_l(jc,1) = p_nh%diag%rho_ic(jc,1,jb)*p_nh%prog(nnow)%w(jc,1,jb) & - * p_nh%metrics%vwind_expl_wgt(jc,jb) - ENDDO - !$ACC END PARALLEL - ELSE IF (.NOT. l_open_ubc .AND. .NOT. l_vert_nested) THEN - - - !$ser savepoint mo_solve_nonhydro_stencil_46_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing w_nnew=p_nh%prog(nnew)%w(:,:,jb)' - - !$ser data w_nnew=p_nh%prog(nnew)%w(:,:,jb) - - PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' - - !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - p_nh%prog(nnew)%w(jc,1,jb) = 0._wp - z_contr_w_fl_l(jc,1) = 0._wp - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_46_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing w_nnew=p_nh%prog(nnew)%w(:,:,jb)' - - !$ser data w_nnew=p_nh%prog(nnew)%w(:,:,jb) - - PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' - - !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) - - ELSE ! l_vert_nested - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - ! UBC for w: horizontally interpolated from the parent interface level, - ! and linearly interpolated in time. - p_nh%prog(nnew)%w(jc,1,jb) = p_nh%diag%w_ubc(jc,jb,1) & - & + dt_linintp_ubc_nnew * p_nh%diag%w_ubc(jc,jb,2) - ! - z_contr_w_fl_l(jc,1) = z_mflx_top(jc,jb) * p_nh%metrics%vwind_expl_wgt(jc,jb) - ENDDO - !$ACC END PARALLEL - ENDIF - - ! lower boundary condition for w, consistent with contravariant correction - - !$ser savepoint mo_solve_nonhydro_stencil_47_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing w_nnew=p_nh%prog(nnew)%w(:,:,jb)' - - !$ser data w_nnew=p_nh%prog(nnew)%w(:,:,jb) - - PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' - - !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - p_nh%prog(nnew)%w(jc,nlevp1,jb) = p_nh%diag%w_concorr_c(jc,nlevp1,jb) - z_contr_w_fl_l(jc,nlevp1) = 0.0_wp - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_47_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing w_nnew=p_nh%prog(nnew)%w(:,:,jb)' - - !$ser data w_nnew=p_nh%prog(nnew)%w(:,:,jb) - - PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' - - !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,jb) - - - ! Explicit parts of density and Exner pressure - ! - ! Top level first - - - !$ser savepoint mo_solve_nonhydro_stencil_48_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' - - !$ser data z_rho_expl=z_rho_expl(:,:) - - PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' - - !$ser data z_exner_expl=z_exner_expl(:,:) - - PRINT *, 'Serializing rho_nnow=p_nh%prog(nnow)%rho(:,:,jb)' - - !$ser data rho_nnow=p_nh%prog(nnow)%rho(:,:,jb) - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb) - - PRINT *, 'Serializing z_flxdiv_mass=z_flxdiv_mass(:,:)' - - !$ser data z_flxdiv_mass=z_flxdiv_mass(:,:) - - PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' - - !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) - - PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,jb)' - - !$ser data exner_pr=p_nh%diag%exner_pr(:,:,jb) - - PRINT *, 'Serializing z_beta=z_beta(:,:)' - - !$ser data z_beta=z_beta(:,:) - - PRINT *, 'Serializing z_flxdiv_theta=z_flxdiv_theta(:,:)' - - !$ser data z_flxdiv_theta=z_flxdiv_theta(:,:) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb) - - PRINT *, 'Serializing ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb)' - - !$ser data ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - z_rho_expl(jc,1)= p_nh%prog(nnow)%rho(jc,1,jb) & - & -dtime*p_nh%metrics%inv_ddqz_z_full(jc,1,jb) & - & *(z_flxdiv_mass(jc,1) & - & +z_contr_w_fl_l(jc,1 ) & - & -z_contr_w_fl_l(jc,2 )) - - z_exner_expl(jc,1)= p_nh%diag%exner_pr(jc,1,jb) & - & -z_beta (jc,1)*(z_flxdiv_theta(jc,1) & - & +p_nh%diag%theta_v_ic(jc,1,jb)*z_contr_w_fl_l(jc,1) & - & -p_nh%diag%theta_v_ic(jc,2,jb)*z_contr_w_fl_l(jc,2)) & - & +dtime*p_nh%diag%ddt_exner_phy(jc,1,jb) - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_48_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' - - !$ser data z_rho_expl=z_rho_expl(:,:) - - PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' - - !$ser data z_exner_expl=z_exner_expl(:,:) - - PRINT *, 'Serializing rho_nnow=p_nh%prog(nnow)%rho(:,:,jb)' - - !$ser data rho_nnow=p_nh%prog(nnow)%rho(:,:,jb) - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb) - - PRINT *, 'Serializing z_flxdiv_mass=z_flxdiv_mass(:,:)' - - !$ser data z_flxdiv_mass=z_flxdiv_mass(:,:) - - PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' - - !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) - - PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,jb)' - - !$ser data exner_pr=p_nh%diag%exner_pr(:,:,jb) - - PRINT *, 'Serializing z_beta=z_beta(:,:)' - - !$ser data z_beta=z_beta(:,:) - - PRINT *, 'Serializing z_flxdiv_theta=z_flxdiv_theta(:,:)' - - !$ser data z_flxdiv_theta=z_flxdiv_theta(:,:) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb) - - PRINT *, 'Serializing ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb)' - - !$ser data ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb) - - ! Other levels - - !$ser savepoint mo_solve_nonhydro_stencil_49_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' - - !$ser data z_rho_expl=z_rho_expl(:,:) - - PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' - - !$ser data z_exner_expl=z_exner_expl(:,:) - - PRINT *, 'Serializing rho_nnow=p_nh%prog(nnow)%rho(:,:,jb)' - - !$ser data rho_nnow=p_nh%prog(nnow)%rho(:,:,jb) - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb) - - PRINT *, 'Serializing z_flxdiv_mass=z_flxdiv_mass(:,:)' - - !$ser data z_flxdiv_mass=z_flxdiv_mass(:,:) - - PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' - - !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) - - PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,jb)' - - !$ser data exner_pr=p_nh%diag%exner_pr(:,:,jb) - - PRINT *, 'Serializing z_beta=z_beta(:,:)' - - !$ser data z_beta=z_beta(:,:) - - PRINT *, 'Serializing z_flxdiv_theta=z_flxdiv_theta(:,:)' - - !$ser data z_flxdiv_theta=z_flxdiv_theta(:,:) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb) - - PRINT *, 'Serializing ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb)' - - !$ser data ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 2, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - z_rho_expl(jc,jk)= p_nh%prog(nnow)%rho(jc,jk ,jb) & - & -dtime*p_nh%metrics%inv_ddqz_z_full(jc,jk ,jb) & - & *(z_flxdiv_mass(jc,jk ) & - & +z_contr_w_fl_l(jc,jk ) & - & -z_contr_w_fl_l(jc,jk+1 )) - - z_exner_expl(jc,jk)= p_nh%diag%exner_pr(jc,jk,jb) - z_beta(jc,jk) & - & *(z_flxdiv_theta(jc,jk) & - & +p_nh%diag%theta_v_ic(jc,jk ,jb)*z_contr_w_fl_l(jc,jk ) & - & -p_nh%diag%theta_v_ic(jc,jk+1,jb)*z_contr_w_fl_l(jc,jk+1)) & - & +dtime*p_nh%diag%ddt_exner_phy(jc,jk,jb) - - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_49_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' - - !$ser data z_rho_expl=z_rho_expl(:,:) - - PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' - - !$ser data z_exner_expl=z_exner_expl(:,:) - - PRINT *, 'Serializing rho_nnow=p_nh%prog(nnow)%rho(:,:,jb)' - - !$ser data rho_nnow=p_nh%prog(nnow)%rho(:,:,jb) - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,jb) - - PRINT *, 'Serializing z_flxdiv_mass=z_flxdiv_mass(:,:)' - - !$ser data z_flxdiv_mass=z_flxdiv_mass(:,:) - - PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' - - !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) - - PRINT *, 'Serializing exner_pr=p_nh%diag%exner_pr(:,:,jb)' - - !$ser data exner_pr=p_nh%diag%exner_pr(:,:,jb) - - PRINT *, 'Serializing z_beta=z_beta(:,:)' - - !$ser data z_beta=z_beta(:,:) - - PRINT *, 'Serializing z_flxdiv_theta=z_flxdiv_theta(:,:)' - - !$ser data z_flxdiv_theta=z_flxdiv_theta(:,:) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,jb) - - PRINT *, 'Serializing ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb)' - - !$ser data ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,jb) - - - IF (is_iau_active) THEN ! add analysis increments from data assimilation to density and exner pressure - - - !$ser savepoint mo_solve_nonhydro_stencil_50_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing iau_wgt_dyn=iau_wgt_dyn' - - !$ser data iau_wgt_dyn=iau_wgt_dyn - - PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' - - !$ser data z_rho_expl=z_rho_expl(:,:) - - PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' - - !$ser data z_exner_expl=z_exner_expl(:,:) - - PRINT *, 'Serializing rho_incr=p_nh%diag%rho_incr(:,:,jb)' - - !$ser data rho_incr=p_nh%diag%rho_incr(:,:,jb) - - PRINT *, 'Serializing exner_incr=p_nh%diag%exner_incr(:,:,jb)' - - !$ser data exner_incr=p_nh%diag%exner_incr(:,:,jb) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 1, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - z_rho_expl(jc,jk) = z_rho_expl(jc,jk) + iau_wgt_dyn*p_nh%diag%rho_incr(jc,jk,jb) - z_exner_expl(jc,jk) = z_exner_expl(jc,jk) + iau_wgt_dyn*p_nh%diag%exner_incr(jc,jk,jb) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_50_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing iau_wgt_dyn=iau_wgt_dyn' - - !$ser data iau_wgt_dyn=iau_wgt_dyn - - PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' - - !$ser data z_rho_expl=z_rho_expl(:,:) - - PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' - - !$ser data z_exner_expl=z_exner_expl(:,:) - - PRINT *, 'Serializing rho_incr=p_nh%diag%rho_incr(:,:,jb)' - - !$ser data rho_incr=p_nh%diag%rho_incr(:,:,jb) - - PRINT *, 'Serializing exner_incr=p_nh%diag%exner_incr(:,:,jb)' - - !$ser data exner_incr=p_nh%diag%exner_incr(:,:,jb) - - ENDIF - - ! - ! Solve tridiagonal matrix for w - ! -! TODO: not parallelized - - - !$ser savepoint mo_solve_nonhydro_stencil_52_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing cpd=cpd' - - !$ser data cpd=cpd - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' - - !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) - - PRINT *, 'Serializing ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1)' - - !$ser data ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1) - - PRINT *, 'Serializing z_alpha=z_alpha(:,:)' - - !$ser data z_alpha=z_alpha(:,:) - - PRINT *, 'Serializing z_beta=z_beta(:,:)' - - !$ser data z_beta=z_beta(:,:) - - PRINT *, 'Serializing z_w_expl=z_w_expl(:,:)' - - !$ser data z_w_expl=z_w_expl(:,:) - - PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' - - !$ser data z_exner_expl=z_exner_expl(:,:) - - PRINT *, 'Serializing z_q=z_q(:,:)' - - !$ser data z_q=z_q(:,:) - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP SEQ -!$NEC outerloop_unroll(8) - DO jk = 2, nlev -!DIR$ IVDEP -!$NEC ivdep - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - z_gamma = dtime*cpd*p_nh%metrics%vwind_impl_wgt(jc,jb)* & - p_nh%diag%theta_v_ic(jc,jk,jb)/p_nh%metrics%ddqz_z_half(jc,jk,jb) - z_a = -z_gamma*z_beta(jc,jk-1)*z_alpha(jc,jk-1) - z_c = -z_gamma*z_beta(jc,jk )*z_alpha(jc,jk+1) - z_b = 1.0_vp+z_gamma*z_alpha(jc,jk) & - *(z_beta(jc,jk-1)+z_beta(jc,jk)) - z_g = 1.0_vp/(z_b+z_a*z_q(jc,jk-1)) - z_q(jc,jk) = - z_c*z_g - p_nh%prog(nnew)%w(jc,jk,jb) = z_w_expl(jc,jk) - z_gamma & - & *(z_exner_expl(jc,jk-1)-z_exner_expl(jc,jk)) - p_nh%prog(nnew)%w(jc,jk,jb) = (p_nh%prog(nnew)%w(jc,jk,jb) & - -z_a*p_nh%prog(nnew)%w(jc,jk-1,jb))*z_g - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_52_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing cpd=cpd' - - !$ser data cpd=cpd - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' - - !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) - - PRINT *, 'Serializing theta_v_ic=p_nh%diag%theta_v_ic(:,:,1)' - - !$ser data theta_v_ic=p_nh%diag%theta_v_ic(:,:,1) - - PRINT *, 'Serializing ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1)' - - !$ser data ddqz_z_half=p_nh%metrics%ddqz_z_half(:,:,1) - - PRINT *, 'Serializing z_alpha=z_alpha(:,:)' - - !$ser data z_alpha=z_alpha(:,:) - - PRINT *, 'Serializing z_beta=z_beta(:,:)' - - !$ser data z_beta=z_beta(:,:) - - PRINT *, 'Serializing z_w_expl=z_w_expl(:,:)' - - !$ser data z_w_expl=z_w_expl(:,:) - - PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' - - !$ser data z_exner_expl=z_exner_expl(:,:) - - PRINT *, 'Serializing z_q=z_q(:,:)' - - !$ser data z_q=z_q(:,:) - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - - - !$ser savepoint mo_solve_nonhydro_stencil_53_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_q=z_q' - - !$ser data z_q=z_q - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP SEQ - DO jk = nlev-1, 2, -1 -!DIR$ IVDEP - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - p_nh%prog(nnew)%w(jc,jk,jb) = p_nh%prog(nnew)%w(jc,jk,jb)& - & +p_nh%prog(nnew)%w(jc,jk+1,jb)*z_q(jc,jk) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_53_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_q=z_q' - - !$ser data z_q=z_q - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - - ! Rayleigh damping mechanism (Klemp,Dudhia,Hassiotis: MWR136,pp.3987-4004) - ! - IF ( rayleigh_type == RAYLEIGH_KLEMP ) THEN - -!$ACC PARALLEL IF( i_am_accel_node ) DEFAULT(PRESENT) ASYNC(1) -!$ACC LOOP GANG VECTOR COLLAPSE(1) -DO jc = 1, nproma - w_1(jc,jb) = p_nh%prog(nnew)%w(jc,1,jb) -ENDDO -!$ACC END PARALLEL - - - !$ser savepoint mo_solve_nonhydro_stencil_54_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_raylfac=z_raylfac(:)' - - !$ser data z_raylfac=z_raylfac(:) - - PRINT *, 'Serializing w_1=w_1(:,1)' - - !$ser data w_1=w_1(:,1) - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 2, nrdmax(jg) -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - p_nh%prog(nnew)%w(jc,jk,jb) = z_raylfac(jk)*p_nh%prog(nnew)%w(jc,jk,jb) + & - (1._wp-z_raylfac(jk))*p_nh%prog(nnew)%w(jc,1,jb) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_54_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing z_raylfac=z_raylfac(:)' - - !$ser data z_raylfac=z_raylfac(:) - - PRINT *, 'Serializing w_1=w_1(:,1)' - - !$ser data w_1=w_1(:,1) - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - - ! Classic Rayleigh damping mechanism for w (requires reference state !!) - ! - ELSE IF ( rayleigh_type == RAYLEIGH_CLASSIC ) THEN - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 2, nrdmax(jg) -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - p_nh%prog(nnew)%w(jc,jk,jb) = p_nh%prog(nnew)%w(jc,jk,jb) & - & - dtime*p_nh%metrics%rayleigh_w(jk) & - & * ( p_nh%prog(nnew)%w(jc,jk,jb) & - & - p_nh%ref%w_ref(jc,jk,jb) ) - ENDDO - ENDDO - !$ACC END PARALLEL - ENDIF - - ! Results for thermodynamic variables - - !$ser savepoint mo_solve_nonhydro_stencil_55_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing cvd_o_rd=cvd_o_rd' - - !$ser data cvd_o_rd=cvd_o_rd - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' - - !$ser data z_rho_expl=z_rho_expl(:,:) - - PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' - - !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - - PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' - - !$ser data z_exner_expl=z_exner_expl(:,:) - - PRINT *, 'Serializing exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1)' - - !$ser data exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1) - - PRINT *, 'Serializing z_alpha=z_alpha(:,:)' - - !$ser data z_alpha=z_alpha(:,:) - - PRINT *, 'Serializing z_beta=z_beta' - - !$ser data z_beta=z_beta - - PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' - - !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) - - PRINT *, 'Serializing theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing exner_now=p_nh%prog(nnow)%exner(:,:,1)' - - !$ser data exner_now=p_nh%prog(nnow)%exner(:,:,1) - - PRINT *, 'Serializing rho_new=p_nh%prog(nnew)%rho(:,:,1)' - - !$ser data rho_new=p_nh%prog(nnew)%rho(:,:,1) - - PRINT *, 'Serializing exner_new=p_nh%prog(nnew)%exner(:,:,1)' - - !$ser data exner_new=p_nh%prog(nnew)%exner(:,:,1) - - PRINT *, 'Serializing theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1)' - - !$ser data theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR TILE(128, 1) -!$NEC outerloop_unroll(8) - DO jk = jk_start, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - - ! density - p_nh%prog(nnew)%rho(jc,jk,jb) = z_rho_expl(jc,jk) & - - p_nh%metrics%vwind_impl_wgt(jc,jb)*dtime & - * p_nh%metrics%inv_ddqz_z_full(jc,jk,jb) & - *(p_nh%diag%rho_ic(jc,jk ,jb)*p_nh%prog(nnew)%w(jc,jk ,jb) & - - p_nh%diag%rho_ic(jc,jk+1,jb)*p_nh%prog(nnew)%w(jc,jk+1,jb)) - - ! exner - p_nh%prog(nnew)%exner(jc,jk,jb) = z_exner_expl(jc,jk) & - + p_nh%metrics%exner_ref_mc(jc,jk,jb)-z_beta(jc,jk) & - *(z_alpha(jc,jk )*p_nh%prog(nnew)%w(jc,jk ,jb) & - - z_alpha(jc,jk+1)*p_nh%prog(nnew)%w(jc,jk+1,jb)) - - ! theta - p_nh%prog(nnew)%theta_v(jc,jk,jb) = p_nh%prog(nnow)%rho(jc,jk,jb)*p_nh%prog(nnow)%theta_v(jc,jk,jb) & - *( (p_nh%prog(nnew)%exner(jc,jk,jb)/p_nh%prog(nnow)%exner(jc,jk,jb)-1.0_wp) * cvd_o_rd+1.0_wp ) & - / p_nh%prog(nnew)%rho(jc,jk,jb) - - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_55_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing cvd_o_rd=cvd_o_rd' - - !$ser data cvd_o_rd=cvd_o_rd - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing z_rho_expl=z_rho_expl(:,:)' - - !$ser data z_rho_expl=z_rho_expl(:,:) - - PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' - - !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - - PRINT *, 'Serializing z_exner_expl=z_exner_expl(:,:)' - - !$ser data z_exner_expl=z_exner_expl(:,:) - - PRINT *, 'Serializing exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1)' - - !$ser data exner_ref_mc=p_nh%metrics%exner_ref_mc(:,:,1) - - PRINT *, 'Serializing z_alpha=z_alpha(:,:)' - - !$ser data z_alpha=z_alpha(:,:) - - PRINT *, 'Serializing z_beta=z_beta' - - !$ser data z_beta=z_beta - - PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' - - !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) - - PRINT *, 'Serializing theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing exner_now=p_nh%prog(nnow)%exner(:,:,1)' - - !$ser data exner_now=p_nh%prog(nnow)%exner(:,:,1) - - PRINT *, 'Serializing rho_new=p_nh%prog(nnew)%rho(:,:,1)' - - !$ser data rho_new=p_nh%prog(nnew)%rho(:,:,1) - - PRINT *, 'Serializing exner_new=p_nh%prog(nnew)%exner(:,:,1)' - - !$ser data exner_new=p_nh%prog(nnew)%exner(:,:,1) - - PRINT *, 'Serializing theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1)' - - !$ser data theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1) - - ! Special treatment of uppermost layer in the case of vertical nesting - IF (l_vert_nested) THEN - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - - ! density - p_nh%prog(nnew)%rho(jc,1,jb) = z_rho_expl(jc,1) & - - p_nh%metrics%vwind_impl_wgt(jc,jb)*dtime & - * p_nh%metrics%inv_ddqz_z_full(jc,1,jb) & - *(z_mflx_top(jc,jb) - p_nh%diag%rho_ic(jc,2,jb)*p_nh%prog(nnew)%w(jc,2,jb)) - - ! exner - p_nh%prog(nnew)%exner(jc,1,jb) = z_exner_expl(jc,1) & - + p_nh%metrics%exner_ref_mc(jc,1,jb)-z_beta(jc,1) & - *(p_nh%metrics%vwind_impl_wgt(jc,jb)*p_nh%diag%theta_v_ic(jc,1,jb) & - * z_mflx_top(jc,jb) - z_alpha(jc,2)*p_nh%prog(nnew)%w(jc,2,jb)) - - ! theta - p_nh%prog(nnew)%theta_v(jc,1,jb) = p_nh%prog(nnow)%rho(jc,1,jb)*p_nh%prog(nnow)%theta_v(jc,1,jb) & - *( (p_nh%prog(nnew)%exner(jc,1,jb)/p_nh%prog(nnow)%exner(jc,1,jb)-1.0_wp) * cvd_o_rd+1.0_wp ) & - /p_nh%prog(nnew)%rho(jc,1,jb) - - ENDDO - !$ACC END PARALLEL - ENDIF - - - ! compute dw/dz for divergence damping term - IF (lhdiff_rcf .AND. istep == 1 .AND. divdamp_type >= 3) THEN - - - !$ser savepoint mo_solve_nonhydro_stencil_56_63_8f4253f6-09a1-46f7-a72e-95b3ff4ebb06_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) - - PRINT *, 'Serializing z_dwdz_dd=z_dwdz_dd(:,:,1)' - - !$ser data z_dwdz_dd=z_dwdz_dd(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR TILE(32, 4) - DO jk = kstart_dd3d(jg), nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - z_dwdz_dd(jc,jk,jb) = p_nh%metrics%inv_ddqz_z_full(jc,jk,jb) * & - ( (p_nh%prog(nnew)%w(jc,jk,jb)-p_nh%prog(nnew)%w(jc,jk+1,jb)) - & - (p_nh%diag%w_concorr_c(jc,jk,jb)-p_nh%diag%w_concorr_c(jc,jk+1,jb)) ) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_56_63_8f4253f6-09a1-46f7-a72e-95b3ff4ebb06_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) - - PRINT *, 'Serializing z_dwdz_dd=z_dwdz_dd(:,:,1)' - - !$ser data z_dwdz_dd=z_dwdz_dd(:,:,1) - ENDIF - - ! Preparations for tracer advection - IF (lprep_adv .AND. istep == 2) THEN - IF (lclean_mflx) THEN - - - !$ser savepoint mo_solve_nonhydro_stencil_57_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' - - !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 1, nlev -!$NEC ivdep - DO jc = i_startidx, i_endidx - prep_adv%mass_flx_ic(jc,jk,jb) = 0._wp - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_57_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' - - !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) - - ENDIF - - - !$ser savepoint mo_solve_nonhydro_stencil_58_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing r_nsubsteps=r_nsubsteps' - - !$ser data r_nsubsteps=r_nsubsteps - - PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' - - !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) - - PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' - - !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - - PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' - - !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = jk_start, nlev -!$NEC ivdep - DO jc = i_startidx, i_endidx - prep_adv%mass_flx_ic(jc,jk,jb) = prep_adv%mass_flx_ic(jc,jk,jb) + r_nsubsteps * ( z_contr_w_fl_l(jc,jk) + & - p_nh%diag%rho_ic(jc,jk,jb) * p_nh%metrics%vwind_impl_wgt(jc,jb) * p_nh%prog(nnew)%w(jc,jk,jb) ) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_58_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing r_nsubsteps=r_nsubsteps' - - !$ser data r_nsubsteps=r_nsubsteps - - PRINT *, 'Serializing z_contr_w_fl_l=z_contr_w_fl_l(:,:)' - - !$ser data z_contr_w_fl_l=z_contr_w_fl_l(:,:) - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) - - PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' - - !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - - PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' - - !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) - - IF (l_vert_nested) THEN - ! Use mass flux which has been interpolated to the upper nest boundary. - ! This mass flux is also seen by the mass continuity equation (rho). - ! Hence, by using the same mass flux for the tracer mass continuity equations, - ! consistency with continuity (CWC) is ensured. - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - prep_adv%mass_flx_ic(jc,1,jb) = prep_adv%mass_flx_ic(jc,1,jb) + & - r_nsubsteps * z_mflx_top(jc,jb) - ENDDO - !$ACC END PARALLEL - ENDIF - ENDIF - - ! store dynamical part of exner time increment in exner_dyn_incr - ! the conversion into a temperature tendency is done in the NWP interface - IF (istep == 1 .AND. idyn_timestep == 1) THEN - - - !$ser savepoint mo_solve_nonhydro_stencil_59_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing exner=p_nh%prog(nnow)%exner(:,:,1)' - - !$ser data exner=p_nh%prog(nnow)%exner(:,:,1) - - PRINT *, 'Serializing exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1)' - - !$ser data exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = kstart_moist(jg), nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - p_nh%diag%exner_dyn_incr(jc,jk,jb) = p_nh%prog(nnow)%exner(jc,jk,jb) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_59_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing exner=p_nh%prog(nnow)%exner(:,:,1)' - - !$ser data exner=p_nh%prog(nnow)%exner(:,:,1) - - PRINT *, 'Serializing exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1)' - - !$ser data exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1) - - ELSE IF (istep == 2 .AND. idyn_timestep == ndyn_substeps_var(jg)) THEN - - - !$ser savepoint mo_solve_nonhydro_stencil_60_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing ndyn_substeps_var=real(ndyn_substeps_var(jg),wp)' - - !$ser data ndyn_substeps_var=real(ndyn_substeps_var(jg),wp) - - PRINT *, 'Serializing exner=p_nh%prog(nnew)%exner(:,:,1)' - - !$ser data exner=p_nh%prog(nnew)%exner(:,:,1) - - PRINT *, 'Serializing ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,1)' - - !$ser data ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,1) - - PRINT *, 'Serializing exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1)' - - !$ser data exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = kstart_moist(jg), nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - p_nh%diag%exner_dyn_incr(jc,jk,jb) = p_nh%prog(nnew)%exner(jc,jk,jb) - & - (p_nh%diag%exner_dyn_incr(jc,jk,jb) + ndyn_substeps_var(jg)*dtime*p_nh%diag%ddt_exner_phy(jc,jk,jb)) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_60_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing ndyn_substeps_var=real(ndyn_substeps_var(jg),wp)' - - !$ser data ndyn_substeps_var=real(ndyn_substeps_var(jg),wp) - - PRINT *, 'Serializing exner=p_nh%prog(nnew)%exner(:,:,1)' - - !$ser data exner=p_nh%prog(nnew)%exner(:,:,1) - - PRINT *, 'Serializing ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,1)' - - !$ser data ddt_exner_phy=p_nh%diag%ddt_exner_phy(:,:,1) - - PRINT *, 'Serializing exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1)' - - !$ser data exner_dyn_incr=p_nh%diag%exner_dyn_incr(:,:,1) - - ENDIF - - IF (istep == 2 .AND. l_child_vertnest) THEN - ! Store values at nest interface levels -!DIR$ IVDEP - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - - p_nh%diag%w_int(jc,jb,idyn_timestep) = & - 0.5_wp*(p_nh%prog(nnow)%w(jc,nshift,jb) + p_nh%prog(nnew)%w(jc,nshift,jb)) - - p_nh%diag%theta_v_ic_int(jc,jb,idyn_timestep) = p_nh%diag%theta_v_ic(jc,nshift,jb) - - p_nh%diag%rho_ic_int(jc,jb,idyn_timestep) = p_nh%diag%rho_ic(jc,nshift,jb) - - p_nh%diag%mflx_ic_int(jc,jb,idyn_timestep) = p_nh%diag%rho_ic(jc,nshift,jb) * & - (p_nh%metrics%vwind_expl_wgt(jc,jb)*p_nh%prog(nnow)%w(jc,nshift,jb) + & - p_nh%metrics%vwind_impl_wgt(jc,jb)*p_nh%prog(nnew)%w(jc,nshift,jb)) - ENDDO - !$ACC END PARALLEL - ENDIF - - ENDDO -!$OMP END DO - - ! Boundary update in case of nesting - IF (l_limited_area .OR. jg > 1) THEN - - rl_start = 1 - rl_end = grf_bdywidth_c - - i_startblk = p_patch%cells%start_block(rl_start) - i_endblk = p_patch%cells%end_block(rl_end) - -!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,jc) ICON_OMP_DEFAULT_SCHEDULE - DO jb = i_startblk, i_endblk - - CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - ! non-MPI-parallelized (serial) case - IF (istep == 1 .AND. my_process_is_mpi_all_seq() ) THEN - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 1, nlev -#if __INTEL_COMPILER != 1400 || __INTEL_COMPILER_UPDATE != 3 -!DIR$ IVDEP -#endif - DO jc = i_startidx, i_endidx - - p_nh%prog(nnew)%rho(jc,jk,jb) = p_nh%prog(nnow)%rho(jc,jk,jb) + & - dtime*p_nh%diag%grf_tend_rho(jc,jk,jb) - - p_nh%prog(nnew)%theta_v(jc,jk,jb) = p_nh%prog(nnow)%theta_v(jc,jk,jb) + & - dtime*p_nh%diag%grf_tend_thv(jc,jk,jb) - - ! Diagnose exner from rho*theta - p_nh%prog(nnew)%exner(jc,jk,jb) = EXP(rd_o_cvd*LOG(rd_o_p0ref* & - p_nh%prog(nnew)%rho(jc,jk,jb)*p_nh%prog(nnew)%theta_v(jc,jk,jb))) - - p_nh%prog(nnew)%w(jc,jk,jb) = p_nh%prog(nnow)%w(jc,jk,jb) + & - dtime*p_nh%diag%grf_tend_w(jc,jk,jb) - - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - p_nh%prog(nnew)%w(jc,nlevp1,jb) = p_nh%prog(nnow)%w(jc,nlevp1,jb) + & - dtime*p_nh%diag%grf_tend_w(jc,nlevp1,jb) - ENDDO - !$ACC END PARALLEL - - ELSE IF (istep == 1 ) THEN - - ! In the MPI-parallelized case, only rho and w are updated here, - ! and theta_v is preliminarily stored on exner in order to save - ! halo communications - - - - !$ser savepoint mo_solve_nonhydro_stencil_61_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' - - !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) - - PRINT *, 'Serializing grf_tend_rho=p_nh%diag%grf_tend_rho(:,:,1)' - - !$ser data grf_tend_rho=p_nh%diag%grf_tend_rho(:,:,1) - - PRINT *, 'Serializing theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing grf_tend_thv=p_nh%diag%grf_tend_thv(:,:,1)' - - !$ser data grf_tend_thv=p_nh%diag%grf_tend_thv(:,:,1) - - PRINT *, 'Serializing w_now=p_nh%prog(nnow)%w(:,:,1)' - - !$ser data w_now=p_nh%prog(nnow)%w(:,:,1) - - PRINT *, 'Serializing grf_tend_w=p_nh%diag%grf_tend_w(:,:,1)' - - !$ser data grf_tend_w=p_nh%diag%grf_tend_w(:,:,1) - - PRINT *, 'Serializing rho_new=p_nh%prog(nnew)%rho(:,:,1)' - - !$ser data rho_new=p_nh%prog(nnew)%rho(:,:,1) - - PRINT *, 'Serializing exner_new=p_nh%prog(nnew)%exner(:,:,1)' - - !$ser data exner_new=p_nh%prog(nnew)%exner(:,:,1) - - PRINT *, 'Serializing w_new=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w_new=p_nh%prog(nnew)%w(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 1, nlev -#if __INTEL_COMPILER != 1400 || __INTEL_COMPILER_UPDATE != 3 -!DIR$ IVDEP -#endif - DO jc = i_startidx, i_endidx - - p_nh%prog(nnew)%rho(jc,jk,jb) = p_nh%prog(nnow)%rho(jc,jk,jb) + & - dtime*p_nh%diag%grf_tend_rho(jc,jk,jb) - - ! *** Storing theta_v on exner is done to save MPI communications *** - ! DO NOT TOUCH THIS! - p_nh%prog(nnew)%exner(jc,jk,jb) = p_nh%prog(nnow)%theta_v(jc,jk,jb) + & - dtime*p_nh%diag%grf_tend_thv(jc,jk,jb) - - p_nh%prog(nnew)%w(jc,jk,jb) = p_nh%prog(nnow)%w(jc,jk,jb) + & - dtime*p_nh%diag%grf_tend_w(jc,jk,jb) - - ENDDO - ENDDO - !$ACC END PARALLEL - - - !$ser savepoint mo_solve_nonhydro_stencil_61_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' - - !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) - - PRINT *, 'Serializing grf_tend_rho=p_nh%diag%grf_tend_rho(:,:,1)' - - !$ser data grf_tend_rho=p_nh%diag%grf_tend_rho(:,:,1) - - PRINT *, 'Serializing theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing grf_tend_thv=p_nh%diag%grf_tend_thv(:,:,1)' - - !$ser data grf_tend_thv=p_nh%diag%grf_tend_thv(:,:,1) - - PRINT *, 'Serializing w_now=p_nh%prog(nnow)%w(:,:,1)' - - !$ser data w_now=p_nh%prog(nnow)%w(:,:,1) - - PRINT *, 'Serializing grf_tend_w=p_nh%diag%grf_tend_w(:,:,1)' - - !$ser data grf_tend_w=p_nh%diag%grf_tend_w(:,:,1) - - PRINT *, 'Serializing rho_new=p_nh%prog(nnew)%rho(:,:,1)' - - !$ser data rho_new=p_nh%prog(nnew)%rho(:,:,1) - - PRINT *, 'Serializing exner_new=p_nh%prog(nnew)%exner(:,:,1)' - - !$ser data exner_new=p_nh%prog(nnew)%exner(:,:,1) - - PRINT *, 'Serializing w_new=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w_new=p_nh%prog(nnew)%w(:,:,1) - - - !$ser savepoint mo_solve_nonhydro_stencil_62_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing w_now=p_nh%prog(nnow)%w(:,:,1)' - - !$ser data w_now=p_nh%prog(nnow)%w(:,:,1) - - PRINT *, 'Serializing grf_tend_w=p_nh%diag%grf_tend_w(:,:,1)' - - !$ser data grf_tend_w=p_nh%diag%grf_tend_w(:,:,1) - - PRINT *, 'Serializing w_new=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w_new=p_nh%prog(nnew)%w(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - p_nh%prog(nnew)%w(jc,nlevp1,jb) = p_nh%prog(nnow)%w(jc,nlevp1,jb) + & - dtime*p_nh%diag%grf_tend_w(jc,nlevp1,jb) - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_62_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing dtime=dtime' - - !$ser data dtime=dtime - - PRINT *, 'Serializing w_now=p_nh%prog(nnow)%w(:,:,1)' - - !$ser data w_now=p_nh%prog(nnow)%w(:,:,1) - - PRINT *, 'Serializing grf_tend_w=p_nh%diag%grf_tend_w(:,:,1)' - - !$ser data grf_tend_w=p_nh%diag%grf_tend_w(:,:,1) - - PRINT *, 'Serializing w_new=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w_new=p_nh%prog(nnew)%w(:,:,1) - - ENDIF - - ! compute dw/dz for divergence damping term - IF (lhdiff_rcf .AND. istep == 1 .AND. divdamp_type >= 3) THEN - - - !$ser savepoint mo_solve_nonhydro_stencil_56_63_3f33cb44-1850-43af-a392-c804c0530f9f_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) - - PRINT *, 'Serializing z_dwdz_dd=z_dwdz_dd(:,:,1)' - - !$ser data z_dwdz_dd=z_dwdz_dd(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR TILE(32, 4) - DO jk = kstart_dd3d(jg), nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - z_dwdz_dd(jc,jk,jb) = p_nh%metrics%inv_ddqz_z_full(jc,jk,jb) * & - ( (p_nh%prog(nnew)%w(jc,jk,jb)-p_nh%prog(nnew)%w(jc,jk+1,jb)) - & - (p_nh%diag%w_concorr_c(jc,jk,jb)-p_nh%diag%w_concorr_c(jc,jk+1,jb)) ) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_56_63_3f33cb44-1850-43af-a392-c804c0530f9f_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1)' - - !$ser data inv_ddqz_z_full=p_nh%metrics%inv_ddqz_z_full(:,:,1) - - PRINT *, 'Serializing w=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w=p_nh%prog(nnew)%w(:,:,1) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) - - PRINT *, 'Serializing z_dwdz_dd=z_dwdz_dd(:,:,1)' - - !$ser data z_dwdz_dd=z_dwdz_dd(:,:,1) - - ENDIF - - ! Preparations for tracer advection - ! - ! Note that the vertical mass flux at nest boundary points is required in case that - ! vertical tracer transport precedes horizontal tracer transport. - IF (lprep_adv .AND. istep == 2) THEN - IF (lclean_mflx) THEN - - - !$ser savepoint mo_solve_nonhydro_stencil_64_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' - - !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) - !$ACC KERNELS IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - prep_adv%mass_flx_ic(i_startidx:i_endidx,:,jb) = 0._wp - !$ACC END KERNELS - - !$ser savepoint mo_solve_nonhydro_stencil_64_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' - - !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) - ENDIF - - - !$ser savepoint mo_solve_nonhydro_stencil_65_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing r_nsubsteps=r_nsubsteps' - - !$ser data r_nsubsteps=r_nsubsteps - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) - - PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1)' - - !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1) - - PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' - - !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) - - PRINT *, 'Serializing w_now=p_nh%prog(nnow)%w(:,:,1)' - - !$ser data w_now=p_nh%prog(nnow)%w(:,:,1) - - PRINT *, 'Serializing w_new=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w_new=p_nh%prog(nnew)%w(:,:,1) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) - - PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' - - !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = jk_start, nlev -!DIR$ IVDEP -!$NEC ivdep - DO jc = i_startidx, i_endidx - prep_adv%mass_flx_ic(jc,jk,jb) = prep_adv%mass_flx_ic(jc,jk,jb) + r_nsubsteps*p_nh%diag%rho_ic(jc,jk,jb)* & - (p_nh%metrics%vwind_expl_wgt(jc,jb)*p_nh%prog(nnow)%w(jc,jk,jb) + & - p_nh%metrics%vwind_impl_wgt(jc,jb)*p_nh%prog(nnew)%w(jc,jk,jb) - p_nh%diag%w_concorr_c(jc,jk,jb) ) - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_65_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing r_nsubsteps=r_nsubsteps' - - !$ser data r_nsubsteps=r_nsubsteps - - PRINT *, 'Serializing rho_ic=p_nh%diag%rho_ic(:,:,1)' - - !$ser data rho_ic=p_nh%diag%rho_ic(:,:,1) - - PRINT *, 'Serializing vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1)' - - !$ser data vwind_expl_wgt=p_nh%metrics%vwind_expl_wgt(:,1) - - PRINT *, 'Serializing vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1)' - - !$ser data vwind_impl_wgt=p_nh%metrics%vwind_impl_wgt(:,1) - - PRINT *, 'Serializing w_now=p_nh%prog(nnow)%w(:,:,1)' - - !$ser data w_now=p_nh%prog(nnow)%w(:,:,1) - - PRINT *, 'Serializing w_new=p_nh%prog(nnew)%w(:,:,1)' - - !$ser data w_new=p_nh%prog(nnew)%w(:,:,1) - - PRINT *, 'Serializing w_concorr_c=p_nh%diag%w_concorr_c(:,:,1)' - - !$ser data w_concorr_c=p_nh%diag%w_concorr_c(:,:,1) - - PRINT *, 'Serializing mass_flx_ic=prep_adv%mass_flx_ic(:,:,1)' - - !$ser data mass_flx_ic=prep_adv%mass_flx_ic(:,:,1) - - IF (l_vert_nested) THEN - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR - DO jc = i_startidx, i_endidx - prep_adv%mass_flx_ic(jc,1,jb) = prep_adv%mass_flx_ic(jc,1,jb) + & - r_nsubsteps * (p_nh%diag%mflx_ic_ubc(jc,jb,1) & - + dt_linintp_ubc * p_nh%diag%mflx_ic_ubc(jc,jb,2)) - ENDDO - !$ACC END PARALLEL - ENDIF - ENDIF - - ENDDO -!$OMP END DO - - ENDIF - -!$OMP END PARALLEL - - - !------------------------- - ! communication phase - - IF (timers_level > 5) THEN - CALL timer_stop(timer_solve_nh_vimpl) - CALL timer_start(timer_solve_nh_exch) - ENDIF - - IF (itype_comm == 1) THEN - IF (istep == 1) THEN - IF (lhdiff_rcf .AND. divdamp_type >= 3) THEN - ! Synchronize w and vertical contribution to divergence damping -#ifdef __MIXED_PRECISION - CALL sync_patch_array_mult_mp(SYNC_C,p_patch,1,1,p_nh%prog(nnew)%w,f3din1_sp=z_dwdz_dd, & - & opt_varname="w_nnew and z_dwdz_dd") -#else - CALL sync_patch_array_mult(SYNC_C,p_patch,2,p_nh%prog(nnew)%w,z_dwdz_dd, & - & opt_varname="w_nnew and z_dwdz_dd") -#endif - ELSE - ! Only w needs to be synchronized - CALL sync_patch_array(SYNC_C,p_patch,p_nh%prog(nnew)%w,opt_varname="w_nnew") - ENDIF - ELSE ! istep = 2: synchronize all prognostic variables - CALL sync_patch_array_mult(SYNC_C,p_patch,3,p_nh%prog(nnew)%rho, & - p_nh%prog(nnew)%exner,p_nh%prog(nnew)%w,opt_varname="rho, exner, w_nnew") - ENDIF - ENDIF - - IF (timers_level > 5) CALL timer_stop(timer_solve_nh_exch) - - ! end communication phase - !------------------------- - - ENDDO ! istep-loop - - - ! The remaining computations are needed for MPI-parallelized applications only - IF ( .NOT. my_process_is_mpi_all_seq() ) THEN - -! OpenMP directives are commented for the NEC because the overhead is too large -#if !defined( __SX__ ) -!$OMP PARALLEL PRIVATE(rl_start,rl_end,i_startblk,i_endblk) -#endif - IF (l_limited_area .OR. jg > 1) THEN - - ! Index list over halo points lying in the boundary interpolation zone - ! Note: this list typically contains at most 10 grid points - - - !$ser savepoint mo_solve_nonhydro_stencil_66_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing rd_o_cvd=rd_o_cvd' - - !$ser data rd_o_cvd=rd_o_cvd - - PRINT *, 'Serializing rd_o_p0ref=rd_o_p0ref' - - !$ser data rd_o_p0ref=rd_o_p0ref - - PRINT *, 'Serializing bdy_halo_c=p_nh%metrics%mask_prog_halo_c_dsl_low_refin(:,1)' - - !$ser data bdy_halo_c=p_nh%metrics%mask_prog_halo_c_dsl_low_refin(:,1) - - PRINT *, 'Serializing rho=p_nh%prog(nnew)%rho(:,:,1)' - - !$ser data rho=p_nh%prog(nnew)%rho(:,:,1) - - PRINT *, 'Serializing theta_v=p_nh%prog(nnew)%theta_v(:,:,1)' - - !$ser data theta_v=p_nh%prog(nnew)%theta_v(:,:,1) - - PRINT *, 'Serializing exner=p_nh%prog(nnew)%exner(:,:,1)' - - !$ser data exner=p_nh%prog(nnew)%exner(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG -#ifndef __SX__ -!$OMP DO PRIVATE(jb,ic,jk,jc) ICON_OMP_DEFAULT_SCHEDULE -#endif - DO ic = 1, p_nh%metrics%bdy_halo_c_dim - - jb = p_nh%metrics%bdy_halo_c_blk(ic) - jc = p_nh%metrics%bdy_halo_c_idx(ic) -!DIR$ IVDEP - !$ACC LOOP VECTOR - DO jk = 1, nlev - p_nh%prog(nnew)%theta_v(jc,jk,jb) = p_nh%prog(nnew)%exner(jc,jk,jb) - - ! Diagnose exner from rho*theta - p_nh%prog(nnew)%exner(jc,jk,jb) = EXP(rd_o_cvd*LOG(rd_o_p0ref* & - p_nh%prog(nnew)%rho(jc,jk,jb)*p_nh%prog(nnew)%theta_v(jc,jk,jb))) - - ENDDO - ENDDO - !$ACC END PARALLEL - - rl_start = min_rlcell_int - 1 - rl_end = min_rlcell - - CALL get_indices_c(p_patch, 1, 1, 1, & - i_startidx, i_endidx, rl_start, rl_end) - - - !$ser savepoint mo_solve_nonhydro_stencil_66_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing rd_o_cvd=rd_o_cvd' - - !$ser data rd_o_cvd=rd_o_cvd - - PRINT *, 'Serializing rd_o_p0ref=rd_o_p0ref' - - !$ser data rd_o_p0ref=rd_o_p0ref - - PRINT *, 'Serializing bdy_halo_c=p_nh%metrics%mask_prog_halo_c_dsl_low_refin(:,1)' - - !$ser data bdy_halo_c=p_nh%metrics%mask_prog_halo_c_dsl_low_refin(:,1) - - PRINT *, 'Serializing rho=p_nh%prog(nnew)%rho(:,:,1)' - - !$ser data rho=p_nh%prog(nnew)%rho(:,:,1) - - PRINT *, 'Serializing theta_v=p_nh%prog(nnew)%theta_v(:,:,1)' - - !$ser data theta_v=p_nh%prog(nnew)%theta_v(:,:,1) - - PRINT *, 'Serializing exner=p_nh%prog(nnew)%exner(:,:,1)' - - !$ser data exner=p_nh%prog(nnew)%exner(:,:,1) - -#ifndef __SX__ -!$OMP END DO -#endif - - rl_start = 1 - rl_end = grf_bdywidth_c - - i_startblk = p_patch%cells%start_block(rl_start) - i_endblk = p_patch%cells%end_block(rl_end) - -#ifndef __SX__ -!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,jc) ICON_OMP_DEFAULT_SCHEDULE -#endif - DO jb = i_startblk, i_endblk - - CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - - !$ser savepoint mo_solve_nonhydro_stencil_67_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing rd_o_cvd=rd_o_cvd' - - !$ser data rd_o_cvd=rd_o_cvd - - PRINT *, 'Serializing rd_o_p0ref=rd_o_p0ref' - - !$ser data rd_o_p0ref=rd_o_p0ref - - PRINT *, 'Serializing rho=p_nh%prog(nnew)%rho(:,:,1)' - - !$ser data rho=p_nh%prog(nnew)%rho(:,:,1) - - PRINT *, 'Serializing theta_v=p_nh%prog(nnew)%theta_v(:,:,1)' - - !$ser data theta_v=p_nh%prog(nnew)%theta_v(:,:,1) - - PRINT *, 'Serializing exner=p_nh%prog(nnew)%exner(:,:,1)' - - !$ser data exner=p_nh%prog(nnew)%exner(:,:,1) - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - !$ACC LOOP GANG VECTOR COLLAPSE(2) - DO jk = 1, nlev -!DIR$ IVDEP - DO jc = i_startidx, i_endidx - - p_nh%prog(nnew)%theta_v(jc,jk,jb) = p_nh%prog(nnew)%exner(jc,jk,jb) - - ! Diagnose exner from rhotheta - p_nh%prog(nnew)%exner(jc,jk,jb) = EXP(rd_o_cvd*LOG(rd_o_p0ref* & - p_nh%prog(nnew)%rho(jc,jk,jb)*p_nh%prog(nnew)%theta_v(jc,jk,jb))) - - ENDDO - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_67_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing rd_o_cvd=rd_o_cvd' - - !$ser data rd_o_cvd=rd_o_cvd - - PRINT *, 'Serializing rd_o_p0ref=rd_o_p0ref' - - !$ser data rd_o_p0ref=rd_o_p0ref - - PRINT *, 'Serializing rho=p_nh%prog(nnew)%rho(:,:,1)' - - !$ser data rho=p_nh%prog(nnew)%rho(:,:,1) - - PRINT *, 'Serializing theta_v=p_nh%prog(nnew)%theta_v(:,:,1)' - - !$ser data theta_v=p_nh%prog(nnew)%theta_v(:,:,1) - - PRINT *, 'Serializing exner=p_nh%prog(nnew)%exner(:,:,1)' - - !$ser data exner=p_nh%prog(nnew)%exner(:,:,1) - ENDDO -#ifndef __SX__ -!$OMP END DO -#endif - ENDIF - - rl_start = min_rlcell_int - 1 - rl_end = min_rlcell - - i_startblk = p_patch%cells%start_block(rl_start) - i_endblk = p_patch%cells%end_block(rl_end) - -#ifndef __SX__ -!$OMP DO PRIVATE(jb,i_startidx,i_endidx,jk,jc) ICON_OMP_DEFAULT_SCHEDULE -#endif - DO jb = i_startblk, i_endblk - - CALL get_indices_c(p_patch, jb, i_startblk, i_endblk, & - i_startidx, i_endidx, rl_start, rl_end) - - - !$ser savepoint mo_solve_nonhydro_stencil_68_start istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing mask_prog_halo_c=p_nh%metrics%mask_prog_halo_c(:,1)' - - !$ser data mask_prog_halo_c=p_nh%metrics%mask_prog_halo_c(:,1) - - PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' - - !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) - - PRINT *, 'Serializing theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing exner_new=p_nh%prog(nnew)%exner(:,:,1)' - - !$ser data exner_new=p_nh%prog(nnew)%exner(:,:,1) - - PRINT *, 'Serializing exner_now=p_nh%prog(nnow)%exner(:,:,1)' - - !$ser data exner_now=p_nh%prog(nnow)%exner(:,:,1) - - PRINT *, 'Serializing rho_new=p_nh%prog(nnew)%rho(:,:,1)' - - !$ser data rho_new=p_nh%prog(nnew)%rho(:,:,1) - - PRINT *, 'Serializing theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1)' - - !$ser data theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1) - - PRINT *, 'Serializing cvd_o_rd=cvd_o_rd' - - !$ser data cvd_o_rd=cvd_o_rd - !$ACC PARALLEL IF(i_am_accel_node) DEFAULT(PRESENT) ASYNC(1) - -#ifdef __LOOP_EXCHANGE - !$ACC LOOP GANG - DO jc = i_startidx, i_endidx - IF (p_nh%metrics%mask_prog_halo_c(jc,jb)) THEN -!DIR$ IVDEP - !$ACC LOOP VECTOR - DO jk = 1, nlev -#else - !$ACC LOOP GANG VECTOR TILE(32, 4) - DO jk = 1, nlev - DO jc = i_startidx, i_endidx - IF (p_nh%metrics%mask_prog_halo_c(jc,jb)) THEN -#endif - p_nh%prog(nnew)%theta_v(jc,jk,jb) = p_nh%prog(nnow)%rho(jc,jk,jb)*p_nh%prog(nnow)%theta_v(jc,jk,jb) & - *( (p_nh%prog(nnew)%exner(jc,jk,jb)/p_nh%prog(nnow)%exner(jc,jk,jb)-1.0_wp) * cvd_o_rd+1.0_wp ) & - / p_nh%prog(nnew)%rho(jc,jk,jb) - -#ifdef __LOOP_EXCHANGE - ENDDO - ENDIF -#else - ENDIF - ENDDO -#endif - ENDDO - !$ACC END PARALLEL - - !$ser savepoint mo_solve_nonhydro_stencil_68_end istep=istep mo_solve_nonhydro_ctr=mo_solve_nonhydro_ctr - - PRINT *, 'Serializing mask_prog_halo_c=p_nh%metrics%mask_prog_halo_c(:,1)' - - !$ser data mask_prog_halo_c=p_nh%metrics%mask_prog_halo_c(:,1) - - PRINT *, 'Serializing rho_now=p_nh%prog(nnow)%rho(:,:,1)' - - !$ser data rho_now=p_nh%prog(nnow)%rho(:,:,1) - - PRINT *, 'Serializing theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1)' - - !$ser data theta_v_now=p_nh%prog(nnow)%theta_v(:,:,1) - - PRINT *, 'Serializing exner_new=p_nh%prog(nnew)%exner(:,:,1)' - - !$ser data exner_new=p_nh%prog(nnew)%exner(:,:,1) - - PRINT *, 'Serializing exner_now=p_nh%prog(nnow)%exner(:,:,1)' - - !$ser data exner_now=p_nh%prog(nnow)%exner(:,:,1) - - PRINT *, 'Serializing rho_new=p_nh%prog(nnew)%rho(:,:,1)' - - !$ser data rho_new=p_nh%prog(nnew)%rho(:,:,1) - - PRINT *, 'Serializing theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1)' - - !$ser data theta_v_new=p_nh%prog(nnew)%theta_v(:,:,1) - - PRINT *, 'Serializing cvd_o_rd=cvd_o_rd' - - !$ser data cvd_o_rd=cvd_o_rd - - - ENDDO -#ifndef __SX__ -!$OMP END DO NOWAIT -!$OMP END PARALLEL -#endif - - ENDIF ! .NOT. my_process_is_mpi_all_seq() - - IF (ltimer) CALL timer_stop(timer_solve_nh) - CALL message('DSL', 'all dycore kernels ran') - - !$ACC WAIT - !$ACC END DATA - -#if !defined (__LOOP_EXCHANGE) && !defined (__SX__) - CALL btraj%destruct() -#endif - - END SUBROUTINE solve_nh - -#ifdef _OPENACC - - SUBROUTINE h2d_solve_nonhydro( nnow, jstep, jg, idiv_method, grf_intmethod_e, lprep_adv, l_vert_nested, is_iau_active, & - p_nh, prep_adv ) - - INTEGER, INTENT(IN) :: nnow, jstep, jg, idiv_method, grf_intmethod_e - LOGICAL, INTENT(IN) :: l_vert_nested, lprep_adv, is_iau_active - - TYPE(t_nh_state), INTENT(INOUT) :: p_nh - TYPE(t_prepare_adv), TARGET, INTENT(INOUT) :: prep_adv - - REAL(wp), DIMENSION(:,:,:), POINTER :: exner_tmp, rho_tmp, theta_v_tmp, vn_tmp, w_tmp ! p_prog WP - REAL(wp), DIMENSION(:,:,:), POINTER :: vn_ie_ubc_tmp ! p_diag WP 2D - REAL(wp), DIMENSION(:,:,:), POINTER :: w_ubc_tmp, mflx_ic_ubc_tmp, theta_v_ic_ubc_tmp, rho_ic_ubc_tmp ! p_diag WP - - REAL(wp), DIMENSION(:,:,:), POINTER :: theta_v_ic_tmp, rho_ic_tmp ! p_diag WP - REAL(wp), DIMENSION(:,:,:), POINTER :: mass_fl_e_tmp, exner_pr_tmp ! p_diag WP - REAL(wp), DIMENSION(:,:,:), POINTER :: grf_bdy_mflx_tmp ! p_diag WP - - REAL(vp), DIMENSION(:,:,:), POINTER :: vt_tmp, vn_ie_tmp, w_concorr_c_tmp, ddt_exner_phy_tmp ! p_diag VP - REAL(vp), DIMENSION(:,:,:), POINTER :: exner_dyn_incr_tmp ! p_diag VP - REAL(vp), DIMENSION(:,:,:), POINTER :: ddt_vn_phy_tmp ! p_diag VP - - REAL(vp), DIMENSION(:,:,:), POINTER :: rho_incr_tmp, exner_incr_tmp ! p_diag VP - REAL(wp), DIMENSION(:,:,:), POINTER :: vn_traj_tmp, mass_flx_me_tmp, mass_flx_ic_tmp ! prep_adv WP - REAL(wp), DIMENSION(:,:,:), POINTER :: vn_ref_tmp, w_ref_tmp ! p_ref WP - - REAL(vp), DIMENSION(:,:,:,:), POINTER :: ddt_vn_apc_pc_tmp - REAL(vp), DIMENSION(:,:,:,:), POINTER :: ddt_vn_cor_pc_tmp - REAL(vp), DIMENSION(:,:,:,:), POINTER :: ddt_w_adv_pc_tmp - - REAL(wp), DIMENSION(:,:,:), POINTER :: ddt_vn_dyn_tmp, ddt_vn_dmp_tmp, ddt_vn_adv_tmp, ddt_vn_cor_tmp ! p_diag WP - REAL(wp), DIMENSION(:,:,:), POINTER :: ddt_vn_pgr_tmp, ddt_vn_phd_tmp, ddt_vn_iau_tmp, ddt_vn_ray_tmp ! p_diag WP - REAL(wp), DIMENSION(:,:,:), POINTER :: ddt_vn_grf_tmp ! p_diag WP - -! p_patch: -! p_patch%cells: edge_idx/blk -! p_patch%edges: cell_idx/blk, vertex_idx/blk, quad_idx/blk, -! primal/dual_normal_cell, inv_primal/dual_edge_length, tangent_orientation, refin_ctrl - -! -! p_nh%metrics: vertidx_gradp, pg_vertidx, pg_edgeidx, pg_edgeblk, -! bdy_halo_c_blk, bdy_halo_c_idx, bdy_mflx_e_blk, bdy_mflx_e_idx, -! coeff_gradp, d_exner_dz_ref_ic, d2dexdz2_fac1_mc, -! ddqz_z_half, ddxn_z_full, ddxt_z_full, ddqz_z_full_e, -! exner_exfac, exner_ref_mc, hmask_dd3d, inv_ddqz_z_full, -! mask_prog_halo_c, nudge_e_blk, nudge_e_idx, pg_exdist, -! rayleigh_vn, rayleigh_w, rho_ref_mc, rho_ref_me, -! scalfac_dd3d, theta_ref_ic, theta_ref_mc, theta_ref_me, -! vwind_expl_wgt, vwind_impl_wgt, -! wgtfac_c, wgtfac_e, wgtfacq_c, wgtfacq1_c, zdiff_gradp - - -! p_nh%prog(nnow) All present (above) - - exner_tmp => p_nh%prog(nnow)%exner - rho_tmp => p_nh%prog(nnow)%rho - theta_v_tmp => p_nh%prog(nnow)%theta_v - vn_tmp => p_nh%prog(nnow)%vn - w_tmp => p_nh%prog(nnow)%w - !$ACC UPDATE DEVICE(exner_tmp, rho_tmp, theta_v_tmp, vn_tmp, w_tmp) - -! p_nh%diag: - - rho_ic_tmp => p_nh%diag%rho_ic - theta_v_ic_tmp => p_nh%diag%theta_v_ic - !$ACC UPDATE DEVICE(rho_ic_tmp, theta_v_ic_tmp) - - vt_tmp => p_nh%diag%vt - vn_ie_tmp => p_nh%diag%vn_ie - w_concorr_c_tmp => p_nh%diag%w_concorr_c - !$ACC UPDATE DEVICE(vt_tmp, vn_ie_tmp, w_concorr_c_tmp) - - mass_fl_e_tmp => p_nh%diag%mass_fl_e - exner_pr_tmp => p_nh%diag%exner_pr - exner_dyn_incr_tmp => p_nh%diag%exner_dyn_incr - !$ACC UPDATE DEVICE(mass_fl_e_tmp, exner_pr_tmp, exner_dyn_incr_tmp) - -! WS: I do not think these are necessary, but adding for completeness - ddt_vn_apc_pc_tmp => p_nh%diag%ddt_vn_apc_pc - ddt_w_adv_pc_tmp => p_nh%diag%ddt_w_adv_pc - !$ACC UPDATE DEVICE(ddt_vn_apc_pc_tmp, ddt_w_adv_pc_tmp) - IF (p_nh%diag%ddt_vn_adv_is_associated .OR. p_nh%diag%ddt_vn_cor_is_associated) THEN - ddt_vn_cor_pc_tmp => p_nh%diag%ddt_vn_cor_pc - !$ACC UPDATE DEVICE(ddt_vn_cor_pc_tmp) - END IF - -! MAG: For completeness - ddt_vn_dyn_tmp => p_nh%diag%ddt_vn_dyn - !$ACC UPDATE DEVICE(ddt_vn_dyn_tmp) IF(p_nh%diag%ddt_vn_dyn_is_associated) - ddt_vn_dmp_tmp => p_nh%diag%ddt_vn_dmp - !$ACC UPDATE DEVICE(ddt_vn_dmp_tmp) IF(p_nh%diag%ddt_vn_dmp_is_associated) - ddt_vn_adv_tmp => p_nh%diag%ddt_vn_adv - !$ACC UPDATE DEVICE(ddt_vn_adv_tmp) IF(p_nh%diag%ddt_vn_adv_is_associated) - ddt_vn_cor_tmp => p_nh%diag%ddt_vn_cor - !$ACC UPDATE DEVICE(ddt_vn_cor_tmp) IF(p_nh%diag%ddt_vn_cor_is_associated) - ddt_vn_pgr_tmp => p_nh%diag%ddt_vn_pgr - !$ACC UPDATE DEVICE(ddt_vn_pgr_tmp) IF(p_nh%diag%ddt_vn_pgr_is_associated) - ddt_vn_phd_tmp => p_nh%diag%ddt_vn_phd - !$ACC UPDATE DEVICE(ddt_vn_phd_tmp) IF(p_nh%diag%ddt_vn_phd_is_associated) - ddt_vn_iau_tmp => p_nh%diag%ddt_vn_iau - !$ACC UPDATE DEVICE(ddt_vn_iau_tmp) IF(p_nh%diag%ddt_vn_iau_is_associated) - ddt_vn_ray_tmp => p_nh%diag%ddt_vn_ray - !$ACC UPDATE DEVICE(ddt_vn_ray_tmp) IF(p_nh%diag%ddt_vn_ray_is_associated) - ddt_vn_grf_tmp => p_nh%diag%ddt_vn_grf - !$ACC UPDATE DEVICE(ddt_vn_grf_tmp) IF(p_nh%diag%ddt_vn_grf_is_associated) - - mflx_ic_ubc_tmp => p_nh%diag%mflx_ic_ubc - vn_ie_ubc_tmp => p_nh%diag%vn_ie_ubc - theta_v_ic_ubc_tmp => p_nh%diag%theta_v_ic_ubc - rho_ic_ubc_tmp => p_nh%diag%rho_ic_ubc - w_ubc_tmp => p_nh%diag%w_ubc - !$ACC UPDATE DEVICE(mflx_ic_ubc_tmp, vn_ie_ubc_tmp, theta_v_ic_ubc_tmp, rho_ic_ubc_tmp, w_ubc_tmp) IF(l_vert_nested) - - ddt_exner_phy_tmp => p_nh%diag%ddt_exner_phy - ddt_vn_phy_tmp => p_nh%diag%ddt_vn_phy - !$ACC UPDATE DEVICE(ddt_exner_phy_tmp, ddt_vn_phy_tmp) - - rho_incr_tmp => p_nh%diag%rho_incr - exner_incr_tmp => p_nh%diag%exner_incr - !$ACC UPDATE DEVICE(rho_incr_tmp, exner_incr_tmp) - - grf_bdy_mflx_tmp => p_nh%diag%grf_bdy_mflx - !$ACC UPDATE DEVICE(grf_bdy_mflx_tmp) IF((jg > 1) .AND. (grf_intmethod_e >= 5) .AND. (idiv_method == 1) .AND. (jstep == 0)) - -! prep_adv: - - vn_traj_tmp => prep_adv%vn_traj - mass_flx_me_tmp => prep_adv%mass_flx_me - mass_flx_ic_tmp => prep_adv%mass_flx_ic - !$ACC UPDATE DEVICE(vn_traj_tmp, mass_flx_me_tmp, mass_flx_ic_tmp) IF(lprep_adv) - -! p_nh%ref: - - vn_ref_tmp => p_nh%ref%vn_ref - w_ref_tmp => p_nh%ref%w_ref - !$ACC UPDATE DEVICE(vn_ref_tmp, w_ref_tmp) - - END SUBROUTINE h2d_solve_nonhydro - - SUBROUTINE d2h_solve_nonhydro( nnew, jstep, jg, idyn_timestep, grf_intmethod_e, idiv_method, lsave_mflx, & - & l_child_vertnest, lprep_adv, p_nh, prep_adv ) - - INTEGER, INTENT(IN) :: nnew, jstep, jg, idyn_timestep, grf_intmethod_e, idiv_method - LOGICAL, INTENT(IN) :: lsave_mflx, l_child_vertnest, lprep_adv - - TYPE(t_nh_state), INTENT(INOUT) :: p_nh - TYPE(t_prepare_adv), TARGET, INTENT(INOUT) :: prep_adv - - REAL(wp), DIMENSION(:,:,:), POINTER :: exner_tmp, rho_tmp, theta_v_tmp, vn_tmp, w_tmp ! p_prog WP - REAL(wp), DIMENSION(:,:,:), POINTER :: vn_ie_int_tmp ! p_diag WP 2D - REAL(wp), DIMENSION(:,:,:), POINTER :: theta_v_ic_tmp, rho_ic_tmp, rho_ic_int_tmp, w_int_tmp ! p_diag WP - REAL(wp), DIMENSION(:,:,:), POINTER :: theta_v_ic_int_tmp, grf_bdy_mflx_tmp ! p_diag WP - REAL(wp), DIMENSION(:,:,:), POINTER :: mass_fl_e_tmp, mflx_ic_int_tmp, exner_pr_tmp ! p_diag WP - - REAL(vp), DIMENSION(:,:,:), POINTER :: vt_tmp, vn_ie_tmp, w_concorr_c_tmp ! p_diag VP - REAL(vp), DIMENSION(:,:,:), POINTER :: mass_fl_e_sv_tmp ! p_diag VP - REAL(vp), DIMENSION(:,:,:), POINTER :: exner_dyn_incr_tmp ! p_diag VP - REAL(wp), DIMENSION(:,:,:), POINTER :: vn_traj_tmp, mass_flx_me_tmp, mass_flx_ic_tmp ! prep_adv WP - REAL(vp), DIMENSION(:,:,:,:), POINTER :: ddt_vn_apc_pc_tmp, ddt_vn_cor_pc_tmp, ddt_w_adv_pc_tmp - - REAL(wp), DIMENSION(:,:,:), POINTER :: ddt_vn_dyn_tmp, ddt_vn_dmp_tmp, ddt_vn_adv_tmp, ddt_vn_cor_tmp ! p_diag WP - REAL(wp), DIMENSION(:,:,:), POINTER :: ddt_vn_pgr_tmp, ddt_vn_phd_tmp, ddt_vn_iau_tmp, ddt_vn_ray_tmp ! p_diag WP - REAL(wp), DIMENSION(:,:,:), POINTER :: ddt_vn_grf_tmp ! p_diag WP - -! The following code is necessary if the Dycore is to be run in isolation on the GPU -! Update all device output on host: the prognostic variables have shifted from nnow to nnew; diagnostics pointers set above - - exner_tmp => p_nh%prog(nnew)%exner - rho_tmp => p_nh%prog(nnew)%rho - theta_v_tmp => p_nh%prog(nnew)%theta_v - vn_tmp => p_nh%prog(nnew)%vn - w_tmp => p_nh%prog(nnew)%w - !$ACC UPDATE HOST(exner_tmp, rho_tmp, theta_v_tmp, vn_tmp, w_tmp) - - vt_tmp => p_nh%diag%vt - vn_ie_tmp => p_nh%diag%vn_ie - rho_ic_tmp => p_nh%diag%rho_ic - theta_v_ic_tmp => p_nh%diag%theta_v_ic - exner_pr_tmp => p_nh%diag%exner_pr - !$ACC UPDATE HOST(vt_tmp, vn_ie_tmp, rho_ic_tmp, theta_v_ic_tmp, exner_pr_tmp) - - w_concorr_c_tmp => p_nh%diag%w_concorr_c - mass_fl_e_tmp => p_nh%diag%mass_fl_e - exner_dyn_incr_tmp => p_nh%diag%exner_dyn_incr - !$ACC UPDATE HOST(w_concorr_c_tmp, mass_fl_e_tmp, exner_dyn_incr_tmp) - - ddt_vn_apc_pc_tmp => p_nh%diag%ddt_vn_apc_pc - ddt_w_adv_pc_tmp => p_nh%diag%ddt_w_adv_pc - !$ACC UPDATE HOST(ddt_vn_apc_pc_tmp, ddt_w_adv_pc_tmp) - IF (p_nh%diag%ddt_vn_adv_is_associated .OR. p_nh%diag%ddt_vn_cor_is_associated) THEN - ddt_vn_cor_pc_tmp => p_nh%diag%ddt_vn_cor_pc - !$ACC UPDATE HOST(ddt_vn_cor_pc_tmp) - END IF - -! MAG: For completeness - ddt_vn_dyn_tmp => p_nh%diag%ddt_vn_dyn - !$ACC UPDATE HOST(ddt_vn_dyn_tmp) IF(p_nh%diag%ddt_vn_dyn_is_associated) - ddt_vn_dmp_tmp => p_nh%diag%ddt_vn_dmp - !$ACC UPDATE HOST(ddt_vn_dmp_tmp) IF(p_nh%diag%ddt_vn_dmp_is_associated) - ddt_vn_adv_tmp => p_nh%diag%ddt_vn_adv - !$ACC UPDATE HOST(ddt_vn_adv_tmp) IF(p_nh%diag%ddt_vn_adv_is_associated) - ddt_vn_cor_tmp => p_nh%diag%ddt_vn_cor - !$ACC UPDATE HOST(ddt_vn_cor_tmp) IF(p_nh%diag%ddt_vn_cor_is_associated) - ddt_vn_pgr_tmp => p_nh%diag%ddt_vn_pgr - !$ACC UPDATE HOST(ddt_vn_pgr_tmp) IF(p_nh%diag%ddt_vn_pgr_is_associated) - ddt_vn_phd_tmp => p_nh%diag%ddt_vn_phd - !$ACC UPDATE HOST(ddt_vn_phd_tmp) IF(p_nh%diag%ddt_vn_phd_is_associated) - ddt_vn_iau_tmp => p_nh%diag%ddt_vn_iau - !$ACC UPDATE HOST(ddt_vn_iau_tmp) IF(p_nh%diag%ddt_vn_iau_is_associated) - ddt_vn_ray_tmp => p_nh%diag%ddt_vn_ray - !$ACC UPDATE HOST(ddt_vn_ray_tmp) IF(p_nh%diag%ddt_vn_ray_is_associated) - ddt_vn_grf_tmp => p_nh%diag%ddt_vn_grf - !$ACC UPDATE HOST(ddt_vn_grf_tmp) IF(p_nh%diag%ddt_vn_grf_is_associated) - - mass_fl_e_sv_tmp => p_nh%diag%mass_fl_e_sv - !$ACC UPDATE HOST(mass_fl_e_sv_tmp) IF(lsave_mflx) - - w_int_tmp => p_nh%diag%w_int - mflx_ic_int_tmp => p_nh%diag%mflx_ic_int - theta_v_ic_int_tmp => p_nh%diag%theta_v_ic_int - rho_ic_int_tmp => p_nh%diag%rho_ic_int - !$ACC UPDATE HOST(w_int_tmp, mflx_ic_int_tmp, theta_v_ic_int_tmp, rho_ic_int_tmp) IF(l_child_vertnest) - - vn_ie_int_tmp => p_nh%diag%vn_ie_int - !$ACC UPDATE HOST(vn_ie_int_tmp) IF(idyn_timestep == 1 .AND. l_child_vertnest) - - grf_bdy_mflx_tmp => p_nh%diag%grf_bdy_mflx - !$ACC UPDATE HOST(grf_bdy_mflx_tmp) IF((jg > 1) .AND. (grf_intmethod_e >= 5) .AND. (idiv_method == 1) .AND. (jstep == 0)) - - vn_traj_tmp => prep_adv%vn_traj - mass_flx_me_tmp => prep_adv%mass_flx_me - mass_flx_ic_tmp => prep_adv%mass_flx_ic - !$ACC UPDATE HOST(vn_traj_tmp, mass_flx_me_tmp, mass_flx_ic_tmp) IF(lprep_adv) - - END SUBROUTINE d2h_solve_nonhydro - -#endif - -END MODULE mo_solve_nonhydro From c21a8ae2f963c120723682752dbab16922341c0a Mon Sep 17 00:00:00 2001 From: Christopher Bignamini Date: Tue, 9 May 2023 16:45:13 +0200 Subject: [PATCH 14/21] Fortran serialization codegen tests (#198) Co-authored-by: samkellerhals --- pyutils/src/icon4py/f2ser/parse.py | 11 +- pyutils/tests/f2ser/test_f2ser_cli.py | 19 +- pyutils/tests/f2ser/test_f2ser_codegen.py | 94 ++- .../expected_diffusion_granule_savepoint.f90 | 656 ++++++++++++++++++ 4 files changed, 767 insertions(+), 13 deletions(-) create mode 100644 testutils/src/icon4py/testutils/fortran/expected_diffusion_granule_savepoint.f90 diff --git a/pyutils/src/icon4py/f2ser/parse.py b/pyutils/src/icon4py/f2ser/parse.py index 609859485e..09755f92a4 100644 --- a/pyutils/src/icon4py/f2ser/parse.py +++ b/pyutils/src/icon4py/f2ser/parse.py @@ -159,11 +159,12 @@ def _parse_derived_types(self, derived_types: dict) -> dict: MissingDerivedTypeError: If the type definition for a derived type could not be found in any of the dependency files. """ derived_type_defs = {} - for dep in self.dependencies: - parsed = crack(dep) - for block in parsed["body"]: - if block["block"] == "type": - derived_type_defs[block["name"]] = block["vars"] + if self.dependencies: + for dep in self.dependencies: + parsed = crack(dep) + for block in parsed["body"]: + if block["block"] == "type": + derived_type_defs[block["name"]] = block["vars"] for _, subroutine_vars in derived_types.items(): for _, intent_vars in subroutine_vars.items(): diff --git a/pyutils/tests/f2ser/test_f2ser_cli.py b/pyutils/tests/f2ser/test_f2ser_cli.py index ac544211ea..b32d87e1d9 100644 --- a/pyutils/tests/f2ser/test_f2ser_cli.py +++ b/pyutils/tests/f2ser/test_f2ser_cli.py @@ -43,6 +43,15 @@ def test_cli_no_deps(no_deps_source_file, outfile, cli): assert result.exit_code == 0 +def test_cli_wrong_deps(diffusion_granule, samples_path, outfile, cli): + inp = str(diffusion_granule) + deps = [str(samples_path / "wrong_derived_types_example.f90")] + args = [inp, outfile, "-d", *deps] + result = cli.invoke(main, args) + assert result.exit_code == 2 + assert "Invalid value for '--dependencies' / '-d'" in result.output + + def test_cli_missing_deps(diffusion_granule, outfile, cli): inp = str(diffusion_granule) args = [inp, outfile] @@ -50,10 +59,16 @@ def test_cli_missing_deps(diffusion_granule, outfile, cli): assert isinstance(result.exception, MissingDerivedTypeError) +def test_cli_wrong_source(outfile, cli): + inp = str("foo.90") + args = [inp, outfile] + result = cli.invoke(main, args) + assert "Invalid value for 'GRANULE_PATH'" in result.output + + def test_cli_missing_source(not_existing_diffusion_granule, outfile, cli): inp = str(not_existing_diffusion_granule) args = [inp, outfile] result = cli.invoke(main, args) - error_search = result.stdout.find("Invalid value for 'GRANULE_PATH'") - assert error_search != -1 assert isinstance(result.exception, SystemExit) + assert "Invalid value for 'GRANULE_PATH'" in result.output diff --git a/pyutils/tests/f2ser/test_f2ser_codegen.py b/pyutils/tests/f2ser/test_f2ser_codegen.py index 9f5368373b..b6d0c38ed3 100644 --- a/pyutils/tests/f2ser/test_f2ser_codegen.py +++ b/pyutils/tests/f2ser/test_f2ser_codegen.py @@ -11,18 +11,100 @@ # # SPDX-License-Identifier: GPL-3.0-or-later +import pytest + from icon4py.f2ser.deserialise import ParsedGranuleDeserialiser from icon4py.f2ser.parse import GranuleParser from icon4py.liskov.codegen.serialisation.generate import ( SerialisationCodeGenerator, ) +from icon4py.liskov.codegen.shared.types import GeneratedCode def test_deserialiser_diffusion_codegen(diffusion_granule, diffusion_granule_deps): - parser = GranuleParser(diffusion_granule, diffusion_granule_deps) - parsed = parser() - deserialiser = ParsedGranuleDeserialiser(parsed, directory=".", prefix="test") - interface = deserialiser() - generator = SerialisationCodeGenerator(interface) - generated = generator() + parsed = GranuleParser(diffusion_granule, diffusion_granule_deps)() + interface = ParsedGranuleDeserialiser(parsed, directory=".", prefix="test")() + generated = SerialisationCodeGenerator(interface)() assert len(generated) == 3 + + +@pytest.fixture +def expected_no_deps_serialization_directives(): + serialization_directives = [ + GeneratedCode( + startln=12, + source="\n" + ' !$ser init directory="." prefix="test"\n' + "\n" + " !$ser savepoint no_deps_init_in\n" + "\n" + " PRINT *, 'Serializing a=a'\n" + "\n" + " !$ser data a=a\n" + "\n" + " PRINT *, 'Serializing b=b'\n" + "\n" + " !$ser data b=b", + ), + GeneratedCode( + startln=14, + source="\n" + " !$ser savepoint no_deps_init_out\n" + "\n" + " PRINT *, 'Serializing c=c'\n" + "\n" + " !$ser data c=c\n" + "\n" + " PRINT *, 'Serializing b=b'\n" + "\n" + " !$ser data b=b", + ), + GeneratedCode( + startln=20, + source="\n" + " !$ser savepoint no_deps_run_in\n" + "\n" + " PRINT *, 'Serializing a=a'\n" + "\n" + " !$ser data a=a\n" + "\n" + " PRINT *, 'Serializing b=b'\n" + "\n" + " !$ser data b=b", + ), + GeneratedCode( + startln=22, + source="\n" + " !$ser savepoint no_deps_run_out\n" + "\n" + " PRINT *, 'Serializing c=c'\n" + "\n" + " !$ser data c=c\n" + "\n" + " PRINT *, 'Serializing b=b'\n" + "\n" + " !$ser data b=b", + ), + ] + return serialization_directives + + +def test_deserialiser_directives_no_deps_codegen( + no_deps_source_file, expected_no_deps_serialization_directives +): + parsed = GranuleParser(no_deps_source_file)() + interface = ParsedGranuleDeserialiser(parsed, directory=".", prefix="test")() + generated = SerialisationCodeGenerator(interface)() + assert generated == expected_no_deps_serialization_directives + + +def test_deserialiser_directives_diffusion_codegen( + diffusion_granule, diffusion_granule_deps, samples_path +): + parsed = GranuleParser(diffusion_granule, diffusion_granule_deps)() + interface = ParsedGranuleDeserialiser(parsed, directory=".", prefix="test")() + generated = SerialisationCodeGenerator(interface)() + reference_savepoint = ( + samples_path / "expected_diffusion_granule_savepoint.f90" + ).read_text() + assert generated[0].source == reference_savepoint.rstrip() diff --git a/testutils/src/icon4py/testutils/fortran/expected_diffusion_granule_savepoint.f90 b/testutils/src/icon4py/testutils/fortran/expected_diffusion_granule_savepoint.f90 new file mode 100644 index 0000000000..19a3653d97 --- /dev/null +++ b/testutils/src/icon4py/testutils/fortran/expected_diffusion_granule_savepoint.f90 @@ -0,0 +1,656 @@ + + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_primal_normal_vert_v1 + + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_primal_normal_vert_v2 + + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_dual_normal_vert_v1 + + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_dual_normal_vert_v2 + + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_primal_normal_cell_v1 + + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_primal_normal_cell_v2 + + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_dual_normal_cell_v1 + + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_dual_normal_cell_v2 + + !$ser init directory="." prefix="test" + + !$ser savepoint diffusion_init_in + + !$ser verbatim allocate(edges_primal_normal_vert_v1(size(edges_primal_normal_vert, 1),size(edges_primal_normal_vert, 2),size(edges_primal_normal_vert, 3))) + !$ser data edges_primal_normal_vert_v1=edges_primal_normal_vert_v1(:,:,:) + !$ser verbatim deallocate(edges_primal_normal_vert_v1) + + !$ser verbatim allocate(edges_primal_normal_vert_v2(size(edges_primal_normal_vert, 1),size(edges_primal_normal_vert, 2),size(edges_primal_normal_vert, 3))) + !$ser data edges_primal_normal_vert_v2=edges_primal_normal_vert_v2(:,:,:) + !$ser verbatim deallocate(edges_primal_normal_vert_v2) + + !$ser verbatim allocate(edges_dual_normal_vert_v1(size(edges_dual_normal_vert, 1),size(edges_dual_normal_vert, 2),size(edges_dual_normal_vert, 3))) + !$ser data edges_dual_normal_vert_v1=edges_dual_normal_vert_v1(:,:,:) + !$ser verbatim deallocate(edges_dual_normal_vert_v1) + + !$ser verbatim allocate(edges_dual_normal_vert_v2(size(edges_dual_normal_vert, 1),size(edges_dual_normal_vert, 2),size(edges_dual_normal_vert, 3))) + !$ser data edges_dual_normal_vert_v2=edges_dual_normal_vert_v2(:,:,:) + !$ser verbatim deallocate(edges_dual_normal_vert_v2) + + !$ser verbatim allocate(edges_primal_normal_cell_v1(size(edges_primal_normal_cell, 1),size(edges_primal_normal_cell, 2),size(edges_primal_normal_cell, 3))) + !$ser data edges_primal_normal_cell_v1=edges_primal_normal_cell_v1(:,:,:) + !$ser verbatim deallocate(edges_primal_normal_cell_v1) + + !$ser verbatim allocate(edges_primal_normal_cell_v2(size(edges_primal_normal_cell, 1),size(edges_primal_normal_cell, 2),size(edges_primal_normal_cell, 3))) + !$ser data edges_primal_normal_cell_v2=edges_primal_normal_cell_v2(:,:,:) + !$ser verbatim deallocate(edges_primal_normal_cell_v2) + + !$ser verbatim allocate(edges_dual_normal_cell_v1(size(edges_dual_normal_cell, 1),size(edges_dual_normal_cell, 2),size(edges_dual_normal_cell, 3))) + !$ser data edges_dual_normal_cell_v1=edges_dual_normal_cell_v1(:,:,:) + !$ser verbatim deallocate(edges_dual_normal_cell_v1) + + !$ser verbatim allocate(edges_dual_normal_cell_v2(size(edges_dual_normal_cell, 1),size(edges_dual_normal_cell, 2),size(edges_dual_normal_cell, 3))) + !$ser data edges_dual_normal_cell_v2=edges_dual_normal_cell_v2(:,:,:) + !$ser verbatim deallocate(edges_dual_normal_cell_v2) + + PRINT *, 'Serializing cvd_o_rd=cvd_o_rd' + + !$ser data cvd_o_rd=cvd_o_rd + + PRINT *, 'Serializing grav=grav' + + !$ser data grav=grav + + PRINT *, 'Serializing jg=jg' + + !$ser data jg=jg + + PRINT *, 'Serializing nproma=nproma' + + !$ser data nproma=nproma + + PRINT *, 'Serializing nlev=nlev' + + !$ser data nlev=nlev + + PRINT *, 'Serializing nblks_e=nblks_e' + + !$ser data nblks_e=nblks_e + + PRINT *, 'Serializing nblks_v=nblks_v' + + !$ser data nblks_v=nblks_v + + PRINT *, 'Serializing nblks_c=nblks_c' + + !$ser data nblks_c=nblks_c + + PRINT *, 'Serializing nshift=nshift' + + !$ser data nshift=nshift + + PRINT *, 'Serializing nshift_total=nshift_total' + + !$ser data nshift_total=nshift_total + + PRINT *, 'Serializing nrdmax=nrdmax' + + !$ser data nrdmax=nrdmax + + PRINT *, 'Serializing ndyn_substeps=ndyn_substeps' + + !$ser data ndyn_substeps=ndyn_substeps + + PRINT *, 'Serializing hdiff_order=hdiff_order' + + !$ser data hdiff_order=hdiff_order + + PRINT *, 'Serializing itype_comm=itype_comm' + + !$ser data itype_comm=itype_comm + + PRINT *, 'Serializing itype_sher=itype_sher' + + !$ser data itype_sher=itype_sher + + PRINT *, 'Serializing itype_vn_diffu=itype_vn_diffu' + + !$ser data itype_vn_diffu=itype_vn_diffu + + PRINT *, 'Serializing itype_t_diffu=itype_t_diffu' + + !$ser data itype_t_diffu=itype_t_diffu + + PRINT *, 'Serializing hdiff_smag_z=hdiff_smag_z' + + !$ser data hdiff_smag_z=hdiff_smag_z + + PRINT *, 'Serializing hdiff_smag_z2=hdiff_smag_z2' + + !$ser data hdiff_smag_z2=hdiff_smag_z2 + + PRINT *, 'Serializing hdiff_smag_z3=hdiff_smag_z3' + + !$ser data hdiff_smag_z3=hdiff_smag_z3 + + PRINT *, 'Serializing hdiff_smag_z4=hdiff_smag_z4' + + !$ser data hdiff_smag_z4=hdiff_smag_z4 + + PRINT *, 'Serializing hdiff_smag_fac=hdiff_smag_fac' + + !$ser data hdiff_smag_fac=hdiff_smag_fac + + PRINT *, 'Serializing hdiff_smag_fac2=hdiff_smag_fac2' + + !$ser data hdiff_smag_fac2=hdiff_smag_fac2 + + PRINT *, 'Serializing hdiff_smag_fac3=hdiff_smag_fac3' + + !$ser data hdiff_smag_fac3=hdiff_smag_fac3 + + PRINT *, 'Serializing hdiff_smag_fac4=hdiff_smag_fac4' + + !$ser data hdiff_smag_fac4=hdiff_smag_fac4 + + PRINT *, 'Serializing hdiff_efdt_ratio=hdiff_efdt_ratio' + + !$ser data hdiff_efdt_ratio=hdiff_efdt_ratio + + PRINT *, 'Serializing k4=k4' + + !$ser data k4=k4 + + PRINT *, 'Serializing k4w=k4w' + + !$ser data k4w=k4w + + PRINT *, 'Serializing nudge_max_coeff=nudge_max_coeff' + + !$ser data nudge_max_coeff=nudge_max_coeff + + PRINT *, 'Serializing denom_diffu_v=denom_diffu_v' + + !$ser data denom_diffu_v=denom_diffu_v + + PRINT *, 'Serializing p_test_run=p_test_run' + + !$ser data p_test_run=p_test_run + + PRINT *, 'Serializing lphys=lphys' + + !$ser data lphys=lphys + + PRINT *, 'Serializing lhdiff_rcf=lhdiff_rcf' + + !$ser data lhdiff_rcf=lhdiff_rcf + + PRINT *, 'Serializing lhdiff_w=lhdiff_w' + + !$ser data lhdiff_w=lhdiff_w + + PRINT *, 'Serializing lhdiff_temp=lhdiff_temp' + + !$ser data lhdiff_temp=lhdiff_temp + + PRINT *, 'Serializing l_zdiffu_t=l_zdiffu_t' + + !$ser data l_zdiffu_t=l_zdiffu_t + + PRINT *, 'Serializing l_limited_area=l_limited_area' + + !$ser data l_limited_area=l_limited_area + + PRINT *, 'Serializing lfeedback=lfeedback' + + !$ser data lfeedback=lfeedback + + PRINT *, 'Serializing ltkeshs=ltkeshs' + + !$ser data ltkeshs=ltkeshs + + PRINT *, 'Serializing lsmag_3d=lsmag_3d' + + !$ser data lsmag_3d=lsmag_3d + + PRINT *, 'Serializing lvert_nest=lvert_nest' + + !$ser data lvert_nest=lvert_nest + + PRINT *, 'Serializing ltimer=ltimer' + + !$ser data ltimer=ltimer + + PRINT *, 'Serializing ddt_vn_hdf_is_associated=ddt_vn_hdf_is_associated' + + !$ser data ddt_vn_hdf_is_associated=ddt_vn_hdf_is_associated + + PRINT *, 'Serializing ddt_vn_dyn_is_associated=ddt_vn_dyn_is_associated' + + !$ser data ddt_vn_dyn_is_associated=ddt_vn_dyn_is_associated + + PRINT *, 'Serializing vct_a=vct_a(:)' + + IF (SIZE(vct_a) > 0) THEN + !$ser data vct_a=vct_a(:) + ELSE + PRINT *, 'Warning: Array vct_a has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing c_lin_e=c_lin_e(:,:,:)' + + IF (SIZE(c_lin_e) > 0) THEN + !$ser data c_lin_e=c_lin_e(:,:,:) + ELSE + PRINT *, 'Warning: Array c_lin_e has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing e_bln_c_s=e_bln_c_s(:,:,:)' + + IF (SIZE(e_bln_c_s) > 0) THEN + !$ser data e_bln_c_s=e_bln_c_s(:,:,:) + ELSE + PRINT *, 'Warning: Array e_bln_c_s has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing e_bln_c_u=e_bln_c_u(:,:,:)' + + IF (SIZE(e_bln_c_u) > 0) THEN + !$ser data e_bln_c_u=e_bln_c_u(:,:,:) + ELSE + PRINT *, 'Warning: Array e_bln_c_u has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing e_bln_c_v=e_bln_c_v(:,:,:)' + + IF (SIZE(e_bln_c_v) > 0) THEN + !$ser data e_bln_c_v=e_bln_c_v(:,:,:) + ELSE + PRINT *, 'Warning: Array e_bln_c_v has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing cells_aw_verts=cells_aw_verts(:,:,:)' + + IF (SIZE(cells_aw_verts) > 0) THEN + !$ser data cells_aw_verts=cells_aw_verts(:,:,:) + ELSE + PRINT *, 'Warning: Array cells_aw_verts has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing geofac_div=geofac_div(:,:,:)' + + IF (SIZE(geofac_div) > 0) THEN + !$ser data geofac_div=geofac_div(:,:,:) + ELSE + PRINT *, 'Warning: Array geofac_div has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing geofac_rot=geofac_rot(:,:,:)' + + IF (SIZE(geofac_rot) > 0) THEN + !$ser data geofac_rot=geofac_rot(:,:,:) + ELSE + PRINT *, 'Warning: Array geofac_rot has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing geofac_n2s=geofac_n2s(:,:,:)' + + IF (SIZE(geofac_n2s) > 0) THEN + !$ser data geofac_n2s=geofac_n2s(:,:,:) + ELSE + PRINT *, 'Warning: Array geofac_n2s has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing geofac_grg=geofac_grg(:,:,:,:)' + + IF (SIZE(geofac_grg) > 0) THEN + !$ser data geofac_grg=geofac_grg(:,:,:,:) + ELSE + PRINT *, 'Warning: Array geofac_grg has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing nudgecoeff_e=nudgecoeff_e(:,:)' + + IF (SIZE(nudgecoeff_e) > 0) THEN + !$ser data nudgecoeff_e=nudgecoeff_e(:,:) + ELSE + PRINT *, 'Warning: Array nudgecoeff_e has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing rbf_vec_idx_v=rbf_vec_idx_v(:,:,:)' + + IF (SIZE(rbf_vec_idx_v) > 0) THEN + !$ser data rbf_vec_idx_v=rbf_vec_idx_v(:,:,:) + ELSE + PRINT *, 'Warning: Array rbf_vec_idx_v has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing rbf_vec_blk_v=rbf_vec_blk_v(:,:,:)' + + IF (SIZE(rbf_vec_blk_v) > 0) THEN + !$ser data rbf_vec_blk_v=rbf_vec_blk_v(:,:,:) + ELSE + PRINT *, 'Warning: Array rbf_vec_blk_v has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing rbf_vec_coeff_v=rbf_vec_coeff_v(:,:,:,:)' + + IF (SIZE(rbf_vec_coeff_v) > 0) THEN + !$ser data rbf_vec_coeff_v=rbf_vec_coeff_v(:,:,:,:) + ELSE + PRINT *, 'Warning: Array rbf_vec_coeff_v has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing enhfac_diffu=enhfac_diffu(:)' + + IF (SIZE(enhfac_diffu) > 0) THEN + !$ser data enhfac_diffu=enhfac_diffu(:) + ELSE + PRINT *, 'Warning: Array enhfac_diffu has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing zd_intcoef=zd_intcoef(:,:)' + + IF (SIZE(zd_intcoef) > 0) THEN + !$ser data zd_intcoef=zd_intcoef(:,:) + ELSE + PRINT *, 'Warning: Array zd_intcoef has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing zd_geofac=zd_geofac(:,:)' + + IF (SIZE(zd_geofac) > 0) THEN + !$ser data zd_geofac=zd_geofac(:,:) + ELSE + PRINT *, 'Warning: Array zd_geofac has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing zd_diffcoef=zd_diffcoef(:)' + + IF (SIZE(zd_diffcoef) > 0) THEN + !$ser data zd_diffcoef=zd_diffcoef(:) + ELSE + PRINT *, 'Warning: Array zd_diffcoef has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing wgtfac_c=wgtfac_c(:,:,:)' + + IF (SIZE(wgtfac_c) > 0) THEN + !$ser data wgtfac_c=wgtfac_c(:,:,:) + ELSE + PRINT *, 'Warning: Array wgtfac_c has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing wgtfac_e=wgtfac_e(:,:,:)' + + IF (SIZE(wgtfac_e) > 0) THEN + !$ser data wgtfac_e=wgtfac_e(:,:,:) + ELSE + PRINT *, 'Warning: Array wgtfac_e has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing wgtfacq_e=wgtfacq_e(:,:,:)' + + IF (SIZE(wgtfacq_e) > 0) THEN + !$ser data wgtfacq_e=wgtfacq_e(:,:,:) + ELSE + PRINT *, 'Warning: Array wgtfacq_e has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing wgtfacq1_e=wgtfacq1_e(:,:,:)' + + IF (SIZE(wgtfacq1_e) > 0) THEN + !$ser data wgtfacq1_e=wgtfacq1_e(:,:,:) + ELSE + PRINT *, 'Warning: Array wgtfacq1_e has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing ddqz_z_full_e=ddqz_z_full_e(:,:,:)' + + IF (SIZE(ddqz_z_full_e) > 0) THEN + !$ser data ddqz_z_full_e=ddqz_z_full_e(:,:,:) + ELSE + PRINT *, 'Warning: Array ddqz_z_full_e has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing theta_ref_mc=theta_ref_mc(:,:,:)' + + IF (SIZE(theta_ref_mc) > 0) THEN + !$ser data theta_ref_mc=theta_ref_mc(:,:,:) + ELSE + PRINT *, 'Warning: Array theta_ref_mc has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing zd_indlist=zd_indlist(:,:)' + + IF (SIZE(zd_indlist) > 0) THEN + !$ser data zd_indlist=zd_indlist(:,:) + ELSE + PRINT *, 'Warning: Array zd_indlist has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing zd_blklist=zd_blklist(:,:)' + + IF (SIZE(zd_blklist) > 0) THEN + !$ser data zd_blklist=zd_blklist(:,:) + ELSE + PRINT *, 'Warning: Array zd_blklist has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing zd_vertidx=zd_vertidx(:,:)' + + IF (SIZE(zd_vertidx) > 0) THEN + !$ser data zd_vertidx=zd_vertidx(:,:) + ELSE + PRINT *, 'Warning: Array zd_vertidx has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing zd_listdim=zd_listdim' + + !$ser data zd_listdim=zd_listdim + + PRINT *, 'Serializing edges_start_block=edges_start_block(min_rledge:)' + + IF (SIZE(edges_start_block) > 0) THEN + !$ser data edges_start_block=edges_start_block(min_rledge:) + ELSE + PRINT *, 'Warning: Array edges_start_block has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing edges_end_block=edges_end_block(min_rledge:)' + + IF (SIZE(edges_end_block) > 0) THEN + !$ser data edges_end_block=edges_end_block(min_rledge:) + ELSE + PRINT *, 'Warning: Array edges_end_block has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing edges_start_index=edges_start_index(min_rledge:)' + + IF (SIZE(edges_start_index) > 0) THEN + !$ser data edges_start_index=edges_start_index(min_rledge:) + ELSE + PRINT *, 'Warning: Array edges_start_index has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing edges_end_index=edges_end_index(min_rledge:)' + + IF (SIZE(edges_end_index) > 0) THEN + !$ser data edges_end_index=edges_end_index(min_rledge:) + ELSE + PRINT *, 'Warning: Array edges_end_index has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing edges_vertex_idx=edges_vertex_idx(:,:,:)' + + IF (SIZE(edges_vertex_idx) > 0) THEN + !$ser data edges_vertex_idx=edges_vertex_idx(:,:,:) + ELSE + PRINT *, 'Warning: Array edges_vertex_idx has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing edges_vertex_blk=edges_vertex_blk(:,:,:)' + + IF (SIZE(edges_vertex_blk) > 0) THEN + !$ser data edges_vertex_blk=edges_vertex_blk(:,:,:) + ELSE + PRINT *, 'Warning: Array edges_vertex_blk has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing edges_cell_idx=edges_cell_idx(:,:,:)' + + IF (SIZE(edges_cell_idx) > 0) THEN + !$ser data edges_cell_idx=edges_cell_idx(:,:,:) + ELSE + PRINT *, 'Warning: Array edges_cell_idx has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing edges_cell_blk=edges_cell_blk(:,:,:)' + + IF (SIZE(edges_cell_blk) > 0) THEN + !$ser data edges_cell_blk=edges_cell_blk(:,:,:) + ELSE + PRINT *, 'Warning: Array edges_cell_blk has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing edges_tangent_orientation=edges_tangent_orientation(:,:)' + + IF (SIZE(edges_tangent_orientation) > 0) THEN + !$ser data edges_tangent_orientation=edges_tangent_orientation(:,:) + ELSE + PRINT *, 'Warning: Array edges_tangent_orientation has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing edges_inv_vert_vert_length=edges_inv_vert_vert_length(:,:)' + + IF (SIZE(edges_inv_vert_vert_length) > 0) THEN + !$ser data edges_inv_vert_vert_length=edges_inv_vert_vert_length(:,:) + ELSE + PRINT *, 'Warning: Array edges_inv_vert_vert_length has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing edges_inv_primal_edge_length=edges_inv_primal_edge_length(:,:)' + + IF (SIZE(edges_inv_primal_edge_length) > 0) THEN + !$ser data edges_inv_primal_edge_length=edges_inv_primal_edge_length(:,:) + ELSE + PRINT *, 'Warning: Array edges_inv_primal_edge_length has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing edges_inv_dual_edge_length=edges_inv_dual_edge_length(:,:)' + + IF (SIZE(edges_inv_dual_edge_length) > 0) THEN + !$ser data edges_inv_dual_edge_length=edges_inv_dual_edge_length(:,:) + ELSE + PRINT *, 'Warning: Array edges_inv_dual_edge_length has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing edges_area_edge=edges_area_edge(:,:)' + + IF (SIZE(edges_area_edge) > 0) THEN + !$ser data edges_area_edge=edges_area_edge(:,:) + ELSE + PRINT *, 'Warning: Array edges_area_edge has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing cells_start_block=cells_start_block(min_rlcell:)' + + IF (SIZE(cells_start_block) > 0) THEN + !$ser data cells_start_block=cells_start_block(min_rlcell:) + ELSE + PRINT *, 'Warning: Array cells_start_block has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing cells_end_block=cells_end_block(min_rlcell:)' + + IF (SIZE(cells_end_block) > 0) THEN + !$ser data cells_end_block=cells_end_block(min_rlcell:) + ELSE + PRINT *, 'Warning: Array cells_end_block has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing cells_start_index=cells_start_index(min_rlcell:)' + + IF (SIZE(cells_start_index) > 0) THEN + !$ser data cells_start_index=cells_start_index(min_rlcell:) + ELSE + PRINT *, 'Warning: Array cells_start_index has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing cells_end_index=cells_end_index(min_rlcell:)' + + IF (SIZE(cells_end_index) > 0) THEN + !$ser data cells_end_index=cells_end_index(min_rlcell:) + ELSE + PRINT *, 'Warning: Array cells_end_index has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing cells_neighbor_idx=cells_neighbor_idx(:,:,:)' + + IF (SIZE(cells_neighbor_idx) > 0) THEN + !$ser data cells_neighbor_idx=cells_neighbor_idx(:,:,:) + ELSE + PRINT *, 'Warning: Array cells_neighbor_idx has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing cells_neighbor_blk=cells_neighbor_blk(:,:,:)' + + IF (SIZE(cells_neighbor_blk) > 0) THEN + !$ser data cells_neighbor_blk=cells_neighbor_blk(:,:,:) + ELSE + PRINT *, 'Warning: Array cells_neighbor_blk has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing cells_edge_idx=cells_edge_idx(:,:,:)' + + IF (SIZE(cells_edge_idx) > 0) THEN + !$ser data cells_edge_idx=cells_edge_idx(:,:,:) + ELSE + PRINT *, 'Warning: Array cells_edge_idx has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing cells_edge_blk=cells_edge_blk(:,:,:)' + + IF (SIZE(cells_edge_blk) > 0) THEN + !$ser data cells_edge_blk=cells_edge_blk(:,:,:) + ELSE + PRINT *, 'Warning: Array cells_edge_blk has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing cells_area=cells_area(:,:)' + + IF (SIZE(cells_area) > 0) THEN + !$ser data cells_area=cells_area(:,:) + ELSE + PRINT *, 'Warning: Array cells_area has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing verts_start_block=verts_start_block(min_rlvert:)' + + IF (SIZE(verts_start_block) > 0) THEN + !$ser data verts_start_block=verts_start_block(min_rlvert:) + ELSE + PRINT *, 'Warning: Array verts_start_block has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing verts_end_block=verts_end_block(min_rlvert:)' + + IF (SIZE(verts_end_block) > 0) THEN + !$ser data verts_end_block=verts_end_block(min_rlvert:) + ELSE + PRINT *, 'Warning: Array verts_end_block has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing verts_start_index=verts_start_index(min_rlvert:)' + + IF (SIZE(verts_start_index) > 0) THEN + !$ser data verts_start_index=verts_start_index(min_rlvert:) + ELSE + PRINT *, 'Warning: Array verts_start_index has size 0. Not serializing array.' + END IF + + PRINT *, 'Serializing verts_end_index=verts_end_index(min_rlvert:)' + + IF (SIZE(verts_end_index) > 0) THEN + !$ser data verts_end_index=verts_end_index(min_rlvert:) + ELSE + PRINT *, 'Warning: Array verts_end_index has size 0. Not serializing array.' + END IF From 1f97fca22f05b589ec8ce48ae19770c769edfd94 Mon Sep 17 00:00:00 2001 From: samkellerhals Date: Thu, 11 May 2023 13:42:36 +0200 Subject: [PATCH 15/21] Fix f2ser and liskov serialisation --- .../codegen/serialisation/deserialise.py | 16 +++++++- .../liskov/codegen/serialisation/interface.py | 1 + .../liskov/codegen/serialisation/template.py | 10 +++-- .../expected_diffusion_granule_savepoint.f90 | 40 +++++++++++-------- 4 files changed, 45 insertions(+), 22 deletions(-) diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py b/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py index 3ce3c81ef1..30ce8ffd17 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py @@ -17,6 +17,7 @@ from icon4py.common.logger import setup_logger from icon4py.liskov.codegen.integration.deserialise import ( TOLERANCE_ARGS, + DeclareDataFactory, _extract_stencil_name, pop_item_from_dict, ) @@ -63,6 +64,7 @@ def __call__(self, parsed: ts.ParsedDict) -> list[SavepointData]: end_stencil = extract_directive( parsed["directives"], icon4py.liskov.parsing.parse.EndStencil ) + gpu_fields = self.get_gpu_fields(parsed) repeated = self._find_repeated_stencils(parsed["content"]) @@ -82,7 +84,7 @@ def __call__(self, parsed: ts.ParsedDict) -> list[SavepointData]: for k, v in self._get_timestep_variables(stencil_name).items() ] - fields = self._make_fields(field_names) + fields = self._make_fields(field_names, gpu_fields) for intent, ln in [("start", start.startln), ("end", end.startln)]: savepoint = SavepointData( @@ -96,6 +98,15 @@ def __call__(self, parsed: ts.ParsedDict) -> list[SavepointData]: return deserialised + def get_gpu_fields(self, parsed: ts.ParsedDict) -> set[str]: + """Get declared fields which will be loaded on GPU and thus need to be serialised using accdata.""" + declare = DeclareDataFactory()(parsed) + fields = [] + for d in declare: + for f in d.declarations: + fields.append(f) + return set(fields) + @staticmethod def _remove_unnecessary_keys(named_args: dict) -> dict: """Remove unnecessary keys from named_args, and only return field names.""" @@ -108,7 +119,7 @@ def _remove_unnecessary_keys(named_args: dict) -> dict: return copy @staticmethod - def _make_fields(named_args: dict) -> list[FieldSerialisationData]: + def _make_fields(named_args: dict, gpu_fields: set) -> list[FieldSerialisationData]: """Create a list of FieldSerialisationData objects based on named arguments.""" fields = [ FieldSerialisationData( @@ -121,6 +132,7 @@ def _make_fields(named_args: dict) -> list[FieldSerialisationData]: typespec=None, typename=None, ptr_var=None, + device="gpu" if variable in gpu_fields else "cpu", ) for variable, association in named_args.items() if variable not in SKIP_VARS diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/interface.py b/liskov/src/icon4py/liskov/codegen/serialisation/interface.py index 364fc65f50..8868b1935f 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/interface.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/interface.py @@ -34,6 +34,7 @@ class FieldSerialisationData: variable: str association: str decomposed: bool = False + device: Optional[str] = "cpu" dimension: Optional[list[str]] = None typespec: Optional[str] = None typename: Optional[str] = None diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/template.py b/liskov/src/icon4py/liskov/codegen/serialisation/template.py index 6603624797..dc942e6a7d 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/template.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/template.py @@ -28,6 +28,7 @@ class Field(eve.Node): typespec: Optional[str] typename: Optional[str] ptr_var: Optional[str] + device: str class StandardFields(eve.Node): @@ -88,12 +89,12 @@ class SavepointStatementGenerator(TemplatedGenerator): PRINT *, 'Serializing {{ f.variable }}={{ f.association }}' {% if f.dimension %} IF (SIZE({{ f.variable }}) > 0) THEN - !$ser data {{ f.variable }}={{ f.association }} + !$ser {% if f.device == 'gpu' %}accdata {% else %}data {% endif %}{{ f.variable }}={{ f.association }} ELSE PRINT *, 'Warning: Array {{ f.variable }} has size 0. Not serializing array.' ENDIF {% else %} - !$ser data {{ f.variable }}={{ f.association }} + !$ser {% if f.device == 'gpu' %}accdata {% else %}data {% endif %}{{ f.variable }}={{ f.association }} {% endif %} {% endfor %} """ @@ -102,7 +103,7 @@ class SavepointStatementGenerator(TemplatedGenerator): DecomposedFieldDeclarations = as_jinja( """ {% for f in _this_node.fields %} - !$ser verbatim {{ f.typespec }}, dimension({{ ",".join(f.dimension) }}), allocatable :: {{ f.variable }}_{{ f.ptr_var}} + !$ser verbatim {{ f.typespec }}, dimension({{ ",".join(f.dimension) }}), allocatable :: {{ f.variable }}_{{ f.ptr_var}}({{ ",".join(f.dimension) }}) {% endfor %} """ ) @@ -111,7 +112,8 @@ class SavepointStatementGenerator(TemplatedGenerator): """ {% for f in _this_node.fields %} !$ser verbatim allocate({{ f.variable }}_{{ f.ptr_var}}({{ f.alloc_dims }})) - !$ser data {{ f.variable }}_{{ f.ptr_var}}={{ f.association }} + !$ser verbatim {{ f.variable }}_{{ f.ptr_var}} = {{ f.variable }}%{{ f.ptr_var}} + !$ser {% if f.device == 'gpu' %}accdata {% else %}data {% endif %}{{ f.variable }}_{{ f.ptr_var}}={{ f.variable }}_{{ f.ptr_var}} !$ser verbatim deallocate({{ f.variable }}_{{ f.ptr_var}}) {% endfor %} """ diff --git a/testutils/src/icon4py/testutils/fortran/expected_diffusion_granule_savepoint.f90 b/testutils/src/icon4py/testutils/fortran/expected_diffusion_granule_savepoint.f90 index 19a3653d97..ba2d232dcc 100644 --- a/testutils/src/icon4py/testutils/fortran/expected_diffusion_granule_savepoint.f90 +++ b/testutils/src/icon4py/testutils/fortran/expected_diffusion_granule_savepoint.f90 @@ -1,54 +1,62 @@ - !$ser verbatim real, dimension(:,:,:), allocatable :: edges_primal_normal_vert_v1 + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_primal_normal_vert_v1(:,:,:) - !$ser verbatim real, dimension(:,:,:), allocatable :: edges_primal_normal_vert_v2 + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_primal_normal_vert_v2(:,:,:) - !$ser verbatim real, dimension(:,:,:), allocatable :: edges_dual_normal_vert_v1 + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_dual_normal_vert_v1(:,:,:) - !$ser verbatim real, dimension(:,:,:), allocatable :: edges_dual_normal_vert_v2 + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_dual_normal_vert_v2(:,:,:) - !$ser verbatim real, dimension(:,:,:), allocatable :: edges_primal_normal_cell_v1 + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_primal_normal_cell_v1(:,:,:) - !$ser verbatim real, dimension(:,:,:), allocatable :: edges_primal_normal_cell_v2 + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_primal_normal_cell_v2(:,:,:) - !$ser verbatim real, dimension(:,:,:), allocatable :: edges_dual_normal_cell_v1 + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_dual_normal_cell_v1(:,:,:) - !$ser verbatim real, dimension(:,:,:), allocatable :: edges_dual_normal_cell_v2 + !$ser verbatim real, dimension(:,:,:), allocatable :: edges_dual_normal_cell_v2(:,:,:) !$ser init directory="." prefix="test" !$ser savepoint diffusion_init_in !$ser verbatim allocate(edges_primal_normal_vert_v1(size(edges_primal_normal_vert, 1),size(edges_primal_normal_vert, 2),size(edges_primal_normal_vert, 3))) - !$ser data edges_primal_normal_vert_v1=edges_primal_normal_vert_v1(:,:,:) + !$ser verbatim edges_primal_normal_vert_v1 = edges_primal_normal_vert%v1 + !$ser data edges_primal_normal_vert_v1=edges_primal_normal_vert_v1 !$ser verbatim deallocate(edges_primal_normal_vert_v1) !$ser verbatim allocate(edges_primal_normal_vert_v2(size(edges_primal_normal_vert, 1),size(edges_primal_normal_vert, 2),size(edges_primal_normal_vert, 3))) - !$ser data edges_primal_normal_vert_v2=edges_primal_normal_vert_v2(:,:,:) + !$ser verbatim edges_primal_normal_vert_v2 = edges_primal_normal_vert%v2 + !$ser data edges_primal_normal_vert_v2=edges_primal_normal_vert_v2 !$ser verbatim deallocate(edges_primal_normal_vert_v2) !$ser verbatim allocate(edges_dual_normal_vert_v1(size(edges_dual_normal_vert, 1),size(edges_dual_normal_vert, 2),size(edges_dual_normal_vert, 3))) - !$ser data edges_dual_normal_vert_v1=edges_dual_normal_vert_v1(:,:,:) + !$ser verbatim edges_dual_normal_vert_v1 = edges_dual_normal_vert%v1 + !$ser data edges_dual_normal_vert_v1=edges_dual_normal_vert_v1 !$ser verbatim deallocate(edges_dual_normal_vert_v1) !$ser verbatim allocate(edges_dual_normal_vert_v2(size(edges_dual_normal_vert, 1),size(edges_dual_normal_vert, 2),size(edges_dual_normal_vert, 3))) - !$ser data edges_dual_normal_vert_v2=edges_dual_normal_vert_v2(:,:,:) + !$ser verbatim edges_dual_normal_vert_v2 = edges_dual_normal_vert%v2 + !$ser data edges_dual_normal_vert_v2=edges_dual_normal_vert_v2 !$ser verbatim deallocate(edges_dual_normal_vert_v2) !$ser verbatim allocate(edges_primal_normal_cell_v1(size(edges_primal_normal_cell, 1),size(edges_primal_normal_cell, 2),size(edges_primal_normal_cell, 3))) - !$ser data edges_primal_normal_cell_v1=edges_primal_normal_cell_v1(:,:,:) + !$ser verbatim edges_primal_normal_cell_v1 = edges_primal_normal_cell%v1 + !$ser data edges_primal_normal_cell_v1=edges_primal_normal_cell_v1 !$ser verbatim deallocate(edges_primal_normal_cell_v1) !$ser verbatim allocate(edges_primal_normal_cell_v2(size(edges_primal_normal_cell, 1),size(edges_primal_normal_cell, 2),size(edges_primal_normal_cell, 3))) - !$ser data edges_primal_normal_cell_v2=edges_primal_normal_cell_v2(:,:,:) + !$ser verbatim edges_primal_normal_cell_v2 = edges_primal_normal_cell%v2 + !$ser data edges_primal_normal_cell_v2=edges_primal_normal_cell_v2 !$ser verbatim deallocate(edges_primal_normal_cell_v2) !$ser verbatim allocate(edges_dual_normal_cell_v1(size(edges_dual_normal_cell, 1),size(edges_dual_normal_cell, 2),size(edges_dual_normal_cell, 3))) - !$ser data edges_dual_normal_cell_v1=edges_dual_normal_cell_v1(:,:,:) + !$ser verbatim edges_dual_normal_cell_v1 = edges_dual_normal_cell%v1 + !$ser data edges_dual_normal_cell_v1=edges_dual_normal_cell_v1 !$ser verbatim deallocate(edges_dual_normal_cell_v1) !$ser verbatim allocate(edges_dual_normal_cell_v2(size(edges_dual_normal_cell, 1),size(edges_dual_normal_cell, 2),size(edges_dual_normal_cell, 3))) - !$ser data edges_dual_normal_cell_v2=edges_dual_normal_cell_v2(:,:,:) + !$ser verbatim edges_dual_normal_cell_v2 = edges_dual_normal_cell%v2 + !$ser data edges_dual_normal_cell_v2=edges_dual_normal_cell_v2 !$ser verbatim deallocate(edges_dual_normal_cell_v2) PRINT *, 'Serializing cvd_o_rd=cvd_o_rd' From 5d1c204ceff0a2717d5bbc8069a7f51d121ec801 Mon Sep 17 00:00:00 2001 From: samkellerhals Date: Thu, 11 May 2023 14:06:20 +0200 Subject: [PATCH 16/21] Update README --- liskov/README.md | 4 ++-- pyutils/README.md | 56 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/liskov/README.md b/liskov/README.md index ffa4a5580f..a7bf033633 100644 --- a/liskov/README.md +++ b/liskov/README.md @@ -15,10 +15,10 @@ The icon4py-liskov package includes the `icon_liskov` CLI tool which takes a for To use the `icon_liskov` tool, run the following command: ```bash -icon_liskov [--profile] [--metadatagen] +icon_liskov [--profile] [--metadatagen] [--ppser] ``` -Where `input_filepath` is the path to the input file to be processed, and `output_filepath` is the path to the output file. The optional `--profile` flag adds nvtx profile statements to the stencils. +Where `input_filepath` is the path to the input file to be processed, and `output_filepath` is the path to the output file. The optional `--profile` flag adds nvtx profile statements to the stencils. The `--metadatagen` flag generates a metadata header at the top of the file which includes information on icon_liskov such as the git tag and commit hash. The `--ppser` flag activates serialisation mode and will trigger the generation of `ppser` serialisation statements serialising all variables at the start and end of each stencil directive. The data will be saved at the default folder location of the currently run experiment and will have a prefix of `liskov-serialisation`. ### Preprocessor directives diff --git a/pyutils/README.md b/pyutils/README.md index 931007a2b4..d57eed8d3f 100644 --- a/pyutils/README.md +++ b/pyutils/README.md @@ -1,21 +1,34 @@ # icon4py-pyutils -## Description +## icon4pygen -Python utilities for ICON4Py. +The `icon4pygen` tool generates GridTools C++ code as well as Fortran and C++ bindings for an icon4py fencil, so that it can be executed from within ICON. -### icongen +### Usage: -Generates from GT4Py Field View programs +`icon4pygen [OPTIONS] FENCIL BLOCK_SIZE LEVELS_PER_THREAD OUTPATH` -- GridTools C++ `gridtools::fn` code, -- field metadata to be used in dawn bindings generator for GT4Py. +#### Arguments: -## Installation instructions +``` +FENCIL: The fencil to generate code for. It can be specified as :, where is the dotted name of the containing module and is the name of the fencil. +BLOCK_SIZE: The number of threads per block to use in a cuda kernel. +LEVELS_PER_THREAD: How many k-levels to process per thread. +OUTPATH: The folder in which to write all generated code. +``` + +#### Options + +``` +--is_global: Flag to indicate if the stencil is global. +--imperative: Flag to indicate if the generated code should be written imperatively. +``` + +#### Example: -Check `README.md` file in the root of the repository. +`icon4pygen icon4py.atm_dyn_iconam.mo_velocity_advection_stencil_1:mo_velocity_advection_stencil_1 128 4 /path/to/output/folder` -## Autocomplete +#### Autocomplete In order to turn on autocomplete in your shell for `icon4pygen` you need to execute the following in your shell: @@ -24,3 +37,28 @@ eval "$(_ICON4PYGEN_COMPLETE=bash_source icon4pygen)" ``` To permanently enable autocomplete on your system add the above statement to your `~/.bashrc` file. + +## f2ser + +This tool is designed to parse a well-defined Fortran granule interface and generate ppser statements for each variable in the interface. It uses the `f2py` library to perform the parsing and `liskov` for the generation tasks. + +### Usage + +`f2ser [OPTIONS] GRANULE_PATH OUTPUT_FILEPATH` + +### Arguments + +``` +GRANULE_PATH A path to the Fortran source file to be parsed. +OUTPUT_FILEPATH A path to the output Fortran source file to be generated. +``` + +### Options + +``` +--dependencies PATH Optional list of dependency paths. +--directory TEXT The directory to serialise the variables to. +--prefix TEXT The prefix to use for each serialised variable. +``` + +**Note:** The output of f2ser still has to be preprocessed using `pp_ser.py`, which then yields a compilable unit. The serialised files will have `f2ser` as their prefix in the default folder location of the experiment. From 230f1a54a12b8fa4184058c3ee9438a08a213c3d Mon Sep 17 00:00:00 2001 From: Samuel Date: Wed, 17 May 2023 16:32:07 +0200 Subject: [PATCH 17/21] Add multinode serialisation (#207) --- liskov/src/icon4py/liskov/cli.py | 11 ++- .../codegen/serialisation/deserialise.py | 16 ++- .../liskov/codegen/serialisation/generate.py | 16 ++- .../liskov/codegen/serialisation/interface.py | 5 + .../liskov/codegen/serialisation/template.py | 11 ++- .../src/icon4py/liskov/pipeline/collection.py | 7 +- liskov/tests/test_cli.py | 1 + liskov/tests/test_generation.py | 27 ++++-- pyutils/src/icon4py/f2ser/cli.py | 11 ++- pyutils/src/icon4py/f2ser/deserialise.py | 24 +++-- pyutils/src/icon4py/f2ser/parse.py | 53 +++++++--- pyutils/tests/f2ser/test_f2ser_cli.py | 5 +- pyutils/tests/f2ser/test_f2ser_codegen.py | 8 +- .../tests/f2ser/test_granule_deserialiser.py | 97 ++++++++++--------- pyutils/tests/f2ser/test_parsing.py | 27 +++--- .../expected_diffusion_granule_savepoint.f90 | 2 +- 16 files changed, 223 insertions(+), 98 deletions(-) diff --git a/liskov/src/icon4py/liskov/cli.py b/liskov/src/icon4py/liskov/cli.py index fb67f550ff..31889b3d7f 100644 --- a/liskov/src/icon4py/liskov/cli.py +++ b/liskov/src/icon4py/liskov/cli.py @@ -55,12 +55,20 @@ is_flag=True, help="Add metadata header with information about program (requires git).", ) +@click.option( + "--multinode", + is_flag=True, + type=bool, + help="Specify whether it is a multinode run.", + default=False, +) def main( input_path: pathlib.Path, output_path: pathlib.Path, ppser: bool, profile: bool, metadatagen: bool, + multinode: bool, ) -> None: """Command line interface for interacting with the ICON-Liskov DSL Preprocessor. @@ -71,6 +79,7 @@ def main( -p --profile Add nvtx profile statements to stencils. -m --metadatagen Add metadata header with information about program (requires git). --ppser Generate ppser serialization statements instead of integration code. + --multinode: Considers this a multinode run. Arguments: input_path: Path to the input file to process. @@ -80,7 +89,7 @@ def main( def run_serialisation() -> None: iface = parse_fortran_file(input_path, output_path, mode) - run_code_generation(input_path, output_path, mode, iface) + run_code_generation(input_path, output_path, mode, iface, multinode=multinode) def run_integration() -> None: iface = parse_fortran_file(input_path, output_path, mode) diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py b/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py index 30ce8ffd17..385016d821 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py @@ -23,6 +23,7 @@ ) from icon4py.liskov.codegen.serialisation.interface import ( FieldSerialisationData, + ImportData, InitData, Metadata, SavepointData, @@ -56,6 +57,8 @@ def __call__(self, parsed: ts.ParsedDict) -> InitData: class SavepointDataFactory: + dtype = SavepointData + def __call__(self, parsed: ts.ParsedDict) -> list[SavepointData]: """Create a list of Start and End Savepoints for each Start and End Stencil directive.""" start_stencil = extract_directive( @@ -87,7 +90,7 @@ def __call__(self, parsed: ts.ParsedDict) -> list[SavepointData]: fields = self._make_fields(field_names, gpu_fields) for intent, ln in [("start", start.startln), ("end", end.startln)]: - savepoint = SavepointData( + savepoint = self.dtype( subroutine=f"{stencil_name}", intent=intent, startln=ln, @@ -203,9 +206,20 @@ def _find_repeated_stencils(self, content): return set(repeated_names) +class ImportDataFactory: + dtype = ImportData + + def __call__(self, parsed: ts.ParsedDict) -> ImportData: + imports = extract_directive( + parsed["directives"], icon4py.liskov.parsing.parse.Imports + )[0] + return self.dtype(startln=imports.startln) + + class SerialisationCodeDeserialiser(Deserialiser): _FACTORIES = { "Init": InitDataFactory(), "Savepoint": SavepointDataFactory(), + "Import": ImportDataFactory(), } _INTERFACE_TYPE = SerialisationCodeInterface diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/generate.py b/liskov/src/icon4py/liskov/codegen/serialisation/generate.py index 25d42c7f4a..f1b56b30a5 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/generate.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/generate.py @@ -17,6 +17,8 @@ SerialisationCodeInterface, ) from icon4py.liskov.codegen.serialisation.template import ( + ImportStatement, + ImportStatementGenerator, SavepointStatement, SavepointStatementGenerator, ) @@ -28,15 +30,25 @@ class SerialisationCodeGenerator(CodeGenerator): - def __init__(self, interface: SerialisationCodeInterface): + def __init__(self, interface: SerialisationCodeInterface, multinode: bool = False): super().__init__() self.interface = interface + self.multinode = multinode def __call__(self, data: Any = None) -> list[GeneratedCode]: """Generate all f90 code for integration.""" + self._generate_import() self._generate_savepoints() return self.generated + def _generate_import(self) -> None: + if self.multinode: + self._generate( + ImportStatement, + ImportStatementGenerator, + self.interface.Import.startln, + ) + def _generate_savepoints(self) -> None: init_complete = False for i, savepoint in enumerate(self.interface.Savepoint): @@ -47,6 +59,7 @@ def _generate_savepoints(self) -> None: SavepointStatementGenerator, self.interface.Savepoint[i].startln, savepoint=savepoint, + multinode=self.multinode, ) else: self._generate( @@ -55,5 +68,6 @@ def _generate_savepoints(self) -> None: self.interface.Savepoint[i].startln, savepoint=savepoint, init=self.interface.Init, + multinode=self.multinode, ) init_complete = True diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/interface.py b/liskov/src/icon4py/liskov/codegen/serialisation/interface.py index 8868b1935f..301a4d2366 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/interface.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/interface.py @@ -49,7 +49,12 @@ class SavepointData(CodeGenInput): metadata: Optional[list[Metadata]] +class ImportData(CodeGenInput): + ... + + @dataclass class SerialisationCodeInterface: + Import: ImportData Init: InitData Savepoint: list[SavepointData] diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/template.py b/liskov/src/icon4py/liskov/codegen/serialisation/template.py index dc942e6a7d..120e73bed2 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/template.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/template.py @@ -46,6 +46,7 @@ class DecomposedFieldDeclarations(DecomposedFields): class SavepointStatement(eve.Node): savepoint: SavepointData init: Optional[InitData] = eve.datamodels.field(default=None) + multinode: bool standard_fields: StandardFields = eve.datamodels.field(init=False) decomposed_fields: DecomposedFields = eve.datamodels.field(init=False) decomposed_field_declarations: DecomposedFieldDeclarations = eve.datamodels.field( @@ -72,7 +73,7 @@ class SavepointStatementGenerator(TemplatedGenerator): {{ decomposed_field_declarations }} {% if _this_node.init %} - !$ser init directory="{{_this_node.init.directory}}" prefix="{{ _this_node.init.prefix }}" + !$ser init directory="{{_this_node.init.directory}}" prefix="{{ _this_node.init.prefix }}" {% if _this_node.multinode %}mpi_rank=get_my_mpi_work_id(){% endif %} {% endif %} !$ser savepoint {{ _this_node.savepoint.subroutine }}_{{ _this_node.savepoint.intent }} {% if _this_node.savepoint.metadata %} {%- for m in _this_node.savepoint.metadata -%} {{ m.key }}={{ m.value }} {% endfor -%} {% endif %} @@ -131,3 +132,11 @@ def generate_size_strings(colon_list, var_name): f.alloc_dims = ",".join(generate_size_strings(f.dimension, f.variable)) return self.generic_visit(node) + + +class ImportStatement(eve.Node): + ... + + +class ImportStatementGenerator(TemplatedGenerator): + ImportStatement = as_jinja(" USE mo_mpi, ONLY: get_my_mpi_work_id") diff --git a/liskov/src/icon4py/liskov/pipeline/collection.py b/liskov/src/icon4py/liskov/pipeline/collection.py index 5994cd28fd..115e83f685 100644 --- a/liskov/src/icon4py/liskov/pipeline/collection.py +++ b/liskov/src/icon4py/liskov/pipeline/collection.py @@ -43,7 +43,10 @@ @linear_pipeline def parse_fortran_file( - input_filepath: Path, output_filepath: Path, deserialiser_type: str + input_filepath: Path, + output_filepath: Path, + deserialiser_type: str, + **kwargs, ) -> list[Step]: """Execute a pipeline to parse and deserialize directives from a file. @@ -66,7 +69,7 @@ def parse_fortran_file( return [ DirectivesScanner(input_filepath), DirectivesParser(input_filepath, output_filepath), - deserialiser(), + deserialiser(**kwargs), ] diff --git a/liskov/tests/test_cli.py b/liskov/tests/test_cli.py index efbc68b47d..2eefb00cfd 100644 --- a/liskov/tests/test_cli.py +++ b/liskov/tests/test_cli.py @@ -52,6 +52,7 @@ def outfile(tmp_path): (MULTIPLE_STENCILS, ["--profile"]), (REPEATED_STENCILS, ["--ppser", "--profile"]), (REPEATED_STENCILS, ["--profile"]), + (MULTIPLE_STENCILS, ["--ppser", "--multinode"]), ], ) def test_cli(make_f90_tmpfile, cli, file, outfile, options): diff --git a/liskov/tests/test_generation.py b/liskov/tests/test_generation.py index 7c82076bf6..fb61064055 100644 --- a/liskov/tests/test_generation.py +++ b/liskov/tests/test_generation.py @@ -36,6 +36,7 @@ ) from icon4py.liskov.codegen.serialisation.interface import ( FieldSerialisationData, + ImportData, InitData, Metadata, SavepointData, @@ -242,11 +243,11 @@ def test_integration_code_generation( assert generated[9].source == expected_insert_source -# TODO: fix tests to adapt to new custom output fields @pytest.fixture def serialisation_code_interface(): interface = { - "Init": InitData(startln=0, directory=".", prefix="liskov-serialisation"), + "Import": ImportData(startln=0), + "Init": InitData(startln=1, directory=".", prefix="liskov-serialisation"), "Savepoint": [ SavepointData( startln=9, @@ -306,8 +307,9 @@ def serialisation_code_interface(): @pytest.fixture def expected_savepoints(): return [ + " USE mo_mpi, ONLY: get_my_mpi_work_id", """ - !$ser init directory="." prefix="liskov-serialisation" + !$ser init directory="." prefix="liskov-serialisation" mpi_rank=get_my_mpi_work_id() !$ser savepoint apply_nabla2_to_vn_in_lateral_boundary_start jstep=jstep_ptr diffctr=diffctr @@ -327,10 +329,19 @@ def expected_savepoints(): ] +@pytest.mark.parametrize("multinode", [False, True]) def test_serialisation_code_generation( - serialisation_code_interface, expected_savepoints + serialisation_code_interface, expected_savepoints, multinode ): - generated = SerialisationCodeGenerator(serialisation_code_interface)() - assert len(generated) == 2 - assert generated[0].source == expected_savepoints[0] - assert generated[1].source == expected_savepoints[1] + generated = SerialisationCodeGenerator( + serialisation_code_interface, multinode=multinode + )() + + if multinode: + assert len(generated) == 3 + assert generated[0].source == expected_savepoints[0] + assert generated[1].source == expected_savepoints[1] + assert generated[2].source == expected_savepoints[2] + else: + assert len(generated) == 2 + assert generated[1].source == expected_savepoints[2] diff --git a/pyutils/src/icon4py/f2ser/cli.py b/pyutils/src/icon4py/f2ser/cli.py index 0dfeba93a3..1f23b6738e 100644 --- a/pyutils/src/icon4py/f2ser/cli.py +++ b/pyutils/src/icon4py/f2ser/cli.py @@ -48,12 +48,20 @@ @click.option( "--prefix", type=str, help="Prefix to use for serialised files.", default="f2ser" ) +@click.option( + "--multinode", + is_flag=True, + type=bool, + help="Specify whether it is a multinode run.", + default=False, +) def main( granule_path: pathlib.Path, dependencies: Optional[list[pathlib.Path]], output_filepath: pathlib.Path, directory: str, prefix: str, + multinode: bool, ) -> None: """Command line interface for interacting with the ICON-f2ser serialization Preprocessor. @@ -62,10 +70,11 @@ def main( output_filepath (Path): A path to the output Fortran source file to be generated. directory (str): The directory to serialise the variables to. prefix (str): The prefix to use for each serialised variable. + multinode (bool): Specify whether this is a multinode run. """ parsed = GranuleParser(granule_path, dependencies)() interface = ParsedGranuleDeserialiser(parsed, directory=directory, prefix=prefix)() - generated = SerialisationCodeGenerator(interface)() + generated = SerialisationCodeGenerator(interface, multinode=multinode)() CodegenWriter(granule_path, output_filepath)(generated) diff --git a/pyutils/src/icon4py/f2ser/deserialise.py b/pyutils/src/icon4py/f2ser/deserialise.py index 2b32240f23..8705236914 100644 --- a/pyutils/src/icon4py/f2ser/deserialise.py +++ b/pyutils/src/icon4py/f2ser/deserialise.py @@ -14,6 +14,7 @@ from icon4py.f2ser.parse import CodegenContext, ParsedGranule from icon4py.liskov.codegen.serialisation.interface import ( FieldSerialisationData, + ImportData, InitData, SavepointData, SerialisationCodeInterface, @@ -21,11 +22,16 @@ class ParsedGranuleDeserialiser: - def __init__(self, parsed: ParsedGranule, directory: str, prefix: str): + def __init__( + self, + parsed: ParsedGranule, + directory: str = ".", + prefix: str = "f2ser", + ): self.parsed = parsed self.directory = directory self.prefix = prefix - self.data = {"Savepoint": [], "Init": ...} + self.data = {"Savepoint": [], "Init": ..., "Import": ...} def __call__(self) -> SerialisationCodeInterface: """Deserialise the parsed granule and returns a serialisation interface. @@ -36,6 +42,7 @@ def __call__(self) -> SerialisationCodeInterface: self._merge_out_inout_fields() self._make_savepoints() self._make_init_data() + self._make_imports() return SerialisationCodeInterface(**self.data) def _make_savepoints(self) -> None: @@ -44,7 +51,7 @@ def _make_savepoints(self) -> None: Returns: None. """ - for subroutine_name, intent_dict in self.parsed.items(): + for subroutine_name, intent_dict in self.parsed.subroutines.items(): for intent, var_dict in intent_dict.items(): self._create_savepoint(subroutine_name, intent, var_dict) @@ -128,7 +135,7 @@ def _make_init_data(self) -> None: """ first_intent_in_subroutine = [ var_dict - for intent_dict in self.parsed.values() + for intent_dict in self.parsed.subroutines.values() for intent, var_dict in intent_dict.items() if intent == "in" ][0] @@ -136,7 +143,9 @@ def _make_init_data(self) -> None: first_intent_in_subroutine["codegen_ctx"], "init" ) self.data["Init"] = InitData( - startln=startln, directory=self.directory, prefix=self.prefix + startln=startln, + directory=self.directory, + prefix=self.prefix, ) def _merge_out_inout_fields(self): @@ -145,7 +154,7 @@ def _merge_out_inout_fields(self): Returns: None. """ - for _, intent_dict in self.parsed.items(): + for _, intent_dict in self.parsed.subroutines.items(): if "inout" in intent_dict: intent_dict["in"].update(intent_dict["inout"]) intent_dict["out"].update(intent_dict["inout"]) @@ -161,3 +170,6 @@ def _get_codegen_line(ctx: CodegenContext, intent: str): return ctx.first_declaration_ln else: raise ValueError(f"Unrecognized intent: {intent}") + + def _make_imports(self): + self.data["Import"] = ImportData(startln=self.parsed.last_import_ln) diff --git a/pyutils/src/icon4py/f2ser/parse.py b/pyutils/src/icon4py/f2ser/parse.py index 09755f92a4..5080ce9ba4 100644 --- a/pyutils/src/icon4py/f2ser/parse.py +++ b/pyutils/src/icon4py/f2ser/parse.py @@ -38,7 +38,13 @@ class CodegenContext: end_subroutine_ln: int -ParsedGranule = dict[str, dict[str, dict[str, any] | CodegenContext]] +ParsedSubroutines = dict[str, dict[str, dict[str, any] | CodegenContext]] + + +@dataclass +class ParsedGranule: + subroutines: ParsedSubroutines + last_import_ln: int class GranuleParser: @@ -56,12 +62,17 @@ class GranuleParser: def __init__( self, granule: Path, dependencies: Optional[list[Path]] = None ) -> None: - self.granule = granule + self.granule_path = granule self.dependencies = dependencies def __call__(self) -> ParsedGranule: """Parse the granule and return the parsed data.""" - subroutines = self._extract_subroutines(crack(self.granule)) + subroutines = self.parse_subroutines() + last_import_ln = self.find_last_fortran_use_statement() + return ParsedGranule(subroutines=subroutines, last_import_ln=last_import_ln) + + def parse_subroutines(self): + subroutines = self._extract_subroutines(crack(self.granule_path)) variables_grouped_by_intent = { name: self._extract_intent_vars(routine) for name, routine in subroutines.items() @@ -70,7 +81,8 @@ def __call__(self) -> ParsedGranule: variables_grouped_by_intent ) combined_type_vars = self._combine_types(derived_type_vars, intrinsic_type_vars) - return self._update_with_codegen_lines(combined_type_vars) + with_lines = self._update_with_codegen_lines(combined_type_vars) + return with_lines def _extract_subroutines(self, parsed: dict[str, any]) -> dict[str, any]: """Extract the _init and _run subroutines from the parsed granule. @@ -89,7 +101,7 @@ def _extract_subroutines(self, parsed: dict[str, any]) -> dict[str, any]: if len(subroutines) != 2: raise ParsingError( - f"Did not find _init and _run subroutines in {self.granule}" + f"Did not find _init and _run subroutines in {self.granule_path}" ) return subroutines @@ -243,12 +255,31 @@ def _update_with_codegen_lines(self, parsed_types: dict) -> dict: with_lines = deepcopy(parsed_types) for subroutine in with_lines: for intent in with_lines[subroutine]: - with_lines[subroutine][intent]["codegen_ctx"] = self.get_line_numbers( - subroutine - ) + with_lines[subroutine][intent][ + "codegen_ctx" + ] = self.get_subroutine_lines(subroutine) return with_lines - def get_line_numbers(self, subroutine_name: str) -> CodegenContext: + def find_last_fortran_use_statement(self): + with open(self.granule_path) as f: + file_contents = f.readlines() + + # Reverse the order of the lines so we can search from the end + file_contents.reverse() + + # Look for the last USE statement + use_ln = None + for i, line in enumerate(file_contents): + if line.strip().lower().startswith("use"): + use_ln = len(file_contents) - i + if i > 0 and file_contents[i - 1].strip().lower() == "#endif": + # If the USE statement is preceded by an #endif statement, return the line number after the #endif statement + return use_ln + 1 + else: + return use_ln + return None + + def get_subroutine_lines(self, subroutine_name: str) -> CodegenContext: """Return CodegenContext object containing line numbers of the last declaration statement and the code before the end of the given subroutine. Args: @@ -257,7 +288,7 @@ def get_line_numbers(self, subroutine_name: str) -> CodegenContext: Returns: CodegenContext: Object containing the line number of the last declaration statement and the line number of the last line of the code before the end of the given subroutine. """ - with open(self.granule, "r") as f: + with open(self.granule_path) as f: code = f.read() # Find the line number where the subroutine is defined @@ -280,7 +311,7 @@ def get_line_numbers(self, subroutine_name: str) -> CodegenContext: if re.search(declaration_pattern, line) ] if not declaration_pattern_lines: - raise ParsingError(f"No declarations found in {self.granule}") + raise ParsingError(f"No declarations found in {self.granule_path}") last_declaration_ln = declaration_pattern_lines[-1] + start_subroutine_ln + 1 first_declaration_ln = declaration_pattern_lines[0] + start_subroutine_ln diff --git a/pyutils/tests/f2ser/test_f2ser_cli.py b/pyutils/tests/f2ser/test_f2ser_cli.py index b32d87e1d9..e4f27dcdbd 100644 --- a/pyutils/tests/f2ser/test_f2ser_cli.py +++ b/pyutils/tests/f2ser/test_f2ser_cli.py @@ -28,10 +28,13 @@ def cli(): return CliRunner() -def test_cli(diffusion_granule, diffusion_granule_deps, outfile, cli): +@pytest.mark.parametrize("multinode", [False, True]) +def test_cli(diffusion_granule, diffusion_granule_deps, outfile, cli, multinode): inp = str(diffusion_granule) deps = [str(p) for p in diffusion_granule_deps] args = [inp, outfile, "-d", ",".join(deps)] + if multinode: + args.append("--multinode") result = cli.invoke(main, args) assert result.exit_code == 0 diff --git a/pyutils/tests/f2ser/test_f2ser_codegen.py b/pyutils/tests/f2ser/test_f2ser_codegen.py index b6d0c38ed3..23604e7d93 100644 --- a/pyutils/tests/f2ser/test_f2ser_codegen.py +++ b/pyutils/tests/f2ser/test_f2ser_codegen.py @@ -23,7 +23,7 @@ def test_deserialiser_diffusion_codegen(diffusion_granule, diffusion_granule_deps): parsed = GranuleParser(diffusion_granule, diffusion_granule_deps)() - interface = ParsedGranuleDeserialiser(parsed, directory=".", prefix="test")() + interface = ParsedGranuleDeserialiser(parsed)() generated = SerialisationCodeGenerator(interface)() assert len(generated) == 3 @@ -34,7 +34,7 @@ def expected_no_deps_serialization_directives(): GeneratedCode( startln=12, source="\n" - ' !$ser init directory="." prefix="test"\n' + ' !$ser init directory="." prefix="f2ser"\n' "\n" " !$ser savepoint no_deps_init_in\n" "\n" @@ -93,7 +93,7 @@ def test_deserialiser_directives_no_deps_codegen( no_deps_source_file, expected_no_deps_serialization_directives ): parsed = GranuleParser(no_deps_source_file)() - interface = ParsedGranuleDeserialiser(parsed, directory=".", prefix="test")() + interface = ParsedGranuleDeserialiser(parsed)() generated = SerialisationCodeGenerator(interface)() assert generated == expected_no_deps_serialization_directives @@ -102,7 +102,7 @@ def test_deserialiser_directives_diffusion_codegen( diffusion_granule, diffusion_granule_deps, samples_path ): parsed = GranuleParser(diffusion_granule, diffusion_granule_deps)() - interface = ParsedGranuleDeserialiser(parsed, directory=".", prefix="test")() + interface = ParsedGranuleDeserialiser(parsed)() generated = SerialisationCodeGenerator(interface)() reference_savepoint = ( samples_path / "expected_diffusion_granule_savepoint.f90" diff --git a/pyutils/tests/f2ser/test_granule_deserialiser.py b/pyutils/tests/f2ser/test_granule_deserialiser.py index cbe16a325d..8c8a60b912 100644 --- a/pyutils/tests/f2ser/test_granule_deserialiser.py +++ b/pyutils/tests/f2ser/test_granule_deserialiser.py @@ -13,7 +13,7 @@ import pytest from icon4py.f2ser.deserialise import ParsedGranuleDeserialiser -from icon4py.f2ser.parse import CodegenContext, GranuleParser +from icon4py.f2ser.parse import CodegenContext, GranuleParser, ParsedGranule from icon4py.liskov.codegen.serialisation.interface import ( FieldSerialisationData, SavepointData, @@ -23,60 +23,61 @@ @pytest.fixture def mock_parsed_granule(): - return { - "diffusion_init": { - "in": { - "jg": {"typespec": "integer", "attrspec": [], "intent": ["in"]}, - "vt": { - "typespec": "real", - "kindselector": {"kind": "vp"}, - "attrspec": [], - "intent": ["in"], - "dimension": [":", ":", ":"], - }, - "codegen_ctx": CodegenContext(432, 450, 600), - } - }, - "diffusion_run": { - "out": { - "vert_idx": { - "typespec": "logical", - "kindselector": {"kind": "vp"}, - "attrspec": [], - "intent": ["in"], - "dimension": [":", ":", ":"], - }, - "codegen_ctx": CodegenContext(800, 850, 1000), + return ParsedGranule( + subroutines={ + "diffusion_init": { + "in": { + "jg": {"typespec": "integer", "attrspec": [], "intent": ["in"]}, + "vt": { + "typespec": "real", + "kindselector": {"kind": "vp"}, + "attrspec": [], + "intent": ["in"], + "dimension": [":", ":", ":"], + }, + "codegen_ctx": CodegenContext(432, 450, 600), + } }, - "in": { - "vn": {"typespec": "integer", "attrspec": [], "intent": ["out"]}, - "vert_idx": { - "typespec": "logical", - "kindselector": {"kind": "vp"}, - "attrspec": [], - "intent": ["in"], - "dimension": [":", ":", ":"], + "diffusion_run": { + "out": { + "vert_idx": { + "typespec": "logical", + "kindselector": {"kind": "vp"}, + "attrspec": [], + "intent": ["in"], + "dimension": [":", ":", ":"], + }, + "codegen_ctx": CodegenContext(800, 850, 1000), }, - "codegen_ctx": CodegenContext(600, 690, 750), - }, - "inout": { - "vn": {"typespec": "integer", "attrspec": [], "intent": ["out"]}, - "vert_idx": { - "typespec": "logical", - "kindselector": {"kind": "vp"}, - "attrspec": [], - "intent": ["in"], - "dimension": [":", ":", ":"], + "in": { + "vn": {"typespec": "integer", "attrspec": [], "intent": ["out"]}, + "vert_idx": { + "typespec": "logical", + "kindselector": {"kind": "vp"}, + "attrspec": [], + "intent": ["in"], + "dimension": [":", ":", ":"], + }, + "codegen_ctx": CodegenContext(600, 690, 750), + }, + "inout": { + "vn": {"typespec": "integer", "attrspec": [], "intent": ["out"]}, + "vert_idx": { + "typespec": "logical", + "kindselector": {"kind": "vp"}, + "attrspec": [], + "intent": ["in"], + "dimension": [":", ":", ":"], + }, }, }, }, - } + last_import_ln=59, + ) def test_deserialiser_mock(mock_parsed_granule): - deserialiser = ParsedGranuleDeserialiser( - mock_parsed_granule, directory=".", prefix="f2ser" - ) + deserialiser = ParsedGranuleDeserialiser(mock_parsed_granule) interface = deserialiser() assert isinstance(interface, SerialisationCodeInterface) assert len(interface.Savepoint) == 3 @@ -93,6 +94,6 @@ def test_deserialiser_mock(mock_parsed_granule): def test_deserialiser_diffusion_granule(diffusion_granule, diffusion_granule_deps): parser = GranuleParser(diffusion_granule, diffusion_granule_deps) parsed = parser() - deserialiser = ParsedGranuleDeserialiser(parsed, directory=".", prefix="f2ser") + deserialiser = ParsedGranuleDeserialiser(parsed) interface = deserialiser() assert len(interface.Savepoint) == 3 diff --git a/pyutils/tests/f2ser/test_parsing.py b/pyutils/tests/f2ser/test_parsing.py index 78e6985275..2b606262b5 100644 --- a/pyutils/tests/f2ser/test_parsing.py +++ b/pyutils/tests/f2ser/test_parsing.py @@ -19,30 +19,33 @@ def test_granule_parsing(diffusion_granule, diffusion_granule_deps): parser = GranuleParser(diffusion_granule, diffusion_granule_deps) - parsed = parser() + parsed_granule = parser() - assert list(parsed) == ["diffusion_init", "diffusion_run"] + subroutines = parsed_granule.subroutines - assert list(parsed["diffusion_init"]) == ["in"] - assert len(parsed["diffusion_init"]["in"]) == 107 - assert parsed["diffusion_init"]["in"]["codegen_ctx"] == CodegenContext( + assert list(subroutines) == ["diffusion_init", "diffusion_run"] + + assert list(subroutines["diffusion_init"]) == ["in"] + assert len(subroutines["diffusion_init"]["in"]) == 107 + assert subroutines["diffusion_init"]["in"]["codegen_ctx"] == CodegenContext( first_declaration_ln=190, last_declaration_ln=280, end_subroutine_ln=401 ) - assert list(parsed["diffusion_run"]) == ["in", "inout", "out"] - assert len(parsed["diffusion_run"]["in"]) == 5 - assert parsed["diffusion_run"]["in"]["codegen_ctx"] == CodegenContext( + assert list(subroutines["diffusion_run"]) == ["in", "inout", "out"] + assert len(subroutines["diffusion_run"]["in"]) == 5 + assert subroutines["diffusion_run"]["in"]["codegen_ctx"] == CodegenContext( first_declaration_ln=417, last_declaration_ln=492, end_subroutine_ln=1965 ) - assert len(parsed["diffusion_run"]["inout"]) == 8 + assert len(subroutines["diffusion_run"]["inout"]) == 8 - assert len(parsed["diffusion_run"]["out"]) == 5 - assert parsed["diffusion_run"]["out"]["codegen_ctx"] == CodegenContext( + assert len(subroutines["diffusion_run"]["out"]) == 5 + assert subroutines["diffusion_run"]["out"]["codegen_ctx"] == CodegenContext( first_declaration_ln=417, last_declaration_ln=492, end_subroutine_ln=1965 ) - assert isinstance(parsed, dict) + assert isinstance(subroutines, dict) + assert parsed_granule.last_import_ln == 60 def test_granule_parsing_missing_derived_typedef(diffusion_granule, samples_path): diff --git a/testutils/src/icon4py/testutils/fortran/expected_diffusion_granule_savepoint.f90 b/testutils/src/icon4py/testutils/fortran/expected_diffusion_granule_savepoint.f90 index ba2d232dcc..c7b8173854 100644 --- a/testutils/src/icon4py/testutils/fortran/expected_diffusion_granule_savepoint.f90 +++ b/testutils/src/icon4py/testutils/fortran/expected_diffusion_granule_savepoint.f90 @@ -15,7 +15,7 @@ !$ser verbatim real, dimension(:,:,:), allocatable :: edges_dual_normal_cell_v2(:,:,:) - !$ser init directory="." prefix="test" + !$ser init directory="." prefix="f2ser" !$ser savepoint diffusion_init_in From e6142022c53a790419aff8965a2be44a1c58fcc9 Mon Sep 17 00:00:00 2001 From: samkellerhals Date: Mon, 22 May 2023 10:05:40 +0200 Subject: [PATCH 18/21] Standardise CLI documentation --- liskov/src/icon4py/liskov/cli.py | 15 +++------------ pyutils/src/icon4py/f2ser/cli.py | 15 ++++++--------- pyutils/src/icon4py/icon4pygen/cli.py | 25 ++++++++++++++----------- 3 files changed, 23 insertions(+), 32 deletions(-) diff --git a/liskov/src/icon4py/liskov/cli.py b/liskov/src/icon4py/liskov/cli.py index 31889b3d7f..2fbefaef88 100644 --- a/liskov/src/icon4py/liskov/cli.py +++ b/liskov/src/icon4py/liskov/cli.py @@ -53,7 +53,7 @@ "--metadatagen", "-m", is_flag=True, - help="Add metadata header with information about program (requires git).", + help="Add metadata header with information about program.", ) @click.option( "--multinode", @@ -72,18 +72,9 @@ def main( ) -> None: """Command line interface for interacting with the ICON-Liskov DSL Preprocessor. - Usage: - icon_liskov [-p] [-m] - - Options: - -p --profile Add nvtx profile statements to stencils. - -m --metadatagen Add metadata header with information about program (requires git). - --ppser Generate ppser serialization statements instead of integration code. - --multinode: Considers this a multinode run. - Arguments: - input_path: Path to the input file to process. - output_path: Path to the output file to generate. + INPUT_PATH: Path to input file containing Liskov directives. + OUTPUT_PATH: Path to new file to be generated. """ mode = "serialisation" if ppser else "integration" diff --git a/pyutils/src/icon4py/f2ser/cli.py b/pyutils/src/icon4py/f2ser/cli.py index 1f23b6738e..2c0d41c56f 100644 --- a/pyutils/src/icon4py/f2ser/cli.py +++ b/pyutils/src/icon4py/f2ser/cli.py @@ -31,6 +31,10 @@ exists=True, dir_okay=False, resolve_path=True, path_type=pathlib.Path ), ) +@click.argument( + "output_filepath", + type=click.Path(dir_okay=False, resolve_path=True, path_type=pathlib.Path), +) @click.option( "--dependencies", "-d", @@ -38,10 +42,6 @@ type=click.Path(exists=True), help="Optional list of dependency paths.", ) -@click.argument( - "output_filepath", - type=click.Path(dir_okay=False, resolve_path=True, path_type=pathlib.Path), -) @click.option( "--directory", type=str, help="Directory to serialise variables to.", default="." ) @@ -66,11 +66,8 @@ def main( """Command line interface for interacting with the ICON-f2ser serialization Preprocessor. Arguments: - granule_path (Path): A path to the Fortran source file to be parsed. - output_filepath (Path): A path to the output Fortran source file to be generated. - directory (str): The directory to serialise the variables to. - prefix (str): The prefix to use for each serialised variable. - multinode (bool): Specify whether this is a multinode run. + GRANULE_PATH: A path to the Fortran source file to be parsed. + OUTPUT_FILEPATH: A path to the output Fortran source file to be generated. """ parsed = GranuleParser(granule_path, dependencies)() interface = ParsedGranuleDeserialiser(parsed, directory=directory, prefix=prefix)() diff --git a/pyutils/src/icon4py/icon4pygen/cli.py b/pyutils/src/icon4py/icon4pygen/cli.py index 382726abf5..b27c529d31 100644 --- a/pyutils/src/icon4py/icon4pygen/cli.py +++ b/pyutils/src/icon4py/icon4pygen/cli.py @@ -45,13 +45,20 @@ def shell_complete(self, ctx, param, incomplete): @click.argument("fencil", type=ModuleType()) @click.argument("block_size", type=int, default=128) @click.argument("levels_per_thread", type=int, default=4) -@click.option("--is_global", is_flag=True, type=bool) +@click.option( + "--is_global", is_flag=True, type=bool, help="Whether this is a global run." +) @click.argument( "outpath", type=click.Path(dir_okay=True, resolve_path=True, path_type=pathlib.Path), default=".", ) -@click.option("--imperative", is_flag=True, type=bool) +@click.option( + "--imperative", + is_flag=True, + type=bool, + help="Whether to use the imperative code generation backend.", +) def main( fencil: str, block_size: int, @@ -63,15 +70,11 @@ def main( """ Generate Gridtools C++ code for an icon4py fencil as well as all the associated C++ and Fortran bindings. - Args: - fencil: may be specified as :, where is the dotted name of the containing module - and is the name of the fencil. - - block_size: refers to the number of threads per block to use in a cuda kernel. - - levels_per_thread: how many k-levels to process per thread. - - outpath: represents a path to the folder in which to write all generated code. + Arguments: + FENCIL: may be specified as :, where is the dotted name of the containing module and is the name of the fencil. + BLOCK_SIZE: refers to the number of threads per block to use in a cuda kernel. + LEVELS_PER_THREAD: how many k-levels to process per thread. + OUTPATH: represents a path to the folder in which to write all generated code. """ from icon4py.icon4pygen.backend import GTHeader from icon4py.icon4pygen.bindings.workflow import PyBindGen From bee0752c93b3ebee410ede5a6279ba3c17b97b3f Mon Sep 17 00:00:00 2001 From: samkellerhals Date: Mon, 22 May 2023 11:56:38 +0200 Subject: [PATCH 19/21] PR Review Fixes --- liskov/README.md | 10 +++++++++- .../icon4py/liskov/codegen/integration/deserialise.py | 2 +- .../src/icon4py/liskov/codegen/integration/generate.py | 2 +- .../liskov/codegen/serialisation/deserialise.py | 2 +- .../icon4py/liskov/codegen/serialisation/generate.py | 2 +- .../codegen/shared/{deserialiser.py => deserialise.py} | 0 .../codegen/shared/{generator.py => generate.py} | 0 pyutils/src/icon4py/f2ser/deserialise.py | 2 +- .../icon4pygen/bindings/codegen/render/field.py | 2 +- 9 files changed, 15 insertions(+), 7 deletions(-) rename liskov/src/icon4py/liskov/codegen/shared/{deserialiser.py => deserialise.py} (100%) rename liskov/src/icon4py/liskov/codegen/shared/{generator.py => generate.py} (100%) diff --git a/liskov/README.md b/liskov/README.md index a7bf033633..ca8236fc8f 100644 --- a/liskov/README.md +++ b/liskov/README.md @@ -18,7 +18,15 @@ To use the `icon_liskov` tool, run the following command: icon_liskov [--profile] [--metadatagen] [--ppser] ``` -Where `input_filepath` is the path to the input file to be processed, and `output_filepath` is the path to the output file. The optional `--profile` flag adds nvtx profile statements to the stencils. The `--metadatagen` flag generates a metadata header at the top of the file which includes information on icon_liskov such as the git tag and commit hash. The `--ppser` flag activates serialisation mode and will trigger the generation of `ppser` serialisation statements serialising all variables at the start and end of each stencil directive. The data will be saved at the default folder location of the currently run experiment and will have a prefix of `liskov-serialisation`. +The following are descriptions of the arguments and options: + +- input_filepath: path to the input file to be processed. +- output_filepath: path to the output file. +- profile flag: adds nvtx profile statements to the stencils (optional). +- metadatagen flag: generates a metadata header at the top of the file which includes information on icon_liskov such as the version used. +- ppser flag: activates serialisation mode and will trigger the generation of ppser serialisation statements serialising all variables at the start and end of each stencil directive. + +**Note**: By default the data will be saved at the default folder location of the currently run experiment and will have a prefix of `liskov-serialisation`. ### Preprocessor directives diff --git a/liskov/src/icon4py/liskov/codegen/integration/deserialise.py b/liskov/src/icon4py/liskov/codegen/integration/deserialise.py index 1832e501d6..46d09eaca8 100644 --- a/liskov/src/icon4py/liskov/codegen/integration/deserialise.py +++ b/liskov/src/icon4py/liskov/codegen/integration/deserialise.py @@ -32,7 +32,7 @@ StartStencilData, UnusedDirective, ) -from icon4py.liskov.codegen.shared.deserialiser import Deserialiser +from icon4py.liskov.codegen.shared.deserialise import Deserialiser from icon4py.liskov.codegen.shared.types import CodeGenInput from icon4py.liskov.parsing.exceptions import ( DirectiveSyntaxError, diff --git a/liskov/src/icon4py/liskov/codegen/integration/generate.py b/liskov/src/icon4py/liskov/codegen/integration/generate.py index c34bc47fef..cc8af30138 100644 --- a/liskov/src/icon4py/liskov/codegen/integration/generate.py +++ b/liskov/src/icon4py/liskov/codegen/integration/generate.py @@ -43,7 +43,7 @@ StartStencilStatement, StartStencilStatementGenerator, ) -from icon4py.liskov.codegen.shared.generator import CodeGenerator +from icon4py.liskov.codegen.shared.generate import CodeGenerator from icon4py.liskov.codegen.shared.types import GeneratedCode from icon4py.liskov.external.metadata import CodeMetadata diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py b/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py index 385016d821..f477747260 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/deserialise.py @@ -29,7 +29,7 @@ SavepointData, SerialisationCodeInterface, ) -from icon4py.liskov.codegen.shared.deserialiser import Deserialiser +from icon4py.liskov.codegen.shared.deserialise import Deserialiser from icon4py.liskov.parsing.utils import extract_directive diff --git a/liskov/src/icon4py/liskov/codegen/serialisation/generate.py b/liskov/src/icon4py/liskov/codegen/serialisation/generate.py index f1b56b30a5..7fda920d2c 100644 --- a/liskov/src/icon4py/liskov/codegen/serialisation/generate.py +++ b/liskov/src/icon4py/liskov/codegen/serialisation/generate.py @@ -22,7 +22,7 @@ SavepointStatement, SavepointStatementGenerator, ) -from icon4py.liskov.codegen.shared.generator import CodeGenerator +from icon4py.liskov.codegen.shared.generate import CodeGenerator from icon4py.liskov.codegen.shared.types import GeneratedCode diff --git a/liskov/src/icon4py/liskov/codegen/shared/deserialiser.py b/liskov/src/icon4py/liskov/codegen/shared/deserialise.py similarity index 100% rename from liskov/src/icon4py/liskov/codegen/shared/deserialiser.py rename to liskov/src/icon4py/liskov/codegen/shared/deserialise.py diff --git a/liskov/src/icon4py/liskov/codegen/shared/generator.py b/liskov/src/icon4py/liskov/codegen/shared/generate.py similarity index 100% rename from liskov/src/icon4py/liskov/codegen/shared/generator.py rename to liskov/src/icon4py/liskov/codegen/shared/generate.py diff --git a/pyutils/src/icon4py/f2ser/deserialise.py b/pyutils/src/icon4py/f2ser/deserialise.py index 8705236914..daeee81950 100644 --- a/pyutils/src/icon4py/f2ser/deserialise.py +++ b/pyutils/src/icon4py/f2ser/deserialise.py @@ -90,7 +90,7 @@ def _create_savepoint( intent=intent, startln=self._get_codegen_line(var_dict["codegen_ctx"], intent), fields=fields, - metadata=None, # todo: currently not using metadata + metadata=None, ) ) diff --git a/pyutils/src/icon4py/icon4pygen/bindings/codegen/render/field.py b/pyutils/src/icon4py/icon4pygen/bindings/codegen/render/field.py index 6797204d25..6d9a2d5faa 100644 --- a/pyutils/src/icon4py/icon4pygen/bindings/codegen/render/field.py +++ b/pyutils/src/icon4py/icon4pygen/bindings/codegen/render/field.py @@ -68,7 +68,7 @@ def render_ranked_dim_string(self) -> str: ) def render_serialise_func(self) -> str: - """Render c++ f2ser function.""" + """Render c++ serialisation function.""" _serializers = { "E": "serialize_dense_edges", "C": "serialize_dense_cells", From 60d2bcde2a16f8e6de4b0ed3256c71b137cabda8 Mon Sep 17 00:00:00 2001 From: samkellerhals Date: Mon, 22 May 2023 13:43:33 +0200 Subject: [PATCH 20/21] Rename writer --- .../src/icon4py/liskov/codegen/shared/{writer.py => write.py} | 0 liskov/src/icon4py/liskov/pipeline/collection.py | 2 +- liskov/tests/test_writer.py | 2 +- pyutils/src/icon4py/f2ser/cli.py | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename liskov/src/icon4py/liskov/codegen/shared/{writer.py => write.py} (100%) diff --git a/liskov/src/icon4py/liskov/codegen/shared/writer.py b/liskov/src/icon4py/liskov/codegen/shared/write.py similarity index 100% rename from liskov/src/icon4py/liskov/codegen/shared/writer.py rename to liskov/src/icon4py/liskov/codegen/shared/write.py diff --git a/liskov/src/icon4py/liskov/pipeline/collection.py b/liskov/src/icon4py/liskov/pipeline/collection.py index 115e83f685..5bc730c36c 100644 --- a/liskov/src/icon4py/liskov/pipeline/collection.py +++ b/liskov/src/icon4py/liskov/pipeline/collection.py @@ -23,7 +23,7 @@ from icon4py.liskov.codegen.serialisation.generate import ( SerialisationCodeGenerator, ) -from icon4py.liskov.codegen.shared.writer import CodegenWriter +from icon4py.liskov.codegen.shared.write import CodegenWriter from icon4py.liskov.external.gt4py import UpdateFieldsWithGt4PyStencils from icon4py.liskov.parsing.parse import DirectivesParser from icon4py.liskov.parsing.scan import DirectivesScanner diff --git a/liskov/tests/test_writer.py b/liskov/tests/test_writer.py index 5b7866d5d9..41db096973 100644 --- a/liskov/tests/test_writer.py +++ b/liskov/tests/test_writer.py @@ -15,7 +15,7 @@ from tempfile import TemporaryDirectory from icon4py.liskov.codegen.shared.types import GeneratedCode -from icon4py.liskov.codegen.shared.writer import DIRECTIVE_IDENT, CodegenWriter +from icon4py.liskov.codegen.shared.write import DIRECTIVE_IDENT, CodegenWriter def test_write_from(): diff --git a/pyutils/src/icon4py/f2ser/cli.py b/pyutils/src/icon4py/f2ser/cli.py index 2c0d41c56f..8a65333249 100644 --- a/pyutils/src/icon4py/f2ser/cli.py +++ b/pyutils/src/icon4py/f2ser/cli.py @@ -21,7 +21,7 @@ from icon4py.liskov.codegen.serialisation.generate import ( SerialisationCodeGenerator, ) -from icon4py.liskov.codegen.shared.writer import CodegenWriter +from icon4py.liskov.codegen.shared.write import CodegenWriter @click.command("icon_f2ser") From ea354f0dd7d04ca4c0ef336feb78493e2cc6cc65 Mon Sep 17 00:00:00 2001 From: samkellerhals Date: Wed, 24 May 2023 14:08:59 +0200 Subject: [PATCH 21/21] Add nested commands to icon_liskov --- liskov/src/icon4py/liskov/cli.py | 93 ++++++++++--------- .../src/icon4py/liskov/external/metadata.py | 4 +- liskov/tests/test_cli.py | 59 ++++++------ 3 files changed, 83 insertions(+), 73 deletions(-) diff --git a/liskov/src/icon4py/liskov/cli.py b/liskov/src/icon4py/liskov/cli.py index 2fbefaef88..b53eed9d8f 100644 --- a/liskov/src/icon4py/liskov/cli.py +++ b/liskov/src/icon4py/liskov/cli.py @@ -26,7 +26,8 @@ logger = setup_logger(__name__) -@click.command("icon_liskov") +@click.group(invoke_without_command=True) +@click.pass_context @click.argument( "input_path", type=click.Path( @@ -37,12 +38,24 @@ "output_path", type=click.Path(dir_okay=False, resolve_path=True, path_type=pathlib.Path), ) -@click.option( - "--ppser", - is_flag=True, - type=str, - help="Generate ppser serialization statements instead of integration code.", -) +def main(ctx, input_path, output_path): + """Command line interface for interacting with the ICON-Liskov DSL Preprocessor. + + Arguments: + INPUT_PATH: Path to input file containing Liskov directives. + OUTPUT_PATH: Path to new file to be generated. + """ + if ctx.invoked_subcommand is None: + click.echo( + "Need to choose one of the following commands:\nintegrate\nserialise" + ) + else: + ctx.ensure_object(dict) + ctx.obj["INPUT"] = input_path + ctx.obj["OUTPUT"] = output_path + + +@main.command() @click.option( "--profile", "-p", @@ -55,51 +68,39 @@ is_flag=True, help="Add metadata header with information about program.", ) +@click.pass_context +def integrate(ctx, profile, metadatagen): + mode = "integration" + inp = ctx.obj["INPUT"] + out = ctx.obj["OUTPUT"] + + iface = parse_fortran_file(inp, out, mode) + iface_gt4py = load_gt4py_stencils(iface) + run_code_generation( + inp, + out, + mode, + iface_gt4py, + profile=profile, + metadatagen=metadatagen, + ) + + +@main.command() @click.option( "--multinode", is_flag=True, type=bool, - help="Specify whether it is a multinode run.", + help="Specify whether it is a multinode run. Will generate mpi rank information.", default=False, ) -def main( - input_path: pathlib.Path, - output_path: pathlib.Path, - ppser: bool, - profile: bool, - metadatagen: bool, - multinode: bool, -) -> None: - """Command line interface for interacting with the ICON-Liskov DSL Preprocessor. - - Arguments: - INPUT_PATH: Path to input file containing Liskov directives. - OUTPUT_PATH: Path to new file to be generated. - """ - mode = "serialisation" if ppser else "integration" - - def run_serialisation() -> None: - iface = parse_fortran_file(input_path, output_path, mode) - run_code_generation(input_path, output_path, mode, iface, multinode=multinode) - - def run_integration() -> None: - iface = parse_fortran_file(input_path, output_path, mode) - iface_gt4py = load_gt4py_stencils(iface) - run_code_generation( - input_path, - output_path, - mode, - iface_gt4py, - profile=profile, - metadatagen=metadatagen, - ) - - mode_dispatcher = { - "serialisation": run_serialisation, - "integration": run_integration, - } - - mode_dispatcher[mode]() +@click.pass_context +def serialise(ctx, multinode): + mode = "serialisation" + inp = ctx.obj["INPUT"] + out = ctx.obj["OUTPUT"] + iface = parse_fortran_file(inp, out, mode) + run_code_generation(inp, out, mode, iface, multinode=multinode) if __name__ == "__main__": diff --git a/liskov/src/icon4py/liskov/external/metadata.py b/liskov/src/icon4py/liskov/external/metadata.py index e1b5ff2c47..756d92dcea 100644 --- a/liskov/src/icon4py/liskov/external/metadata.py +++ b/liskov/src/icon4py/liskov/external/metadata.py @@ -30,7 +30,9 @@ def generated_on(self) -> str: def cli_params(self) -> dict[str, Any]: try: ctx = click.get_current_context() - return ctx.params + params = ctx.params.copy() + params.update(ctx.parent.params) + return params except Exception as e: raise MissingClickContextError( f"Cannot fetch click context in this thread as no click command has been executed.\n {e}" diff --git a/liskov/tests/test_cli.py b/liskov/tests/test_cli.py index 2eefb00cfd..6fdffde422 100644 --- a/liskov/tests/test_cli.py +++ b/liskov/tests/test_cli.py @@ -11,6 +11,8 @@ # # SPDX-License-Identifier: GPL-3.0-or-later +import itertools + import pytest from icon4py.liskov.cli import main @@ -29,34 +31,39 @@ def outfile(tmp_path): return str(tmp_path / "gen.f90") +test_cases = [] + +files = [ + ("NO_DIRECTIVES", NO_DIRECTIVES_STENCIL), + ("SINGLE", SINGLE_STENCIL), + ("CONSECUTIVE", CONSECUTIVE_STENCIL), + ("FREE_FORM", FREE_FORM_STENCIL), + ("MULTIPLE", MULTIPLE_STENCILS), + ("REPEATED", REPEATED_STENCILS), +] + +flags = {"serialise": ["--multinode"], "integrate": ["-p", "-m"]} + +for file_name, file_content in files: + for cmd in flags.keys(): + flag_combinations = [] + for r in range(1, len(flags[cmd]) + 1): + flag_combinations.extend(itertools.combinations(flags[cmd], r)) + for flags_selected in flag_combinations: + args = (file_name, file_content, cmd, list(flags_selected)) + test_cases.append(args) + + @pytest.mark.parametrize( - "file, options", - [ - (NO_DIRECTIVES_STENCIL, ["--ppser"]), - (NO_DIRECTIVES_STENCIL, []), - (SINGLE_STENCIL, ["--ppser"]), - (SINGLE_STENCIL, []), - (CONSECUTIVE_STENCIL, ["--ppser"]), - (CONSECUTIVE_STENCIL, []), - (FREE_FORM_STENCIL, ["--ppser"]), - (FREE_FORM_STENCIL, []), - (MULTIPLE_STENCILS, ["--ppser"]), - (MULTIPLE_STENCILS, []), - (SINGLE_STENCIL, ["--ppser"]), - (SINGLE_STENCIL, ["--profile"]), - (CONSECUTIVE_STENCIL, ["--ppser", "--profile"]), - (CONSECUTIVE_STENCIL, ["--profile"]), - (FREE_FORM_STENCIL, ["--ppser", "--profile"]), - (FREE_FORM_STENCIL, ["--profile"]), - (MULTIPLE_STENCILS, ["--ppser", "--profile"]), - (MULTIPLE_STENCILS, ["--profile"]), - (REPEATED_STENCILS, ["--ppser", "--profile"]), - (REPEATED_STENCILS, ["--profile"]), - (MULTIPLE_STENCILS, ["--ppser", "--multinode"]), + "file_name, file_content, cmd, cmd_flags", + test_cases, + ids=[ + "file={}, command={}, flags={}".format(file_name, cmd, ",".join(cmd_flags)) + for file_name, file_content, cmd, cmd_flags in test_cases ], ) -def test_cli(make_f90_tmpfile, cli, file, outfile, options): - fpath = str(make_f90_tmpfile(content=file)) - args = [fpath, outfile, *options] +def test_cli(make_f90_tmpfile, cli, outfile, file_name, file_content, cmd, cmd_flags): + fpath = str(make_f90_tmpfile(content=file_content)) + args = [fpath, outfile, cmd, *cmd_flags] result = cli.invoke(main, args) assert result.exit_code == 0