diff --git a/maintainer/walberla_kernels/generate_lb_kernels.py b/maintainer/walberla_kernels/generate_lb_kernels.py index cfe346507a..21300af894 100644 --- a/maintainer/walberla_kernels/generate_lb_kernels.py +++ b/maintainer/walberla_kernels/generate_lb_kernels.py @@ -193,6 +193,30 @@ def paramlist(parameters, keys): ctx, config, method, templates ) + # generate PackInfo + assignments = pystencils_espresso.generate_pack_info_pdfs_field_assignments( + fields, streaming_pattern="pull") + spec = pystencils_espresso.generate_pack_info_vector_field_specifications( + config, stencil, force_field.layout) + for params, target_suffix in paramlist(parameters, ["CPU"]): + pystencils_walberla.generate_pack_info_from_kernel( + ctx, f"PackInfoPdf{precision_prefix}{target_suffix}", assignments, + kind="pull", **params) + pystencils_walberla.generate_pack_info( + ctx, f"PackInfoVec{precision_prefix}{target_suffix}", spec, **params) + if target_suffix == "CUDA": + continue + token = "\n //TODO: optimize by generating kernel for this case\n" + for field_suffix in ["Pdf", "Vec"]: + class_name = f"PackInfo{field_suffix}{precision_prefix}{target_suffix}" # nopep8 + with open(f"{class_name}.h", "r+") as f: + content = f.read() + assert token in content + content = content.replace(token, "\n") + f.seek(0) + f.truncate() + f.write(content) + # boundary conditions ubb_dynamic = lbmpy_espresso.UBB( lambda *args: None, dim=3, data_type=config.data_type.default_factory()) diff --git a/maintainer/walberla_kernels/pystencils_espresso.py b/maintainer/walberla_kernels/pystencils_espresso.py index 0b6a9d70bb..3cf6edfbf3 100644 --- a/maintainer/walberla_kernels/pystencils_espresso.py +++ b/maintainer/walberla_kernels/pystencils_espresso.py @@ -207,6 +207,60 @@ def generate_fields(config, stencil, field_layout='fzyx'): return fields +def generate_pack_info_pdfs_field_assignments(fields, streaming_pattern): + """ + Visualize the stencil directions with:: + + import lbmpy + import matplotlib.pyplot as plt + stencil = lbmpy.LBStencil(lbmpy.Stencil.D3Q19) + stencil.plot(data=[i for i in range(19)]) + plt.show() + + """ + stencil = lbmpy.enums.Stencil.D3Q19 + lbm_config = lbmpy.LBMConfig(stencil=stencil, + method=lbmpy.Method.CUMULANT, + compressible=True, + zero_centered=False, + weighted=True, + streaming_pattern=streaming_pattern, + relaxation_rate=sp.Symbol("omega_shear"), + ) + lbm_opt = lbmpy.LBMOptimisation( + symbolic_field=fields["pdfs" if streaming_pattern == + "pull" else "pdfs_tmp"], + symbolic_temporary_field=fields["pdfs" if streaming_pattern == + "push" else "pdfs_tmp"], + field_layout=fields['pdfs'].layout) + lbm_update_rule = lbmpy.create_lb_update_rule( + lbm_config=lbm_config, + lbm_optimisation=lbm_opt) + return lbm_update_rule.all_assignments + + +def generate_pack_info_vector_field_specifications(config, stencil, layout): + import collections + import itertools + field = ps.Field.create_generic( + "field", + 3, + data_type_np[config.data_type.default_factory().c_name], + index_dimensions=1, + layout=layout, + index_shape=(3,) + ) + q = len(stencil) + coord = itertools.product(*[(-1, 0, 1)] * 3) + if q == 19: + dirs = tuple((i, j, k) for i, j, k in coord if i**2 + j**2 + k**2 != 3) + else: + dirs = tuple((i, j, k) for i, j, k in coord) + spec = collections.defaultdict(set) + spec[dirs] = {field[0, 0, 0](i) for i in range(3)} + return spec + + def generate_config(ctx, params): return pystencils_walberla.utility.config_from_context(ctx, **params) diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp index 91266fa05a..6f1fedae10 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp @@ -119,6 +119,11 @@ class LBWalberlaImpl : public LBWalberlaBase { using VectorField = field::GhostLayerField; template using PackInfo = field::communication::PackInfo; + template + using PackInfoStreaming = + std::conditional_t, + typename detail::KernelTrait::PackInfoPdf, + typename detail::KernelTrait::PackInfoVec>; template using RegularCommScheme = blockforest::communication::UniformBufferedScheme; @@ -133,6 +138,8 @@ class LBWalberlaImpl : public LBWalberlaBase { using VectorField = gpu::GPUField; template using PackInfo = gpu::communication::MemcpyPackInfo; + template + using PackInfoStreaming = gpu::communication::MemcpyPackInfo; template using RegularCommScheme = gpu::communication::UniformGPUScheme; template @@ -284,6 +291,7 @@ class LBWalberlaImpl : public LBWalberlaBase { /** Flag for boundary cells. */ FlagUID const Boundary_flag{"boundary"}; + bool m_has_boundaries{false}; /** * @brief Full communicator. @@ -307,6 +315,10 @@ class LBWalberlaImpl : public LBWalberlaBase { template using PackInfo = typename FieldTrait::template PackInfo; + template + using PackInfoStreaming = + typename FieldTrait::template PackInfoStreaming; // communicators std::shared_ptr m_boundary_communicator; @@ -414,6 +426,24 @@ class LBWalberlaImpl : public LBWalberlaBase { #endif } + void setup_streaming_communicator() { + auto const setup = [this]() { + auto const &blocks = m_lattice->get_blocks(); + m_pdf_streaming_communicator = + std::make_shared(blocks); + m_pdf_streaming_communicator->addPackInfo( + std::make_shared(m_pdf_field_id)); + m_pdf_streaming_communicator->addPackInfo( + std::make_shared>( + m_last_applied_force_field_id)); + }; + if (m_has_boundaries or (m_collision_model and has_lees_edwards_bc())) { + setup.template operator()>(); + } else { + setup.template operator()>(); + } + } + public: LBWalberlaImpl(std::shared_ptr lattice, double viscosity, double density) @@ -448,12 +478,7 @@ class LBWalberlaImpl : public LBWalberlaBase { reset_boundary_handling(); // Set up the communication and register fields - m_pdf_streaming_communicator = - std::make_shared(blocks); - m_pdf_streaming_communicator->addPackInfo( - std::make_shared>(m_pdf_field_id)); - m_pdf_streaming_communicator->addPackInfo( - std::make_shared>(m_last_applied_force_field_id)); + setup_streaming_communicator(); m_full_communicator = std::make_shared(blocks); m_full_communicator->addPackInfo( @@ -555,7 +580,9 @@ class LBWalberlaImpl : public LBWalberlaBase { integrate_collide(blocks); m_pdf_streaming_communicator->communicate(); // Handle boundaries - integrate_boundaries(blocks); + if (m_has_boundaries) { + integrate_boundaries(blocks); + } // LB stream integrate_stream(blocks); // Mark pending ghost layer updates @@ -569,7 +596,9 @@ class LBWalberlaImpl : public LBWalberlaBase { void integrate_pull_scheme() { auto const &blocks = get_lattice().get_blocks(); // Handle boundaries - integrate_boundaries(blocks); + if (m_has_boundaries) { + integrate_boundaries(blocks); + } // LB stream integrate_stream(blocks); // LB collide @@ -690,6 +719,7 @@ class LBWalberlaImpl : public LBWalberlaBase { omega_odd, omega, seed, uint32_t{0u}); m_collision_model = std::make_shared(std::move(obj)); m_run_collide_sweep = CollideSweepVisitor(blocks); + setup_streaming_communicator(); } void set_collision_model( @@ -734,6 +764,7 @@ class LBWalberlaImpl : public LBWalberlaBase { blocks, m_last_applied_force_field_id, m_vec_tmp_field_id, n_ghost_layers, shear_direction, shear_plane_normal, m_lees_edwards_callbacks->get_pos_offset); + setup_streaming_communicator(); } void check_lebc(unsigned int shear_direction, @@ -765,10 +796,12 @@ class LBWalberlaImpl : public LBWalberlaBase { bool consider_ghosts = false) const override { assert(not(consider_ghosts and m_pending_ghost_comm.test(GhostComm::VEL))); assert(not(consider_ghosts and m_pending_ghost_comm.test(GhostComm::UBB))); - auto const is_boundary = get_node_is_boundary(node, consider_ghosts); - if (is_boundary) // is info available locally - if (*is_boundary) // is the node a boundary + if (m_has_boundaries) { + auto const is_boundary = get_node_is_boundary(node, consider_ghosts); + if (is_boundary and *is_boundary) { return get_node_velocity_at_boundary(node, consider_ghosts); + } + } auto const bc = get_block_and_cell(get_lattice(), node, consider_ghosts); if (!bc) return std::nullopt; @@ -1266,6 +1299,7 @@ class LBWalberlaImpl : public LBWalberlaBase { bool set_node_velocity_at_boundary(Utils::Vector3i const &node, Utils::Vector3d const &velocity) override { + on_boundary_add(); m_pending_ghost_comm.set(GhostComm::UBB); auto bc = get_block_and_cell(get_lattice(), node, true); if (bc) { @@ -1307,6 +1341,7 @@ class LBWalberlaImpl : public LBWalberlaBase { void set_slice_velocity_at_boundary( Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner, std::vector> const &velocity) override { + on_boundary_add(); m_pending_ghost_comm.set(GhostComm::UBB); if (auto const ci = get_interval(lower_corner, upper_corner)) { auto const &lattice = get_lattice(); @@ -1388,19 +1423,30 @@ class LBWalberlaImpl : public LBWalberlaBase { void reallocate_ubb_field() override { m_boundary->boundary_update(); } + void on_boundary_add() { + if (not m_has_boundaries) { + m_has_boundaries = true; + setup_streaming_communicator(); + } + m_has_boundaries = true; + } + void clear_boundaries() override { reset_boundary_handling(); m_pending_ghost_comm.set(GhostComm::UBB); ghost_communication(); + m_has_boundaries = false; + setup_streaming_communicator(); } void update_boundary_from_shape(std::vector const &raster_flat, std::vector const &data_flat) override { + on_boundary_add(); + m_pending_ghost_comm.set(GhostComm::UBB); auto const grid_size = get_lattice().get_grid_dimensions(); auto const data = fill_3D_vector_array(data_flat, grid_size); set_boundary_from_grid(*m_boundary, get_lattice(), raster_flat, data); - m_pending_ghost_comm.set(GhostComm::UBB); ghost_communication(); reallocate_ubb_field(); } diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt index c2bf4267a8..434d968d52 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt @@ -20,6 +20,8 @@ target_sources( espresso_walberla PRIVATE StreamSweepSinglePrecision.cpp StreamSweepDoublePrecision.cpp + PackInfoPdfSinglePrecision.cpp PackInfoPdfDoublePrecision.cpp + PackInfoVecSinglePrecision.cpp PackInfoVecDoublePrecision.cpp InitialPDFsSetterSinglePrecision.cpp InitialPDFsSetterDoublePrecision.cpp Dynamic_UBB_single_precision.cpp Dynamic_UBB_double_precision.cpp) diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp new file mode 100644 index 0000000000..abee661f39 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp @@ -0,0 +1,1358 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoPdfDoublePrecision.cpp +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#include "PackInfoPdfDoublePrecision.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "stencil/Directions.h" + +#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +namespace walberla { +namespace pystencils { + +using walberla::cell::CellInterval; +using walberla::stencil::Direction; + +namespace internal_pack_SW { +static FUNC_PREFIX void pack_SW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_SW + +namespace internal_pack_BW { +static FUNC_PREFIX void pack_BW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BW + +namespace internal_pack_W { +static FUNC_PREFIX void pack_W(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_W + +namespace internal_pack_TW { +static FUNC_PREFIX void pack_TW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TW + +namespace internal_pack_NW { +static FUNC_PREFIX void pack_NW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_NW + +namespace internal_pack_BS { +static FUNC_PREFIX void pack_BS(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BS + +namespace internal_pack_S { +static FUNC_PREFIX void pack_S(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_S + +namespace internal_pack_TS { +static FUNC_PREFIX void pack_TS(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TS + +namespace internal_pack_B { +static FUNC_PREFIX void pack_B(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_B + +namespace internal_pack_T { +static FUNC_PREFIX void pack_T(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_T + +namespace internal_pack_BN { +static FUNC_PREFIX void pack_BN(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BN + +namespace internal_pack_N { +static FUNC_PREFIX void pack_N(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_N + +namespace internal_pack_TN { +static FUNC_PREFIX void pack_TN(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TN + +namespace internal_pack_SE { +static FUNC_PREFIX void pack_SE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_SE + +namespace internal_pack_BE { +static FUNC_PREFIX void pack_BE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BE + +namespace internal_pack_E { +static FUNC_PREFIX void pack_E(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_E + +namespace internal_pack_TE { +static FUNC_PREFIX void pack_TE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TE + +namespace internal_pack_NE { +static FUNC_PREFIX void pack_NE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_NE + +namespace internal_unpack_SW { +static FUNC_PREFIX void unpack_SW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_SW + +namespace internal_unpack_BW { +static FUNC_PREFIX void unpack_BW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BW + +namespace internal_unpack_W { +static FUNC_PREFIX void unpack_W(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_W + +namespace internal_unpack_TW { +static FUNC_PREFIX void unpack_TW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TW + +namespace internal_unpack_NW { +static FUNC_PREFIX void unpack_NW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_NW + +namespace internal_unpack_BS { +static FUNC_PREFIX void unpack_BS(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BS + +namespace internal_unpack_S { +static FUNC_PREFIX void unpack_S(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_S + +namespace internal_unpack_TS { +static FUNC_PREFIX void unpack_TS(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TS + +namespace internal_unpack_B { +static FUNC_PREFIX void unpack_B(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_B + +namespace internal_unpack_T { +static FUNC_PREFIX void unpack_T(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_T + +namespace internal_unpack_BN { +static FUNC_PREFIX void unpack_BN(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BN + +namespace internal_unpack_N { +static FUNC_PREFIX void unpack_N(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_N + +namespace internal_unpack_TN { +static FUNC_PREFIX void unpack_TN(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TN + +namespace internal_unpack_SE { +static FUNC_PREFIX void unpack_SE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_SE + +namespace internal_unpack_BE { +static FUNC_PREFIX void unpack_BE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BE + +namespace internal_unpack_E { +static FUNC_PREFIX void unpack_E(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_E + +namespace internal_unpack_TE { +static FUNC_PREFIX void unpack_TE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TE + +namespace internal_unpack_NE { +static FUNC_PREFIX void unpack_NE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_NE + +void PackInfoPdfDoublePrecision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + double *buffer = reinterpret_cast(byte_buffer); + + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getSliceBeforeGhostLayer(dir, ci, 1, false); + + switch (dir) { + case stencil::SW: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_SW::pack_SW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BW: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BW::pack_BW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::W: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_W::pack_W(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TW: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TW::pack_TW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NW: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_NW::pack_NW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BS: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BS::pack_BS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::S: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_S::pack_S(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TS: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TS::pack_TS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::B: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_B::pack_B(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::T: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_T::pack_T(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BN: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BN::pack_BN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::N: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_N::pack_N(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TN: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TN::pack_TN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::SE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_SE::pack_SE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BE::pack_BE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::E: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_E::pack_E(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TE::pack_TE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_NE::pack_NE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +void PackInfoPdfDoublePrecision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + double *buffer = reinterpret_cast(byte_buffer); + + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getGhostRegion(dir, ci, 1, false); + auto communciationDirection = stencil::inverseDir[dir]; + + switch (communciationDirection) { + case stencil::SW: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_SW::unpack_SW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BW: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BW::unpack_BW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::W: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_W::unpack_W(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TW: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TW::unpack_TW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NW: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_NW::unpack_NW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BS: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BS::unpack_BS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::S: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_S::unpack_S(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TS: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TS::unpack_TS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::B: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_B::unpack_B(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::T: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_T::unpack_T(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BN: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BN::unpack_BN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::N: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_N::unpack_N(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TN: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TN::unpack_TN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::SE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_SE::unpack_SE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BE::unpack_BE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::E: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_E::unpack_E(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TE::unpack_TE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_NE::unpack_NE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +uint_t PackInfoPdfDoublePrecision::size(stencil::Direction dir, const IBlock *block) const { + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getGhostRegion(dir, ci, 1, false); + + uint_t elementsPerCell = 0; + + switch (dir) { + case stencil::SW: + elementsPerCell = 1; + break; + + case stencil::BW: + elementsPerCell = 1; + break; + + case stencil::W: + elementsPerCell = 5; + break; + + case stencil::TW: + elementsPerCell = 1; + break; + + case stencil::NW: + elementsPerCell = 1; + break; + + case stencil::BS: + elementsPerCell = 1; + break; + + case stencil::S: + elementsPerCell = 5; + break; + + case stencil::TS: + elementsPerCell = 1; + break; + + case stencil::B: + elementsPerCell = 5; + break; + + case stencil::T: + elementsPerCell = 5; + break; + + case stencil::BN: + elementsPerCell = 1; + break; + + case stencil::N: + elementsPerCell = 5; + break; + + case stencil::TN: + elementsPerCell = 1; + break; + + case stencil::SE: + elementsPerCell = 1; + break; + + case stencil::BE: + elementsPerCell = 1; + break; + + case stencil::E: + elementsPerCell = 5; + break; + + case stencil::TE: + elementsPerCell = 1; + break; + + case stencil::NE: + elementsPerCell = 1; + break; + + default: + elementsPerCell = 0; + } + return ci.numCells() * elementsPerCell * sizeof(double); +} + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.h new file mode 100644 index 0000000000..d2c205023c --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.h @@ -0,0 +1,84 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoPdfDoublePrecision.h +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, +// lbmpy_walberla/pystencils_walberla from waLBerla commit +// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#pragma once +#include "communication/UniformPackInfo.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "domain_decomposition/IBlock.h" +#include "field/GhostLayerField.h" +#include "stencil/Directions.h" + +#define FUNC_PREFIX + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace pystencils { + +class PackInfoPdfDoublePrecision + : public ::walberla::communication::UniformPackInfo { +public: + PackInfoPdfDoublePrecision(BlockDataID pdfsID_) : pdfsID(pdfsID_){}; + virtual ~PackInfoPdfDoublePrecision() {} + + bool constantDataExchange() const { return true; } + bool threadsafeReceiving() const { return true; } + + void unpackData(IBlock *receiver, stencil::Direction dir, + mpi::RecvBuffer &buffer) { + const auto dataSize = size(dir, receiver); + unpack(dir, buffer.skip(dataSize), receiver); + } + + void communicateLocal(const IBlock *sender, IBlock *receiver, + stencil::Direction dir) { + mpi::SendBuffer sBuffer; + packData(sender, dir, sBuffer); + mpi::RecvBuffer rBuffer(sBuffer); + unpackData(receiver, stencil::inverseDir[dir], rBuffer); + } + + void packDataImpl(const IBlock *sender, stencil::Direction dir, + mpi::SendBuffer &outBuffer) const { + const auto dataSize = size(dir, sender); + pack(dir, outBuffer.forward(dataSize), const_cast(sender)); + } + + void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const; + void unpack(stencil::Direction dir, unsigned char *buffer, + IBlock *block) const; + uint_t size(stencil::Direction dir, const IBlock *block) const; + +private: + BlockDataID pdfsID; +}; + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp new file mode 100644 index 0000000000..5beb6eb918 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp @@ -0,0 +1,1358 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoPdfSinglePrecision.cpp +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#include "PackInfoPdfSinglePrecision.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "stencil/Directions.h" + +#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +namespace walberla { +namespace pystencils { + +using walberla::cell::CellInterval; +using walberla::stencil::Direction; + +namespace internal_pack_SW { +static FUNC_PREFIX void pack_SW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_SW + +namespace internal_pack_BW { +static FUNC_PREFIX void pack_BW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BW + +namespace internal_pack_W { +static FUNC_PREFIX void pack_W(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_W + +namespace internal_pack_TW { +static FUNC_PREFIX void pack_TW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TW + +namespace internal_pack_NW { +static FUNC_PREFIX void pack_NW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_NW + +namespace internal_pack_BS { +static FUNC_PREFIX void pack_BS(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BS + +namespace internal_pack_S { +static FUNC_PREFIX void pack_S(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_S + +namespace internal_pack_TS { +static FUNC_PREFIX void pack_TS(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TS + +namespace internal_pack_B { +static FUNC_PREFIX void pack_B(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_B + +namespace internal_pack_T { +static FUNC_PREFIX void pack_T(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_T + +namespace internal_pack_BN { +static FUNC_PREFIX void pack_BN(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BN + +namespace internal_pack_N { +static FUNC_PREFIX void pack_N(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_N + +namespace internal_pack_TN { +static FUNC_PREFIX void pack_TN(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TN + +namespace internal_pack_SE { +static FUNC_PREFIX void pack_SE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_SE + +namespace internal_pack_BE { +static FUNC_PREFIX void pack_BE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BE + +namespace internal_pack_E { +static FUNC_PREFIX void pack_E(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_E + +namespace internal_pack_TE { +static FUNC_PREFIX void pack_TE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TE + +namespace internal_pack_NE { +static FUNC_PREFIX void pack_NE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_NE + +namespace internal_unpack_SW { +static FUNC_PREFIX void unpack_SW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_SW + +namespace internal_unpack_BW { +static FUNC_PREFIX void unpack_BW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BW + +namespace internal_unpack_W { +static FUNC_PREFIX void unpack_W(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_W + +namespace internal_unpack_TW { +static FUNC_PREFIX void unpack_TW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TW + +namespace internal_unpack_NW { +static FUNC_PREFIX void unpack_NW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_NW + +namespace internal_unpack_BS { +static FUNC_PREFIX void unpack_BS(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BS + +namespace internal_unpack_S { +static FUNC_PREFIX void unpack_S(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_S + +namespace internal_unpack_TS { +static FUNC_PREFIX void unpack_TS(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TS + +namespace internal_unpack_B { +static FUNC_PREFIX void unpack_B(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_B + +namespace internal_unpack_T { +static FUNC_PREFIX void unpack_T(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_T + +namespace internal_unpack_BN { +static FUNC_PREFIX void unpack_BN(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BN + +namespace internal_unpack_N { +static FUNC_PREFIX void unpack_N(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_N + +namespace internal_unpack_TN { +static FUNC_PREFIX void unpack_TN(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TN + +namespace internal_unpack_SE { +static FUNC_PREFIX void unpack_SE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_SE + +namespace internal_unpack_BE { +static FUNC_PREFIX void unpack_BE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BE + +namespace internal_unpack_E { +static FUNC_PREFIX void unpack_E(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_E + +namespace internal_unpack_TE { +static FUNC_PREFIX void unpack_TE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TE + +namespace internal_unpack_NE { +static FUNC_PREFIX void unpack_NE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_NE + +void PackInfoPdfSinglePrecision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + float *buffer = reinterpret_cast(byte_buffer); + + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getSliceBeforeGhostLayer(dir, ci, 1, false); + + switch (dir) { + case stencil::SW: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_SW::pack_SW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BW: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BW::pack_BW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::W: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_W::pack_W(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TW: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TW::pack_TW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NW: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_NW::pack_NW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BS: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BS::pack_BS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::S: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_S::pack_S(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TS: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TS::pack_TS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::B: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_B::pack_B(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::T: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_T::pack_T(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BN: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BN::pack_BN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::N: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_N::pack_N(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TN: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TN::pack_TN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::SE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_SE::pack_SE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BE::pack_BE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::E: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_E::pack_E(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TE::pack_TE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_NE::pack_NE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +void PackInfoPdfSinglePrecision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + float *buffer = reinterpret_cast(byte_buffer); + + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getGhostRegion(dir, ci, 1, false); + auto communciationDirection = stencil::inverseDir[dir]; + + switch (communciationDirection) { + case stencil::SW: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_SW::unpack_SW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BW: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BW::unpack_BW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::W: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_W::unpack_W(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TW: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TW::unpack_TW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NW: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_NW::unpack_NW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BS: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BS::unpack_BS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::S: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_S::unpack_S(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TS: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TS::unpack_TS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::B: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_B::unpack_B(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::T: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_T::unpack_T(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BN: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BN::unpack_BN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::N: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_N::unpack_N(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TN: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TN::unpack_TN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::SE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_SE::unpack_SE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BE::unpack_BE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::E: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_E::unpack_E(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TE::unpack_TE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_NE::unpack_NE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +uint_t PackInfoPdfSinglePrecision::size(stencil::Direction dir, const IBlock *block) const { + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getGhostRegion(dir, ci, 1, false); + + uint_t elementsPerCell = 0; + + switch (dir) { + case stencil::SW: + elementsPerCell = 1; + break; + + case stencil::BW: + elementsPerCell = 1; + break; + + case stencil::W: + elementsPerCell = 5; + break; + + case stencil::TW: + elementsPerCell = 1; + break; + + case stencil::NW: + elementsPerCell = 1; + break; + + case stencil::BS: + elementsPerCell = 1; + break; + + case stencil::S: + elementsPerCell = 5; + break; + + case stencil::TS: + elementsPerCell = 1; + break; + + case stencil::B: + elementsPerCell = 5; + break; + + case stencil::T: + elementsPerCell = 5; + break; + + case stencil::BN: + elementsPerCell = 1; + break; + + case stencil::N: + elementsPerCell = 5; + break; + + case stencil::TN: + elementsPerCell = 1; + break; + + case stencil::SE: + elementsPerCell = 1; + break; + + case stencil::BE: + elementsPerCell = 1; + break; + + case stencil::E: + elementsPerCell = 5; + break; + + case stencil::TE: + elementsPerCell = 1; + break; + + case stencil::NE: + elementsPerCell = 1; + break; + + default: + elementsPerCell = 0; + } + return ci.numCells() * elementsPerCell * sizeof(float); +} + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.h new file mode 100644 index 0000000000..9dd84b0c62 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.h @@ -0,0 +1,84 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoPdfSinglePrecision.h +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, +// lbmpy_walberla/pystencils_walberla from waLBerla commit +// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#pragma once +#include "communication/UniformPackInfo.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "domain_decomposition/IBlock.h" +#include "field/GhostLayerField.h" +#include "stencil/Directions.h" + +#define FUNC_PREFIX + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace pystencils { + +class PackInfoPdfSinglePrecision + : public ::walberla::communication::UniformPackInfo { +public: + PackInfoPdfSinglePrecision(BlockDataID pdfsID_) : pdfsID(pdfsID_){}; + virtual ~PackInfoPdfSinglePrecision() {} + + bool constantDataExchange() const { return true; } + bool threadsafeReceiving() const { return true; } + + void unpackData(IBlock *receiver, stencil::Direction dir, + mpi::RecvBuffer &buffer) { + const auto dataSize = size(dir, receiver); + unpack(dir, buffer.skip(dataSize), receiver); + } + + void communicateLocal(const IBlock *sender, IBlock *receiver, + stencil::Direction dir) { + mpi::SendBuffer sBuffer; + packData(sender, dir, sBuffer); + mpi::RecvBuffer rBuffer(sBuffer); + unpackData(receiver, stencil::inverseDir[dir], rBuffer); + } + + void packDataImpl(const IBlock *sender, stencil::Direction dir, + mpi::SendBuffer &outBuffer) const { + const auto dataSize = size(dir, sender); + pack(dir, outBuffer.forward(dataSize), const_cast(sender)); + } + + void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const; + void unpack(stencil::Direction dir, unsigned char *buffer, + IBlock *block) const; + uint_t size(stencil::Direction dir, const IBlock *block) const; + +private: + BlockDataID pdfsID; +}; + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp new file mode 100644 index 0000000000..5e94631ed0 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp @@ -0,0 +1,212 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoVecDoublePrecision.cpp +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#include "PackInfoVecDoublePrecision.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "stencil/Directions.h" + +#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +namespace walberla { +namespace pystencils { + +using walberla::cell::CellInterval; +using walberla::stencil::Direction; + +namespace internal_05a1eb9a7382e5e7047cdb22e28b6556 { +static FUNC_PREFIX void pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(double *RESTRICT _data_buffer, double *RESTRICT const _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_field_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_field_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_field_0; ctr_0 += 1) { + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2]; + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3]; + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3]; + } + } + } +} +} // namespace internal_05a1eb9a7382e5e7047cdb22e28b6556 + +namespace internal_1ccccad4ca561e07a0934cadb07d0fc1 { +static FUNC_PREFIX void unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(double *RESTRICT const _data_buffer, double *RESTRICT _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_field_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_field_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_field_0; ctr_0 += 1) { + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0]; + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1]; + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2]; + } + } + } +} +} // namespace internal_1ccccad4ca561e07a0934cadb07d0fc1 + +void PackInfoVecDoublePrecision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + double *buffer = reinterpret_cast(byte_buffer); + + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getSliceBeforeGhostLayer(dir, ci, 1, false); + + switch (dir) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers())) + double *RESTRICT const _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_field_0 = int64_t(field->xStride()); + const int64_t _stride_field_1 = int64_t(field->yStride()); + const int64_t _stride_field_2 = int64_t(field->zStride()); + const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride())); + internal_05a1eb9a7382e5e7047cdb22e28b6556::pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +void PackInfoVecDoublePrecision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + double *buffer = reinterpret_cast(byte_buffer); + + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getGhostRegion(dir, ci, 1, false); + auto communciationDirection = stencil::inverseDir[dir]; + + switch (communciationDirection) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers())) + double *RESTRICT _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_field_0 = int64_t(field->xStride()); + const int64_t _stride_field_1 = int64_t(field->yStride()); + const int64_t _stride_field_2 = int64_t(field->zStride()); + const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride())); + internal_1ccccad4ca561e07a0934cadb07d0fc1::unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +uint_t PackInfoVecDoublePrecision::size(stencil::Direction dir, const IBlock *block) const { + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getGhostRegion(dir, ci, 1, false); + + uint_t elementsPerCell = 0; + + switch (dir) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: + elementsPerCell = 3; + break; + + default: + elementsPerCell = 0; + } + return ci.numCells() * elementsPerCell * sizeof(double); +} + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.h new file mode 100644 index 0000000000..4cd1dc0869 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.h @@ -0,0 +1,84 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoVecDoublePrecision.h +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, +// lbmpy_walberla/pystencils_walberla from waLBerla commit +// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#pragma once +#include "communication/UniformPackInfo.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "domain_decomposition/IBlock.h" +#include "field/GhostLayerField.h" +#include "stencil/Directions.h" + +#define FUNC_PREFIX + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace pystencils { + +class PackInfoVecDoublePrecision + : public ::walberla::communication::UniformPackInfo { +public: + PackInfoVecDoublePrecision(BlockDataID fieldID_) : fieldID(fieldID_){}; + virtual ~PackInfoVecDoublePrecision() {} + + bool constantDataExchange() const { return true; } + bool threadsafeReceiving() const { return true; } + + void unpackData(IBlock *receiver, stencil::Direction dir, + mpi::RecvBuffer &buffer) { + const auto dataSize = size(dir, receiver); + unpack(dir, buffer.skip(dataSize), receiver); + } + + void communicateLocal(const IBlock *sender, IBlock *receiver, + stencil::Direction dir) { + mpi::SendBuffer sBuffer; + packData(sender, dir, sBuffer); + mpi::RecvBuffer rBuffer(sBuffer); + unpackData(receiver, stencil::inverseDir[dir], rBuffer); + } + + void packDataImpl(const IBlock *sender, stencil::Direction dir, + mpi::SendBuffer &outBuffer) const { + const auto dataSize = size(dir, sender); + pack(dir, outBuffer.forward(dataSize), const_cast(sender)); + } + + void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const; + void unpack(stencil::Direction dir, unsigned char *buffer, + IBlock *block) const; + uint_t size(stencil::Direction dir, const IBlock *block) const; + +private: + BlockDataID fieldID; +}; + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp new file mode 100644 index 0000000000..a9dea10421 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp @@ -0,0 +1,212 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoVecSinglePrecision.cpp +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#include "PackInfoVecSinglePrecision.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "stencil/Directions.h" + +#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +namespace walberla { +namespace pystencils { + +using walberla::cell::CellInterval; +using walberla::stencil::Direction; + +namespace internal_05a1eb9a7382e5e7047cdb22e28b6556 { +static FUNC_PREFIX void pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(float *RESTRICT _data_buffer, float *RESTRICT const _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_field_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_field_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_field_0; ctr_0 += 1) { + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2]; + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3]; + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3]; + } + } + } +} +} // namespace internal_05a1eb9a7382e5e7047cdb22e28b6556 + +namespace internal_1ccccad4ca561e07a0934cadb07d0fc1 { +static FUNC_PREFIX void unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(float *RESTRICT const _data_buffer, float *RESTRICT _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_field_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_field_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_field_0; ctr_0 += 1) { + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0]; + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1]; + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2]; + } + } + } +} +} // namespace internal_1ccccad4ca561e07a0934cadb07d0fc1 + +void PackInfoVecSinglePrecision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + float *buffer = reinterpret_cast(byte_buffer); + + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getSliceBeforeGhostLayer(dir, ci, 1, false); + + switch (dir) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers())) + float *RESTRICT const _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_field_0 = int64_t(field->xStride()); + const int64_t _stride_field_1 = int64_t(field->yStride()); + const int64_t _stride_field_2 = int64_t(field->zStride()); + const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride())); + internal_05a1eb9a7382e5e7047cdb22e28b6556::pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +void PackInfoVecSinglePrecision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + float *buffer = reinterpret_cast(byte_buffer); + + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getGhostRegion(dir, ci, 1, false); + auto communciationDirection = stencil::inverseDir[dir]; + + switch (communciationDirection) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers())) + float *RESTRICT _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_field_0 = int64_t(field->xStride()); + const int64_t _stride_field_1 = int64_t(field->yStride()); + const int64_t _stride_field_2 = int64_t(field->zStride()); + const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride())); + internal_1ccccad4ca561e07a0934cadb07d0fc1::unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +uint_t PackInfoVecSinglePrecision::size(stencil::Direction dir, const IBlock *block) const { + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getGhostRegion(dir, ci, 1, false); + + uint_t elementsPerCell = 0; + + switch (dir) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: + elementsPerCell = 3; + break; + + default: + elementsPerCell = 0; + } + return ci.numCells() * elementsPerCell * sizeof(float); +} + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.h new file mode 100644 index 0000000000..1d0e7936f9 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.h @@ -0,0 +1,84 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoVecSinglePrecision.h +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, +// lbmpy_walberla/pystencils_walberla from waLBerla commit +// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#pragma once +#include "communication/UniformPackInfo.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "domain_decomposition/IBlock.h" +#include "field/GhostLayerField.h" +#include "stencil/Directions.h" + +#define FUNC_PREFIX + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace pystencils { + +class PackInfoVecSinglePrecision + : public ::walberla::communication::UniformPackInfo { +public: + PackInfoVecSinglePrecision(BlockDataID fieldID_) : fieldID(fieldID_){}; + virtual ~PackInfoVecSinglePrecision() {} + + bool constantDataExchange() const { return true; } + bool threadsafeReceiving() const { return true; } + + void unpackData(IBlock *receiver, stencil::Direction dir, + mpi::RecvBuffer &buffer) { + const auto dataSize = size(dir, receiver); + unpack(dir, buffer.skip(dataSize), receiver); + } + + void communicateLocal(const IBlock *sender, IBlock *receiver, + stencil::Direction dir) { + mpi::SendBuffer sBuffer; + packData(sender, dir, sBuffer); + mpi::RecvBuffer rBuffer(sBuffer); + unpackData(receiver, stencil::inverseDir[dir], rBuffer); + } + + void packDataImpl(const IBlock *sender, stencil::Direction dir, + mpi::SendBuffer &outBuffer) const { + const auto dataSize = size(dir, sender); + pack(dir, outBuffer.forward(dataSize), const_cast(sender)); + } + + void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const; + void unpack(stencil::Direction dir, unsigned char *buffer, + IBlock *block) const; + uint_t size(stencil::Direction dir, const IBlock *block) const; + +private: + BlockDataID fieldID; +}; + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp b/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp index 6d5d4fc79a..c6df7eb3ce 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp @@ -27,6 +27,10 @@ #include "generated_kernels/FieldAccessorsSinglePrecision.h" #include "generated_kernels/InitialPDFsSetterDoublePrecision.h" #include "generated_kernels/InitialPDFsSetterSinglePrecision.h" +#include "generated_kernels/PackInfoPdfDoublePrecision.h" +#include "generated_kernels/PackInfoPdfSinglePrecision.h" +#include "generated_kernels/PackInfoVecDoublePrecision.h" +#include "generated_kernels/PackInfoVecSinglePrecision.h" #ifdef __AVX2__ #include "generated_kernels/CollideSweepDoublePrecisionLeesEdwardsAVX.h" @@ -64,6 +68,8 @@ template struct KernelTrait { using StreamSweep = pystencils::StreamSweepDoublePrecision; #endif using InitialPDFsSetter = pystencils::InitialPDFsSetterDoublePrecision; + using PackInfoPdf = pystencils::PackInfoPdfDoublePrecision; + using PackInfoVec = pystencils::PackInfoVecDoublePrecision; }; template <> struct KernelTrait { @@ -81,6 +87,8 @@ template <> struct KernelTrait { using StreamSweep = pystencils::StreamSweepSinglePrecision; #endif using InitialPDFsSetter = pystencils::InitialPDFsSetterSinglePrecision; + using PackInfoPdf = pystencils::PackInfoPdfSinglePrecision; + using PackInfoVec = pystencils::PackInfoVecSinglePrecision; }; template