From 3fd170980bed30c430a9b0264e9504632b4b7326 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Fri, 12 May 2023 19:22:33 +0200
Subject: [PATCH] Integrate waLBerla library
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the lattice-Boltzmann and electrokinetics features by a
new implementation based on waLBerla, using PyStencils and LbmPy.

Co-authored-by: Rudolf Weeber <weeber@icp.uni-stuttgart.de>
Co-authored-by: Jean-Noël Grad <jgrad@icp.uni-stuttgart.de>
Co-authored-by: Alexander Reinauer <areinauer@icp.uni-stuttgart.de>
Co-authored-by: Ingo Tischler <ingo.tischler@gmx.de>
Co-authored-by: Sebastian Bindgen <info@bindgen.net>
Co-authored-by: Christoph Lohrmann <clohrmann@icp.uni-stuttgart.de>
Co-authored-by: Patrick Kreissl <patrick.kreissl@pa-le.de>
Co-authored-by: RiccardoFrenner <riccardo.frenner@gmail.com>
Co-authored-by: stekajack <avantardejack@gmail.com>
Co-authored-by: Kai Szuttor <kai@icp.uni-stuttgart.de>
Co-authored-by: capomav <chinmaypabshettiwar@gmail.com>
---
 .codecov.yml                                  |    3 +
 .gitlab-ci.yml                                |   22 +-
 .pre-commit-config.yaml                       |    4 +-
 CMakeLists.txt                                |   84 +
 cmake/FindFFTW3.cmake                         |    9 +-
 cmake/espresso_cmake_config.cmakein           |    4 +
 cmake/espresso_enable_avx2_support.cmake      |   67 +
 doc/bibliography.bib                          |   56 +-
 doc/sphinx/advanced_methods.rst               |   17 +-
 doc/sphinx/constraints.rst                    |    4 +-
 doc/sphinx/ek.rst                             |  412 +-
 doc/sphinx/installation.rst                   |   47 +-
 doc/sphinx/integration.rst                    |    4 +-
 doc/sphinx/io.rst                             |   45 +-
 doc/sphinx/lb.rst                             |  397 +-
 doc/sphinx/particles.rst                      |    1 -
 .../active_matter/active_matter.ipynb         |   23 +-
 doc/tutorials/electrokinetics/CMakeLists.txt  |    3 +-
 .../electrokinetics/electrokinetics.ipynb     |  204 +-
 .../lattice_boltzmann_poiseuille_flow.ipynb   |   20 +-
 .../lattice_boltzmann_sedimentation.ipynb     |   16 +-
 .../lattice_boltzmann_theory.ipynb            |   50 +-
 doc/tutorials/polymers/polymers.ipynb         |    9 +-
 .../raspberry_electrophoresis.ipynb           |    6 +-
 maintainer/CI/build_cmake.sh                  |   23 +-
 maintainer/benchmarks/CMakeLists.txt          |   10 +
 maintainer/benchmarks/lb.py                   |   66 +-
 maintainer/benchmarks/runner.sh               |    2 +-
 maintainer/benchmarks/suite.sh                |    1 +
 maintainer/configs/maxset.hpp                 |    8 -
 maintainer/configs/no_rotation.hpp            |   12 -
 maintainer/walberla_kernels/Readme.md         |   59 +
 .../code_generation_context.py                |  146 +
 .../custom_additional_extensions.py           |  349 ++
 maintainer/walberla_kernels/ekin.py           |  214 +
 .../walberla_kernels/generate_ek_kernels.py   |  225 +
 .../walberla_kernels/generate_lb_kernels.py   |  205 +
 maintainer/walberla_kernels/lbmpy_espresso.py |   81 +
 maintainer/walberla_kernels/lees_edwards.py   |  129 +
 .../walberla_kernels/pystencils_espresso.py   |  162 +
 .../walberla_kernels/relaxation_rates.py      |   54 +
 .../templates/Boundary.tmpl.h                 |  306 ++
 .../templates/FieldAccessors.tmpl.h           |  437 ++
 .../templates/ReactionKernelSelector.tmpl.h   |  108 +
 .../walberla_lbm_generation.py                |  212 +
 requirements.txt                              |    7 +-
 samples/ekboundaries.py                       |   77 -
 .../sampleImmersedBoundary.py                 |   19 +-
 samples/lb_circular_couette.py                |  201 +
 samples/lb_four_roller_mill.py                |  189 +
 samples/lb_planar_couette.py                  |  108 +
 samples/lb_profile.py                         |   12 +-
 samples/lbf.py                                |    8 +-
 samples/object_in_fluid/motivation.py         |   66 +-
 samples/visualization_lbboundaries.py         |   10 +-
 samples/visualization_poiseuille.py           |   18 +-
 src/CMakeLists.txt                            |    4 +
 src/config/features.def                       |    9 +-
 src/config/myconfig-default.hpp               |    8 -
 src/core/CMakeLists.txt                       |   26 +-
 src/core/analysis/statistics.cpp              |    5 +-
 src/core/communication.cpp                    |   10 +
 src/core/cuda_utils.cuh                       |   13 -
 src/core/electrostatics/coulomb.cpp           |    7 -
 src/core/event.cpp                            |   44 +-
 src/core/event.hpp                            |    6 +-
 src/core/forces.cpp                           |    7 +-
 src/core/grid.cpp                             |   12 -
 src/core/grid.hpp                             |    3 -
 src/core/grid_based_algorithms/CMakeLists.txt |   18 +-
 .../grid_based_algorithms/EKReactions.hpp     |   60 +
 .../grid_based_algorithms/OptionalCounter.hpp |   58 -
 .../grid_based_algorithms/ek_container.cpp    |  110 +
 .../grid_based_algorithms/ek_container.hpp    |   48 +
 .../ek_reactions.cpp}                         |   34 +-
 .../grid_based_algorithms/ek_reactions.hpp}   |   30 +-
 .../grid_based_algorithms/electrokinetics.hpp |  192 -
 .../electrokinetics_cuda.cu                   | 3842 -----------------
 .../fd-electrostatics.cuh                     |   68 -
 .../fd-electrostatics_cuda.cu                 |  221 -
 src/core/grid_based_algorithms/halo.cpp       |  264 --
 src/core/grid_based_algorithms/halo.hpp       |  135 -
 src/core/grid_based_algorithms/lattice.cpp    |  126 -
 src/core/grid_based_algorithms/lattice.hpp    |  118 -
 src/core/grid_based_algorithms/lb-d3q19.hpp   |   99 -
 src/core/grid_based_algorithms/lb.cpp         | 1353 ------
 src/core/grid_based_algorithms/lb.hpp         |  268 --
 .../grid_based_algorithms/lb_boundaries.cpp   |  317 --
 .../grid_based_algorithms/lb_boundaries.hpp   |   69 -
 .../lb_collective_interface.cpp               |  180 -
 .../lb_collective_interface.hpp               |   50 -
 .../grid_based_algorithms/lb_constants.hpp    |   43 -
 .../grid_based_algorithms/lb_interface.cpp    | 1146 +----
 .../grid_based_algorithms/lb_interface.hpp    |  203 +-
 .../lb_interpolation.cpp                      |  130 +-
 .../lb_interpolation.hpp                      |   31 +-
 .../lb_particle_coupling.cpp                  |  310 +-
 .../lb_particle_coupling.hpp                  |  104 +-
 .../lb_walberla_instance.cpp                  |  108 +
 .../lb_walberla_instance.hpp                  |   58 +
 .../lbboundaries/LBBoundary.hpp               |  108 -
 src/core/grid_based_algorithms/lbgpu.cpp      |  214 -
 src/core/grid_based_algorithms/lbgpu.cuh      |  217 -
 src/core/grid_based_algorithms/lbgpu.hpp      |  231 -
 src/core/grid_based_algorithms/lbgpu_cuda.cu  | 2703 ------------
 src/core/integrate.cpp                        |   59 +-
 src/core/lees_edwards/lees_edwards.hpp        |    7 +-
 ...BFluxDensityProfileAtParticlePositions.cpp |    5 +-
 .../CylindricalLBVelocityProfile.cpp          |    4 +-
 ...alLBVelocityProfileAtParticlePositions.cpp |    3 +-
 .../observables/LBFluidPressureTensor.hpp     |    9 +-
 src/core/observables/LBVelocityProfile.cpp    |    3 +-
 src/core/thermostat.cpp                       |    9 +-
 src/core/unit_tests/CMakeLists.txt            |   13 +-
 .../EspressoSystemStandAlone_test.cpp         |   20 +-
 src/core/unit_tests/Lattice_test.cpp          |  125 -
 src/core/unit_tests/Verlet_list_test.cpp      |   20 +-
 src/core/unit_tests/ek_interface_test.cpp     |  145 +
 src/core/unit_tests/lb_exceptions.cpp         |  100 -
 .../unit_tests/lb_particle_coupling_test.cpp  |  612 +++
 src/core/unit_tests/particle_management.hpp   |   48 +
 src/core/virtual_sites/CMakeLists.txt         |    4 +-
 src/core/virtual_sites/VirtualSites.hpp       |    2 +-
 .../VirtualSitesInertialessTracers.cpp        |   92 +-
 .../VirtualSitesInertialessTracers.hpp        |    2 +-
 .../virtual_sites/lb_inertialess_tracers.cpp  |  309 --
 .../lb_inertialess_tracers_cuda.cu            |  408 --
 .../lb_inertialess_tracers_cuda_interface.cpp |  123 -
 .../lb_inertialess_tracers_cuda_interface.hpp |   61 -
 src/python/espressomd/CMakeLists.txt          |    1 +
 .../espressomd/detail}/CMakeLists.txt         |    6 +-
 src/python/espressomd/detail/__init__.py      |   18 +
 src/python/espressomd/detail/walberla.py      |  171 +
 src/python/espressomd/electrokinetics.pxd     |  166 -
 src/python/espressomd/electrokinetics.py      |  700 +++
 src/python/espressomd/electrokinetics.pyx     |  511 ---
 src/python/espressomd/io/CMakeLists.txt       |    1 +
 src/python/espressomd/io/vtk.py               |  135 +
 src/python/espressomd/lb.pxd                  |  217 -
 src/python/espressomd/lb.py                   |  765 ++++
 src/python/espressomd/lb.pyx                  |  809 ----
 src/python/espressomd/lbboundaries.py         |  104 -
 src/python/espressomd/script_interface.pyx    |   33 +-
 src/python/espressomd/shapes.py               |    2 +-
 src/python/espressomd/system.py               |   23 +-
 src/python/espressomd/thermostat.pxd          |   11 +
 src/python/espressomd/thermostat.pyx          |   28 +-
 src/python/espressomd/visualization.py        |   66 +-
 src/script_interface/CMakeLists.txt           |    2 +-
 src/script_interface/initialize.cpp           |    6 +-
 .../lbboundaries/LBBoundaries.hpp             |   54 -
 .../lbboundaries/LBBoundary.hpp               |   97 -
 .../lees_edwards/LeesEdwards.hpp              |    6 +
 src/script_interface/shapes/Shape.hpp         |   14 +
 src/script_interface/walberla/CMakeLists.txt  |   31 +
 src/script_interface/walberla/EKContainer.hpp |  159 +
 src/script_interface/walberla/EKFFT.hpp       |   85 +
 src/script_interface/walberla/EKNone.hpp      |   65 +
 .../walberla/EKPoissonSolver.hpp              |   43 +
 src/script_interface/walberla/EKReactant.hpp  |   65 +
 src/script_interface/walberla/EKReaction.hpp  |  177 +
 src/script_interface/walberla/EKReactions.hpp |   47 +
 src/script_interface/walberla/EKSpecies.cpp   |  292 ++
 src/script_interface/walberla/EKSpecies.hpp   |  139 +
 .../walberla/EKSpeciesNode.cpp                |  120 +
 .../walberla/EKSpeciesNode.hpp                |   73 +
 .../walberla/EKSpeciesSlice.cpp               |  100 +
 .../walberla/EKSpeciesSlice.hpp               |  147 +
 src/script_interface/walberla/LBFluid.cpp     |  373 ++
 src/script_interface/walberla/LBFluid.hpp     |  149 +
 src/script_interface/walberla/LBFluidNode.cpp |  147 +
 src/script_interface/walberla/LBFluidNode.hpp |   81 +
 .../walberla/LBFluidSlice.cpp                 |  122 +
 .../walberla/LBFluidSlice.hpp                 |  144 +
 .../walberla/LatticeIndices.hpp               |   68 +
 .../walberla/LatticeModel.hpp                 |   94 +
 .../walberla/LatticeSlice.hpp                 |   99 +
 .../walberla/LatticeSlice.impl.hpp            |  214 +
 .../walberla/LatticeWalberla.hpp              |   80 +
 src/script_interface/walberla/VTKHandle.hpp   |  225 +
 .../walberla/WalberlaCheckpoint.hpp           |  250 ++
 src/script_interface/walberla/initialize.cpp  |   78 +
 .../{lbboundaries => walberla}/initialize.hpp |   19 +-
 src/shapes/CMakeLists.txt                     |    4 +-
 src/shapes/include/shapes/Shape.hpp           |    9 +
 src/shapes/src/Shape.cpp                      |   45 +
 src/shapes/unit_tests/Wall_test.cpp           |   47 +
 src/utils/include/utils/Vector.hpp            |    1 -
 src/walberla_bridge/CMakeLists.txt            |   64 +
 .../include/walberla_bridge/Architecture.hpp  |   24 +
 .../include/walberla_bridge/BlockAndCell.hpp  |   88 +
 .../include/walberla_bridge/LatticeModel.hpp  |   87 +
 .../walberla_bridge/LatticeWalberla.hpp       |   80 +
 .../include/walberla_bridge/VTKHandle.hpp     |   73 +
 .../electrokinetics/EKContainer.hpp           |  135 +
 .../electrokinetics/EKWalberlaNodeState.hpp   |   39 +
 .../electrokinetics/EKinWalberlaBase.hpp      |  154 +
 .../electrokinetics/PoissonSolver/FFT.hpp     |  140 +
 .../electrokinetics/PoissonSolver/None.hpp    |   60 +
 .../PoissonSolver/PoissonSolver.hpp           |   60 +
 .../electrokinetics/ek_poisson_fft_init.hpp   |   30 +
 .../electrokinetics/ek_poisson_none_init.hpp  |   30 +
 .../electrokinetics/ek_walberla_init.hpp      |   34 +
 .../electrokinetics/reactions/EKReactant.hpp  |   60 +
 .../reactions/EKReactionBase.hpp              |   59 +
 .../lattice_boltzmann/LBWalberlaBase.hpp      |  260 ++
 .../lattice_boltzmann/LBWalberlaNodeState.hpp |   39 +
 .../lattice_boltzmann/LeesEdwardsPack.hpp     |   38 +
 .../lattice_boltzmann/lb_walberla_init.hpp    |   30 +
 .../walberla_bridge/utils/ResourceManager.hpp |   82 +
 .../walberla_bridge/utils/boundary_utils.hpp  |  118 +
 .../walberla_bridge/utils/walberla_utils.hpp  |   79 +
 .../include/walberla_bridge/walberla_init.hpp |   34 +
 src/walberla_bridge/src/BoundaryHandling.hpp  |  190 +
 src/walberla_bridge/src/CMakeLists.txt        |   24 +
 src/walberla_bridge/src/LatticeModel.cpp      |   89 +
 src/walberla_bridge/src/LatticeWalberla.cpp   |  115 +
 .../src/electrokinetics/CMakeLists.txt        |   27 +
 .../src/electrokinetics/EKinWalberlaImpl.hpp  |  770 ++++
 .../src/electrokinetics/ek_kernels.hpp        |   65 +
 .../electrokinetics/ek_poisson_fft_init.cpp   |   33 +
 .../electrokinetics/ek_poisson_none_init.cpp  |   33 +
 .../src/electrokinetics/ek_walberla_init.cpp  |   43 +
 .../AdvectiveFluxKernel_double_precision.cpp  | 1712 ++++++++
 .../AdvectiveFluxKernel_double_precision.h    |  104 +
 .../AdvectiveFluxKernel_single_precision.cpp  | 1712 ++++++++
 .../AdvectiveFluxKernel_single_precision.h    |  104 +
 .../generated_kernels/CMakeLists.txt          |   29 +
 .../ContinuityKernel_double_precision.cpp     |  179 +
 .../ContinuityKernel_double_precision.h       |  102 +
 .../ContinuityKernel_single_precision.cpp     |  179 +
 .../ContinuityKernel_single_precision.h       |  102 +
 .../DensityPackInfo_double_precision.cpp      | 1484 +++++++
 .../DensityPackInfo_double_precision.h        |   67 +
 .../DensityPackInfo_single_precision.cpp      | 1484 +++++++
 .../DensityPackInfo_single_precision.h        |   67 +
 ...rnelWithElectrostatic_double_precision.cpp | 1200 +++++
 ...KernelWithElectrostatic_double_precision.h |  114 +
 ...rnelWithElectrostatic_single_precision.cpp | 1218 ++++++
 ...KernelWithElectrostatic_single_precision.h |  114 +
 .../DiffusiveFluxKernel_double_precision.cpp  |  873 ++++
 .../DiffusiveFluxKernel_double_precision.h    |  104 +
 .../DiffusiveFluxKernel_single_precision.cpp  |  873 ++++
 .../DiffusiveFluxKernel_single_precision.h    |  104 +
 .../Dirichlet_double_precision.cpp            |  110 +
 .../Dirichlet_double_precision.h              |  190 +
 .../Dirichlet_single_precision.cpp            |  110 +
 .../Dirichlet_single_precision.h              |  190 +
 .../FixedFlux_double_precision.cpp            |  213 +
 .../FixedFlux_double_precision.h              |  737 ++++
 .../FixedFlux_single_precision.cpp            |  213 +
 .../FixedFlux_single_precision.h              |  737 ++++
 ...rictionCouplingKernel_double_precision.cpp |  191 +
 .../FrictionCouplingKernel_double_precision.h |  105 +
 ...rictionCouplingKernel_single_precision.cpp |  191 +
 .../FrictionCouplingKernel_single_precision.h |  105 +
 .../electrokinetics/reactions/CMakeLists.txt  |   23 +
 .../reactions/EKReactionImplBulk.cpp          |   42 +
 .../reactions/EKReactionImplBulk.hpp          |   46 +
 .../reactions/EKReactionImplIndexed.cpp       |  212 +
 .../reactions/EKReactionImplIndexed.hpp       |   69 +
 .../generated_kernels/CMakeLists.txt          |   27 +
 .../ReactionKernelBulk_1_double_precision.cpp |  124 +
 .../ReactionKernelBulk_1_double_precision.h   |  107 +
 .../ReactionKernelBulk_1_single_precision.cpp |  124 +
 .../ReactionKernelBulk_1_single_precision.h   |  106 +
 .../ReactionKernelBulk_2_double_precision.cpp |  146 +
 .../ReactionKernelBulk_2_double_precision.h   |  112 +
 .../ReactionKernelBulk_2_single_precision.cpp |  146 +
 .../ReactionKernelBulk_2_single_precision.h   |  112 +
 .../ReactionKernelBulk_3_double_precision.cpp |  168 +
 .../ReactionKernelBulk_3_double_precision.h   |  116 +
 .../ReactionKernelBulk_3_single_precision.cpp |  168 +
 .../ReactionKernelBulk_3_single_precision.h   |  118 +
 .../ReactionKernelBulk_4_double_precision.cpp |  190 +
 .../ReactionKernelBulk_4_double_precision.h   |  121 +
 .../ReactionKernelBulk_4_single_precision.cpp |  190 +
 .../ReactionKernelBulk_4_single_precision.h   |  121 +
 .../ReactionKernelBulk_5_double_precision.cpp |  212 +
 .../ReactionKernelBulk_5_double_precision.h   |  126 +
 .../ReactionKernelBulk_5_single_precision.cpp |  212 +
 .../ReactionKernelBulk_5_single_precision.h   |  126 +
 .../ReactionKernelBulk_all.h                  |  155 +
 ...actionKernelIndexed_1_double_precision.cpp |  108 +
 ...ReactionKernelIndexed_1_double_precision.h |  198 +
 ...actionKernelIndexed_1_single_precision.cpp |  108 +
 ...ReactionKernelIndexed_1_single_precision.h |  198 +
 ...actionKernelIndexed_2_double_precision.cpp |  118 +
 ...ReactionKernelIndexed_2_double_precision.h |  207 +
 ...actionKernelIndexed_2_single_precision.cpp |  118 +
 ...ReactionKernelIndexed_2_single_precision.h |  207 +
 ...actionKernelIndexed_3_double_precision.cpp |  128 +
 ...ReactionKernelIndexed_3_double_precision.h |  211 +
 ...actionKernelIndexed_3_single_precision.cpp |  128 +
 ...ReactionKernelIndexed_3_single_precision.h |  210 +
 ...actionKernelIndexed_4_double_precision.cpp |  138 +
 ...ReactionKernelIndexed_4_double_precision.h |  217 +
 ...actionKernelIndexed_4_single_precision.cpp |  138 +
 ...ReactionKernelIndexed_4_single_precision.h |  217 +
 ...actionKernelIndexed_5_double_precision.cpp |  148 +
 ...ReactionKernelIndexed_5_double_precision.h |  224 +
 ...actionKernelIndexed_5_single_precision.cpp |  148 +
 ...ReactionKernelIndexed_5_single_precision.h |  224 +
 .../ReactionKernelIndexed_all.h               |  166 +
 .../src/lattice_boltzmann/CMakeLists.txt      |   28 +
 .../InterpolateAndShiftAtBoundary.hpp         |  157 +
 .../src/lattice_boltzmann/LBWalberlaImpl.hpp  | 1330 ++++++
 .../src/lattice_boltzmann/ResetForce.hpp      |   73 +
 .../generated_kernels/CMakeLists.txt          |   40 +
 ...CollideSweepDoublePrecisionLeesEdwards.cpp |  290 ++
 .../CollideSweepDoublePrecisionLeesEdwards.h  |  108 +
 ...lideSweepDoublePrecisionLeesEdwardsAVX.cpp |  399 ++
 ...ollideSweepDoublePrecisionLeesEdwardsAVX.h |  109 +
 ...CollideSweepDoublePrecisionThermalized.cpp |  568 +++
 .../CollideSweepDoublePrecisionThermalized.h  |  123 +
 ...lideSweepDoublePrecisionThermalizedAVX.cpp |  927 ++++
 ...ollideSweepDoublePrecisionThermalizedAVX.h |  123 +
 ...CollideSweepSinglePrecisionLeesEdwards.cpp |  290 ++
 .../CollideSweepSinglePrecisionLeesEdwards.h  |  108 +
 ...lideSweepSinglePrecisionLeesEdwardsAVX.cpp |  399 ++
 ...ollideSweepSinglePrecisionLeesEdwardsAVX.h |  109 +
 ...CollideSweepSinglePrecisionThermalized.cpp |  552 +++
 .../CollideSweepSinglePrecisionThermalized.h  |  123 +
 ...lideSweepSinglePrecisionThermalizedAVX.cpp |  895 ++++
 ...ollideSweepSinglePrecisionThermalizedAVX.h |  123 +
 .../Dynamic_UBB_double_precision.cpp          |  118 +
 .../Dynamic_UBB_double_precision.h            |  569 +++
 .../Dynamic_UBB_single_precision.cpp          |  118 +
 .../Dynamic_UBB_single_precision.h            |  569 +++
 .../FieldAccessorsDoublePrecision.h           |  832 ++++
 .../FieldAccessorsSinglePrecision.h           |  834 ++++
 .../InitialPDFsSetterDoublePrecision.cpp      |  234 +
 .../InitialPDFsSetterDoublePrecision.h        |  106 +
 .../InitialPDFsSetterSinglePrecision.cpp      |  234 +
 .../InitialPDFsSetterSinglePrecision.h        |  106 +
 .../StreamSweepDoublePrecision.cpp            |  338 ++
 .../StreamSweepDoublePrecision.h              |  116 +
 .../StreamSweepDoublePrecisionAVX.cpp         |  401 ++
 .../StreamSweepDoublePrecisionAVX.h           |  115 +
 .../StreamSweepSinglePrecision.cpp            |  338 ++
 .../StreamSweepSinglePrecision.h              |  116 +
 .../StreamSweepSinglePrecisionAVX.cpp         |  401 ++
 .../StreamSweepSinglePrecisionAVX.h           |  115 +
 .../generated_kernels/myintrin.h              |  127 +
 .../generated_kernels/philox_rand.h           | 1299 ++++++
 .../src/lattice_boltzmann/lb_kernels.hpp      |   92 +
 .../lattice_boltzmann/lb_walberla_init.cpp    |   38 +
 .../lattice_boltzmann/lb_walberla_init.cu}    |    2 +-
 src/walberla_bridge/src/walberla_init.cpp     |   54 +
 src/walberla_bridge/tests/CMakeLists.txt      |   74 +
 .../tests/EKinWalberlaImpl_unit_tests.cpp     |  583 +++
 .../tests/LBWalberlaImpl_bspline_tests.cpp    |  169 +
 .../tests/LBWalberlaImpl_flow_tests.cpp       |  173 +
 .../tests/LBWalberlaImpl_lees_edwards.cpp     |  186 +
 .../LBWalberlaImpl_statistical_tests.cpp      |  145 +
 .../tests/LBWalberlaImpl_unit_tests.cpp       |  632 +++
 .../tests/LatticeWalberla_unit_tests.cpp      |  130 +
 .../tests/ResourceManager_test.cpp            |   77 +
 .../tests/kernels_unit_tests.cpp              |  194 +
 src/walberla_bridge/tests/tests_common.hpp    |   85 +
 src/walberla_bridge/tests/tests_common_ek.hpp |   96 +
 src/walberla_bridge/tests/tests_common_lb.hpp |   94 +
 testsuite/python/CMakeLists.txt               |   63 +-
 testsuite/python/actor.py                     |   98 +-
 testsuite/python/array_properties.py          |    5 +-
 testsuite/python/ek_boundary.py               |  172 +
 testsuite/python/ek_bulk_reactions.py         |  155 +
 testsuite/python/ek_charged_plate.py          |  183 -
 testsuite/python/ek_diffusion.py              |  126 +
 testsuite/python/ek_eof.py                    |  199 +
 testsuite/python/ek_eof_one_species.py        |  492 ---
 testsuite/python/ek_fixeddensity.py           |  109 +
 testsuite/python/ek_fixedflux.py              |  119 +
 testsuite/python/ek_indexed_reactions.py      |  167 +
 testsuite/python/ek_interface.py              |  433 ++
 testsuite/python/ek_noflux.py                 |  106 +
 testsuite/python/ek_slice.py                  |  161 +
 testsuite/python/engine_lb.py                 |  150 +-
 testsuite/python/h5md.py                      |    6 +
 testsuite/python/lattice.py                   |   69 +
 testsuite/python/lattice_vtk.py               |  371 ++
 testsuite/python/lb.py                        |  804 ++--
 testsuite/python/lb_boundary.py               |  154 +-
 testsuite/python/lb_boundary_velocity.py      |  319 +-
 testsuite/python/lb_boundary_volume_force.py  |   54 +-
 testsuite/python/lb_buoyancy_force.py         |   58 +-
 testsuite/python/lb_circular_couette.py       |  204 +-
 testsuite/python/lb_electrohydrodynamics.py   |   12 +-
 testsuite/python/lb_get_u_at_pos.py           |   91 -
 testsuite/python/lb_interpolation.py          |   72 +-
 testsuite/python/lb_lees_edwards.py           |  333 ++
 .../lb_lees_edwards_particle_coupling.py      |   94 +
 ...{lb_density.py => lb_mass_conservation.py} |   25 +-
 testsuite/python/lb_momentum_conservation.py  |  169 +-
 testsuite/python/lb_planar_couette.py         |  135 +
 testsuite/python/lb_poiseuille.py             |  120 +-
 testsuite/python/lb_poiseuille_cylinder.py    |   50 +-
 testsuite/python/lb_pressure_tensor.py        |   81 +-
 testsuite/python/lb_shear.py                  |   99 +-
 testsuite/python/lb_slice.py                  |  161 +-
 testsuite/python/lb_stats.py                  |   93 +-
 testsuite/python/lb_stokes_sphere.py          |   64 +-
 testsuite/python/lb_streaming.py              |  128 +-
 testsuite/python/lb_switch.py                 |   93 -
 testsuite/python/lb_thermo_virtual.py         |   14 +-
 testsuite/python/lb_thermostat.py             |   89 +-
 testsuite/python/lb_vtk.py                    |  209 -
 testsuite/python/lees_edwards.py              |    9 +-
 testsuite/python/linear_momentum_lb.py        |   40 +-
 testsuite/python/observable_cylindricalLB.py  |   37 +-
 testsuite/python/observable_profileLB.py      |   24 +-
 testsuite/python/save_checkpoint.py           |  206 +-
 testsuite/python/test_checkpoint.py           |  329 +-
 testsuite/python/tests_common.py              |   29 +-
 testsuite/python/thermostats_common.py        |    7 +-
 testsuite/python/unittest_generator.py        |   21 +
 testsuite/python/virtual_sites_tracers.py     |    4 +-
 .../python/virtual_sites_tracers_common.py    |  109 +-
 testsuite/python/virtual_sites_tracers_gpu.py |    5 +-
 testsuite/scripts/benchmarks/test_lb.py       |    2 +-
 testsuite/scripts/samples/CMakeLists.txt      |    7 +-
 .../samples/test_lb_circular_couette.py       |   71 +
 ...ndaries.py => test_lb_four_roller_mill.py} |   20 +-
 .../scripts/samples/test_lb_planar_couette.py |   34 +-
 testsuite/scripts/samples/test_lb_profile.py  |    2 +-
 testsuite/scripts/samples/test_lbf.py         |    2 +-
 .../test_object_in_fluid__motivation.py       |   19 +-
 testsuite/scripts/tutorials/CMakeLists.txt    |   10 +-
 .../scripts/tutorials/test_active_matter.py   |    9 +-
 .../scripts/tutorials/test_electrokinetics.py |    9 +-
 .../test_lattice_boltzmann_poiseuille_flow.py |    3 +-
 .../test_lattice_boltzmann_sedimentation.py   |   12 +-
 testsuite/scripts/tutorials/test_polymers.py  |    6 +-
 .../test_raspberry_electrophoresis.py         |    3 +-
 434 files changed, 59218 insertions(+), 19157 deletions(-)
 create mode 100644 cmake/espresso_enable_avx2_support.cmake
 create mode 100644 maintainer/walberla_kernels/Readme.md
 create mode 100644 maintainer/walberla_kernels/code_generation_context.py
 create mode 100644 maintainer/walberla_kernels/custom_additional_extensions.py
 create mode 100644 maintainer/walberla_kernels/ekin.py
 create mode 100644 maintainer/walberla_kernels/generate_ek_kernels.py
 create mode 100644 maintainer/walberla_kernels/generate_lb_kernels.py
 create mode 100644 maintainer/walberla_kernels/lbmpy_espresso.py
 create mode 100644 maintainer/walberla_kernels/lees_edwards.py
 create mode 100644 maintainer/walberla_kernels/pystencils_espresso.py
 create mode 100644 maintainer/walberla_kernels/relaxation_rates.py
 create mode 100644 maintainer/walberla_kernels/templates/Boundary.tmpl.h
 create mode 100644 maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h
 create mode 100644 maintainer/walberla_kernels/templates/ReactionKernelSelector.tmpl.h
 create mode 100644 maintainer/walberla_kernels/walberla_lbm_generation.py
 delete mode 100644 samples/ekboundaries.py
 create mode 100644 samples/lb_circular_couette.py
 create mode 100644 samples/lb_four_roller_mill.py
 create mode 100644 samples/lb_planar_couette.py
 create mode 100644 src/core/grid_based_algorithms/EKReactions.hpp
 delete mode 100644 src/core/grid_based_algorithms/OptionalCounter.hpp
 create mode 100644 src/core/grid_based_algorithms/ek_container.cpp
 create mode 100644 src/core/grid_based_algorithms/ek_container.hpp
 rename src/core/{virtual_sites/lb_inertialess_tracers.hpp => grid_based_algorithms/ek_reactions.cpp} (55%)
 rename src/{script_interface/lbboundaries/initialize.cpp => core/grid_based_algorithms/ek_reactions.hpp} (61%)
 delete mode 100644 src/core/grid_based_algorithms/electrokinetics.hpp
 delete mode 100644 src/core/grid_based_algorithms/electrokinetics_cuda.cu
 delete mode 100644 src/core/grid_based_algorithms/fd-electrostatics.cuh
 delete mode 100644 src/core/grid_based_algorithms/fd-electrostatics_cuda.cu
 delete mode 100644 src/core/grid_based_algorithms/halo.cpp
 delete mode 100644 src/core/grid_based_algorithms/halo.hpp
 delete mode 100644 src/core/grid_based_algorithms/lattice.cpp
 delete mode 100644 src/core/grid_based_algorithms/lattice.hpp
 delete mode 100644 src/core/grid_based_algorithms/lb-d3q19.hpp
 delete mode 100644 src/core/grid_based_algorithms/lb.cpp
 delete mode 100644 src/core/grid_based_algorithms/lb.hpp
 delete mode 100644 src/core/grid_based_algorithms/lb_boundaries.cpp
 delete mode 100644 src/core/grid_based_algorithms/lb_boundaries.hpp
 delete mode 100644 src/core/grid_based_algorithms/lb_collective_interface.cpp
 delete mode 100644 src/core/grid_based_algorithms/lb_collective_interface.hpp
 delete mode 100644 src/core/grid_based_algorithms/lb_constants.hpp
 create mode 100644 src/core/grid_based_algorithms/lb_walberla_instance.cpp
 create mode 100644 src/core/grid_based_algorithms/lb_walberla_instance.hpp
 delete mode 100644 src/core/grid_based_algorithms/lbboundaries/LBBoundary.hpp
 delete mode 100644 src/core/grid_based_algorithms/lbgpu.cpp
 delete mode 100644 src/core/grid_based_algorithms/lbgpu.cuh
 delete mode 100644 src/core/grid_based_algorithms/lbgpu.hpp
 delete mode 100644 src/core/grid_based_algorithms/lbgpu_cuda.cu
 delete mode 100644 src/core/unit_tests/Lattice_test.cpp
 create mode 100644 src/core/unit_tests/ek_interface_test.cpp
 delete mode 100644 src/core/unit_tests/lb_exceptions.cpp
 create mode 100644 src/core/unit_tests/lb_particle_coupling_test.cpp
 create mode 100644 src/core/unit_tests/particle_management.hpp
 delete mode 100644 src/core/virtual_sites/lb_inertialess_tracers.cpp
 delete mode 100644 src/core/virtual_sites/lb_inertialess_tracers_cuda.cu
 delete mode 100644 src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.cpp
 delete mode 100644 src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.hpp
 rename src/{script_interface/lbboundaries => python/espressomd/detail}/CMakeLists.txt (81%)
 create mode 100644 src/python/espressomd/detail/__init__.py
 create mode 100644 src/python/espressomd/detail/walberla.py
 delete mode 100644 src/python/espressomd/electrokinetics.pxd
 create mode 100644 src/python/espressomd/electrokinetics.py
 delete mode 100644 src/python/espressomd/electrokinetics.pyx
 create mode 100644 src/python/espressomd/io/vtk.py
 delete mode 100644 src/python/espressomd/lb.pxd
 create mode 100644 src/python/espressomd/lb.py
 delete mode 100644 src/python/espressomd/lb.pyx
 delete mode 100644 src/python/espressomd/lbboundaries.py
 delete mode 100644 src/script_interface/lbboundaries/LBBoundaries.hpp
 delete mode 100644 src/script_interface/lbboundaries/LBBoundary.hpp
 create mode 100644 src/script_interface/walberla/CMakeLists.txt
 create mode 100644 src/script_interface/walberla/EKContainer.hpp
 create mode 100644 src/script_interface/walberla/EKFFT.hpp
 create mode 100644 src/script_interface/walberla/EKNone.hpp
 create mode 100644 src/script_interface/walberla/EKPoissonSolver.hpp
 create mode 100644 src/script_interface/walberla/EKReactant.hpp
 create mode 100644 src/script_interface/walberla/EKReaction.hpp
 create mode 100644 src/script_interface/walberla/EKReactions.hpp
 create mode 100644 src/script_interface/walberla/EKSpecies.cpp
 create mode 100644 src/script_interface/walberla/EKSpecies.hpp
 create mode 100644 src/script_interface/walberla/EKSpeciesNode.cpp
 create mode 100644 src/script_interface/walberla/EKSpeciesNode.hpp
 create mode 100644 src/script_interface/walberla/EKSpeciesSlice.cpp
 create mode 100644 src/script_interface/walberla/EKSpeciesSlice.hpp
 create mode 100644 src/script_interface/walberla/LBFluid.cpp
 create mode 100644 src/script_interface/walberla/LBFluid.hpp
 create mode 100644 src/script_interface/walberla/LBFluidNode.cpp
 create mode 100644 src/script_interface/walberla/LBFluidNode.hpp
 create mode 100644 src/script_interface/walberla/LBFluidSlice.cpp
 create mode 100644 src/script_interface/walberla/LBFluidSlice.hpp
 create mode 100644 src/script_interface/walberla/LatticeIndices.hpp
 create mode 100644 src/script_interface/walberla/LatticeModel.hpp
 create mode 100644 src/script_interface/walberla/LatticeSlice.hpp
 create mode 100644 src/script_interface/walberla/LatticeSlice.impl.hpp
 create mode 100644 src/script_interface/walberla/LatticeWalberla.hpp
 create mode 100644 src/script_interface/walberla/VTKHandle.hpp
 create mode 100644 src/script_interface/walberla/WalberlaCheckpoint.hpp
 create mode 100644 src/script_interface/walberla/initialize.cpp
 rename src/script_interface/{lbboundaries => walberla}/initialize.hpp (67%)
 create mode 100644 src/shapes/src/Shape.cpp
 create mode 100644 src/walberla_bridge/CMakeLists.txt
 create mode 100644 src/walberla_bridge/include/walberla_bridge/Architecture.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/BlockAndCell.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/LatticeModel.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/LatticeWalberla.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/VTKHandle.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/electrokinetics/EKContainer.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/electrokinetics/EKWalberlaNodeState.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/electrokinetics/EKinWalberlaBase.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/electrokinetics/PoissonSolver/FFT.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/electrokinetics/PoissonSolver/None.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/electrokinetics/PoissonSolver/PoissonSolver.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_fft_init.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_none_init.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_walberla_init.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactant.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LBWalberlaNodeState.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LeesEdwardsPack.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/lb_walberla_init.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/utils/ResourceManager.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/utils/walberla_utils.hpp
 create mode 100644 src/walberla_bridge/include/walberla_bridge/walberla_init.hpp
 create mode 100644 src/walberla_bridge/src/BoundaryHandling.hpp
 create mode 100644 src/walberla_bridge/src/CMakeLists.txt
 create mode 100644 src/walberla_bridge/src/LatticeModel.cpp
 create mode 100644 src/walberla_bridge/src/LatticeWalberla.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/CMakeLists.txt
 create mode 100644 src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/ek_kernels.hpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/ek_poisson_fft_init.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/ek_poisson_none_init.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/ek_walberla_init.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/CMakeLists.txt
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/CMakeLists.txt
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.hpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.hpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/CMakeLists.txt
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_all.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_double_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_single_precision.h
 create mode 100644 src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_all.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/CMakeLists.txt
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/InterpolateAndShiftAtBoundary.hpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/ResetForce.hpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwards.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwards.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwardsAVX.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwardsAVX.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalized.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalized.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalizedAVX.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalizedAVX.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwards.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwards.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwardsAVX.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwardsAVX.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalized.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalized.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalizedAVX.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalizedAVX.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterDoublePrecision.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterDoublePrecision.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterSinglePrecision.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterSinglePrecision.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecision.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecision.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecisionAVX.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecisionAVX.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecision.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecision.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecisionAVX.cpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecisionAVX.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/myintrin.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/philox_rand.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/lb_walberla_init.cpp
 rename src/{core/grid_based_algorithms/electrokinetics.cpp => walberla_bridge/src/lattice_boltzmann/lb_walberla_init.cu} (93%)
 create mode 100644 src/walberla_bridge/src/walberla_init.cpp
 create mode 100644 src/walberla_bridge/tests/CMakeLists.txt
 create mode 100644 src/walberla_bridge/tests/EKinWalberlaImpl_unit_tests.cpp
 create mode 100644 src/walberla_bridge/tests/LBWalberlaImpl_bspline_tests.cpp
 create mode 100644 src/walberla_bridge/tests/LBWalberlaImpl_flow_tests.cpp
 create mode 100644 src/walberla_bridge/tests/LBWalberlaImpl_lees_edwards.cpp
 create mode 100644 src/walberla_bridge/tests/LBWalberlaImpl_statistical_tests.cpp
 create mode 100644 src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp
 create mode 100644 src/walberla_bridge/tests/LatticeWalberla_unit_tests.cpp
 create mode 100644 src/walberla_bridge/tests/ResourceManager_test.cpp
 create mode 100644 src/walberla_bridge/tests/kernels_unit_tests.cpp
 create mode 100644 src/walberla_bridge/tests/tests_common.hpp
 create mode 100644 src/walberla_bridge/tests/tests_common_ek.hpp
 create mode 100644 src/walberla_bridge/tests/tests_common_lb.hpp
 create mode 100644 testsuite/python/ek_boundary.py
 create mode 100644 testsuite/python/ek_bulk_reactions.py
 delete mode 100644 testsuite/python/ek_charged_plate.py
 create mode 100644 testsuite/python/ek_diffusion.py
 create mode 100644 testsuite/python/ek_eof.py
 delete mode 100644 testsuite/python/ek_eof_one_species.py
 create mode 100644 testsuite/python/ek_fixeddensity.py
 create mode 100644 testsuite/python/ek_fixedflux.py
 create mode 100644 testsuite/python/ek_indexed_reactions.py
 create mode 100644 testsuite/python/ek_interface.py
 create mode 100644 testsuite/python/ek_noflux.py
 create mode 100644 testsuite/python/ek_slice.py
 create mode 100644 testsuite/python/lattice.py
 create mode 100644 testsuite/python/lattice_vtk.py
 delete mode 100644 testsuite/python/lb_get_u_at_pos.py
 create mode 100644 testsuite/python/lb_lees_edwards.py
 create mode 100644 testsuite/python/lb_lees_edwards_particle_coupling.py
 rename testsuite/python/{lb_density.py => lb_mass_conservation.py} (74%)
 create mode 100644 testsuite/python/lb_planar_couette.py
 delete mode 100644 testsuite/python/lb_switch.py
 delete mode 100644 testsuite/python/lb_vtk.py
 create mode 100644 testsuite/scripts/samples/test_lb_circular_couette.py
 rename testsuite/scripts/samples/{test_ekboundaries.py => test_lb_four_roller_mill.py} (63%)
 rename src/python/espressomd/ekboundaries.py => testsuite/scripts/samples/test_lb_planar_couette.py (53%)

diff --git a/.codecov.yml b/.codecov.yml
index 50684dece5e..7635de48e9d 100644
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -50,3 +50,6 @@ ignore:
   - "doc/tutorials"
   - "samples"
   - "maintainer"
+  - "src/walberla_bridge/**/generated_kernels/*"
+  - "src/walberla_bridge/myintrin.h"
+  - "src/walberla_bridge/philox_rand.h"
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 4217ff87b07..5e1815f5f94 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -68,7 +68,7 @@ style_doxygen:
     - mkdir build
     - cd build
     - cp ../maintainer/configs/maxset.hpp myconfig.hpp
-    - cmake .. -D ESPRESSO_BUILD_WITH_CUDA=ON -D ESPRESSO_BUILD_WITH_GSL=ON -D ESPRESSO_BUILD_WITH_HDF5=ON -D ESPRESSO_BUILD_WITH_SCAFACOS=ON -D ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS=ON
+    - cmake .. -D ESPRESSO_BUILD_WITH_CUDA=ON -D ESPRESSO_BUILD_WITH_GSL=ON -D ESPRESSO_BUILD_WITH_HDF5=ON -D ESPRESSO_BUILD_WITH_SCAFACOS=ON -D ESPRESSO_BUILD_WITH_WALBERLA=ON -D ESPRESSO_BUILD_WITH_WALBERLA_FFT=ON -D ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS=ON
     - sh ../maintainer/CI/dox_warnings.sh
   tags:
     - espresso
@@ -87,6 +87,7 @@ default:
      myconfig: 'default'
      with_coverage: 'true'
      with_scafacos: 'true'
+     with_walberla: 'true'
      with_stokesian_dynamics: 'true'
      check_skip_long: 'true'
   script:
@@ -94,6 +95,7 @@ default:
   tags:
     - espresso
     - no-cuda
+    - numa
 
 maxset:
   <<: *global_job_definition
@@ -107,6 +109,7 @@ maxset:
      myconfig: 'maxset'
      with_coverage: 'true'
      with_scafacos: 'true'
+     with_walberla: 'true'
      with_stokesian_dynamics: 'true'
      check_skip_long: 'true'
      cmake_params: '-D ESPRESSO_TEST_NP=8'
@@ -116,6 +119,7 @@ maxset:
     - espresso
     - no-cuda
     - numa
+    - avx2
 
 no_rotation:
   <<: *global_job_definition
@@ -169,6 +173,7 @@ clang-sanitizer:
      with_asan: 'true'
      with_ubsan: 'true'
      with_scafacos: 'true'
+     with_walberla: 'true'
      with_stokesian_dynamics: 'true'
   script:
     - bash maintainer/CI/build_cmake.sh
@@ -210,6 +215,7 @@ cuda11-coverage:
      with_coverage: 'true'
      check_skip_long: 'true'
      with_scafacos: 'true'
+     with_walberla: 'true'
      with_stokesian_dynamics: 'true'
   script:
     - bash maintainer/CI/build_cmake.sh
@@ -232,6 +238,8 @@ cuda11-maxset:
      test_timeout: '900'
      srcdir: '${CI_PROJECT_DIR}'
      with_scafacos: 'true'
+     with_walberla: 'true'
+     with_walberla_avx: 'true'
      with_stokesian_dynamics: 'true'
   script:
     - bash maintainer/CI/build_cmake.sh
@@ -256,6 +264,8 @@ tutorials-samples-maxset:
      with_coverage: 'false'
      with_coverage_python: 'true'
      with_scafacos: 'true'
+     with_walberla: 'true'
+     with_walberla_avx: 'true'
      with_stokesian_dynamics: 'true'
      make_check_unit_tests: 'false'
      make_check_python: 'false'
@@ -281,6 +291,8 @@ tutorials-samples-default:
      with_cuda: 'true'
      with_coverage: 'false'
      with_scafacos: 'true'
+     with_walberla: 'true'
+     with_walberla_avx: 'true'
      make_check_unit_tests: 'false'
      make_check_python: 'false'
      make_check_tutorials: 'true'
@@ -306,14 +318,14 @@ tutorials-samples-empty:
      myconfig: 'empty'
      with_cuda: 'true'
      with_coverage: 'false'
-     with_scafacos: 'true'
+     with_scafacos: 'false'
+     with_walberla: 'false'
      make_check_unit_tests: 'false'
      make_check_python: 'false'
      make_check_tutorials: 'true'
      make_check_samples: 'true'
      make_check_benchmarks: 'true'
      test_timeout: '1200'
-     with_scafacos: 'false'
   script:
     - bash maintainer/CI/build_cmake.sh
   tags:
@@ -334,6 +346,8 @@ tutorials-samples-no-gpu:
      with_cuda: 'true'
      with_coverage: 'false'
      with_scafacos: 'true'
+     with_walberla: 'true'
+     with_walberla_avx: 'true'
      make_check_unit_tests: 'false'
      make_check_python: 'false'
      make_check_tutorials: 'true'
@@ -362,6 +376,7 @@ installation:
      make_check_unit_tests: 'false'
      make_check_python: 'false'
      with_scafacos: 'true'
+     with_walberla: 'true'
      with_stokesian_dynamics: 'true'
      srcdir: '${CI_PROJECT_DIR}'
      build_type: 'Release'
@@ -396,6 +411,7 @@ empty:
      with_cuda: 'true'
      with_static_analysis: 'true'
      with_scafacos: 'false'
+     with_walberla: 'false'
      with_stokesian_dynamics: 'false'
      with_coverage: 'false'
      with_coverage_python: 'true'
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c61e17f2ca3..9212797e37e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -8,8 +8,8 @@ repos:
         entry: sh maintainer/format/clang-format.sh
         language: system
         always_run: false
-        files: '.*\.(cpp|hpp|cu|cuh)'
-        exclude: '^libs/'
+        files: '.*\.(cpp|hpp|h|cu|cuh)'
+        exclude: '^libs/|^src/walberla_bridge/src/.*/generated_kernels/.*\.(cpp|cu)|^maintainer/walberla_kernels/templates/.*\.tmpl\.(cpp|hpp|h|cu|cuh)'
         args: ["-i", "-style=file"]
 
     -   id: autopep8
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 70c01263d12..b81dfea64f2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -57,6 +57,7 @@ include(FeatureSummary)
 project(ESPResSo)
 include(GNUInstallDirs)
 include(espresso_option_enum)
+include(espresso_enable_avx2_support)
 if(POLICY CMP0074)
   # make find_package() use <PackageName>_ROOT variables
   cmake_policy(SET CMP0074 NEW)
@@ -92,6 +93,11 @@ option(ESPRESSO_BUILD_TESTS "Enable tests" ON)
 option(ESPRESSO_BUILD_WITH_SCAFACOS "Build with ScaFaCoS support" OFF)
 option(ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS "Build with Stokesian Dynamics"
        OFF)
+option(ESPRESSO_BUILD_WITH_WALBERLA
+       "Build with waLBerla lattice-Boltzmann support" OFF)
+option(ESPRESSO_BUILD_WITH_WALBERLA_AVX
+       "Build waLBerla lattice-Boltzmann with AVX vectorization" OFF)
+option(ESPRESSO_BUILD_WITH_WALBERLA_FFT "Build waLBerla with FFT support" OFF)
 option(ESPRESSO_BUILD_BENCHMARKS "Enable benchmarks" OFF)
 option(ESPRESSO_BUILD_WITH_VALGRIND_MARKERS
        "Build with valgrind instrumentation markers" OFF)
@@ -171,6 +177,15 @@ foreach(func_name __PRETTY_FUNCTION__ __FUNCSIG__ __FUNCTION__)
   endif(result${func_name})
 endforeach()
 
+#
+# AVX2 support
+#
+
+include(CheckCXXCompilerFlag)
+
+add_library(espresso_avx_flags INTERFACE)
+add_library(espresso::avx_flags ALIAS espresso_avx_flags)
+
 #
 # Interface libraries
 #
@@ -573,6 +588,75 @@ if(ESPRESSO_BUILD_BENCHMARKS)
   add_subdirectory(maintainer/benchmarks)
 endif()
 
+#
+# waLBerla
+#
+
+if(ESPRESSO_BUILD_WITH_WALBERLA)
+  # cmake-format: off
+  include(FetchContent)
+  FetchContent_Declare(
+    walberla
+    GIT_REPOSITORY https://i10git.cs.fau.de/walberla/walberla.git
+    GIT_TAG        065ce5f311850371a97ac4766f47dbb5ca8424ba
+  )
+  # workaround for https://gitlab.kitware.com/cmake/cmake/-/issues/21146
+  if(NOT DEFINED walberla_SOURCE_DIR OR NOT EXISTS "${walberla_SOURCE_DIR}")
+    FetchContent_Populate(walberla)
+  endif()
+  # cmake-format: on
+  string(REGEX REPLACE "([/\\]walberla)-src$" "\\1-build" walberla_BINARY_DIR
+                       "${walberla_SOURCE_DIR}")
+  set(WALBERLA_BUILD_TESTS off CACHE BOOL "")
+  set(WALBERLA_BUILD_BENCHMARKS off CACHE BOOL "")
+  set(WALBERLA_BUILD_TOOLS off CACHE BOOL "")
+  set(WALBERLA_BUILD_TUTORIALS off CACHE BOOL "")
+  set(WALBERLA_BUILD_SHOWCASES off CACHE BOOL "")
+  set(WALBERLA_BUILD_DOC off CACHE BOOL "")
+  set(WALBERLA_LOGLEVEL "WARNING" CACHE STRING "")
+  set(CMAKE_POSITION_INDEPENDENT_CODE on CACHE BOOL "")
+  if(ESPRESSO_BUILD_WITH_CUDA)
+    set(WALBERLA_BUILD_WITH_CUDA "on" CACHE BOOL "")
+    if(CMAKE_VERSION VERSION_LESS 3.25 OR NOT ESPRESSO_CUDA_COMPILER STREQUAL
+                                          "clang")
+      if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+        set(CMAKE_CUDA_ARCHITECTURES 75)
+      endif()
+    endif()
+  endif()
+  if(ESPRESSO_BUILD_WITH_WALBERLA_FFT)
+    set(ESPRESSO_USE_WALBERLA_FFT 1)
+    set(WALBERLA_BUILD_WITH_FFTW on CACHE BOOL "")
+  else()
+    set(WALBERLA_BUILD_WITH_FFTW off CACHE BOOL "")
+  endif()
+  set(WALBERLA_BUILD_WITH_FASTMATH off CACHE BOOL "")
+  add_subdirectory("${walberla_SOURCE_DIR}" "${walberla_BINARY_DIR}")
+  set(WALBERLA_LIBS
+      walberla::core walberla::domain_decomposition walberla::blockforest
+      walberla::boundary walberla::field walberla::lbm walberla::timeloop
+      walberla::vtk)
+  if(ESPRESSO_BUILD_WITH_WALBERLA_FFT)
+    set(WALBERLA_LIBS ${WALBERLA_LIBS} walberla::fft)
+  endif()
+  if(ESPRESSO_BUILD_WITH_CUDA AND WALBERLA_BUILD_WITH_CUDA)
+    set(WALBERLA_LIBS ${WALBERLA_LIBS} walberla::cuda)
+  endif()
+  # workaround for https://gitlab.kitware.com/cmake/cmake/-/issues/21283
+  foreach(target_w_namespace IN LISTS WALBERLA_LIBS)
+    string(REPLACE "walberla::" "" target_wo_namespace ${target_w_namespace})
+    add_library(${target_w_namespace} ALIAS ${target_wo_namespace})
+  endforeach()
+  if(ESPRESSO_BUILD_WITH_WALBERLA_AVX)
+    function(espresso_avx_flags_callback COMPILER_AVX2_FLAG)
+      target_compile_options(
+        espresso_avx_flags INTERFACE "${COMPILER_AVX2_FLAG}"
+                                     "-DESPRESSO_BUILD_WITH_AVX_KERNELS")
+    endfunction()
+    espresso_enable_avx2_support(espresso_avx_flags_callback)
+  endif()
+endif()
+
 #
 # Subdirectories
 #
diff --git a/cmake/FindFFTW3.cmake b/cmake/FindFFTW3.cmake
index f7bae67947d..a4bdae4f110 100644
--- a/cmake/FindFFTW3.cmake
+++ b/cmake/FindFFTW3.cmake
@@ -32,14 +32,21 @@ endif(FFTW3_INCLUDE_DIR)
 
 find_path(FFTW3_INCLUDE_DIR fftw3.h)
 find_library(FFTW3_LIBRARIES NAMES fftw3)
+find_path(FFTW3_MPI_INCLUDE_DIR fftw3-mpi.h)
+find_library(FFTW3_MPI_LIBRARIES NAMES fftw3_mpi)
 
 # handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE if all
 # listed variables are TRUE
 include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(FFTW3 DEFAULT_MSG FFTW3_LIBRARIES
                                   FFTW3_INCLUDE_DIR)
+set(FPHSA_NAME_MISMATCHED 1)
+find_package_handle_standard_args(FFTW3_MPI DEFAULT_MSG FFTW3_MPI_LIBRARIES
+                                  FFTW3_MPI_INCLUDE_DIR)
+unset(FPHSA_NAME_MISMATCHED)
+
+mark_as_advanced(FFTW3_LIBRARIES FFTW3_INCLUDE_DIR FFTW3_MPI_LIBRARIES FFTW3_MPI_INCLUDE_DIR)
 
-mark_as_advanced(FFTW3_LIBRARIES FFTW3_INCLUDE_DIR)
 
 if(FFTW3_FOUND AND NOT TARGET FFTW3::FFTW3)
   add_library(FFTW3::FFTW3 INTERFACE IMPORTED)
diff --git a/cmake/espresso_cmake_config.cmakein b/cmake/espresso_cmake_config.cmakein
index 884c323187b..62091119c59 100644
--- a/cmake/espresso_cmake_config.cmakein
+++ b/cmake/espresso_cmake_config.cmakein
@@ -13,6 +13,10 @@
 
 #cmakedefine ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS
 
+#cmakedefine ESPRESSO_BUILD_WITH_WALBERLA
+
+#cmakedefine ESPRESSO_BUILD_WITH_WALBERLA_FFT
+
 #cmakedefine ESPRESSO_BUILD_WITH_VALGRIND_MARKERS
 
 #define PACKAGE_NAME "${PROJECT_NAME}"
diff --git a/cmake/espresso_enable_avx2_support.cmake b/cmake/espresso_enable_avx2_support.cmake
new file mode 100644
index 00000000000..3baab871465
--- /dev/null
+++ b/cmake/espresso_enable_avx2_support.cmake
@@ -0,0 +1,67 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+function(espresso_enable_avx2_support callback)
+  set(COMPILER_AVX2_FLAG "")
+  foreach(FLAG_NAME "-mavx2" "/arch:AVX2")
+    string(REGEX REPLACE "[^0-9A-Za-z_]" "_" FLAG_VARIABLE "${FLAG_NAME}")
+    check_cxx_compiler_flag("${flag_name}"
+                            COMPILER_HAS_${FLAG_VARIABLE}_FLAG_RESULT)
+    if(COMPILER_HAS_${FLAG_VARIABLE}_FLAG_RESULT)
+      set(COMPILER_AVX2_FLAG "${FLAG_NAME}")
+      cmake_language(CALL ${callback} "${COMPILER_AVX2_FLAG}")
+      break()
+    endif()
+  endforeach()
+  if(COMPILER_AVX2_FLAG STREQUAL "")
+    message(
+      FATAL_ERROR
+        "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION} doesn't support AVX2-specific compiler flags."
+    )
+  endif()
+  if(NOT COMPILER_AVX2_FLAG STREQUAL "/arch:AVX2")
+    execute_process(
+      COMMAND ${CMAKE_CXX_COMPILER} -march=native -E -v - INPUT_FILE /dev/null
+      OUTPUT_VARIABLE MARCH_NATIVE_OUTPUT_STRING
+      ERROR_VARIABLE MARCH_NATIVE_OUTPUT_STRING)
+    if(NOT "${MARCH_NATIVE_OUTPUT_STRING}" MATCHES "[ \n](\\+avx2|-mavx2|-D__AVX2__)[ \n]")
+      message(
+        FATAL_ERROR
+          "AVX2 not supported on this CPU architecture according to ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}. While ESPResSo will still compile, you will trigger SIGILL when calling AVX functions."
+      )
+    endif()
+  endif()
+  set(CMAKE_REQUIRED_FLAGS_BACKUP "${CMAKE_REQUIRED_FLAGS}")
+  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${COMPILER_AVX2_FLAG}")
+  check_cxx_source_compiles(
+    "#include <immintrin.h>
+       __m256i xi_i = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+       __m256  xi_s = _mm256_set_ps(0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f);
+       __m256d xi_d = _mm256_set_pd(0.0, 1.0, 2.0, 3.0);
+       int main() {}
+      " COMPILER_HAS_AVX2_SUPPORT)
+  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_BACKUP}")
+  if(NOT COMPILER_HAS_AVX2_SUPPORT)
+    message(
+      FATAL_ERROR
+        "Cannot execute a simple AVX2 program compiled by ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}."
+    )
+  endif()
+endfunction()
+
diff --git a/doc/bibliography.bib b/doc/bibliography.bib
index cb3b51c2821..abb48c90e6a 100644
--- a/doc/bibliography.bib
+++ b/doc/bibliography.bib
@@ -146,6 +146,40 @@ @Article{batle20a
   doi       = {10.1038/s41598-020-76029-x},
 }
 
+@InProceedings{bauer19a,
+  author = {Bauer, Martin and H\"{o}tzer, Johannes and Ernst, Dominik and Hammer, Julian and Seiz, Marco and Hierl, Henrik and H\"{o}nig, Jan and K\"{o}stler, Harald and Wellein, Gerhard and Nestler, Britta and R\"{u}de, Ulrich},
+  title = {Code Generation for Massively Parallel Phase-Field Simulations},
+  year = {2019},
+  publisher = {Association for Computing Machinery},
+  address = {New York},
+  doi = {10.1145/3295500.3356186},
+  isbn = {9781450362290},
+  booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis},
+  location = {Denver, Colorado},
+}
+
+@Article{bauer21a,
+  author = {Martin Bauer and Sebastian Eibl and Christian Godenschwager and Nils Kohl and Michael Kuron and Christoph Rettinger and Florian Schornbaum and Christoph Schwarzmeier and Dominik Th\"{o}nnes and Harald K\"{o}stler and Ulrich R\"{u}de},
+  title = {{waLBerla}: A block-structured high-performance framework for multiphysics simulations},
+  journal = {Computers \& Mathematics with Applications},
+  year = {2021},
+  issn = {0898-1221},
+  doi = {10.1016/j.camwa.2020.01.007},
+  pages = {478--501},
+  volume = {81},
+}
+
+@Article{bauer21b,
+  author = {Bauer, Martin and K\"{o}stler, Harald and R\"{u}de, Ulrich},
+  title = {{lbmpy}: Automatic code generation for efficient parallel lattice {Boltzmann} methods},
+  journal = {Journal of Computational Science},
+  volume = {49},
+  pages = {101269},
+  year = {2021},
+  issn = {1877-7503},
+  doi = {10.1016/j.jocs.2020.101269},
+}
+
 @Article{bindgen21a,
   author = {Bindgen, Sebastian and Weik, Florian and Weeber, Rudolf and Koos, Erin and de Buyl, Pierre},
   title = {{L}ees--{E}dwards boundary conditions for translation invariant shear flow: {I}mplementation and transport properties},
@@ -469,6 +503,17 @@ @Article{gay81a
   doi = {10.1063/1.441483},
 }
 
+@InProceedings{godenschwager13a,
+  title     = {A framework for hybrid parallel flow simulations with a trillion cells in complex geometries},
+  author    = {Godenschwager, Christian and Schornbaum, Florian and Bauer, Martin and K{\"o}stler, Harald and R{\"u}de, Ulrich},
+  booktitle = {Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis},
+  year      = {2013},
+  publisher = {Association for Computing Machinery},
+  address   = {New York},
+  doi       = {10.1145/2503210.2503273},
+  isbn      = {9781450323789},
+}
+
 @Article{gompper96a,
   author    = {Gompper, G. and Kroll, D. M.},
   title     = {Random Surface Discretizations and the Renormalization of the Bending Rigidity},
@@ -845,17 +890,6 @@ @Article{reed92a
   publisher={AIP Publishing}
 }
 
-@Article{rohm12a,
-  author = {Roehm, D. and Arnold, A.},
-  title = {Lattice {B}oltzmann simulations on {GPU}s with {ESPResSo}},
-  journal = {European Physical Journal Special Topics},
-  year = {2012},
-  volume = {210},
-  number = {1},
-  pages = {89--100},
-  doi = {10.1140/epjst/e2012-01639-6},
-}
-
 @Book{rubinstein03a,
   title = {Polymer Physics},
   publisher = {Oxford University Press},
diff --git a/doc/sphinx/advanced_methods.rst b/doc/sphinx/advanced_methods.rst
index c5b78c88e6e..7676d831980 100644
--- a/doc/sphinx/advanced_methods.rst
+++ b/doc/sphinx/advanced_methods.rst
@@ -374,7 +374,6 @@ Description of sample script
 .. note::
 
     The following features are required:
-    ``LB_BOUNDARIES``,
     ``EXTERNAL_FORCES``,
     ``MASS``, ``SOFT_SPHERE``
 
@@ -459,17 +458,15 @@ Specification of fluid and movement
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 ::
 
-    lbf = espressomd.lb.LBFluid(agrid=1, dens=1.0, visc=1.5, fric=1.5,
-                                tau=time_step, ext_force_density=[0.002, 0.0, 0.0])
+    lbf = espressomd.lb.LBFluidWalberla(agrid=1, density=1.0, kinematic_viscosity=1.5,
+                                        tau=time_step, ext_force_density=[0.002, 0.0, 0.0])
     system.actors.add(lbf)
 
 This part of the script specifies the fluid that will get the system
 moving. Here ``agrid`` :math:`=\Delta x` is the spatial discretisation
 step, ``tau`` is the time step that will be the same as the time step
-for particles, viscosity ``visc`` and density ``dens`` of the fluid are
-physical parameters scaled to lattice units. ``fric`` is a
-(non-physical) friction parameter that enters the fluid-object
-interaction and has to be set carefully. Finally, ``ext_force_density`` sets the
+for particles, viscosity ``viscosity`` and density ``density`` of the fluid are
+physical parameters scaled to lattice units, ``ext_force_density`` sets the
 force-per-unit-volume vector that drives the fluid. Another option to
 add momentum to fluid is by specifying the velocity on the boundaries.
 
@@ -518,12 +515,12 @@ defined as follows. First we define the two shapes:
                                 direction=1)
 
 The ``direction=1`` determines that the fluid is on the *outside*. Next
-we create boundaries for the fluid:
+we mark the LB nodes within the shapes as boundaries:
 
 ::
 
-    system.lbboundaries.add(lbboundaries.LBBoundary(shape=boundary1))
-    system.lbboundaries.add(lbboundaries.LBBoundary(shape=boundary2))
+    lbf.add_boundary_from_shape(boundary1)
+    lbf.add_boundary_from_shape(boundary2)
 
 Followed by creating the constraints for cells:
 
diff --git a/doc/sphinx/constraints.rst b/doc/sphinx/constraints.rst
index b46a75f5bbb..6bc6bd13922 100644
--- a/doc/sphinx/constraints.rst
+++ b/doc/sphinx/constraints.rst
@@ -550,8 +550,8 @@ the exception of a planar wall. For this, there is no ``direction`` option, but
 the ``normal`` vector of the wall points in the direction that is considered to
 yield positive distances.  Outside their use in constraints, shapes can also be
 used as a way to define LB boundary nodes. In this case, negative distances
-define nodes which are part of a boundary (please refer to :ref:`Using shapes
-as lattice-Boltzmann boundary`).
+define nodes which are part of a boundary (please refer to :ref:`Shape-based
+LB boundary conditions`).
 
 
 .. _External Fields:
diff --git a/doc/sphinx/ek.rst b/doc/sphinx/ek.rst
index 2a586c3a75d..a2e52502eda 100644
--- a/doc/sphinx/ek.rst
+++ b/doc/sphinx/ek.rst
@@ -11,6 +11,19 @@ interpolated on the LB grid. In the following paragraph we briefly
 explain the electrokinetic model implemented in |es|, before we come to the
 description of the interface.
 
+.. note::
+    Please cite :cite:t:`godenschwager13a` and :cite:t:`bauer21a` (BibTeX keys
+    ``godenschwager13a`` and ``bauer21a`` in :file:`doc/bibliography.bib`) if
+    you use the LB fluid. When generating your own kernels with pystencils and
+    lbmpy, please also cite :cite:t:`bauer19a` and :cite:t:`bauer21b` (BibTeX
+    key ``bauer19a`` resp. ``bauer21b`` in :file:`doc/bibliography.bib`).
+
+.. note::
+
+    Requires external features ``WALBERLA`` and optionally ``WALBERLA_FFT``
+    (for the FFT-based Poisson solver), enabled with the CMake options
+    ``-D ESPRESSO_BUILD_WITH_WALBERLA=ON -D ESPRESSO_BUILD_WITH_WALBERLA_FFT=ON``.
+
 .. _Electrokinetic equations:
 
 Electrokinetic equations
@@ -129,203 +142,298 @@ The electrokinetic equations have the following properties:
    spectra at frequencies, high enough that they correspond to times
    faster than the diffusive time scales of the charged species.
 
-.. _Setup:
+.. _EK Setup:
 
 Setup
 -----
 
-.. _Initialization:
+.. _EK Initialization:
 
 Initialization
-~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^
 
-:class:`~espressomd.electrokinetics.Electrokinetics` is used to initialize
-the LB fluid of the EK method::
+Here is a minimal working example::
 
     import espressomd
     import espressomd.electrokinetics
-    system = espressomd.System(box_l=[10.0, 10.0, 10.0])
-    system.time_step = 0.0
-    system.cell_system.skin = 0.4
-    ek = espressomd.electrokinetics.Electrokinetics(agrid=1.0, lb_density=1.0,
-        viscosity=1.0, ext_force_density = [1,0,0], friction=1.0, T=1.0, prefactor=1.0,
-        stencil='linkcentered', advection=True, fluid_coupling='friction')
-    system.actors.add(ek)
-
-.. note:: Features ``ELECTROKINETICS`` and ``CUDA`` required
-
-It is very similar to the lattice-Boltzmann command in set-up.
-We therefore refer the reader to chapter :ref:`Lattice-Boltzmann`
-for details on the implementation of LB in |es| and describe only
-the major differences here.
-
-The first major difference with the LB implementation is that the
-electrokinetics set-up is a GPU-only implementation. A CPU version
-will become available in the 4.3 line of |es|. To use the electrokinetics
-features it is therefore imperative that your computer contains
-a CUDA-capable GPU.
-
-To set up a proper LB fluid using this command, one has to specify at
-least the following options: ``agrid``, ``lb_density``, ``viscosity``,
-``friction``, ``T``, and ``prefactor``. The other options can be
-used to modify the behavior of the LB fluid. Note that the command does
-not allow the user to set the time step parameter as is the case for the
-lattice-Boltzmann command, this parameter is instead taken directly from
-the value set for :attr:`~espressomd.system.System.time_step`.
-The LB *mass density* is set independently from the
-electrokinetic *number densities*, since the LB fluid serves only as a
-medium through which hydrodynamic interactions are propagated, as will
-be explained further in the next paragraph. If no ``lb_density`` is specified, then our
-algorithm assumes ``lb_density= 1.0``. The two 'new' parameters are the temperature ``T`` at
-which the diffusive species are simulated and the ``prefactor``
-associated with the electrostatic properties of the medium. See the
-above description of the electrokinetic equations for an explanation of
-the introduction of a temperature, which does not come in directly via a
-thermostat that produces thermal fluctuations.
-
-``advection`` can be set to ``True`` or ``False``. It controls whether there should be an
-advective contribution to the diffusive species' fluxes. Default is
-``True``.
-
-``fluid_coupling`` can be set to ``"friction"`` or ``"estatics"``.
-This option determines the force term acting on the fluid.
-The former specifies the force term to be the
-sum of the species fluxes divided by their respective mobilities while
-the latter simply uses the electrostatic force density acting on all
-species. Note that this switching is only possible for the ``"linkcentered"``
-stencil. For all other stencils, this choice is hardcoded. The default
-is ``"friction"``.
-
-``es_coupling`` enables the action of the electrostatic potential due to the
-electrokinetics species and charged boundaries on the MD particles. The
-forces on the particles are calculated by interpolation from the
-electric field which is in turn calculated from the potential via finite
-differences. This only includes interactions between the species and
-boundaries and MD particles, not between MD particles and MD particles.
-To get complete electrostatic interactions a particles Coulomb method
-like Ewald or P3M has to be activated too.
-
-The fluctuation of the EK species can be turned on by the flag ``fluctuations``.
-This adds a white-noise term to the fluxes. The amplitude of this noise term
-can be controlled by ``fluctuation_amplitude``. To circumvent that these fluctuations
-lead to negative densities, they are modified by a smoothed Heaviside function,
-which decreases the magnitude of the fluctuation for densities close to 0.
-By default the fluctuations are turned off.
-
-Another difference with LB is that EK parameters are immutables,
-and the EK object cannot be checkpointed.
+
+    system = espressomd.System(box_l=3 * [6.0])
+    system.time_step = 0.01
+    system.cell_system.skin = 1.0
+
+    ek_lattice = espressomd.electrokinetics.LatticeWalberla(agrid=0.5, n_ghost_layers=1)
+    ek_solver = espressomd.electrokinetics.EKNone(lattice=ek_lattice)
+    system.ekcontainer.solver = ek_solver
+    system.ekcontainer.tau = system.time_step
+
+where ``system.ekcontainer`` is the EK system, ``ek_solver`` is the Poisson
+solver (here ``EKNone`` doesn't actually solve the electrostatic field, but
+instead imposes a zero field), and ``ek_lattice`` contains the grid parameters.
+In this setup, the EK system doesn't contain any species. The following
+sections will show how to add species that can diffuse, advect, react and/or
+electrostatically interact. An EK system can be set up at the same time as a
+LB system.
 
 .. _Diffusive species:
 
 Diffusive species
-~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^
 ::
 
-    species = espressomd.electrokinetics.Species(density=density, D=D, valency=valency,
-        ext_force_density=ext_force)
+    ek_species = espressomd.electrokinetics.EKSpecies(
+        lattice=ek_lattice,
+        single_precision=False,
+        kT=1.0,
+        density=0.85,
+        valency=0.0,
+        diffusion=0.1,
+        advection=False,
+        friction_coupling=False,
+        ext_efield=[0., 0., 0.]
+    )
 
-:class:`~espressomd.electrokinetics.Species` is used to initialize a diffusive species. Here the
-options specify: the number density ``density``, the diffusion coefficient ``D``, the
-valency of the particles of that species ``valency``, and an optional external
-(electric) force which is applied to the diffusive species. As mentioned
-before, the LB density is completely decoupled from the electrokinetic
-densities. This has the advantage that greater freedom can be achieved
-in matching the internal parameters to an experimental system. Moreover,
-it is possible to choose parameters for which the LB is more stable.
-The species can be added to a LB fluid::
+:class:`~espressomd.electrokinetics.EKSpecies` is used to initialize a diffusive
+species. Here the options specify: the electrokinetic *number densities*
+``density`` (independent from the LB ``density``), the diffusion coefficient
+``diffusion``, the valency of the particles of that species ``valency``,
+the optional external (electric) force ``ext_efield`` which is applied to
+the diffusive species, the thermal energy ``kT`` for thermal fluctuations,
+``friction_coupling`` to enable coupling of the diffusive species to the
+LB fluid force and ``advection`` to add an advective contribution to the
+diffusive species' fluxes from the LB fluid.
+Multiple species can be added to the EK system.
 
-    ek.add_species(species)
+To add species to the EK system::
 
-One can also add the species during the initialization step of the
-:class:`~espressomd.electrokinetics.Electrokinetics` class by defining
-the list variable ``species``::
+    system.ekcontainer.add(ek_species)
 
-    ek = espressomd.electrokinetics.Electrokinetics(species=[species], ...)
+To remove species from the EK system::
 
-The variables ``density``, ``D``, and
-``valency`` must be set to properly initialize the diffusive species; the
-``ext_force_density`` is optional.
+    system.ekcontainer.remove(ek_species)
 
-.. _EK boundaries:
+Individual nodes and slices of the species lattice can be accessed and
+modified using the syntax outlined in :ref:`Reading and setting properties
+of single lattice nodes`.
 
-EK boundaries
-~~~~~~~~~~~~~
+As mentioned before, the LB density is completely decoupled from the
+electrokinetic densities. This has the advantage that greater freedom can
+be achieved in matching the internal parameters to an experimental system.
+Moreover, it is possible to choose parameters for which the LB is more stable.
 
-:class:`~espressomd.ekboundaries.EKBoundary` is used to set up
-internal (or external) boundaries for the electrokinetics algorithm in much
-the same way as the :class:`~espressomd.lbboundaries.LBBoundary` class is
-used for the LB fluid::
+Performance considerations
+^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-    ek_boundary = espressomd.ekboundaries.EKBoundary(charge_density=1.0, shape=my_shape)
-    system.ekboundaries.add(ek_boundary)
+The CPU implementation of the EK has an extra flag ``single_precision`` to
+use single-precision floating point values. These are approximately 10%
+faster than double-precision, at the cost of a small loss in precision.
 
-.. note:: Feature ``EK_BOUNDARIES`` required
+.. _Checkpointing EK:
 
-The major difference with the LB class is the option ``charge_density``,
-with which a boundary can be endowed with a volume charge density.
-To create a surface charge density, a combination of two
-oppositely charged boundaries, one inside the other, can be used. However,
-care should be taken to maintain the surface charge density when the value of ``agrid``
-is changed. Examples for possible shapes are wall, sphere, ellipsoid, cylinder,
-rhomboid and hollow conical frustum. We refer to the documentation of the
-:class:`espressomd.shapes` module for more possible shapes and information on
-the options associated to these shapes. In order to properly set up the
-boundaries, the ``charge_density`` and ``shape`` must be specified.
+Checkpointing
+-------------
 
-.. _Output:
+::
 
-Output
-~~~~~~
+    ek.save_checkpoint(path, binary)
+    ek.load_checkpoint(path, binary)
+
+The first command saves all of the EK nodes' properties to an ASCII
+(``binary=False``) or binary (``binary=True``) format respectively.
+The second command loads the EK nodes' properties.
+In both cases ``path`` specifies the location of the
+checkpoint file. This is useful for restarting a simulation either on the same
+machine or a different machine. Some care should be taken when using the binary
+format as the format of doubles can depend on both the computer being used as
+well as the compiler.
+
+.. _EK VTK output:
+
+VTK output
+----------
+
+The waLBerla library implements a globally-accessible VTK registry.
+A VTK stream can be attached to an EK actor to periodically write
+one or multiple fluid field data into a single file using
+:class:`~espressomd.electrokinetics.VTKOutput`::
+
+    vtk_obs = ["density"]
+    # create a VTK callback that automatically writes every 10 EK steps
+    ek_vtk = espressomd.electrokinetics.VTKOutput(
+        identifier="ek_vtk_automatic", observables=vtk_obs, delta_N=10)
+    ek.add_vtk_writer(vtk=ek_vtk)
+    system.integrator.run(100)
+    # can be deactivated
+    ek_vtk.disable()
+    system.integrator.run(10)
+    ek_vtk.enable()
+    # create a VTK callback that writes only when explicitly called
+    ek_vtk_on_demand = espressomd.electrokinetics.VTKOutput(
+        identifier="ek_vtk_now", observables=vtk_obs)
+    ek.add_vtk_writer(vtk=ek_vtk_on_demand)
+    ek_vtk_on_demand.write()
+
+Currently only supports the species density.
+By default, the properties of the current state
+of the species are written to disk on demand. To add a stream that writes
+to disk continuously, use the optional argument ``delta_N`` to indicate
+the level of subsampling. Such a stream can be deactivated.
+
+The VTK format is readable by visualization software such as ParaView [5]_
+or Mayavi2 [6]_, as well as in |es| (see :ref:`Reading VTK files`).
+If you plan to use ParaView for visualization, note that also the particle
+positions can be exported using the VTK format
+(see :meth:`~espressomd.particle_data.ParticleList.writevtk`).
+
+Important: these VTK files are written in multi-piece format, i.e. each MPI
+rank writes its local domain to a new piece in the VTK uniform grid to avoid
+a MPI reduction. ParaView can handle the topology reconstruction natively.
+However, when reading the multi-piece file with the Python ``vtk`` package,
+the topology must be manually reconstructed. In particular, calling the XML
+reader ``GetOutput()`` method directly after the update step will erase all
+topology information. While this is not an issue for VTK files obtained from
+simulations that ran with 1 MPI rank, for parallel simulations this will lead
+to 3D grids with incorrectly ordered data. Automatic topology reconstruction
+is available through :class:`~espressomd.io.vtk.VTKReader`::
+
+    import pathlib
+    import tempfile
+    import numpy as np
+    import espressomd
+    import espressomd.electrokinetics
+    import espressomd.io.vtk
 
-.. _Fields:
+    system = espressomd.System(box_l=[12., 14., 10.])
+    system.cell_system.skin = 0.4
+    system.time_step = 0.1
 
-Fields
-""""""
+    lattice = espressomd.electrokinetics.LatticeWalberla(agrid=1.)
+    species = espressomd.electrokinetics.EKSpecies(
+            lattice=lattice, density=1., kT=1., diffusion=0.1, valency=0.,
+            advection=False, friction_coupling=False, tau=system.time_step)
+    system.ekcontainer.tau = species.tau
+    system.ekcontainer.add(species)
+    system.integrator.run(10)
 
-::
+    vtk_reader = espressomd.io.vtk.VTKReader()
+    label_density = "density"
 
-    ek.write_vtk_boundary(path)
-    ek.write_vtk_density(path)
-    ek.write_vtk_velocity(path)
-    ek.write_vtk_potential(path)
+    with tempfile.TemporaryDirectory() as tmp_directory:
+        path_vtk_root = pathlib.Path(tmp_directory)
+        label_vtk = "ek_vtk"
+        path_vtk = path_vtk_root / label_vtk / "simulation_step_0.vtu"
 
-A property of the fluid field can be exported into a file in one go.
-Currently supported fields are: density, velocity, potential and boundary,
-which give the LB fluid density, the LB fluid velocity,
-the electrostatic potential, and the location and type of the
-boundaries, respectively. The boundaries can only be printed when the
-``EK_BOUNDARIES`` is compiled in. The output is a vtk-file, which is readable by
-visualization software such as ParaView [5]_ and Mayavi2 [6]_.
+        # write VTK file
+        ek_vtk = espressomd.electrokinetics.VTKOutput(
+            identifier=label_vtk, delta_N=0,
+            observables=["density"],
+            base_folder=str(path_vtk_root))
+        species.add_vtk_writer(vtk=ek_vtk)
+        ek_vtk.write()
 
-::
+        # read VTK file
+        vtk_grids = vtk_reader.parse(path_vtk)
+        vtk_density = vtk_grids[label_density]
 
-    species.write_vtk_flux(path)
-    species.write_vtk_density(path)
+        # check VTK values match node values
+        ek_density = np.copy(lbf[:, :, :].density)
+        np.testing.assert_allclose(vtk_density, ek_density, rtol=1e-10, atol=0.)
 
-These commands are similar to the above. They enable the
-export of diffusive species properties, namely: ``density`` and ``flux``, which specify the
-number density and flux of species ``species``, respectively.
+.. _Setting up EK boundary conditions:
 
-.. _Local quantities:
+Setting up boundary conditions
+------------------------------
 
-Local quantities
-""""""""""""""""
+It is possible to impose a fixed density and a fixed flux on EK species.
 
-Local quantities like velocity or fluid density for single nodes can be accessed in the same way
-as for an LB fluid, see :ref:`Lattice-Boltzmann`. The only EK-specific quantity is the potential.
+Under the hood, a boundary field is added to the blockforest, which contains
+pre-calculated information for the streaming operations.
 
-::
+.. _Per-node EK boundary conditions:
 
-    ek[0, 0, 0].potential
-    ek[0, 0, 0].velocity
-    ek[0, 0, 0].boundary
+Per-node boundary conditions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The local ``density`` and ``flux`` of a species can be obtained in the same fashion:
+One can set (or update) the boundary conditions of individual nodes::
 
-::
+    import espressomd
+    import espressomd.electrokinetics
+    system = espressomd.System(box_l=[10.0, 10.0, 10.0])
+    system.cell_system.skin = 0.1
+    system.time_step = 0.01
+    lattice = espressomd.electrokinetics.LatticeWalberla(agrid=0.5, n_ghost_layers=1)
+    ek_species = espressomd.electrokinetics.EKSpecies(
+        kT=1.5, lattice=self.lattice, density=0.85, valency=0., diffusion=0.1,
+        advection=False, friction_coupling=False, tau=system.time_step)
+    system.ekcontainer.tau = species.tau
+    system.ekcontainer.add(ek_species)
+    # set node fixed density boundary conditions
+    lbf[0, 0, 0].boundary = espressomd.electrokinetics.DensityBoundary(1.)
+    # update node fixed density boundary conditions
+    lbf[0, 0, 0].boundary = espressomd.electrokinetics.DensityBoundary(2.)
+    # remove node boundary conditions
+    lbf[0, 0, 0].boundary = None
+
+.. _Shape-based EK boundary conditions:
+
+Shape-based boundary conditions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Adding a shape-based boundary is straightforward::
+
+    import espressomd
+    import espressomd.electrokinetics
+    import espressomd.shapes
+    system = espressomd.System(box_l=[10.0, 10.0, 10.0])
+    system.cell_system.skin = 0.1
+    system.time_step = 0.01
+    lattice = espressomd.electrokinetics.LatticeWalberla(agrid=0.5, n_ghost_layers=1)
+    ek_species = espressomd.electrokinetics.EKSpecies(
+        kT=1.5, lattice=self.lattice, density=0.85, valency=0.0, diffusion=0.1,
+        advection=False, friction_coupling=False, tau=system.time_step)
+    system.ekcontainer.tau = species.tau
+    system.ekcontainer.add(ek_species)
+    # set fixed density boundary conditions
+    wall = espressomd.shapes.Wall(normal=[1., 0., 0.], dist=2.5)
+    ek_species.add_boundary_from_shape(
+        shape=wall, value=1., boundary_type=espressomd.electrokinetics.DensityBoundary)
+    # clear fixed density boundary conditions
+    ek_species.clear_density_boundaries()
+
+For a position-dependent flux, the argument to ``value`` must be a 4D grid
+(the first three dimensions must match the EK grid shape, the fourth
+dimension has size 3 for the flux).
+
+For a complete description of all available shapes, refer to
+:mod:`espressomd.shapes`.
+
+.. _Prototyping new EK methods:
+
+Prototyping new EK methods
+--------------------------
+
+Start by installing the code generator dependencies:
+
+.. code-block:: bash
+
+    python3 -m pip install --user -c requirements.txt numpy sympy lbmpy pystencils islpy
+
+Next, edit the code generator script to configure new kernels, then execute it:
+
+.. code-block:: bash
+
+    python3 maintainer/walberla_kernels/generate_lb_kernels.py
+
+The script takes optional arguments to control the CPU or GPU architecture,
+as well as the floating-point precision. The generated source code files need
+to be written to :file:`src/walberla_bridge/src/electrokinetics/generated_kernels/`
+and :file:`src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/`.
+These steps can be automated with the convenience shell functions documented in
+:file:`maintainer/walberla_kernels/Readme.md`.
+Edit the :file:`CMakeLists.txt` file in the destination folders to include the
+new kernels in the build system.
+Then, adapt :file:`src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp`
+to use the new EK kernels.
 
-    species[0, 0, 0].density
-    species[0, 0, 0].flux
 
 .. [5]
    https://www.paraview.org/
diff --git a/doc/sphinx/installation.rst b/doc/sphinx/installation.rst
index 3c81315ae4f..1495331b0a2 100644
--- a/doc/sphinx/installation.rst
+++ b/doc/sphinx/installation.rst
@@ -498,19 +498,9 @@ Fluid dynamics and fluid structure interaction
 
    .. seealso:: :ref:`DPD interaction`
 
--  ``LB_BOUNDARIES`` Enables the construction of LB boundaries from shape-based constraints on the CPU.
-
--  ``LB_BOUNDARIES_GPU`` Enables the construction of LB boundaries from shape-based constraints on the GPU.
-
 -  ``LB_ELECTROHYDRODYNAMICS`` Enables the implicit calculation of electro-hydrodynamics for charged
    particles and salt ions in an electric field.
 
--  ``ELECTROKINETICS`` Enables the description of chemical species advected by a LB fluid on the GPU.
-
--  ``EK_BOUNDARIES`` Enables the construction of electrokinetic boundaries from shape-based constraints on the GPU.
-
--  ``EK_DEBUG`` Enables additional checks in electrokinetic simulations.
-
 
 .. _Interaction features:
 
@@ -769,6 +759,9 @@ The following options control features from external libraries:
 * ``ESPRESSO_BUILD_WITH_SCAFACOS``: Build with ScaFaCoS support.
 * ``ESPRESSO_BUILD_WITH_GSL``: Build with GSL support.
 * ``ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS`` Build with Stokesian Dynamics support.
+* ``ESPRESSO_BUILD_WITH_WALBERLA``: Build with waLBerla support.
+* ``ESPRESSO_BUILD_WITH_WALBERLA_FFT``: Build waLBerla with FFT and PFFT support, used in FFT-based electrokinetics.
+* ``ESPRESSO_BUILD_WITH_WALBERLA_AVX``: Build waLBerla with AVX kernels instead of regular kernels.
 * ``ESPRESSO_BUILD_WITH_PYTHON``: Build with the Python interface.
 
 The following options control code instrumentation:
@@ -863,7 +856,12 @@ Configuring without a network connection
 Several :ref:`external features <External features>` in |es| rely on
 external libraries that are downloaded automatically by CMake. When a
 network connection cannot be established due to firewall restrictions,
-the CMake logic needs editing:
+the CMake logic needs editing.
+
+.. _Git submodules without a network connection:
+
+Git submodules without a network connection
+"""""""""""""""""""""""""""""""""""""""""""
 
 * ``ESPRESSO_BUILD_WITH_HDF5``: when cloning |es|, the :file:`libs/h5xx` folder
   will be a git submodule containing a :file:`.git` subfolder. To prevent CMake
@@ -876,13 +874,26 @@ the CMake logic needs editing:
   When installing a release version of |es|, no network communication
   is needed for HDF5.
 
-* ``ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS``: this library is installed using
-  `FetchContent <https://cmake.org/cmake/help/latest/module/FetchContent.html>`__.
-  The repository URL can be found in the ``GIT_REPOSITORY`` field of the
-  corresponding ``FetchContent_Declare()`` command. The ``GIT_TAG`` field
-  provides the commit. Clone this repository locally next to the |es|
-  folder and edit the |es| build system such that ``GIT_REPOSITORY`` points
-  to the absolute path of the Stokesian Dynamics clone, for example with:
+.. _CMake subprojects without a network connection:
+
+CMake subprojects without a network connection
+""""""""""""""""""""""""""""""""""""""""""""""
+
+Several libraries are downloaded and included into the CMake project using
+`FetchContent <https://cmake.org/cmake/help/latest/module/FetchContent.html>`__.
+The repository URLs can be found in the ``GIT_REPOSITORY`` field of the
+corresponding ``FetchContent_Declare()`` commands. The ``GIT_TAG`` field
+provides the commit. Clone these repositories locally and edit the |es|
+build system such that ``GIT_REPOSITORY`` points to the absolute path of
+the clone. You can automate this task by adapting the following commands:
+
+* ``ESPRESSO_BUILD_WITH_WALBERLA``
+
+  .. code-block:: bash
+
+    sed -ri 's|GIT_REPOSITORY +.+/walberla.git|GIT_REPOSITORY /work/username/walberla|' CMakeLists.txt
+
+* ``ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS``
 
   .. code-block:: bash
 
diff --git a/doc/sphinx/integration.rst b/doc/sphinx/integration.rst
index e4547612fad..b7f0bd34eaf 100644
--- a/doc/sphinx/integration.rst
+++ b/doc/sphinx/integration.rst
@@ -649,7 +649,7 @@ The backcoupling of friction forces and noise to the fluid is also done by distr
 Details for both the interpolation and the force distribution can be found in :cite:`ahlrichs99a` and :cite:`dunweg09a`.
 
 The LB fluid can be used to thermalize particles, while also including their hydrodynamic interactions.
-The LB thermostat expects an instance of either :class:`espressomd.lb.LBFluid` or :class:`espressomd.lb.LBFluidGPU`.
+The LB thermostat expects an instance of either :class:`espressomd.lb.LBFluidWalberla` or :class:`espressomd.lb.LBFluidWalberlaGPU`.
 Temperature is set via the ``kT`` argument of the LB fluid.
 
 The magnitude of the frictional coupling can be adjusted by the
@@ -658,7 +658,7 @@ parameter ``gamma``. To enable the LB thermostat, use::
     import espressomd
     import espressomd.lb
     system = espressomd.System(box_l=[1, 1, 1])
-    lbf = espressomd.lb.LBFluid(agrid=1, dens=1, visc=1, tau=0.01)
+    lbf = espressomd.lb.LBFluidWalberla(agrid=1, density=1, kinematic_viscosity=1, tau=0.01)
     system.actors.add(lbf)
     system.thermostat.set_lb(LB_fluid=lbf, seed=123, gamma=1.5)
 
diff --git a/doc/sphinx/io.rst b/doc/sphinx/io.rst
index 520b06b6fe6..5c6ee7eafa3 100644
--- a/doc/sphinx/io.rst
+++ b/doc/sphinx/io.rst
@@ -111,14 +111,22 @@ Be aware of the following limitations:
   for a specific combination of features, please share your findings
   with the |es| community.
 
-* The active actors, i.e., the content of ``system.actors``, are checkpointed.
-  For lattice-Boltzmann fluids, this only includes the parameters such as the
-  lattice constant (``agrid``). The actual flow field has to be saved
-  separately with the lattice-Boltzmann specific methods
-  :meth:`espressomd.lb.HydrodynamicInteraction.save_checkpoint`
-  and loaded via :meth:`espressomd.lb.HydrodynamicInteraction.load_checkpoint`
+* The active actors, i.e., the content of ``system.actors`` resp.
+  ``system.ekcontainers``, are checkpointed. For lattice-based methods like
+  lattice-Boltzmann fluids and advection-diffusion-reaction models, this only
+  includes the parameters such as the lattice constant (``agrid``) and initial
+  densities.
+  The actual fields have to be saved separately with the lattice-specific
+  methods :meth:`espressomd.lb.LBFluidWalberla.save_checkpoint
+  <espressomd.detail.walberla.LatticeModel.save_checkpoint>` resp.
+  :meth:`espressomd.electrokinetics.EKSpecies.save_checkpoint
+  <espressomd.detail.walberla.LatticeModel.save_checkpoint>`
+  and loaded via :meth:`espressomd.lb.LBFluidWalberla.load_checkpoint
+  <espressomd.detail.walberla.LatticeModel.load_checkpoint>` resp.
+  :meth:`espressomd.electrokinetics.EKSpecies.load_checkpoint
+  <espressomd.detail.walberla.LatticeModel.load_checkpoint>`
   after restoring the checkpoint. See :ref:`LB checkpointing <Checkpointing LB>`
-  for more details.
+  resp. :ref:`EK checkpointing <Checkpointing EK>` for more details.
 
 * References between Python objects are not maintained during checkpointing.
   For example, if an instance of a shape and an instance of a constraint
@@ -501,3 +509,26 @@ requires increasing and continuous indexing. The |es| ``id`` can be used as *key
     vtf_index[3]
 
 Note that the |es| particles are ordered in increasing order, thus ``id=3`` corresponds to the zeroth VTF index.
+
+.. _Reading VTK files:
+
+Reading VTK files
+-----------------
+
+The waLBerla library writes VTK multi-piece uniform grids in XML format.
+Each piece contains information about its spatial extent, from which it is
+possible to deduce the grid dimensions. Each piece may contain one or more
+array, which are uniquely identified by name. While the Python package ``vtk``
+provides tools to read VTK files as numpy arrays, it doesn't automatically
+reconstruct the 3D grids using the topology information of each piece; this
+functionality is provided by the wrapper :class:`~espressomd.io.vtk.VTKReader`:
+
+.. code-block:: python
+
+    import espressomd.io.vtk
+    vtk_reader = espressomd.io.vtk.VTKReader()
+    vtk_grids = vtk_reader.parse("simulation_step_0.vtu")
+    vtk_density = vtk_grids["density"]
+    print(vtk_density.shape)
+
+For a self-contained example, please refer to :ref:`LB VTK output`.
diff --git a/doc/sphinx/lb.rst b/doc/sphinx/lb.rst
index 72e78d47975..68a2dc0e8a6 100644
--- a/doc/sphinx/lb.rst
+++ b/doc/sphinx/lb.rst
@@ -18,11 +18,16 @@ Here we restrict the documentation to the interface. For a more detailed
 description of the method, please refer to the literature.
 
 .. note::
+    Please cite :cite:t:`godenschwager13a` and :cite:t:`bauer21a` (BibTeX keys
+    ``godenschwager13a`` and ``bauer21a`` in :file:`doc/bibliography.bib`) if
+    you use the LB fluid. When generating your own kernels with pystencils and
+    lbmpy, please also cite :cite:t:`bauer19a` and :cite:t:`bauer21b` (BibTeX
+    key ``bauer19a`` resp. ``bauer21b`` in :file:`doc/bibliography.bib`).
 
-    Please cite :cite:`arnold13a` (BibTeX key ``arnold13a`` in
-    :file:`doc/bibliography.bib`) if you use the LB fluid and :cite:`rohm12a`
-    (BibTeX key ``rohm12a`` in :file:`doc/bibliography.bib`) if you use
-    the GPU implementation.
+.. note::
+
+    Requires external feature ``WALBERLA``, enabled with the CMake option
+    ``-D ESPRESSO_BUILD_WITH_WALBERLA=ON``.
 
 .. _Setting up a LB fluid:
 
@@ -36,18 +41,18 @@ The following minimal example illustrates how to use the LBM in |es|::
     system = espressomd.System(box_l=[10, 20, 30])
     system.time_step = 0.01
     system.cell_system.skin = 0.4
-    lb = espressomd.lb.LBFluid(agrid=1.0, dens=1.0, visc=1.0, tau=0.01)
+    lb = espressomd.lb.LBFluidWalberla(agrid=1.0, density=1.0, kinematic_viscosity=1.0, tau=0.01)
     system.actors.add(lb)
     system.integrator.run(100)
 
 To use the GPU-accelerated variant, replace line 6 in the example above by::
 
-    lb = espressomd.lb.LBFluidGPU(agrid=1.0, dens=1.0, visc=1.0, tau=0.01)
+    lb = espressomd.lb.LBFluidWalberlaGPU(agrid=1.0, density=1.0, kinematic_viscosity=1.0, tau=0.01)
 
 .. note:: Feature ``CUDA`` required for the GPU-accelerated variant
 
 To use the (much faster) GPU implementation of the LBM, use
-:class:`~espressomd.lb.LBFluidGPU` in place of :class:`~espressomd.lb.LBFluid`.
+:class:`~espressomd.lb.LBFluidWalberlaGPU` in place of :class:`~espressomd.lb.LBFluidWalberla`.
 Please note that the GPU implementation uses single precision floating point operations.
 This decreases the accuracy of calculations compared to the CPU implementation.
 In particular, due to rounding errors, the fluid density decreases over time,
@@ -62,12 +67,12 @@ lattice constant of the fluid, so the size of the box in every direction
 must be a multiple of ``agrid``.
 
 In the following, we discuss the parameters that can be supplied to the LBM in |es|.
-The detailed interface definition is available at :class:`~espressomd.lb.LBFluid`.
+The detailed interface definition is available at :class:`~espressomd.lb.LBFluidWalberla`.
 
 The LB scheme and the MD scheme are not synchronized: In one LB time
 step typically several MD steps are performed. This allows to speed up
 the simulations and is adjusted with the parameter ``tau``, the LB time step.
-The parameters ``dens`` and ``visc`` set up the density and (kinematic) viscosity of the
+The parameters ``density`` and ``viscosity`` set up the density and (kinematic) viscosity of the
 LB fluid in (usual) MD units. Internally the LB implementation works
 with a different set of units: all lengths are expressed in ``agrid``, all times
 in ``tau`` and so on.
@@ -85,26 +90,24 @@ Thermalization of the fluid (and particle coupling later on) can be activated by
 providing a non-zero value for the parameter ``kT``. Then, a seed has to be provided for
 the fluid thermalization::
 
-    lbfluid = espressomd.lb.LBFluid(kT=1.0, seed=134, ...)
+    lb = espressomd.lb.LBFluidWalberla(kT=1.0, seed=134, ...)
 
 The parameter ``ext_force_density`` takes a three dimensional vector as an
 array_like of :obj:`float`, representing a homogeneous external body force density in MD
-units to be applied to the fluid. The parameter ``bulk_visc`` allows one to
-tune the bulk viscosity of the fluid and is given in MD units. In the limit of
-low Mach number, the flow does not compress the fluid and the resulting flow
-field is therefore independent of the bulk viscosity. It is however known that
-the value of the viscosity does affect the quality of the implemented
-link-bounce-back method. ``gamma_even`` and ``gamma_odd`` are the relaxation
-parameters for the kinetic modes. These fluid parameters do not correspond to
-any macroscopic fluid properties, but do influence numerical properties of the
-algorithm, such as the magnitude of the error at boundaries. Unless you are an
-expert, leave their defaults unchanged. If you do change them, note that they
-are to be given in LB units.
+units to be applied to the fluid.
 
 Before running a simulation at least the following parameters must be
-set up: ``agrid``, ``tau``, ``visc``, ``dens``. For the other parameters,
-the following are taken: ``bulk_visc=0``, ``gamma_odd=0``, ``gamma_even=0``,
-``ext_force_density=[0, 0, 0]``.
+set up: ``agrid``, ``tau``, ``viscosity``, ``density``.
+
+Performance considerations
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The CPU implementation of the LB has an extra flag ``single_precision`` to
+use single-precision floating point values. These are approximately 10%
+faster than double-precision, at the cost of a small loss in precision.
+
+To enable vectorization, run ``cmake . -D ESPRESSO_BUILD_WITH_WALBERLA_AVX=ON``.
+An AVX2-capable microprocessor is required.
 
 .. _Checkpointing LB:
 
@@ -148,18 +151,8 @@ To get interpolated velocity values between lattice nodes, the function::
     lb.get_interpolated_velocity(pos=[1.1, 1.2, 1.3])
 
 with a single position  ``pos`` as an argument can be used.
-For the GPU fluid :class:`espressomd.lb.LBFluidGPU`, a method
-:py:meth:`~espressomd.lb.LBFluidGPU.get_interpolated_fluid_velocity_at_positions()`
-is also available, which expects a numpy array of positions as an argument.
-
-By default, the interpolation is done linearly between the nearest 8 LB nodes,
-but for the GPU implementation also a quadratic scheme involving 27 nodes is implemented
-(see eqs. 297 and 301 in :cite:`dunweg09a`).
-You can choose by calling
-one of::
 
-    lb.set_interpolation_order('linear')
-    lb.set_interpolation_order('quadratic')
+The interpolation is done linearly between the nearest 8 LB nodes.
 
 A note on boundaries:
 both interpolation schemes don't take into account the physical location of the boundaries
@@ -184,11 +177,31 @@ the :ref:`LB thermostat` (see more detailed description there). A short example
 
     system.thermostat.set_lb(LB_fluid=lbf, seed=123, gamma=1.5)
 
-where ``lbf`` is an instance of either :class:`~espressomd.lb.LBFluid` or
-:class:`~espressomd.lb.LBFluidGPU`, ``gamma`` the friction coefficient and
+where ``lbf`` is an instance of either :class:`~espressomd.lb.LBFluidWalberla` or
+:class:`~espressomd.lb.LBFluidWalberlaGPU`, ``gamma`` the friction coefficient and
 ``seed`` the seed for the random number generator involved
 in the thermalization.
 
+.. _LB and LEbc:
+
+LB and LEbc
+^^^^^^^^^^^
+
+:ref:`Lees-Edwards boundary conditions` (LEbc) are supported by both
+LB implementations, which follow the derivation in :cite:`wagner02a`.
+Note, that there is no extra python interface for the use of LEbc
+with the LB algorithm: all the necessary information is internally
+derived from the currently active MD LEbc protocol in
+``system.lees_edwards.protocol``.
+Therefore, the MD LEbc must be set before the LB actor is instantiated.
+Use the :class:`~espressomd.lees_edwards.Off` if the system should have
+no shearing initially; this action will initialize the shear axes, and
+when the LB actor is instantiated, the Lees-Edwards collision kernels
+will be used instead of the default ones.
+
+.. note::
+
+    At the moment, LB only supports the case ``shear_plane_normal="y"``.
 
 .. _Reading and setting properties of single lattice nodes:
 
@@ -201,14 +214,13 @@ the selected LB grid node and allows one to access all of its properties::
     lb[x, y, z].density              # fluid density (one scalar for LB and CUDA)
     lb[x, y, z].velocity             # fluid velocity (a numpy array of three floats)
     lb[x, y, z].pressure_tensor      # fluid pressure tensor (a symmetric 3x3 numpy array of floats)
-    lb[x, y, z].pressure_tensor_neq  # nonequilibrium part of the pressure tensor (as above)
-    lb[x, y, z].boundary             # flag indicating whether the node is fluid or boundary (fluid: boundary=0, boundary: boundary != 0)
+    lb[x, y, z].is_boundary          # flag indicating whether the node is fluid or boundary (fluid: boundary=0, boundary: boundary != 1)
     lb[x, y, z].population           # 19 LB populations (a numpy array of 19 floats, check order from the source code)
 
 All of these properties can be read and used in further calculations.
 Only the property ``population`` can be modified. The indices ``x, y, z``
 are integers and enumerate the LB nodes in the three Cartesian directions,
-starting at 0. To modify ``boundary``, refer to :ref:`Setting up boundary conditions`.
+starting at 0. To modify ``is_boundary``, refer to :ref:`Setting up LB boundary conditions`.
 
 Example::
 
@@ -217,6 +229,7 @@ Example::
 
 The first line prints the fluid velocity at node (0 0 0) to the screen.
 The second line sets this fluid node's density to the value ``1.2``.
+Use negative indices to get nodes starting from the end of the lattice.
 
 The nodes can be read and modified using slices. Example::
 
@@ -230,43 +243,103 @@ a value that matches the length of the slice (which sets each node
 individually), or a single value that will be copied to every node
 (e.g. a scalar for density, or an array of length 3 for the velocity).
 
-.. _Output for visualization:
-
-Output for visualization
-------------------------
-
-|es| implements a number of commands to output fluid field data of the whole fluid into a file at once. ::
-
-    lb.write_vtk_velocity(path)
-    lb.write_vtk_boundary(path)
-    lb.write_velocity(path)
-    lb.write_boundary(path)
-
-Currently supported fluid properties are the velocity, and boundary flag in ASCII VTK as well as Gnuplot compatible ASCII output.
+.. _LB VTK output:
+
+VTK output
+----------
+
+The waLBerla library implements a globally-accessible VTK registry.
+A VTK stream can be attached to a LB actor to periodically write
+one or multiple fluid field data into a single file using
+:class:`~espressomd.lb.VTKOutput`::
+
+    vtk_obs = ["density", "velocity_vector"]
+    # create a VTK callback that automatically writes every 10 LB steps
+    lb_vtk = espressomd.lb.VTKOutput(
+        identifier="lb_vtk_automatic", observables=vtk_obs, delta_N=10)
+    lb.add_vtk_writer(vtk=lb_vtk)
+    self.system.integrator.run(100)
+    # can be deactivated
+    lb_vtk.disable()
+    self.system.integrator.run(10)
+    lb_vtk.enable()
+    # create a VTK callback that writes only when explicitly called
+    lb_vtk_on_demand = espressomd.lb.VTKOutput(
+        identifier="lb_vtk_now", observables=vtk_obs)
+    lb.add_vtk_writer(vtk=lb_vtk_on_demand)
+    lb_vtk_on_demand.write()
+
+Currently supported fluid properties are the density, velocity vector
+and pressure tensor. By default, the properties of the current state
+of the fluid are written to disk on demand. To add a stream that writes
+to disk continuously, use the optional argument ``delta_N`` to indicate
+the level of subsampling. Such a stream can be deactivated.
 
 The VTK format is readable by visualization software such as ParaView [1]_
-or Mayavi2 [2]_. If you plan to use ParaView for visualization, note that also the particle
-positions can be exported using the VTK format (see :meth:`~espressomd.particle_data.ParticleList.writevtk`).
-
-The variant
-
-::
-
-   lb.write_vtk_velocity(path, bb1, bb2)
-
-allows you to only output part of the flow field by specifying an axis aligned
-bounding box through the coordinates ``bb1`` and ``bb1`` (lists of three ints) of two of its corners. This
-bounding box can be used to output a slice of the flow field. As an
-example, executing
-
-::
-
-    lb.write_vtk_velocity(path, [0, 0, 5], [10, 10, 5])
-
-will output the cross-section of the velocity field in a plane
-perpendicular to the :math:`z`-axis at :math:`z = 5` (assuming the box
-size is 10 in the :math:`x`- and :math:`y`-direction).
+or Mayavi2 [2]_, as well as in |es| (see :ref:`Reading VTK files`).
+If you plan to use ParaView for visualization, note that also the particle
+positions can be exported using the VTK format
+(see :meth:`~espressomd.particle_data.ParticleList.writevtk`).
+
+Important: these VTK files are written in multi-piece format, i.e. each MPI
+rank writes its local domain to a new piece in the VTK uniform grid to avoid
+a MPI reduction. ParaView can handle the topology reconstruction natively.
+However, when reading the multi-piece file with the Python ``vtk`` package,
+the topology must be manually reconstructed. In particular, calling the XML
+reader ``GetOutput()`` method directly after the update step will erase all
+topology information. While this is not an issue for VTK files obtained from
+simulations that ran with 1 MPI rank, for parallel simulations this will lead
+to 3D grids with incorrectly ordered data. Automatic topology reconstruction
+is available through :class:`~espressomd.io.vtk.VTKReader`::
+
+    import pathlib
+    import tempfile
+    import numpy as np
+    import espressomd
+    import espressomd.lb
+    import espressomd.io.vtk
 
+    system = espressomd.System(box_l=[12., 14., 10.])
+    system.cell_system.skin = 0.4
+    system.time_step = 0.1
+
+    lbf = espressomd.lb.LBFluidWalberla(
+        agrid=1., tau=0.1, density=1., kinematic_viscosity=1.)
+    system.actors.add(lbf)
+    system.integrator.run(10)
+
+    vtk_reader = espressomd.io.vtk.VTKReader()
+    label_density = "density"
+    label_velocity = "velocity_vector"
+    label_pressure = "pressure_tensor"
+
+    with tempfile.TemporaryDirectory() as tmp_directory:
+        path_vtk_root = pathlib.Path(tmp_directory)
+        label_vtk = "lb_vtk"
+        path_vtk = path_vtk_root / label_vtk / "simulation_step_0.vtu"
+
+        # write VTK file
+        lb_vtk = espressomd.lb.VTKOutput(
+            identifier=label_vtk, delta_N=0,
+            observables=["density", "velocity_vector", "pressure_tensor"],
+            base_folder=str(path_vtk_root))
+        lbf.add_vtk_writer(vtk=lb_vtk)
+        lb_vtk.write()
+
+        # read VTK file
+        vtk_grids = vtk_reader.parse(path_vtk)
+        vtk_density = vtk_grids[label_density]
+        vtk_velocity = vtk_grids[label_velocity]
+        vtk_pressure = vtk_grids[label_pressure]
+        vtk_pressure = vtk_pressure.reshape(vtk_pressure.shape[:-1] + (3, 3))
+
+        # check VTK values match node values
+        lb_density = np.copy(lbf[:, :, :].density)
+        lb_velocity = np.copy(lbf[:, :, :].velocity)
+        lb_pressure = np.copy(lbf[:, :, :].pressure_tensor)
+        np.testing.assert_allclose(vtk_density, lb_density, rtol=1e-10, atol=0.)
+        np.testing.assert_allclose(vtk_velocity, lb_velocity, rtol=1e-7, atol=0.)
+        np.testing.assert_allclose(vtk_pressure, lb_pressure, rtol=1e-7, atol=0.)
 
 .. _Choosing between the GPU and CPU implementations:
 
@@ -276,8 +349,8 @@ Choosing between the GPU and CPU implementations
 |es| contains an implementation of the LBM for NVIDIA
 GPUs using the CUDA framework. On CUDA-supporting machines this can be
 activated by compiling with the feature ``CUDA``. Within the
-Python script, the :class:`~espressomd.lb.LBFluid` object can be substituted
-with the :class:`~espressomd.lb.LBFluidGPU` object to switch from CPU based
+Python script, the :class:`~espressomd.lb.LBFluidWalberla` object can be substituted
+with the :class:`~espressomd.lb.LBFluidWalberlaGPU` object to switch from CPU based
 to GPU based execution. For further
 information on CUDA support see section :ref:`CUDA acceleration`.
 
@@ -289,15 +362,10 @@ of the LBM in analogy to the example for the CPU given in section
     system = espressomd.System(box_l=[10, 20, 30])
     system.time_step = 0.01
     system.cell_system.skin = 0.4
-    lb = espressomd.lb.LBFluidGPU(agrid=1.0, dens=1.0, visc=1.0, tau=0.01)
+    lb = espressomd.lb.LBFluidWalberlaGPU(agrid=1.0, density=1.0, kinematic_viscosity=1.0, tau=0.01)
     system.actors.add(lb)
     system.integrator.run(100)
 
-For boundary conditions analogous to the CPU
-implementation, the feature ``LB_BOUNDARIES_GPU`` has to be activated.
-:ref:`Lees-Edwards boundary conditions` are not supported by either
-LB implementation.
-
 .. _Electrohydrodynamics:
 
 Electrohydrodynamics
@@ -316,130 +384,103 @@ particles that should be subject to the field. This effectively acts
 as a velocity offset between the particle and the LB fluid.
 
 For more information on this method and how it works, read the
-publication :cite:`hickey10a`.
-
-
-.. _Using shapes as lattice-Boltzmann boundary:
-
-Using shapes as lattice-Boltzmann boundary
-------------------------------------------
+publication :cite:t:`hickey10a`.
 
-.. note::
-    Feature ``LB_BOUNDARIES`` required
-
-Lattice-Boltzmann boundaries are implemented in the module
-:mod:`espressomd.lbboundaries`. You might want to take a look
-at the classes :class:`~espressomd.lbboundaries.LBBoundary`
-and :class:`~espressomd.lbboundaries.LBBoundaries` for more information.
-
-Adding a shape-based boundary is straightforward::
+.. _Setting up LB boundary conditions:
 
-    lbb = espressomd.lbboundaries.LBBoundary(shape=my_shape, velocity=[0, 0, 0])
-    system.lbboundaries.add(lbb)
-
-or::
+Setting up boundary conditions
+------------------------------
 
-    lbb = espressomd.lbboundaries.LBBoundary()
-    lbb.shape = my_shape
-    lbb.velocity = [0, 0, 0]
-    system.lbboundaries.add(lbb)
+Currently, only the so-called "link-bounce-back" algorithm for boundary
+nodes is available. This creates a boundary that is located
+approximately midway between lattice nodes. With no-slip boundary conditions,
+populations are reflected back. With slip velocities, the reflection is
+followed by a velocity interpolation. This allows to create shear flow and
+boundaries "moving" relative to each other.
 
-.. _Minimal usage example:
+Under the hood, a boundary field is added to the blockforest, which contains
+pre-calculated information for the reflection and interpolation operations.
 
-Minimal usage example
-~~~~~~~~~~~~~~~~~~~~~
+.. _Per-node LB boundary conditions:
 
-.. note:: Feature ``LB_BOUNDARIES`` or ``LB_BOUNDARIES_GPU`` required
+Per-node boundary conditions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-In order to add a wall as boundary for a lattice-Boltzmann fluid
-you could do the following::
+One can set (or update) the slip velocity of individual nodes::
 
-    wall = espressomd.shapes.Wall(dist=5, normal=[1, 0, 0])
-    lbb = espressomd.lbboundaries.LBBoundary(shape=wall, velocity=[0, 0, 0])
-    system.lbboundaries.add(lbb)
+    import espressomd.lb
+    system = espressomd.System(box_l=[10.0, 10.0, 10.0])
+    system.cell_system.skin = 0.1
+    system.time_step = 0.01
+    lbf = espressomd.lb.LBFluidWalberla(agrid=0.5, density=1.0, kinematic_viscosity=1.0, tau=0.01)
+    system.actors.add(lbf)
+    # make one node a boundary node with a slip velocity
+    lbf[0, 0, 0].boundary = espressomd.lb.VelocityBounceBack([0, 0, 1])
+    # update node for no-slip boundary conditions
+    lbf[0, 0, 0].boundary = espressomd.lb.VelocityBounceBack([0, 0, 0])
+    # remove boundary conditions
+    lbf[0, 0, 0].boundary = None
 
-.. _Setting up boundary conditions:
+.. _Shape-based LB boundary conditions:
 
-Setting up boundary conditions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Shape-based boundary conditions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The following example sets up a system consisting of a spherical boundary
-in the center of the simulation box acting as a no-slip boundary for the
-LB fluid that is driven by 4 walls with a slip velocity::
+Adding a shape-based boundary is straightforward::
 
-    import espressomd
     import espressomd.lb
-    import espressomd.lbboundaries
     import espressomd.shapes
-
-    system = espressomd.System(box_l=[64, 64, 64])
+    system = espressomd.System(box_l=[10.0, 10.0, 10.0])
+    system.cell_system.skin = 0.1
     system.time_step = 0.01
-    system.cell_system.skin = 0.4
-
-    lb = espressomd.lb.LBFluid(agrid=1.0, dens=1.0, visc=1.0, tau=0.01)
-    system.actors.add(lb)
-
-    v = [0, 0, 0.01]  # the boundary slip
-    walls = [None] * 4
-
-    wall_shape = espressomd.shapes.Wall(normal=[1, 0, 0], dist=1)
-    walls[0] = espressomd.lbboundaries.LBBoundary(shape=wall_shape, velocity=v)
-
-    wall_shape = espressomd.shapes.Wall(normal=[-1, 0, 0], dist=-63)
-    walls[1] = espressomd.lbboundaries.LBBoundary(shape=wall_shape, velocity=v)
+    lbf = espressomd.lb.LBFluidWalberla(agrid=0.5, density=1.0, kinematic_viscosity=1.0, tau=0.01)
+    system.actors.add(lbf)
+    # set up shear flow between two sliding walls
+    wall1 = espressomd.shapes.Wall(normal=[+1., 0., 0.], dist=2.5)
+    lbf.add_boundary_from_shape(shape=wall1, velocity=[0., +0.05, 0.])
+    wall2 = espressomd.shapes.Wall(normal=[-1., 0., 0.], dist=-(system.box_l[0] - 2.5))
+    lbf.add_boundary_from_shape(shape=wall2, velocity=[0., -0.05, 0.])
+
+The ``velocity`` argument is optional, in which case the no-slip boundary
+conditions are used. For a position-dependent slip velocity, the argument
+to ``velocity`` must be a 4D grid (the first three dimensions must match
+the LB grid shape, the fourth dimension has size 3 for the velocity).
 
-    wall_shape = espressomd.shapes.Wall(normal=[0, 1, 0], dist=1)
-    walls[2] = espressomd.lbboundaries.LBBoundary(shape=wall_shape, velocity=v)
+The LB boundaries use the same :mod:`~espressomd.shapes` objects to specify
+their geometry as :mod:`~espressomd.constraints` do for particles.
+This allows the user to quickly set up a system with boundary conditions
+that simultaneously act on the fluid and particles. For a complete
+description of all available shapes, refer to :mod:`espressomd.shapes`.
 
-    wall_shape = espressomd.shapes.Wall(normal=[0, -1, 0], dist=-63)
-    walls[3] = espressomd.lbboundaries.LBBoundary(shape=wall_shape, velocity=v)
+.. _Prototyping new LB methods:
 
-    for wall in walls:
-        system.lbboundaries.add(wall)
+Prototyping new LB methods
+--------------------------
 
-    sphere_shape = espressomd.shapes.Sphere(radius=5.5, center=[33, 33, 33], direction=1)
-    sphere = espressomd.lbboundaries.LBBoundary(shape=sphere_shape)
-    system.lbboundaries.add(sphere)
+Start by installing the code generator dependencies:
 
-    system.integrator.run(4000)
+.. code-block:: bash
 
-    print(sphere.get_force())
+    python3 -m pip install --user -c requirements.txt numpy sympy lbmpy pystencils islpy
 
-After integrating the system for a sufficient time to reach the steady state,
-the hydrodynamic drag force exerted on the sphere is evaluated.
+Next, edit the code generator script to configure new kernels, then execute it:
 
-The LB boundaries use the same :mod:`~espressomd.shapes` objects to specify
-their geometry as :mod:`~espressomd.constraints` do for particles.
-This allows the user to quickly set up a system with boundary conditions
-that simultaneously act on the fluid and particles. For a complete
-description of all available shapes, refer to :mod:`espressomd.shapes`.
+.. code-block:: bash
 
-Intersecting boundaries are in principle possible but must be treated
-with care. In the current implementation, all nodes that are
-within at least one boundary are treated as boundary nodes.
+    python3 maintainer/walberla_kernels/generate_lb_kernels.py
 
-Currently, only the so-called "link-bounce-back" algorithm for wall
-nodes is available. This creates a boundary that is located
-approximately midway between the lattice nodes, so in the above example ``wall[0]``
-corresponds to a boundary at :math:`x=1.5`. Note that the
-location of the boundary is unfortunately not entirely independent of
-the viscosity. This can be seen when using the sample script with a high
-viscosity.
-
-The bounce back boundary conditions permit it to set the velocity at the boundary
-to a non-zero value via the ``v`` property of an ``LBBoundary`` object.
-This allows to create shear flow and boundaries
-moving relative to each other. The velocity boundary conditions are
-implemented according to :cite:`succi01a` eq. 12.58. Using
-this implementation as a blueprint for the boundary treatment, an
-implementation of the Ladd-Coupling should be relatively
-straightforward. The ``LBBoundary`` object furthermore possesses
-a property ``force``, which keeps track of the hydrodynamic drag
-force exerted onto the boundary by the moving fluid.
+The script takes optional arguments to control the CPU or GPU architecture,
+as well as the floating-point precision. The generated source code files need
+to be written to :file:`src/walberla_bridge/src/lattice_boltzmann/generated_kernels/`.
+These steps can be automated with the convenience shell functions documented in
+:file:`maintainer/walberla_kernels/Readme.md`.
+Edit the :file:`CMakeLists.txt` file in the destination folder to include the
+new kernels in the build system.
+Then, adapt :file:`src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp`
+to use the new LB kernels.
 
 
 .. [1]
    https://www.paraview.org/
-
 .. [2]
    http://code.enthought.com/projects/mayavi/
diff --git a/doc/sphinx/particles.rst b/doc/sphinx/particles.rst
index 7bbb7b46052..b1da59f0d94 100644
--- a/doc/sphinx/particles.rst
+++ b/doc/sphinx/particles.rst
@@ -386,7 +386,6 @@ For correct results, the LB thermostat has to be deactivated for virtual sites::
    system.thermostat.set_lb(kT=0, act_on_virtual=False)
 
 Please note that the velocity attribute of the virtual particles does not carry valid information for this virtual sites scheme.
-With the LB GPU implementation, inertialess tracers only work on 1 MPI rank.
 
 .. _Interacting with groups of particles:
 
diff --git a/doc/tutorials/active_matter/active_matter.ipynb b/doc/tutorials/active_matter/active_matter.ipynb
index 290cd845e85..2055485ab38 100644
--- a/doc/tutorials/active_matter/active_matter.ipynb
+++ b/doc/tutorials/active_matter/active_matter.ipynb
@@ -124,7 +124,7 @@
     "import espressomd.accumulators\n",
     "\n",
     "espressomd.assert_features(\n",
-    "    [\"ENGINE\", \"ROTATION\", \"MASS\", \"ROTATIONAL_INERTIA\", \"CUDA\"])"
+    "    [\"ENGINE\", \"ROTATION\", \"MASS\", \"ROTATIONAL_INERTIA\", \"WALBERLA\"])"
    ]
   },
   {
@@ -891,8 +891,10 @@
    },
    "source": [
     "```python\n",
-    "lbf = espressomd.lb.LBFluidGPU(agrid=HYDRO_PARAMS['agrid'], dens=HYDRO_PARAMS['dens'],\n",
-    "                               visc=HYDRO_PARAMS['visc'], tau=HYDRO_PARAMS['time_step'])\n",
+    "lbf = espressomd.lb.LBFluidWalberla(agrid=HYDRO_PARAMS['agrid'],\n",
+    "                                    density=HYDRO_PARAMS['dens'],\n",
+    "                                    kinematic_viscosity=HYDRO_PARAMS['visc'],\n",
+    "                                    tau=HYDRO_PARAMS['time_step'])\n",
     "system.actors.add(lbf)\n",
     "system.thermostat.set_lb(LB_fluid=lbf, gamma=HYDRO_PARAMS['gamma'], seed=42)\n",
     "```"
@@ -997,8 +999,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "lbf.write_vtk_velocity('./fluid.vtk')\n",
-    "system.part.writevtk('./particle.vtk')"
+    "import os\n",
+    "vtk_base_dir = os.path.join('vtk_out', 'RESULTS_FLOW_FIELD')\n",
+    "vtk_identifier = f'T_{HYDRO_PARAMS[\"mode\"]}_P_{pos[2]}'\n",
+    "vtk_outdir = os.path.join(vtk_base_dir, vtk_identifier)\n",
+    "lb_vtk = espressomd.lb.VTKOutput(identifier=vtk_identifier,\n",
+    "                                 observables=[\"velocity_vector\"],\n",
+    "                                 base_folder=vtk_base_dir,\n",
+    "                                 prefix=\"lb_velocity\")\n",
+    "lbf.add_vtk_writer(vtk=lb_vtk)\n",
+    "for i in range(HYDRO_N_STEPS // 100):\n",
+    "    system.integrator.run(100)\n",
+    "    system.part.writevtk(os.path.join(vtk_outdir, f'position_{i}.vtk'), types=[0])\n",
+    "    lb_vtk.write()"
    ]
   },
   {
diff --git a/doc/tutorials/electrokinetics/CMakeLists.txt b/doc/tutorials/electrokinetics/CMakeLists.txt
index 32a271d67a4..a3c419cb6fc 100644
--- a/doc/tutorials/electrokinetics/CMakeLists.txt
+++ b/doc/tutorials/electrokinetics/CMakeLists.txt
@@ -20,5 +20,4 @@
 configure_tutorial_target(TARGET tutorial_ek DEPENDS electrokinetics.ipynb
                           figures/schlitzpore_3d.png scripts/eof_analytical.py)
 
-nb_export(TARGET tutorial_ek SUFFIX "" FILE "electrokinetics.ipynb" HTML_RUN
-          VAR_SUBST "integration_length=600;dt=0.5")
+nb_export(TARGET tutorial_ek SUFFIX "" FILE "electrokinetics.ipynb" HTML_RUN)
diff --git a/doc/tutorials/electrokinetics/electrokinetics.ipynb b/doc/tutorials/electrokinetics/electrokinetics.ipynb
index dc709d5c7d4..a20ec75b40e 100644
--- a/doc/tutorials/electrokinetics/electrokinetics.ipynb
+++ b/doc/tutorials/electrokinetics/electrokinetics.ipynb
@@ -17,9 +17,8 @@
     "   1. [The Electrokinetic Equations](#The-Electrokinetic-Equations)\n",
     "   2. [EOF in the Slit Pore Geometry](#EOF-in-the-Slit-Pore-Geometry)\n",
     "3. [Simulation using ESPResSo](#Simulation-using-ESPResSo)\n",
-    "   1. [Setting up ESPResSo](#Setting-up-ESPResSo)\n",
-    "   2. [Mapping SI and Simulation Units](#Mapping-SI-and-Simulation-Units)\n",
-    "   3. [Setting up the slit pore system](#Setting-up-the-slit-pore-system)\n",
+    "   1. [Mapping SI and Simulation Units](#Mapping-SI-and-Simulation-Units)\n",
+    "   2. [Setting up the slit pore system](#Setting-up-the-slit-pore-system)\n",
     "4. [References](#References)\n",
     "    "
    ]
@@ -31,18 +30,15 @@
     "## Introduction\n",
     "\n",
     "In recent years the lattice-Boltzmann method (LBM) has proven itself to be a viable way to introduce hydrodynamic interactions into coarse-grained MD simulations with moderate computational cost.\n",
-    "The success of the GPU LBM implementation in ESPResSo and similar developments in other software packages created demand for further developments in this area.\n",
-    "ESPResSo features two such algorithms, namely ELECTROHYDRODYNAMICS, and ELECTROKINETICS (EK).\n",
-    "Both of these make use of the LBM and extend it to coarse-grain not only the solvent molecules but also ionic solutes.\n",
-    "ELECTROHYDRODYNAMICS does so using a slip layer coupling for charged particles valid in the thin Debye layer (large salt concentration) limit [1], while EK explicitly treats the ionic solutes in a continuum fashion and is valid for a wide range of salt concentrations [2-4].\n",
+    "ESPResSo features such an algorithm, which can make use of the LBM and extend it to coarse-grain not only the solvent molecules but also ionic solutes. It is called EK and explicitly treats the ionic solutes in a continuum fashion and is valid for a wide range of salt concentrations [1-3].\n",
     "\n",
     "### Tutorial Outline\n",
     "\n",
     "To make our first steps using ELECTROKINETICS we will work on one of the few systems for which analytic solutions for the electrokinetic equations exist: the slip pore geometry with a counterion-only electrolyte.\n",
     "The same slit pore system is also treated in the LBM tutorial, but there, the ionic species were modeled as explicit particles.\n",
-    "For this system, the two approaches lead to exactly the same results [5].\n",
+    "For this system, the two approaches lead to exactly the same results [4].\n",
     "Differences became significant for multivalent ions, very high salt concentrations, and very high surface charge, since then the mean-field approach the EK employs, is basically solving the Poisson-Nernst-Planck formalism plus the Navier-Stokes equation on a lattice.\n",
-    "This leads to significantly different results from explicit ion approaches [6-8].\n",
+    "This leads to significantly different results from explicit ion approaches [5-7].\n",
     "This tutorial is mainly divided into two sections.\n",
     "* **Theoretical Background** introduces the electrokinetic equations and the analytical solution for the slit pore system.\n",
     "* **Simulation using ESPResSo** deals exclusively with the simulation. \n",
@@ -64,7 +60,7 @@
     "### The Electrokinetic Equations\n",
     "\n",
     "In the following, we will derive the equations modeling the time evolution of the concentrations of dissolved species as well as the solvent in the standard electrokinetic model.\n",
-    "We do so, neglecting the salt ions' contribution to the overall mass density, which allows us to treat the dynamics of the ions and the fluid separately [8].\n",
+    "We do so, neglecting the salt ions' contribution to the overall mass density, which allows us to treat the dynamics of the ions and the fluid separately [7].\n",
     "The solvent fluid will be modeled using the Navier-Stokes equations while we use a set of diffusion-migration-advection equations for the ionic species.\n"
    ]
   },
@@ -105,7 +101,7 @@
     "This free-energy density consists of only an ideal-gas and an electrostatic contribution.\n",
     "The same assumptions form the basis of Poisson-Boltzmann (PB) theory.\n",
     "Hence, the limitations of this model are the same as those of PB.\n",
-    "That means this model applies to monovalent ions at low to intermediate densities and surface charges [6,7,11,12].\n",
+    "That means this model applies to monovalent ions at low to intermediate densities and surface charges [5,6,10,11].\n",
     "\n",
     "The species' chemical potentials $\\mu_{k}$ implied by the free-energy density read\n",
     "\n",
@@ -120,7 +116,7 @@
     "&= -D_{k} \\nabla c_{k} - \\xi_{k} z_{k} e c_{k} \\nabla \\Phi .\n",
     "\\end{aligned}\n",
     "\n",
-    "Here, $\\xi_{k}$ and $D_{k}$ denote the mobility and the diffusion coefficient of species $k$, which are related by the Einstein-Smoluchowski relation $D_{k} / \\xi_{k} = k_{\\mathrm{B}}T$ [12,13].\n",
+    "Here, $\\xi_{k}$ and $D_{k}$ denote the mobility and the diffusion coefficient of species $k$, which are related by the Einstein-Smoluchowski relation $D_{k} / \\xi_{k} = k_{\\mathrm{B}}T$ [11,12].\n",
     "\n",
     "Finally, the total number density flux combining effects of diffusion and advection reads\n",
     "\n",
@@ -219,7 +215,7 @@
     "\\begin{equation}\n",
     "\\Phi(x) = -\\frac{k_\\mathrm{B}T}{ze} \\cdot \\log \\left[ \\frac{C^2}{8 \\pi \\, k_\\mathrm{B}T \\, l_\\mathrm{B}} \\cdot \\cos^{-2}\\left( \\frac{zeC}{2 k_\\mathrm{B}T} \\cdot x\\right) \\right], \\quad \\left| \\frac{zeC}{2 k_\\mathrm{B}T} \\cdot x \\right| < \\frac \\pi 2\\; .\n",
     "\\end{equation}\n",
-    "Refer to [5] for details on this calculation.\n",
+    "Refer to [4] for details on this calculation.\n",
     "Knowing that the counterion density $c$ resembles a Boltzmann distribution in the potential $ze \\Phi$ leads to the expression\n",
     "\\begin{equation}\n",
     "c(x) = \\frac{C^2}{8 \\pi \\, k_\\mathrm{B}T \\, l_\\mathrm{B}} \\cdot \\cos^{-2} \\left( \\frac{zeC}{2 k_\\mathrm{B}T} \\cdot x \\right) \\; .\n",
@@ -251,15 +247,6 @@
     "## Simulation using ESPResSo"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Setting up ESPResSo\n",
-    "\n",
-    "To use the electrokinetics solver in ESPResSo enable the features <tt>ELECTROKINETICS</tt> and <tt>EK_BOUNDARIES</tt> during the build process."
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -368,10 +355,9 @@
     "# Initializing espresso modules and the numpy package\n",
     "import espressomd\n",
     "import espressomd.electrokinetics\n",
-    "import espressomd.ekboundaries\n",
     "import espressomd.shapes\n",
     "\n",
-    "espressomd.assert_features(['CUDA', 'ELECTROKINETICS'])\n",
+    "espressomd.assert_features([\"WALBERLA\", \"WALBERLA_FFT\"])\n",
     "\n",
     "import tqdm\n",
     "import numpy as np\n",
@@ -379,10 +365,6 @@
     "import matplotlib.pyplot as plt\n",
     "plt.rcParams.update({'font.size': 16})\n",
     "\n",
-    "# Set the slit pore geometry where the width is the non-periodic part of the geometry\n",
-    "# the padding is used to ensure that there is no field outside the slit since the\n",
-    "# electrostatics is used with a 3D periodic FFT solver.\n",
-    "\n",
     "box_y = 6\n",
     "box_z = 6\n",
     "width = 50\n",
@@ -411,15 +393,18 @@
     "# Set the electrokinetic parameters\n",
     "\n",
     "agrid = 1.0\n",
-    "dt = 0.2\n",
-    "kT = 1.0\n",
+    "dt = 0.5\n",
+    "kT = 4.0\n",
     "bjerrum_length = 0.7095\n",
+    "permittivity = 1. / (4 * np.pi * bjerrum_length)\n",
     "D = 0.006075\n",
     "valency = 1.0\n",
     "viscosity_dynamic = 79.53\n",
     "density_water = 26.15\n",
     "sigma = -0.05\n",
-    "ext_force_density = [0.0, 0.1, 0.0]"
+    "ext_force_density = [0.0, 0.1, 0.0]\n",
+    "\n",
+    "single_precision = False"
    ]
   },
   {
@@ -440,17 +425,23 @@
     "system.time_step = dt\n",
     "system.cell_system.skin = 0.2\n",
     "system.thermostat.turn_off()\n",
-    "integration_length = int(2e4)"
+    "integration_length = 600"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can now set up the electrokinetics algorithm.\n",
-    "All functionality pertaining to this algorithm is available through the <tt>electrokinetics</tt> submodule of <tt>espressomd</tt>.\n",
-    "Please note that the fluid viscosity is specified as a kinematic viscosity, which is the dynamic viscosity divided by the fluid density.\n",
-    "The kinematic viscosity is also required if you initialize the pure lattice-Boltzmann method.\n"
+    "We can now set up the electrokinetics algorithm."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lattice = espressomd.lb.LatticeWalberla(agrid=agrid, n_ghost_layers=1)"
    ]
   },
   {
@@ -459,26 +450,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Set up the (LB) electrokinetics fluid\n",
     "viscosity_kinematic = viscosity_dynamic / density_water\n",
-    "ek = espressomd.electrokinetics.Electrokinetics(agrid=agrid,\n",
-    "                                                lb_density=density_water,\n",
-    "                                                viscosity=viscosity_kinematic,\n",
-    "                                                friction=1.0,\n",
-    "                                                T=kT,\n",
-    "                                                prefactor=bjerrum_length)"
+    "lbf = espressomd.lb.LBFluidWalberla(lattice=lattice, density=density_water, kinematic_viscosity=viscosity_kinematic, tau=dt, single_precision=single_precision)\n",
+    "system.actors.add(lbf)"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "The value of the friction parameter in the previous setup command is irrelevant, since we don't include any explicit particles in our simulation, but it's needed to pass the sanity check of the LB.\n",
+    "eksolver = espressomd.electrokinetics.EKFFT(lattice=lattice, permittivity=permittivity, single_precision=single_precision)\n",
     "\n",
-    "Next, we set up the individual ionic species.\n",
-    "In this case, we only set up one species of positively charged counterions.\n",
-    "The charge density is chosen in such a way, that it will cancel out the charges of the walls which are being inserted in the step afterwards.\n",
-    "After setting up the species, we have to add it to the electrokinetics instance. "
+    "system.ekcontainer.solver = eksolver\n",
+    "system.ekcontainer.tau = dt"
    ]
   },
   {
@@ -487,25 +473,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Set up the charged and neutral species\n",
     "density_counterions = -2.0 * sigma / width\n",
-    "counterions = espressomd.electrokinetics.Species(density=density_counterions,\n",
-    "                                                 D=D,\n",
-    "                                                 valency=valency,\n",
-    "                                                 ext_force_density=ext_force_density)\n",
-    "\n",
-    "ek.add_species(counterions)"
+    "ekspecies = espressomd.electrokinetics.EKSpecies(lattice=lattice, density=0.0, kT=kT, diffusion=D, valency=valency, advection=True, friction_coupling=True, ext_efield=ext_force_density, single_precision=single_precision, tau=dt)\n",
+    "system.ekcontainer.add(ekspecies)"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "The <tt>EKBoundary</tt> command takes the keyword <tt>charge_density</tt> and the numerical charge density in simulation units as arguments.\n",
-    "The <tt>shape</tt> keyword takes an instance of a shape, which is provided by the <tt>shapes</tt> submodule and is the same as for the <tt>LBBoundary</tt> command.\n",
-    "Here we initialize two charged <tt>Wall</tt> boundaries.\n",
-    "To initialize the boundaries, we have to add them to the <tt>ekboundaries</tt> instance of the system class.\n",
-    "Finally, we initialize the electrokinetics algorithm with our setup by adding the electrokinetics instance as an actor to the system."
+    "ekwallcharge = espressomd.electrokinetics.EKSpecies(lattice=lattice, density=0.0, kT=kT, diffusion=0., valency=-valency, advection=False, friction_coupling=False, ext_efield=[0, 0, 0], single_precision=single_precision, tau=dt)\n",
+    "system.ekcontainer.add(ekwallcharge)"
    ]
   },
   {
@@ -514,28 +494,42 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Set up the walls confining the fluid\n",
-    "ek_wall_left = espressomd.ekboundaries.EKBoundary(charge_density=sigma / agrid,\n",
-    "                                                  shape=espressomd.shapes.Wall(normal=[1, 0, 0], dist=padding))\n",
-    "ek_wall_right = espressomd.ekboundaries.EKBoundary(charge_density=sigma / agrid,\n",
-    "                                                   shape=espressomd.shapes.Wall(normal=[-1, 0, 0], dist=-(padding + width)))\n",
-    "\n",
-    "system.ekboundaries.add(ek_wall_left)\n",
-    "system.ekboundaries.add(ek_wall_right)\n",
-    "\n",
-    "system.actors.add(ek)"
+    "wall_left = espressomd.shapes.Wall(normal=[1, 0, 0], dist=padding)\n",
+    "wall_right = espressomd.shapes.Wall(normal=[-1, 0, 0], dist=-(padding + width))"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "ekspecies[padding:-padding, :, :].density = density_counterions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
-    "After setting up the system, we integrate a sufficient number of time steps to relax the system into the stationary state and output the counterion density profile, the velocity profile, and the shear stress.\n",
-    "Since this system has translational symmetry in the x- and y-direction, we iterate over a line in the z direction and use the <tt>species[node].quantity</tt> command, to output local quantities.\n",
-    "You can instead also use the <tt>electrokinetics.write_vtk_quantity</tt> command to output the whole field at once in a ParaView-compatible format.\n",
+    "ekspecies[:padding, :, :].density = 0.0\n",
+    "ekspecies[-padding:, :, :].density = 0.0\n",
     "\n",
-    "Density and velocity are not the only fields available for output.\n",
-    "Please refer to the User's Guide for all available options."
+    "ekwallcharge[:padding, :, :].density = -sigma / valency / padding\n",
+    "ekwallcharge[-padding:, :, :].density = -sigma / valency / padding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for shape_obj in (wall_left, wall_right):\n",
+    "    ekspecies.add_boundary_from_shape(shape=shape_obj, value=[0., 0., 0.], boundary_type=espressomd.electrokinetics.FluxBoundary)\n",
+    "    ekspecies.add_boundary_from_shape(shape=shape_obj, value=0.0, boundary_type=espressomd.electrokinetics.DensityBoundary)\n",
+    "    lbf.add_boundary_from_shape(shape=shape_obj, velocity=[0., 0., 0.])"
    ]
   },
   {
@@ -562,13 +556,13 @@
     "        node_idxs = (i, int(box_y / (2 * agrid)), int(box_z / (2 * agrid)))\n",
     "\n",
     "        # density\n",
-    "        density_list.append(counterions[node_idxs].density)\n",
+    "        density_list.append(ekspecies[node_idxs].density)\n",
     "\n",
     "        # velocity\n",
-    "        velocity_list.append(ek[node_idxs].velocity[1])\n",
+    "        velocity_list.append(lbf[node_idxs].velocity[1])\n",
     "\n",
     "        # xz component pressure tensor\n",
-    "        pressure_xy_list.append(ek[node_idxs].pressure_tensor[0, 1])\n",
+    "        pressure_xy_list.append(lbf[node_idxs].pressure_tensor[0, 1])\n",
     "\n",
     "np.savetxt(\"eof_simulation.dat\",\n",
     "           np.column_stack((position_list,\n",
@@ -578,18 +572,12 @@
     "           header=\"#position calculated_density calculated_velocity calculated_pressure_xy\")"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We will now plot the counterion density, fluid velocity, and fluid shear stress\n",
-    "profiles along the direction perpendicular to the slit pore walls."
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
     "from scripts import eof_analytical # executes automatically upon import\n",
@@ -633,25 +621,31 @@
    "source": [
     "## References\n",
     "\n",
-    "[1] O. A. Hickey, C. Holm, J. L. Harden and G. W. Slater *Implicit Method for Simulating Electrohydrodynamics of Polyelectrolytes* Physical Review Letters, 2010  \n",
-    "[2] F. Capuani, I. Pagonabarraga and D. Frenkel *Discrete solution of the electrokinetic equations* The Journal of Chemical Physics, 2004  \n",
-    "[3] G. Rempfer *A Lattice based Model for Electrokinetics* Master's thesis, University of Stuttgart, 2013  \n",
-    "[4] G. Rempfer, G. B. Davies, C. Holm and J. de Graaf *Reducing spurious flow in simulations of electrokinetic phenomena* The Journal of Chemical Physics, 2016  \n",
-    "[5] G. Rempfer *Lattice-Boltzmann simulations in complex geometries* Bachelor's thesis, University of Stuttgart, Institute for Computational Physics, 2010  \n",
-    "[6] M. Deserno and C. Holm and S. May, *Fraction of Condensed Counterions around a Charged Rod: Comparison of Poisson-Boltzmann Theory and Computer Simulations* Macromolecules, 2000  \n",
-    "[7] C. Holm, P. K&eacute;kicheff and R. Podgornik *Electrostatic Effects in Soft Matter and Biophysics* Kluwer Academic Publishers, 2001  \n",
-    "[8] M. Deserno and C. Holm *Cell-model and Poisson-Boltzmann-theory: A brief introduction* Electrostatic Effects in Soft Matter and Biophysics, Kluwer Academic Publishers, 2001  \n",
-    "[9] J de Graaf., G. Rempfer and C. Holm *Diffusiophoretic Self-Propulsion for Partially Catalytic Spherical Colloids* IEEE T. Nanobiosci., 2014  \n",
-    "[10] M. Deserno *Counterion condensation for rigid linear polyelectrolytes* Universit&auml;t Mainz, 2000  \n",
-    "[11] J. de Graaf, N Boon, M Dijkstra and R. van Roij *Electrostatic interactions between Janus particles* The Journal of Chemical Physics, 2012   \n",
-    "[12] A. Einstein *&Uuml;ber die von der molekularkinetischen Theorie der W&auml;rme geforderte Bewegung von in ruhenden Fl&uuml;ssigkeiten suspendierten Teilchen* Annalen der Physik, 1905  \n",
-    "[13] M. von Smoluchowski *Zur kinetischen Theorie der Brownschen Molekularbewegung und der Suspensionen* Annalen der Physik, 1906  \n"
+    "[1] F. Capuani, I. Pagonabarraga and D. Frenkel *Discrete solution of the electrokinetic equations* The Journal of Chemical Physics, 2004  \n",
+    "[2] G. Rempfer *A Lattice based Model for Electrokinetics* Master's thesis, University of Stuttgart, 2013  \n",
+    "[3] G. Rempfer, G. B. Davies, C. Holm and J. de Graaf *Reducing spurious flow in simulations of electrokinetic phenomena* The Journal of Chemical Physics, 2016  \n",
+    "[4] G. Rempfer *Lattice-Boltzmann simulations in complex geometries* Bachelor's thesis, University of Stuttgart, Institute for Computational Physics, 2010  \n",
+    "[5] M. Deserno and C. Holm and S. May, *Fraction of Condensed Counterions around a Charged Rod: Comparison of Poisson-Boltzmann Theory and Computer Simulations* Macromolecules, 2000  \n",
+    "[6] C. Holm, P. K&eacute;kicheff and R. Podgornik *Electrostatic Effects in Soft Matter and Biophysics* Kluwer Academic Publishers, 2001  \n",
+    "[7] M. Deserno and C. Holm *Cell-model and Poisson-Boltzmann-theory: A brief introduction* Electrostatic Effects in Soft Matter and Biophysics, Kluwer Academic Publishers, 2001  \n",
+    "[8] J de Graaf., G. Rempfer and C. Holm *Diffusiophoretic Self-Propulsion for Partially Catalytic Spherical Colloids* IEEE T. Nanobiosci., 2014  \n",
+    "[9] M. Deserno *Counterion condensation for rigid linear polyelectrolytes* Universit&auml;t Mainz, 2000  \n",
+    "[10] J. de Graaf, N Boon, M Dijkstra and R. van Roij *Electrostatic interactions between Janus particles* The Journal of Chemical Physics, 2012   \n",
+    "[11] A. Einstein *&Uuml;ber die von der molekularkinetischen Theorie der W&auml;rme geforderte Bewegung von in ruhenden Fl&uuml;ssigkeiten suspendierten Teilchen* Annalen der Physik, 1905  \n",
+    "[12] M. von Smoluchowski *Zur kinetischen Theorie der Brownschen Molekularbewegung und der Suspensionen* Annalen der Physik, 1906  \n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -665,7 +659,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "version": "3.10.6"
   }
  },
  "nbformat": 4,
diff --git a/doc/tutorials/lattice_boltzmann/lattice_boltzmann_poiseuille_flow.ipynb b/doc/tutorials/lattice_boltzmann/lattice_boltzmann_poiseuille_flow.ipynb
index b984fe56e46..b0d51f19add 100644
--- a/doc/tutorials/lattice_boltzmann/lattice_boltzmann_poiseuille_flow.ipynb
+++ b/doc/tutorials/lattice_boltzmann/lattice_boltzmann_poiseuille_flow.ipynb
@@ -58,12 +58,11 @@
     "\n",
     "import espressomd\n",
     "import espressomd.lb\n",
-    "import espressomd.lbboundaries\n",
     "import espressomd.shapes\n",
     "\n",
     "logging.basicConfig(level=logging.INFO, stream=sys.stdout)\n",
     "\n",
-    "espressomd.assert_features(['LB_BOUNDARIES_GPU'])\n",
+    "espressomd.assert_features(['WALBERLA'])\n",
     "\n",
     "# System constants\n",
     "BOX_L = 16.0\n",
@@ -120,8 +119,10 @@
    "source": [
     "```python\n",
     "logging.info(\"Setup LB fluid.\")\n",
-    "lbf = espressomd.lb.LBFluidGPU(agrid=AGRID, dens=DENSITY, visc=VISCOSITY, tau=TIME_STEP,\n",
-    "                               ext_force_density=FORCE_DENSITY)\n",
+    "lbf = espressomd.lb.LBFluidWalberla(agrid=AGRID, density=DENSITY,\n",
+    "                                    kinematic_viscosity=VISCOSITY,\n",
+    "                                    tau=TIME_STEP,\n",
+    "                                    ext_force_density=FORCE_DENSITY)\n",
     "system.actors.add(lbf)\n",
     "```"
    ]
@@ -140,9 +141,7 @@
     "solution2_first": true
    },
    "source": [
-    "Create a LB boundary and append it to the list of system LB boundaries.\n",
-    "\n",
-    "You can refer to section [using shapes as lattice-Boltzmann boundary](https://espressomd.github.io/doc/lb.html#using-shapes-as-lattice-boltzmann-boundary) in the user guide."
+    "Use the convenience function ``add_boundary_from_shape`` of the LB actor to mark nodes within a shape as boundaries.\n"
    ]
   },
   {
@@ -156,11 +155,8 @@
     "top_wall = espressomd.shapes.Wall(normal=[1, 0, 0], dist=WALL_OFFSET)\n",
     "bottom_wall = espressomd.shapes.Wall(normal=[-1, 0, 0], dist=-(BOX_L - WALL_OFFSET))\n",
     "\n",
-    "top_boundary = espressomd.lbboundaries.LBBoundary(shape=top_wall)\n",
-    "bottom_boundary = espressomd.lbboundaries.LBBoundary(shape=bottom_wall)\n",
-    "\n",
-    "system.lbboundaries.add(top_boundary)\n",
-    "system.lbboundaries.add(bottom_boundary)\n",
+    "lbf.add_boundary_from_shape(top_wall)\n",
+    "lbf.add_boundary_from_shape(bottom_wall)\n",
     "```"
    ]
   },
diff --git a/doc/tutorials/lattice_boltzmann/lattice_boltzmann_sedimentation.ipynb b/doc/tutorials/lattice_boltzmann/lattice_boltzmann_sedimentation.ipynb
index 9b8d73f9a05..a8100e9cbdb 100644
--- a/doc/tutorials/lattice_boltzmann/lattice_boltzmann_sedimentation.ipynb
+++ b/doc/tutorials/lattice_boltzmann/lattice_boltzmann_sedimentation.ipynb
@@ -72,12 +72,11 @@
    "source": [
     "import espressomd\n",
     "import espressomd.lb\n",
-    "import espressomd.lbboundaries\n",
     "import espressomd.shapes\n",
     "import espressomd.observables\n",
     "import espressomd.accumulators\n",
     "\n",
-    "espressomd.assert_features([\"LENNARD_JONES\", \"LB_BOUNDARIES\"])\n",
+    "espressomd.assert_features([\"LENNARD_JONES\", \"WALBERLA\"])\n",
     "\n",
     "# imports for data handling, plotting, and progress bar\n",
     "import numpy as np\n",
@@ -133,7 +132,7 @@
     "n_rows = 10\n",
     "\n",
     "# system size in units of lattice spacing\n",
-    "n_height = 40\n",
+    "n_height = 50\n",
     "n_width = 20\n",
     "n_depth = 2\n",
     "\n",
@@ -343,7 +342,10 @@
    },
    "source": [
     "```python\n",
-    "lbf = espressomd.lb.LBFluid(agrid=spacing, dens=1., visc=1., tau=system.time_step, kT=0.)\n",
+    "lbf = espressomd.lb.LBFluidWalberla(agrid=spacing,\n",
+    "                                    density=1.,\n",
+    "                                    kinematic_viscosity=1.,\n",
+    "                                    tau=system.time_step, kT=0.)\n",
     "system.actors.add(lbf)\n",
     "system.thermostat.set_lb(LB_fluid=lbf, gamma=15., seed=0)\n",
     "```"
@@ -367,7 +369,7 @@
     "\n",
     "**Exercise:**\n",
     "* convert the wall shapes to LB boundaries and add them to the system list of LB boundaries\n",
-    "  ([user guide](https://espressomd.github.io/doc/lb.html#using-shapes-as-lattice-boltzmann-boundary))"
+    "  ([user guide](https://espressomd.github.io/doc/lb.html#setting-up-boundary-conditions))"
    ]
   },
   {
@@ -379,9 +381,7 @@
     "```python\n",
     "# add LB boundaries\n",
     "for wall_shape in [wall_shape_b, wall_shape_t]:\n",
-    "    no_slip_wall = espressomd.lbboundaries.LBBoundary(\n",
-    "        shape=wall_shape, velocity=[0, 0, 0])\n",
-    "    system.lbboundaries.add(no_slip_wall)\n",
+    "    lbf.add_boundary_from_shape(wall_shape)\n",
     "```"
    ]
   },
diff --git a/doc/tutorials/lattice_boltzmann/lattice_boltzmann_theory.ipynb b/doc/tutorials/lattice_boltzmann/lattice_boltzmann_theory.ipynb
index ba296175ff5..c4ffcef9d42 100644
--- a/doc/tutorials/lattice_boltzmann/lattice_boltzmann_theory.ipynb
+++ b/doc/tutorials/lattice_boltzmann/lattice_boltzmann_theory.ipynb
@@ -43,8 +43,6 @@
     "\n",
     "For the tutorial you will have to compile in the following features:\n",
     "```c++\n",
-    "#define LB_BOUNDARIES\n",
-    "#define LB_BOUNDARIES_GPU\n",
     "#define LENNARD_JONES\n",
     "```\n",
     "Please uncomment the features in the <tt>myconfig.hpp</tt> and compile **ESPResSo** using this <tt>myconfig.hpp</tt>. This is not necessary if you do not use a custom <tt>myconfig.hpp</tt>, since the features are activated by default. For more information on configuring **ESPResSo** and how to activate CUDA (for GPU computation), refer to the [documentation](https://espressomd.github.io/doc/installation.html). "
@@ -212,11 +210,9 @@
     "## 3 The LB interface in ESPResSo\n",
     "\n",
     "**ESPResSo** features two virtually independent implementations of LB. One implementation uses CPUs and one uses a GPU to perform the computational work. For this, we provide two actor classes\n",
-    "[<tt>LBFluid</tt>](https://espressomd.github.io/doc/espressomd.html#espressomd.lb.LBFluid) and\n",
-    "[<tt>LBFluidGPU</tt>](https://espressomd.github.io/doc/espressomd.html#espressomd.lb.LBFluidGPU) in the module\n",
-    "[<tt>espressomd.lb</tt>](https://espressomd.github.io/doc/espressomd.html#module-espressomd.lb), as well as the optional\n",
-    "[<tt>LBBoundary</tt>](https://espressomd.github.io/doc/espressomd.html#espressomd.lbboundaries.LBBoundary) class found in\n",
-    "[<tt>espressomd.lbboundaries</tt>](https://espressomd.github.io/doc/espressomd.html#module-espressomd.lbboundaries).\n",
+    "[<tt>LBFluidWalberla</tt>](https://espressomd.github.io/doc/espressomd.html#espressomd.lb.LBFluidWalberla) and\n",
+    "[<tt>LBFluidWalberlaGPU</tt>](https://espressomd.github.io/doc/espressomd.html#espressomd.lb.LBFluidWalberlaGPU) in the module\n",
+    "[<tt>espressomd.lb</tt>](https://espressomd.github.io/doc/espressomd.html#module-espressomd.lb).\n",
     "\n",
     "The LB lattice is a cubic lattice, with a lattice constant <tt>agrid</tt> that\n",
     "is the same in all spatial directions. The chosen box length must be an integer multiple\n",
@@ -251,9 +247,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### The <tt>LBFluid</tt> class\n",
+    "### The <tt>LBFluidWalberla</tt> class\n",
     "\n",
-    "The <tt>LBFluid</tt> class provides an interface to the LB-Method in the **ESPResSo** core. When initializing an object, one can pass the aforementioned parameters as keyword arguments. Parameters are given in MD units. The available keyword arguments are:\n",
+    "The <tt>LBFluidWalberla</tt> class provides an interface to the LB-Method in the **ESPResSo** core. When initializing an object, one can pass the aforementioned parameters as keyword arguments. Parameters are given in MD units. The available keyword arguments are:\n",
     "\n",
     "+ <tt>dens</tt>: The density of the fluid.\n",
     "+ <tt>agrid</tt>: The lattice constant of the fluid. It is used to determine the number of LB nodes per direction from <tt>box_l</tt>. *They have to be compatible.*\n",
@@ -263,7 +259,7 @@
     "+ <tt>seed</tt>: The random number generator seed, only relevant for thermalized fluids (i.e. <tt>kT</tt> \\> 0).\n",
     "+ <tt>ext_force_density</tt>: An external force density applied to every node. This is given as a list, tuple or array with three components.\n",
     "\n",
-    "Using these arguments, one can initialize an <tt>LBFluid</tt> object. This object then needs to be added to the system's actor list. The code below provides a minimal example.\n",
+    "Using these arguments, one can initialize an <tt>LBFluidWalberla</tt> object. This object then needs to be added to the system's actor list. The code below provides a minimal example.\n",
     "\n",
     "```python\n",
     "import espressomd\n",
@@ -274,8 +270,8 @@
     "system.time_step = 0.01\n",
     "system.cell_system.skin = 0.4\n",
     "\n",
-    "# Initialize an LBFluid with the minimum set of valid parameters.\n",
-    "lbf = lb.LBFluidGPU(agrid=1, dens=10, visc=.1, tau=0.01)\n",
+    "# Initialize an LBFluidWalberla with the minimum set of valid parameters.\n",
+    "lbf = espressomd.lb.LBFluidWalberla(agrid=1, density=10, kinematic_viscosity=.1, tau=0.01)\n",
     "# Activate the LB by adding it to the System's actor list.\n",
     "system.actors.add(lbf)\n",
     "```"
@@ -287,16 +283,15 @@
    "source": [
     "### Sampling data from a node\n",
     "\n",
-    "The <tt>LBFluid</tt> class also provides a set of methods which can be used to sample data from\n",
+    "The <tt>LBFluidWalberla</tt> class also provides a set of methods which can be used to sample data from\n",
     "the fluid nodes. For example <tt>lbf[X ,Y ,Z].quantity</tt> returns the quantity of the node\n",
     "with $(X, Y, Z)$ coordinates. Note that the indexing in every direction starts with 0.\n",
     "The possible properties are:\n",
     "\n",
     "+ <tt>velocity</tt>: the fluid velocity (list of three floats)\n",
     "+ <tt>pressure_tensor</tt>: the pressure tensor (3x3 matrix)\n",
-    "+ <tt>pressure_tensor_neq</tt>: the nonequilibrium part of the pressure tensor (3x3 matrix).\n",
     "+ <tt>population</tt>: the 19 populations of the D3Q19 lattice.\n",
-    "+ <tt>boundary</tt>: the boundary flag.\n",
+    "+ <tt>is_boundary</tt>: the boundary flag.\n",
     "+ <tt>density</tt>: the local density.\n",
     "\n",
     "Slicing is supported, e.g. to obtain all velocity vectors in the LB fluid as a Numpy array, use <tt>lbf[:,:,:].velocity</tt>."
@@ -306,26 +301,29 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### The <tt>LBBoundary</tt> class\n",
+    "### Setting up boundaries\n",
     "\n",
-    "The [<tt>LBBoundary</tt>](https://espressomd.github.io/doc/espressomd.html#espressomd.lbboundaries.LBBoundary) class represents a boundary on the\n",
-    "[<tt>LBFluid</tt>](https://espressomd.github.io/doc/espressomd.html#espressomd.lb.LBFluid) lattice.\n",
-    "It depends on the classes of the module <tt>espressomd.shapes</tt> as it derives its geometry from them. For the initialization, the arguments <tt>shape</tt> and <tt>velocity</tt> are supported. The <tt>shape</tt> argument takes an object from the <tt>shapes</tt> module and the <tt>velocity</tt> argument expects a list, tuple or array containing 3 floats. Setting the <tt>velocity</tt> will result in a slip boundary condition.\n",
+    "Boundary conditions for the fluid are set on the\n",
+    "[<tt>LBFluidWalberla</tt>](https://espressomd.github.io/doc/espressomd.html#espressomd.lb.LBFluidWalberla) lattice by marking the nodes at which the boundary condition should hold as boundary nodes.\n",
+    "There are several ways to access individual nodes, please refer to the documentation for a complete list. Once they are gathered, a boundary condition e.g. of the type [<tt>VelocityBounceBack</tt>](https://espressomd.github.io/doc/espressomd.html#espressomd.lb.VelocityBounceBack) can be assigned to them, as shown in the following example: \n",
+    "```python\n",
+    "node = lbf[0,0,0]\n",
+    "node.boundary = VelocityBounceBack(velocity=[0,0,0])\n",
+    "```\n",
+    "In order to mark several nodes as boundaries at once, there are a some convenience functions that make it possible, for example, to mark all nodes within a <tt>espressomd.shapes</tt> as a boundary.\n",
     "\n",
-    "Note that the boundaries are not constructed through the periodic boundary. If, for example, one would set a sphere with its center in one of the corner of the boxes, a sphere fragment will be generated. To avoid this, make sure the sphere, or any other boundary, fits inside the central box.\n",
+    "Note that nodes are not marked as boundaries through the periodic boundary if the shape exceeds the edges of the box. If, for example, one would set a sphere with its center in one of the corner of the boxes, only nodes within the sphere fragment will be boundary nodes. To avoid this, make sure the sphere, or any other shape, fits inside the central box.\n",
     "\n",
-    "Boundaries are instantiated by passing a shape object to the <tt>LBBoundary</tt> class. Here is one way to construct a wall and add it to an existing `system` instance:\n",
+    "Here is an example of how to use shapes to mark nodes as boundaries:\n",
     "\n",
     "```python\n",
-    "import espressomd.lbboundaries\n",
     "import espressomd.shapes\n",
     "\n",
-    "wall = espressomd.lbboundaries.LBBoundary(shape=espressomd.shapes.Wall(normal=[1, 0, 0], dist=1),\n",
-    "                                          velocity=[0, 0, 0.01])\n",
-    "system.lbboundaries.add(wall)\n",
+    "wall_shape = espressomd.shapes.Wall(normal=[1, 0, 0], dist=1)\n",
+    "lbf.add_boundary_from_shape(wall_shape, velocity=[0, 0, 0.01])\n",
     "```\n",
     "\n",
-    "This will create a wall with a surface normal of $(1, 0, 0)$ at a distance of 1 from the origin of the coordinate system in direction of the normal vector. The wall exhibits a slip boundary condition with a velocity of $(0, 0, 0.01)$. For a no-slip boundary condition, leave out the velocity argument or set it to zero. Please refer to the user guide for a complete list of constraints.\n",
+    "This will create a wall shape with a surface normal of $(1, 0, 0)$ at a distance of 1 from the origin of the coordinate system in direction of the normal vector and mark all LB nodes within as boundaries. Additionally, a boundary condition with a velocity of $(0, 0, 0.01)$ is set using the optional `velocity` argument. For a no-slip boundary condition, leave out the velocity argument, as this will set it to zero by default.\n",
     "\n",
     "In **ESPResSo** the so-called *link bounce back* method is implemented, where the effective hydrodynamic boundary is located midway between boundary and fluid node."
    ]
diff --git a/doc/tutorials/polymers/polymers.ipynb b/doc/tutorials/polymers/polymers.ipynb
index 7e6a4b51534..40d4d3bbaa2 100644
--- a/doc/tutorials/polymers/polymers.ipynb
+++ b/doc/tutorials/polymers/polymers.ipynb
@@ -297,8 +297,9 @@
     "    '''\n",
     "    Lattice-based solvation model based on the LBM (Zimm model).\n",
     "    '''\n",
-    "    lbf = espressomd.lb.LBFluidGPU(kT=kT, seed=42, agrid=1, dens=1,\n",
-    "                                   visc=5, tau=system.time_step)\n",
+    "    lbf = espressomd.lb.LBFluidWalberla(kT=kT, seed=42, agrid=1, density=1,\n",
+    "                                        kinematic_viscosity=5, tau=system.time_step,\n",
+    "                                        single_precision=True)\n",
     "    system.actors.add(lbf)\n",
     "    system.thermostat.set_lb(LB_fluid=lbf, gamma=gamma, seed=42)"
    ]
@@ -343,7 +344,7 @@
     "POLYMER_MODEL = 'Rouse'\n",
     "assert POLYMER_MODEL in ('Rouse', 'Zimm')\n",
     "if POLYMER_MODEL == 'Zimm':\n",
-    "    espressomd.assert_features(['CUDA'])\n",
+    "    espressomd.assert_features(['WALBERLA'])\n",
     "    import espressomd.lb\n",
     "\n",
     "# System setup\n",
@@ -439,8 +440,8 @@
     "\n",
     "    # reset system\n",
     "    system.part.clear()\n",
-    "    system.thermostat.turn_off()\n",
     "    system.actors.clear()\n",
+    "    system.thermostat.turn_off()\n",
     "    system.auto_update_accumulators.clear()\n",
     "\n",
     "rh_results = np.array(rh_results)\n",
diff --git a/doc/tutorials/raspberry_electrophoresis/raspberry_electrophoresis.ipynb b/doc/tutorials/raspberry_electrophoresis/raspberry_electrophoresis.ipynb
index 6ae77d8cf57..78535da6197 100644
--- a/doc/tutorials/raspberry_electrophoresis/raspberry_electrophoresis.ipynb
+++ b/doc/tutorials/raspberry_electrophoresis/raspberry_electrophoresis.ipynb
@@ -74,7 +74,7 @@
     "logging.basicConfig(level=logging.INFO, stream=sys.stdout)\n",
     "\n",
     "espressomd.assert_features([\"ELECTROSTATICS\", \"ROTATION\", \"ROTATIONAL_INERTIA\", \"EXTERNAL_FORCES\",\n",
-    "                            \"MASS\", \"VIRTUAL_SITES_RELATIVE\", \"CUDA\", \"LENNARD_JONES\"])\n",
+    "                            \"MASS\", \"VIRTUAL_SITES_RELATIVE\", \"LENNARD_JONES\"])\n",
     "\n",
     "import numpy as np\n",
     "%matplotlib inline\n",
@@ -643,7 +643,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "lb = espressomd.lb.LBFluidGPU(kT=temperature, seed=42, dens=1., visc=3., agrid=1., tau=system.time_step)\n"
+    "lb = espressomd.lb.LBFluidWalberla(kT=temperature, seed=42,\n",
+    "                                   density=1., kinematic_viscosity=3.,\n",
+    "                                   agrid=1., tau=system.time_step)"
    ]
   },
   {
diff --git a/maintainer/CI/build_cmake.sh b/maintainer/CI/build_cmake.sh
index 43652147be1..0c9bbc07fc5 100755
--- a/maintainer/CI/build_cmake.sh
+++ b/maintainer/CI/build_cmake.sh
@@ -126,9 +126,12 @@ set_default_value with_hdf5 true
 set_default_value with_fftw true
 set_default_value with_gsl true
 set_default_value with_scafacos false
+set_default_value with_walberla false
+set_default_value with_walberla_avx false
 set_default_value with_stokesian_dynamics false
 set_default_value test_timeout 300
 set_default_value hide_gpu false
+set_default_value mpiexec_preflags ""
 
 if [ "${make_check_unit_tests}" = true ] || [ "${make_check_python}" = true ] || [ "${make_check_tutorials}" = true ] || [ "${make_check_samples}" = true ] || [ "${make_check_benchmarks}" = true ]; then
     run_checks=true
@@ -186,6 +189,16 @@ else
     cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS=OFF"
 fi
 
+if [ "${with_walberla}" = true ]; then
+  cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_WALBERLA=ON -D ESPRESSO_BUILD_WITH_WALBERLA_FFT=ON"
+  if [ "${with_walberla_avx}" = true ]; then
+    cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_WALBERLA_AVX=ON"
+  fi
+  # disable default OpenMPI CPU binding mechanism to avoid stale references to
+  # waLBerla objects when multiple LB python tests run in parallel on NUMA archs
+  mpiexec_preflags="${mpiexec_preflags:+$mpiexec_preflags;}--bind-to;none"
+fi
+
 if [ "${with_coverage}" = true ]; then
     cmake_params="-D ESPRESSO_BUILD_WITH_COVERAGE=ON ${cmake_params}"
 fi
@@ -248,7 +261,7 @@ cd "${builddir}"
 if [ -f "/etc/os-release" ]; then
     grep -q suse /etc/os-release && . /etc/profile.d/modules.sh && module load gnu-openmpi
     grep -q 'rhel\|fedora' /etc/os-release && for f in /etc/profile.d/*module*.sh; do . "${f}"; done && module load mpi
-    grep -q "Ubuntu 22.04" /etc/os-release && export MPIEXEC_PREFLAGS="--mca;btl_vader_single_copy_mechanism;none"
+    grep -q "Ubuntu 22.04" /etc/os-release && export MPIEXEC_PREFLAGS="--mca;btl_vader_single_copy_mechanism;none${mpiexec_preflags:+;$mpiexec_preflags}"
 fi
 
 # CONFIGURE
@@ -288,8 +301,9 @@ end "BUILD"
 # library. See details in https://github.com/espressomd/espresso/issues/2249
 # Can't do this check on CUDA though because nvcc creates a host function
 # that just calls exit() for each device function, and can't do this with
-# coverage because gcov 9.0 adds code that calls exit().
-if [[ "${with_coverage}" == false && ( "${with_cuda}" == false || "${with_cuda_compiler}" != "nvcc" ) ]]; then
+# coverage because gcov 9.0 adds code that calls exit(), and can't do this
+# with walberla because the library calls exit() in assertions.
+if [[ "${with_coverage}" == false && ( "${with_cuda}" == false || "${with_cuda_compiler}" != "nvcc" ) && "${with_walberla}" != "true" ]]; then
     if nm -o -C $(find . -name '*.so') | grep '[^a-z]exit@@GLIBC'; then
         echo "Found calls to exit() function in shared libraries."
         exit 1
@@ -384,6 +398,9 @@ if [ "${with_coverage}" = true ] || [ "${with_coverage_python}" = true ]; then
         lcov --gcov-tool "${GCOV:-gcov}" -q --directory . --ignore-errors graph --capture --output-file coverage.info # capture coverage info
         lcov --gcov-tool "${GCOV:-gcov}" -q --remove coverage.info '/usr/*' --output-file coverage.info # filter out system
         lcov --gcov-tool "${GCOV:-gcov}" -q --remove coverage.info '*/doc/*' --output-file coverage.info # filter out docs
+        if [ -d _deps/ ]; then
+          lcov --gcov-tool "${GCOV:-gcov}" -q --remove coverage.info $(realpath _deps/)'/*' --output-file coverage.info # filter out docs
+        fi
     fi
     if [ "${with_coverage_python}" = true ]; then
         echo "Running python3-coverage..."
diff --git a/maintainer/benchmarks/CMakeLists.txt b/maintainer/benchmarks/CMakeLists.txt
index 5d1941db143..887cae6ab81 100644
--- a/maintainer/benchmarks/CMakeLists.txt
+++ b/maintainer/benchmarks/CMakeLists.txt
@@ -130,6 +130,16 @@ python_benchmark(
 python_benchmark(FILE ferrofluid.py ARGUMENTS "--particles_per_core=400")
 python_benchmark(FILE mc_acid_base_reservoir.py ARGUMENTS
                  "--particles_per_core=500" RUN_WITH_MPI FALSE)
+python_benchmark(FILE lb.py ARGUMENTS "--box_l=32;--single_precision")
+python_benchmark(FILE lb.py ARGUMENTS "--box_l=32")
+python_benchmark(FILE lb.py ARGUMENTS "--box_l=64;--single_precision")
+python_benchmark(FILE lb.py ARGUMENTS "--box_l=64")
+python_benchmark(FILE lb.py ARGUMENTS "--box_l=128;--single_precision")
+python_benchmark(FILE lb.py ARGUMENTS "--box_l=128")
+if(NOT ESPRESSO_BUILD_WITH_WALBERLA_USE_AVX)
+  python_benchmark(FILE lb.py ARGUMENTS "--box_l=196;--single_precision")
+  python_benchmark(FILE lb.py ARGUMENTS "--box_l=196")
+endif()
 
 add_custom_target(
   benchmarks_data
diff --git a/maintainer/benchmarks/lb.py b/maintainer/benchmarks/lb.py
index 6aede3a3239..44252d6ab62 100644
--- a/maintainer/benchmarks/lb.py
+++ b/maintainer/benchmarks/lb.py
@@ -31,6 +31,9 @@
 parser.add_argument("--particles_per_core", metavar="N", action="store",
                     type=int, default=125, required=False,
                     help="Number of particles per core")
+parser.add_argument("--box_l", action="store",
+                    type=int, default=32, required=False,
+                    help="Number of particles per core")
 parser.add_argument("--lb_sites_per_particle", metavar="N_LB", action="store",
                     type=float, default=28, required=False,
                     help="Number of LB sites per particle")
@@ -38,6 +41,8 @@
                     type=float, default=0.03, required=False,
                     help="Fraction of the simulation box volume occupied by "
                     "particles (range: [0.01-0.74], default: 0.50)")
+parser.add_argument("--single_precision", action="store_true", required=False,
+                    help="Using single-precision floating point accuracy")
 parser.add_argument("--output", metavar="FILEPATH", action="store",
                     type=str, required=False, default="benchmarks.csv",
                     help="Output file (default: benchmarks.csv)")
@@ -50,7 +55,7 @@
 assert args.volume_fraction < np.pi / (3 * np.sqrt(2)), \
     "volume_fraction exceeds the physical limit of sphere packing (~0.74)"
 
-required_features = ["LENNARD_JONES"]
+required_features = ["LENNARD_JONES", "WALBERLA"]
 espressomd.assert_features(required_features)
 
 # System
@@ -68,13 +73,10 @@
 #############################################################
 
 n_proc = system.cell_system.get_state()['n_nodes']
-n_part = n_proc * args.particles_per_core
-# volume of N spheres with radius r: N * (4/3*pi*r^3)
-box_l = (n_part * 4. / 3. * np.pi * (lj_sig / 2.)**3
-         / args.volume_fraction)**(1. / 3.)
-lb_grid = int(2 * round(np.cbrt(n_part * args.lb_sites_per_particle) / 2.))
-agrid = box_l / lb_grid
-measurement_steps = int(max(120**3 / lb_grid**3, 50))
+box_l = args.box_l
+lb_grid = box_l
+agrid = 1.
+measurement_steps = 80
 
 # System
 #############################################################
@@ -86,51 +88,13 @@
 system.cell_system.skin = 0.5
 system.thermostat.turn_off()
 
-# Interaction setup
-#############################################################
-system.non_bonded_inter[0, 0].lennard_jones.set_params(
-    epsilon=lj_eps, sigma=lj_sig, cutoff=lj_cut, shift="auto")
-
-# Particle setup
-#############################################################
-
-system.part.add(pos=np.random.random((n_part, 3)) * system.box_l)
-
-#  Warmup Integration
-#############################################################
-
-# warmup
-benchmarks.minimize(system, n_part / 10.)
-
-system.integrator.set_vv()
-system.thermostat.set_langevin(kT=1.0, gamma=1.0, seed=42)
-
-# tuning and equilibration
-min_skin = 0.2
-max_skin = 1.0
-print("Tune skin: {:.3f}".format(system.cell_system.tune_skin(
-    min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=100)))
-print("Equilibration")
-system.integrator.run(500)
-print("Tune skin: {:.3f}".format(system.cell_system.tune_skin(
-    min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=100)))
-print("Equilibration")
-system.integrator.run(500)
-
-
-system.thermostat.turn_off()
 print(f"LB shape: [{lb_grid}, {lb_grid}, {lb_grid}]")
 print(f"LB agrid: {agrid:.3f}")
-if hasattr(espressomd.lb, "LBFluid"):
-    LBClass = espressomd.lb.LBFluid
-elif hasattr(espressomd.lb, "LBFluidWalberla"):
-    LBClass = espressomd.lb.LBFluidWalberla
-else: 
-    raise Exception("LB not built in")
-
-lbf = LBClass(agrid=agrid, dens=1, visc=1, tau=system.time_step, kT=1, seed=1)
+
+lbf = espressomd.lb.LBFluidWalberla(agrid=agrid, tau=system.time_step,
+                                    density=1., kinematic_viscosity=1.,
+                                    single_precision=args.single_precision)
 system.actors.add(lbf)
-system.thermostat.set_lb(gamma=10, LB_fluid=lbf, seed=2)
 
 
 # time integration loop
@@ -138,7 +102,7 @@
 
 # average time
 avg, ci = benchmarks.get_average_time(timings)
-print(f"average: {avg:.3e} +/- {ci:.3e} (95% C.I.)")
+print(f"average: {1000 * avg:.1f} +/- {1000 * ci:.1f} ms (95% C.I.)")
 
 # write report
 benchmarks.write_report(args.output, n_proc, timings, measurement_steps)
diff --git a/maintainer/benchmarks/runner.sh b/maintainer/benchmarks/runner.sh
index 1c1f694cf37..4258a12fcf3 100644
--- a/maintainer/benchmarks/runner.sh
+++ b/maintainer/benchmarks/runner.sh
@@ -50,7 +50,7 @@ for config in ${configs}; do
   echo "### ${config}" | tee -a benchmarks.log
   cp ${config} myconfig.hpp
   rm -rf src/ maintainer/
-  cmake -D ESPRESSO_BUILD_BENCHMARKS=ON -D ESPRESSO_TEST_TIMEOUT=1200 -D ESPRESSO_BUILD_WITH_CUDA=OFF -D ESPRESSO_BUILD_WITH_CCACHE=OFF ..
+  cmake -D ESPRESSO_BUILD_BENCHMARKS=ON -D ESPRESSO_TEST_TIMEOUT=1200 -D ESPRESSO_BUILD_WITH_CUDA=OFF -D ESPRESSO_BUILD_WITH_WALBERLA=ON -D ESPRESSO_BUILD_WITH_CCACHE=OFF ..
   make -j$(nproc)
   rm -f benchmarks.csv.part
   touch benchmarks.csv.part
diff --git a/maintainer/benchmarks/suite.sh b/maintainer/benchmarks/suite.sh
index 5371cce82c3..df04e82f6f3 100644
--- a/maintainer/benchmarks/suite.sh
+++ b/maintainer/benchmarks/suite.sh
@@ -72,6 +72,7 @@ EOF
 for commit in ${commits}; do
   echo "### commit ${commit}" >> benchmarks.log
   git checkout ${commit} -- ${directories}
+  rm -rf _deps # commits might rely on a different version of dependencies
   bash ../maintainer/benchmarks/runner.sh
   sed -ri "s/^/\"${commit}\",/" benchmarks.csv
   tail -n +2 benchmarks.csv >> benchmarks_suite.csv
diff --git a/maintainer/configs/maxset.hpp b/maintainer/configs/maxset.hpp
index 351edd0dcb0..29c531428f9 100644
--- a/maintainer/configs/maxset.hpp
+++ b/maintainer/configs/maxset.hpp
@@ -37,15 +37,7 @@
 #endif
 
 #define ENGINE
-
 #define LB_ELECTROHYDRODYNAMICS
-#define LB_BOUNDARIES
-#ifdef CUDA
-#define LB_BOUNDARIES_GPU
-#define ELECTROKINETICS
-#define EK_BOUNDARIES
-#define EK_DEBUG
-#endif
 
 #define EXCLUSIONS
 
diff --git a/maintainer/configs/no_rotation.hpp b/maintainer/configs/no_rotation.hpp
index 31461bbbac1..d1a6f418c35 100644
--- a/maintainer/configs/no_rotation.hpp
+++ b/maintainer/configs/no_rotation.hpp
@@ -41,18 +41,6 @@
 #define MMM1D_GPU
 #endif
 
-// Hydrodynamics
-#define LB_BOUNDARIES
-#ifdef CUDA
-#define LB_BOUNDARIES_GPU
-#endif
-
-// Electrokinetics
-#ifdef CUDA
-#define ELECTROKINETICS
-#define EK_BOUNDARIES
-#endif
-
 // Force/energy calculation
 #define EXCLUSIONS
 
diff --git a/maintainer/walberla_kernels/Readme.md b/maintainer/walberla_kernels/Readme.md
new file mode 100644
index 00000000000..37ecd2a7cea
--- /dev/null
+++ b/maintainer/walberla_kernels/Readme.md
@@ -0,0 +1,59 @@
+# Automated kernel generation with waLBerla
+
+The scripts in this directory generate the kernels for lattice-based algorithms.
+
+The following dependencies need to be in the Python path:
+
+* pystencils (https://i10git.cs.fau.de/pycodegen/pystencils)
+* lbmpy (https://i10git.cs.fau.de/pycodegen/lbmpy/)
+* waLBerla's Python components. Here the same version should be used as
+  the one used to build ESPResSo. One way is to use the copy fetched in
+  ESPResSo's `build/_deps/walberla-src/python/` directory.
+
+The Python dependencies can be pip installed locally with the following command:
+
+```sh
+python3 -m pip install --user -c requirements.txt numpy sympy lbmpy pystencils islpy
+```
+
+The kernels can be regenerated with this shell script:
+
+```sh
+# adapt these paths to the build environment
+export VERSION=1.2
+export DEPS="${HOME}/walberla_deps"
+export PYTHONPATH="${DEPS}/${VERSION}/lbmpy:${DEPS}/${VERSION}/pystencils:${DEPS}/devel/walberla/python/"
+
+# convenience functions
+function generate_lb_kernels {
+  python3 $(git rev-parse --show-toplevel)/maintainer/walberla_kernels/generate_lb_kernels.py $@
+}
+function generate_ek_kernels {
+  python3 $(git rev-parse --show-toplevel)/maintainer/walberla_kernels/generate_ek_kernels.py $@
+}
+function format_lb_kernels {
+  $(git rev-parse --show-toplevel)/maintainer/format/clang-format.sh -i *.h
+  $(git rev-parse --show-toplevel)/maintainer/format/clang-format.sh -i *.cpp -style "{Language: Cpp, ColumnLimit: 0}"
+}
+function format_ek_kernels {
+  $(git rev-parse --show-toplevel)/maintainer/format/clang-format.sh -i *.h
+  $(git rev-parse --show-toplevel)/maintainer/format/clang-format.sh -i *.cpp -style "{Language: Cpp, ColumnLimit: 0}"
+}
+
+# LB kernels
+cd $(git rev-parse --show-toplevel)/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/
+generate_lb_kernels
+generate_lb_kernels --single-precision
+format_lb_kernels
+
+# EK kernels
+cd $(git rev-parse --show-toplevel)/src/walberla_bridge/src/electrokinetics/generated_kernels/
+generate_ek_kernels
+generate_ek_kernels --single-precision
+format_ek_kernels
+mv ReactionKernel*.{cpp,h} $(git rev-parse --show-toplevel)/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/
+```
+
+WARNING: The code generation sorts the arguments alphabetically by symbol name.
+If you rename something, you may have to adapt the order of arguments in the
+calling code!
diff --git a/maintainer/walberla_kernels/code_generation_context.py b/maintainer/walberla_kernels/code_generation_context.py
new file mode 100644
index 00000000000..583a3fca578
--- /dev/null
+++ b/maintainer/walberla_kernels/code_generation_context.py
@@ -0,0 +1,146 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import re
+import jinja2
+import hashlib
+import lbmpy
+import lbmpy_walberla
+import pystencils
+import pystencils_walberla
+
+
+def adapt_pystencils():
+    """
+    Adapt pystencils to the SFINAE method (add the block offset lambda
+    callback and the time_step increment).
+    """
+    old_add_pystencils_filters_to_jinja_env = pystencils_walberla.codegen.add_pystencils_filters_to_jinja_env
+
+    def new_add_pystencils_filters_to_jinja_env(jinja_env):
+        # save original pystencils to adapt
+        old_add_pystencils_filters_to_jinja_env(jinja_env)
+        old_generate_members = jinja_env.filters["generate_members"]
+        old_generate_refs_for_kernel_parameters = jinja_env.filters[
+            "generate_refs_for_kernel_parameters"]
+
+        @jinja2.pass_context
+        def new_generate_members(*args, **kwargs):
+            output = old_generate_members(*args, **kwargs)
+            token = " block_offset_0_;"
+            if token in output:
+                i = output.index(token)
+                vartype = output[:i].split("\n")[-1].strip()
+                output += f"\nstd::function<void(IBlock *, {vartype}&, {vartype}&, {vartype}&)> block_offset_generator = [](IBlock * const, {vartype}&, {vartype}&, {vartype}&) {{ }};"
+            return output
+
+        def new_generate_refs_for_kernel_parameters(*args, **kwargs):
+            output = old_generate_refs_for_kernel_parameters(*args, **kwargs)
+            if "block_offset_0" in output:
+                old_token = "auto & block_offset_"
+                new_token = "auto block_offset_"
+                assert output.count(old_token) == 3, \
+                    f"could not find '{old_token}' in '''\n{output}\n'''"
+                output = output.replace(old_token, new_token)
+                output += "\nblock_offset_generator(block, block_offset_0, block_offset_1, block_offset_2);"
+            return output
+
+        # replace pystencils
+        jinja_env.filters["generate_members"] = new_generate_members
+        jinja_env.filters["generate_refs_for_kernel_parameters"] = new_generate_refs_for_kernel_parameters
+
+    pystencils_walberla.codegen.add_pystencils_filters_to_jinja_env = new_add_pystencils_filters_to_jinja_env
+
+
+def earmark_generated_kernels():
+    """
+    Add an earmark at the beginning of generated kernels to document the
+    pystencils/lbmpy toolchain that was used to create them.
+    """
+    walberla_root = lbmpy_walberla.__file__.split("/python/lbmpy_walberla/")[0]
+    with open(os.path.join(walberla_root, ".git/HEAD")) as f:
+        walberla_commit = f.read()
+    if walberla_commit.startswith("ref: refs/heads/master"):
+        ref = walberla_commit.split()[1]
+        with open(os.path.join(walberla_root, f".git/{ref}")) as f:
+            walberla_commit = f.read()
+    token = "// kernel generated with"
+    earmark = (
+        f"{token} pystencils v{pystencils.__version__}, lbmpy v{lbmpy.__version__}, "
+        f"lbmpy_walberla/pystencils_walberla from waLBerla commit {walberla_commit}"
+    )
+    for filename in os.listdir("."):
+        if not filename.endswith(
+                ".tmpl.h") and filename.endswith((".h", ".cpp", ".cu")):
+            with open(filename, "r+") as f:
+                content = f.read()
+                if token not in content:
+                    pos = 0
+                    if content.startswith("/*"):
+                        pos = content.find("*/")
+                        pos = content.find("\n", pos) + 1
+                    elif content.startswith("//====="):
+                        pos = content.find("//=====", 5)
+                        pos = content.find("\n", pos) + 1
+                    f.seek(pos)
+                    f.write(f"\n{earmark}\n{content[pos:]}")
+
+
+def guard_generated_kernels_clang_format():
+    """
+    Some namespaces are too long and will break ``clang-format`` versions
+    9 and 10. Replace them with a unique string of reasonable size.
+    """
+    for filename in os.listdir("."):
+        if filename.endswith(".cpp"):
+            with open(filename, "r") as f:
+                content = f.read()
+            all_ns = re.findall(r"^namespace (internal_[a-zA-Z0-9_]{54,}) \{$",
+                                content, flags=re.MULTILINE)
+            if not all_ns:
+                continue
+            for ns in all_ns:
+                content = re.sub(rf"(?<=[^a-zA-Z0-9_]){ns}(?=[^a-zA-Z0-9_])",
+                                 f"internal_{hashlib.md5(ns.encode('utf-8')).hexdigest()}",
+                                 content)
+            with open(filename, "w") as f:
+                f.write(content)
+
+
+class CodeGeneration(pystencils_walberla.CodeGeneration):
+    """
+    This is a patched version of ``CodeGeneration`` that elides parameters
+    passed to the command line when running the argument parser, and then
+    restores them. It also patches the Jinja templates and earmarks the
+    generated kernels.
+    """
+
+    def __init__(self):
+        import sys
+        old_sys_argv = sys.argv
+        sys.argv = sys.argv[:1]
+        super().__init__()
+        sys.argv = old_sys_argv
+        adapt_pystencils()
+
+    def __exit__(self, *args, **kwargs):
+        super().__exit__(*args, **kwargs)
+        earmark_generated_kernels()
+        guard_generated_kernels_clang_format()
diff --git a/maintainer/walberla_kernels/custom_additional_extensions.py b/maintainer/walberla_kernels/custom_additional_extensions.py
new file mode 100644
index 00000000000..3ff0b83cdd2
--- /dev/null
+++ b/maintainer/walberla_kernels/custom_additional_extensions.py
@@ -0,0 +1,349 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import pathlib
+
+import jinja2
+import numpy as np
+import pystencils as ps
+import pystencils_walberla
+import sympy as sp
+
+
+class Dirichlet_Custom(ps.boundaries.Dirichlet):
+    inner_or_boundary = False
+    single_link = False  # this is the hacky solution
+
+    def __init__(self, value, name=None, data_type="double"):
+        super().__init__(value=value, name=name)
+        self.data_type = data_type
+
+    @property
+    def additional_data(self):
+        if callable(self._value):
+            return [('value', ps.typing.BasicType(self.data_type))]
+        else:
+            return []
+
+
+class Flux(ps.boundaries.boundaryconditions.Boundary):
+    inner_or_boundary = True  # call the boundary condition with the fluid cell
+    single_link = False  # needs to be called for all directional fluxes
+
+    def __init__(self, stencil, value=None, dim=None, data_type='double'):
+        self.stencil = stencil
+        self.value = value
+        if callable(self.value) and not dim:
+            raise ValueError(
+                "When using a flux callback the dimension has to be specified")
+        elif not callable(self.value):
+            dim = len(value)
+        self.dim = dim
+        self.data_type = data_type
+
+    @property
+    def value_is_callable(self):
+        return callable(self.value)
+
+    @property
+    def additional_data(self):
+        if self.value_is_callable:
+            return [(f'flux_{i}', ps.typing.BasicType(
+                self.data_type)) for i in range(self.dim)]
+        else:
+            return []
+
+    @property
+    def additional_data_init_callback(self):
+        if self.value_is_callable:
+            return self.value
+
+    def __call__(self, field, direction_symbol, index_field, **kwargs):
+        assert ps.FieldType.is_staggered(field)
+
+        value = [index_field(f'flux_{i}') for i in range(
+            self.dim)] if self.value_is_callable else self.value
+        value = sp.Matrix(value)
+
+        assert all([s == 0 for s in self.stencil[0]])
+        accesses = [field.staggered_access(
+            ps.stencil.offset_to_direction_string(d)) for d in self.stencil[1:]]
+
+        conds = [
+            sp.Equality(
+                direction_symbol,
+                ps.typing.CastFunc(
+                    d + 1,
+                    np.int32)) for d in range(
+                len(accesses))]
+
+        # use conditional
+        conditional = None
+        for access, condition, direction in zip(
+                accesses, conds, self.stencil[1:]):
+            d = sp.Matrix(direction)
+
+            local_value = value
+
+            # make sure the vector-access is non-negative
+            if isinstance(access, sp.Mul):
+                access *= -1
+                local_value *= -1
+
+            assignment = [
+                ps.Assignment(
+                    access,
+                    local_value.dot(d) /
+                    self.stencil.D ** 2)]
+
+            # build stacked if-conditions for directions
+            conditional = ps.astnodes.Conditional(
+                condition, ps.astnodes.Block(assignment), conditional)
+
+        return [conditional]
+
+    def __hash__(self):
+        return hash((Flux, self.stencil, self.value))
+
+    def __eq__(self, other):
+        return isinstance(
+            other, Flux) and other.stencil == self.stencil and self.value == other.value
+
+
+class DirichletAdditionalDataHandler(
+        pystencils_walberla.additional_data_handler.AdditionalDataHandler):
+    def __init__(self, stencil, boundary_object):
+        assert isinstance(boundary_object, ps.boundaries.Dirichlet)
+        self._boundary_object = boundary_object
+        assert boundary_object.data_type in ("float32", "float64", "double")
+        self.data_type = "float" if boundary_object.data_type == "float32" else "double"
+        super().__init__(stencil=stencil)
+
+    @property
+    def constructor_arguments(self):
+        return f", std::function<{self.data_type}(const Cell &, const shared_ptr<StructuredBlockForest>&, IBlock&)>& " \
+               "dirichletCallback "
+
+    @property
+    def initialiser_list(self):
+        return "elementInitaliser(dirichletCallback),"
+
+    @property
+    def additional_arguments_for_fill_function(self):
+        return "blocks, "
+
+    @property
+    def additional_parameters_for_fill_function(self):
+        return " const shared_ptr<StructuredBlockForest> &blocks, "
+
+    def data_initialisation(self, _):
+        init_list = [f"{self.data_type} InitialisatonAdditionalData = elementInitaliser(Cell(it.x(), it.y(), it.z()), "
+                     "blocks, *block);", "element.value = InitialisatonAdditionalData;"]
+
+        return "\n".join(init_list)
+
+    @property
+    def additional_member_variable(self):
+        return f"std::function<{self.data_type}(const Cell &, const shared_ptr<StructuredBlockForest>&, IBlock&)> " \
+               "elementInitaliser; "
+
+
+class FluxAdditionalDataHandler(
+        pystencils_walberla.additional_data_handler.AdditionalDataHandler):
+    def __init__(self, stencil, boundary_object):
+        self._boundary_object = boundary_object
+        assert boundary_object.data_type in ("float32", "float64", "double")
+        self.data_type = "float" if boundary_object.data_type == "float32" else "double"
+        super().__init__(stencil=stencil)
+
+    @property
+    def constructor_arguments(self):
+        return f", std::function<Vector3<{self.data_type}>(const Cell &, const shared_ptr<StructuredBlockForest>&, IBlock&)>& " \
+               "fluxCallback "
+
+    @property
+    def initialiser_list(self):
+        return "elementInitaliser(fluxCallback),"
+
+    @property
+    def additional_arguments_for_fill_function(self):
+        return "blocks, "
+
+    @property
+    def additional_parameters_for_fill_function(self):
+        return " const shared_ptr<StructuredBlockForest> &blocks, "
+
+    def data_initialisation(self, direction):
+        dirVec = self.stencil_info[direction][1]
+
+        init_list = [
+            f"Vector3<{self.data_type}> InitialisatonAdditionalData = elementInitaliser(Cell(it.x() + {dirVec[0]}, it.y() + {dirVec[1]}, it.z() + {dirVec[2]}), "
+            "blocks, *block);", "element.flux_0 = InitialisatonAdditionalData[0];",
+            "element.flux_1 = InitialisatonAdditionalData[1];"]
+        if self._dim == 3:
+            init_list.append(
+                "element.flux_2 = InitialisatonAdditionalData[2];")
+
+        return "\n".join(init_list)
+
+    @property
+    def additional_member_variable(self):
+        return f"std::function<Vector3<{self.data_type}>(const Cell &, const shared_ptr<StructuredBlockForest>&, IBlock&)> " \
+               "elementInitaliser; "
+
+
+# this custom boundary generator is necessary because our boundary
+# condition writes to several fields at once which is impossible with the
+# shipped one
+def generate_boundary(
+        generation_context,
+        stencil,
+        class_name,
+        dim: int,
+        assignment,
+        target=ps.enums.Target.CPU,
+        data_type=None,
+        cpu_openmp=None,
+        namespace="pystencils",
+        interface_mappings=(),
+        generate_functor=True,
+        **create_kernel_params,
+):
+    struct_name = "IndexInfo"
+
+    config = pystencils_walberla.codegen.config_from_context(
+        generation_context,
+        target=target,
+        data_type=data_type,
+        cpu_openmp=cpu_openmp,
+        **create_kernel_params,
+    )
+    create_kernel_params = config.__dict__
+    del create_kernel_params["target"]
+    del create_kernel_params["index_fields"]
+
+    coordinate_names = ("x", "y", "z")[:dim]
+
+    index_struct_dtype = np.dtype(
+        [(name, np.int32) for name in coordinate_names], align=True
+    )
+
+    index_field = ps.Field(
+        "indexVector",
+        ps.FieldType.INDEXED,
+        index_struct_dtype,
+        layout=[0],
+        shape=(
+            ps.typing.TypedSymbol(
+                "indexVectorSize", ps.typing.BasicType(np.int32)
+            ),
+            1,
+        ),
+        strides=(1, 1),
+    )
+
+    kernel_config = ps.CreateKernelConfig(
+        index_fields=[index_field], target=target, **create_kernel_params
+    )
+
+    kernel = ps.kernelcreation.create_kernel(assignment, config=kernel_config)
+
+    if isinstance(kernel, ps.astnodes.KernelFunction):
+        kernel.function_name = f"boundary_{class_name}"
+        selection_tree = pystencils_walberla.kernel_selection.KernelCallNode(
+            kernel)
+    elif isinstance(kernel, pystencils_walberla.kernel_selection.AbstractKernelSelectionNode):
+        selection_tree = kernel
+    else:
+        raise ValueError(
+            f"kernel_creation_function returned wrong type: {kernel.__class__}"
+        )
+
+    kernel_family = pystencils_walberla.kernel_selection.KernelFamily(
+        selection_tree, class_name)
+    interface_spec = pystencils_walberla.kernel_selection.HighLevelInterfaceSpec(
+        kernel_family.kernel_selection_parameters, interface_mappings
+    )
+
+    additional_data_handler = pystencils_walberla.additional_data_handler.AdditionalDataHandler(
+        stencil=stencil)
+
+    context = {
+        "kernel": kernel_family,
+        "class_name": class_name,
+        "interface_spec": interface_spec,
+        "generate_functor": generate_functor,
+        "StructName": struct_name,
+        "StructDeclaration": pystencils_walberla.boundary.struct_from_numpy_dtype(struct_name, index_struct_dtype),
+        "dim": dim,
+        "target": target.name.lower(),
+        "namespace": namespace,
+        "inner_or_boundary": False,
+        "single_link": False,
+        "additional_data_handler": additional_data_handler,
+    }
+
+    env = jinja2.Environment(
+        loader=jinja2.PackageLoader("pystencils_walberla"), undefined=jinja2.StrictUndefined
+    )
+    pystencils_walberla.jinja_filters.add_pystencils_filters_to_jinja_env(env)
+    custom_env = jinja2.Environment(
+        loader=jinja2.FileSystemLoader(pathlib.Path(__file__).parent), undefined=jinja2.StrictUndefined
+    )
+    pystencils_walberla.jinja_filters.add_pystencils_filters_to_jinja_env(
+        custom_env)
+
+    header = custom_env.get_template(
+        "templates/Boundary.tmpl.h").render(**context)
+    source = env.get_template("Boundary.tmpl.cpp").render(**context)
+
+    source_extension = "cpp" if target == ps.enums.Target.CPU else "cu"
+    generation_context.write_file(f"{class_name}.h", header)
+    generation_context.write_file(f"{class_name}.{source_extension}", source)
+
+
+def generate_kernel_selector(
+        generation_context,
+        class_name,
+        namespace="pystencils",
+        max_num_reactants=None,
+        precision_suffix=None,
+):
+    """
+    Generate helper functions to select a kernel with the appropriate
+    floating-point precision and number of ek species for the currently
+    active ek reaction and ek lattice.
+    """
+
+    context = {
+        "namespace": namespace,
+        "class_name": class_name,
+        "precision_suffix": precision_suffix,
+        "max_num_reactants": max_num_reactants,
+    }
+
+    custom_env = jinja2.Environment(
+        loader=jinja2.FileSystemLoader(pathlib.Path(__file__).parent),
+        undefined=jinja2.StrictUndefined
+    )
+
+    header = custom_env.get_template(
+        "templates/ReactionKernelSelector.tmpl.h").render(**context)
+
+    generation_context.write_file(f"{class_name}_all.h", header)
diff --git a/maintainer/walberla_kernels/ekin.py b/maintainer/walberla_kernels/ekin.py
new file mode 100644
index 00000000000..a5f0c90b5d8
--- /dev/null
+++ b/maintainer/walberla_kernels/ekin.py
@@ -0,0 +1,214 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import pystencils as ps
+import sympy as sp
+import numpy as np
+import typing
+
+from pystencils.fd.derivation import FiniteDifferenceStaggeredStencilDerivation
+from pystencils.fd.finitevolumes import get_access_and_direction
+from pystencils.rng import random_symbol
+from pystencils.stencil import inverse_direction_string
+
+
+# this is from ps.fd.FVM1stOrder.discrete_flux.discretize
+def discretize(term, neighbor):
+    if isinstance(term, sp.Matrix):
+        nw = term.applyfunc(lambda t: discretize(t, neighbor))
+        return nw
+    elif isinstance(term, ps.field.Field.Access):
+        avg = (term.get_shifted(*neighbor) + term) * sp.Rational(1, 2)
+        return avg
+    elif isinstance(term, ps.fd.Diff):
+        access, direction = get_access_and_direction(term)
+
+        fds = FiniteDifferenceStaggeredStencilDerivation(
+            neighbor, access.field.spatial_dimensions, direction)
+        return fds.apply(access)
+
+    if term.args:
+        new_args = [discretize(a, neighbor) for a in term.args]
+        return term.func(*new_args)
+    else:
+        return term
+
+
+class EK:
+    def __init__(self, dim, density_field, flux_field, diffusion, kT=None, velocity_field=None,
+                 force_field=None, potential_field=None, valency=None, ext_efield=None):
+        assert not ps.FieldType.is_staggered(density_field)
+
+        if velocity_field is not None:
+            assert not ps.FieldType.is_staggered(velocity_field)
+
+        if force_field is not None:
+            assert not ps.FieldType.is_staggered(force_field)
+
+        if potential_field is not None:
+            assert not ps.FieldType.is_staggered(potential_field)
+
+        assert ps.FieldType.is_staggered(flux_field)
+
+        self.dim = dim
+        self.density_field = density_field
+        self.velocity_field = velocity_field
+        self.flux_field = flux_field
+        self.diffusion = diffusion
+        self.kT = kT
+        self.force_field = force_field
+        self.potential_field = potential_field
+        self.valency = valency
+        self.ext_efield = ext_efield
+
+        full_stencil = ["C"] + self.flux_field.staggered_stencil + list(
+            map(inverse_direction_string, self.flux_field.staggered_stencil))
+        self.stencil = tuple(map(lambda d: tuple(
+            ps.stencil.direction_string_to_offset(d, self.dim)), full_stencil))
+
+        flux_expression = -self.diffusion * sp.Matrix(
+            [ps.fd.diff(self.density_field, i) for i in range(self.density_field.spatial_dimensions)])
+
+        if self.potential_field is not None and self.valency is not None:
+            if ext_efield is not None:
+                field = sp.Matrix([ps.fd.diff(self.potential_field, i) - ext_efield[i]
+                                   for i in range(self.density_field.spatial_dimensions)])
+            else:
+                field = sp.Matrix([ps.fd.diff(self.potential_field, i)
+                                   for i in range(self.density_field.spatial_dimensions)])
+
+            flux_expression += - self.diffusion / self.kT * \
+                self.density_field.center * self.valency * field
+
+        self.disc = ps.fd.FVM1stOrder(
+            self.density_field, flux=flux_expression, source=0)
+
+        if self.velocity_field is not None:
+            self.vof = ps.fd.VOF(
+                self.flux_field,
+                self.velocity_field,
+                self.density_field)
+
+    def flux_advection(self):
+        if self.velocity_field is not None:
+            return [ps.Assignment(j_adv.lhs, j_adv.lhs + j_adv.rhs)
+                    for j_adv in self.vof]
+
+    def flux(self, include_vof: bool = False,
+             include_fluctuations: bool = False,
+             rng_node: typing.Optional[ps.rng.RNGBase] = None):
+
+        _flux_collection = ps.AssignmentCollection(
+            [self.disc.discrete_flux(self.flux_field)])
+
+        if include_fluctuations:
+            if rng_node is None:
+                raise ValueError(
+                    "rng_node not provided but fluctuations requested")
+
+            block_offsets = tuple(
+                ps.TypedSymbol(
+                    "block_offset_{}".format(i),
+                    np.uint32) for i in range(
+                    self.dim))
+
+            rng_symbol_gen = random_symbol(_flux_collection.subexpressions,
+                                           dim=self.dim,
+                                           rng_node=rng_node,
+                                           seed=ps.TypedSymbol(
+                                               "seed", np.uint32),
+                                           offsets=block_offsets)
+
+            stencil = self.flux_field.staggered_stencil
+            stencil_offsets = list(
+                map(lambda d: ps.stencil.direction_string_to_offset(d), stencil))
+
+            for i, (val, d, rng_symb) in enumerate(
+                    zip(stencil, stencil_offsets, rng_symbol_gen)):
+                assert _flux_collection.main_assignments[i].lhs == self.flux_field.staggered_access(
+                    val)
+                _flux_collection.main_assignments[i] = ps.Assignment(
+                    self.flux_field.staggered_access(val),
+                    _flux_collection.main_assignments[i].rhs + sp.sqrt(
+                        2 * self.diffusion * discretize(self.density_field.center, d)) / sp.Matrix(
+                        d).norm() * rng_symb * sp.sqrt(
+                        3) / 4)
+
+        if include_vof:
+            assert self.velocity_field is not None, "velocity field is not provided!"
+
+            for i, j_adv in enumerate(self.vof):
+                assert _flux_collection.main_assignments[i].lhs == j_adv.lhs
+                _flux_collection.main_assignments[i] = ps.Assignment(
+                    j_adv.lhs,
+                    _flux_collection.main_assignments[i].rhs + j_adv.rhs)
+
+        return _flux_collection
+
+    def continuity(self):
+        return self.disc.discrete_continuity(self.flux_field)
+
+    def friction_coupling(self):
+        if self.kT is None or self.force_field is None:
+            raise RuntimeError("kT or f is not provided!")
+
+        stencil = self.flux_field.staggered_stencil + \
+            [ps.stencil.inverse_direction_string(
+                d) for d in self.flux_field.staggered_stencil]
+
+        return ps.AssignmentCollection([ps.Assignment(self.force_field.center_vector, self.kT / (2 * self.diffusion) * sum([self.flux_field.staggered_access(val) * sp.Matrix(
+            ps.stencil.direction_string_to_offset(val)) for val in stencil[1:]], self.flux_field.staggered_access(stencil[0]) * sp.Matrix(ps.stencil.direction_string_to_offset(stencil[0]))))])
+
+
+class Reaction:
+    def __init__(self, species, orders, stoechom_coefs, rate_coef):
+        self.species = species
+        self.orders = orders
+        self.stoechom_coefs = stoechom_coefs
+        self.rate_coef = rate_coef
+
+    def generate_reaction(self, num_reactants: int) -> ps.AssignmentCollection:
+        if num_reactants > len(self.species):
+            raise ValueError(
+                "Not enough species defined for number of requested reactants")
+
+        # read density fields into subexpressions
+        rho_symbols = sp.symbols(f"local_rho_:{num_reactants}")
+        rate_symbol = sp.Symbol("rate_factor")
+
+        subexpressions = [
+            ps.Assignment(
+                rho_symbols[i],
+                self.species[i].center) for i in range(num_reactants)]
+
+        rate = self.rate_coef
+        for i in range(num_reactants):
+            rate *= sp.Pow(rho_symbols[i], self.orders[i])
+
+        subexpressions.append(ps.Assignment(rate_symbol, rate))
+
+        main_assignments = []
+        for i in range(num_reactants):
+            main_assignments.append(ps.Assignment(self.species[i].center,
+                                                  rho_symbols[i] + rate_symbol * self.stoechom_coefs[i]))
+
+        collection = ps.AssignmentCollection(subexpressions=subexpressions,
+                                             main_assignments=main_assignments)
+
+        return collection
diff --git a/maintainer/walberla_kernels/generate_ek_kernels.py b/maintainer/walberla_kernels/generate_ek_kernels.py
new file mode 100644
index 00000000000..fbf9dc747f5
--- /dev/null
+++ b/maintainer/walberla_kernels/generate_ek_kernels.py
@@ -0,0 +1,225 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import pystencils as ps
+import pystencils_walberla
+import sympy as sp
+import lbmpy
+import argparse
+
+import pystencils_espresso
+import code_generation_context
+
+import ekin
+import custom_additional_extensions
+
+
+parser = argparse.ArgumentParser(description='Generate the waLBerla kernels.')
+parser.add_argument('--single-precision', action='store_true', required=False,
+                    help='Use single-precision')
+args = parser.parse_args()
+
+double_precision: bool = not args.single_precision
+
+data_type_cpp = "double" if double_precision else "float"
+data_type_np = pystencils_espresso.data_type_np[data_type_cpp]
+precision_suffix = pystencils_espresso.precision_suffix[double_precision]
+precision_rng = pystencils_espresso.precision_rng[double_precision]
+
+
+def replace_getData_with_uncheckedFastGetData(filename: str) -> None:
+    with open(filename, "r+") as f:
+        content = f.read()
+        f.seek(0)
+        f.truncate(0)
+        content = content.replace("block->getData<IndexVectors>(indexVectorID);",
+                                  "block->uncheckedFastGetData<IndexVectors>(indexVectorID);")
+        f.write(content)
+
+
+dim: int = 3
+target: ps.enums.Target = ps.enums.Target.CPU
+flux_count: int = 3 ** dim // 2
+
+diffusion = ps.TypedSymbol("D", data_type_np)
+kT = ps.TypedSymbol("kT", data_type_np)
+valency = ps.TypedSymbol("z", data_type_np)
+ext_efield = [ps.TypedSymbol(f"f_ext_{i}", data_type_np) for i in range(dim)]
+
+density_field, potential_field, velocity_field, force_field = ps.fields(
+    f"rho, phi, u(#), f(#): {data_type_np}[#D]".replace("#", str(dim)), layout='zyxf')
+flux_field = ps.fields(
+    f"j({flux_count}): {data_type_np}[{dim}D]",
+    layout='zyxf',
+    field_type=ps.FieldType.STAGGERED_FLUX)
+
+ek = ekin.EK(
+    dim=dim,
+    density_field=density_field,
+    flux_field=flux_field,
+    diffusion=diffusion,
+    kT=kT,
+    velocity_field=velocity_field,
+    force_field=force_field,
+    potential_field=None,
+    valency=None,
+    ext_efield=None)
+ek_electrostatic = ekin.EK(
+    dim=dim,
+    density_field=density_field,
+    flux_field=flux_field,
+    diffusion=diffusion,
+    kT=kT,
+    velocity_field=velocity_field,
+    force_field=force_field,
+    potential_field=potential_field,
+    valency=valency,
+    ext_efield=sp.Matrix(ext_efield))
+
+max_num_reactants: int = 5
+
+react_rhos, orders, stoechom_coefs = [], [], []
+for i in range(max_num_reactants):
+    react_rhos.append(
+        ps.fields(f"rho_{i}: {data_type_np}[#D]".replace("#", str(dim)),
+                  layout="zyxf"))
+    orders.append(ps.TypedSymbol(f"order_{i}", data_type_np))
+    stoechom_coefs.append(ps.TypedSymbol(f"stoech_{i}", data_type_np))
+rate_coef = sp.Symbol("rate_coefficient")
+
+reaction_obj = ekin.Reaction(
+    species=react_rhos,
+    orders=orders,
+    stoechom_coefs=stoechom_coefs,
+    rate_coef=rate_coef,
+)
+
+params = {
+    "target": target,
+    "cpu_vectorize_info": {"assume_inner_stride_one": False}}
+
+with code_generation_context.CodeGeneration() as ctx:
+    ctx.double_accuracy = double_precision
+
+    # codegen configuration
+    config = pystencils_espresso.generate_config(ctx, params)
+
+    pystencils_walberla.generate_sweep(
+        ctx,
+        f"DiffusiveFluxKernel_{precision_suffix}",
+        ek.flux(include_vof=False, include_fluctuations=False,
+                rng_node=precision_rng),
+        staggered=True,
+        **params)
+    pystencils_walberla.generate_sweep(
+        ctx,
+        f"DiffusiveFluxKernelWithElectrostatic_{precision_suffix}",
+        ek_electrostatic.flux(include_vof=False, include_fluctuations=False,
+                              rng_node=precision_rng),
+        staggered=True,
+        **params)
+    pystencils_walberla.generate_sweep(
+        ctx,
+        f"AdvectiveFluxKernel_{precision_suffix}",
+        ek.flux_advection(),
+        staggered=True,
+        **params)
+    pystencils_walberla.generate_sweep(
+        ctx,
+        f"ContinuityKernel_{precision_suffix}",
+        ek.continuity(),
+        **params)
+
+    pystencils_walberla.generate_sweep(
+        ctx,
+        f"FrictionCouplingKernel_{precision_suffix}",
+        ek.friction_coupling(),
+        **params)
+
+    # generate dynamic fixed flux
+    stencil = lbmpy.LBStencil(stencil="D3Q27")
+    dynamic_flux = custom_additional_extensions.Flux(
+        stencil, lambda *args: None, dim=3, data_type=data_type_np)
+    dynamic_flux_additional_data = custom_additional_extensions.FluxAdditionalDataHandler(
+        stencil=stencil, boundary_object=dynamic_flux)
+
+    pystencils_walberla.generate_staggered_flux_boundary(
+        generation_context=ctx,
+        class_name=f"FixedFlux_{precision_suffix}",
+        boundary_object=dynamic_flux,
+        dim=dim,
+        neighbor_stencil=stencil,
+        index_shape=flux_field.index_shape,
+        target=target,
+        additional_data_handler=dynamic_flux_additional_data)
+
+    # generate dynamic fixed density
+    dirichlet_stencil = lbmpy.stencils.LBStencil(stencil=((0, 0, 0),))
+    dirichlet = custom_additional_extensions.Dirichlet_Custom(
+        lambda *args: None, data_type=data_type_np)
+    dirichlet_additional_data = custom_additional_extensions.DirichletAdditionalDataHandler(
+        dirichlet_stencil, dirichlet)
+
+    pystencils_walberla.boundary.generate_boundary(
+        generation_context=ctx,
+        class_name=f"Dirichlet_{precision_suffix}",
+        boundary_object=dirichlet,
+        additional_data_handler=dirichlet_additional_data,
+        field_name="field",
+        neighbor_stencil=stencil,
+        index_shape=density_field.index_shape,
+        target=target)
+
+    pystencils_walberla.generate_pack_info_from_kernel(
+        ctx,
+        f"DensityPackInfo_{precision_suffix}",
+        ek_electrostatic.continuity(),
+        target=target)
+
+    # ek reactions
+    for i in range(1, max_num_reactants + 1):
+        assignments = list(reaction_obj.generate_reaction(num_reactants=i))
+        filename_stem: str = f"ReactionKernelBulk_{i}_{precision_suffix}"
+        pystencils_walberla.generate_sweep(
+            ctx,
+            filename_stem,
+            assignments)
+
+        filename_stem: str = f"ReactionKernelIndexed_{i}_{precision_suffix}"
+        custom_additional_extensions.generate_boundary(
+            generation_context=ctx,
+            stencil=dirichlet_stencil,
+            class_name=filename_stem,
+            dim=dim,
+            target=target,
+            assignment=assignments)
+        replace_getData_with_uncheckedFastGetData(
+            filename=f"{filename_stem}.cpp")
+
+    # ek reactions helper functions
+    custom_additional_extensions.generate_kernel_selector(
+        generation_context=ctx,
+        class_name="ReactionKernelBulk",
+        max_num_reactants=max_num_reactants,
+        precision_suffix=pystencils_espresso.precision_suffix)
+    custom_additional_extensions.generate_kernel_selector(
+        generation_context=ctx,
+        class_name="ReactionKernelIndexed",
+        max_num_reactants=max_num_reactants,
+        precision_suffix=pystencils_espresso.precision_suffix)
diff --git a/maintainer/walberla_kernels/generate_lb_kernels.py b/maintainer/walberla_kernels/generate_lb_kernels.py
new file mode 100644
index 00000000000..dc3083450b4
--- /dev/null
+++ b/maintainer/walberla_kernels/generate_lb_kernels.py
@@ -0,0 +1,205 @@
+#
+# Copyright (C) 2020-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import argparse
+import pkg_resources
+
+import sympy as sp
+
+import pystencils as ps
+import pystencils_walberla
+import pystencils_espresso
+
+import lbmpy
+import lbmpy.creationfunctions
+import lbmpy.forcemodels
+import lbmpy.stencils
+import lbmpy.enums
+
+import lbmpy_walberla
+import lbmpy_espresso
+
+import lees_edwards
+import relaxation_rates
+import walberla_lbm_generation
+import code_generation_context
+
+parser = argparse.ArgumentParser(description="Generate the waLBerla kernels.")
+parser.add_argument("--single-precision", action="store_true", required=False,
+                    help="Use single-precision")
+parser.add_argument("--gpu", action="store_true")
+args = parser.parse_args()
+
+if args.gpu:
+    target = ps.Target.GPU
+else:
+    target = ps.Target.CPU
+
+# Make sure we have the correct versions of the required dependencies
+for module, requirement in [(ps, "==1.2"), (lbmpy, "==1.2")]:
+    assert pkg_resources.packaging.specifiers.SpecifierSet(requirement).contains(module.__version__), \
+        f"{module.__name__} version {module.__version__} doesn't match requirement {requirement}"
+
+
+def paramlist(parameters, keys):
+    for key in keys:
+        if key in parameters:
+            yield parameters[key]
+
+
+with code_generation_context.CodeGeneration() as ctx:
+    ctx.double_accuracy = not args.single_precision
+    if target == ps.Target.GPU:
+        ctx.cuda = True
+
+    # vectorization parameters
+    parameters = {}
+    if target == ps.Target.GPU:
+        default_key = "GPU"
+        parameters["GPU"] = ({"target": target}, "CUDA")
+    else:
+        default_key = "CPU"
+        cpu_vectorize_info = {
+            "instruction_set": "avx",
+            "assume_inner_stride_one": True,
+            "assume_aligned": True,
+            "assume_sufficient_line_padding": False}
+        parameters["CPU"] = ({"target": target}, "")
+        parameters["AVX"] = ({"target": target,
+                             "cpu_vectorize_info": cpu_vectorize_info}, "AVX")
+
+    # codegen configuration
+    config = pystencils_espresso.generate_config(
+        ctx, parameters[default_key][0])
+
+    precision_prefix = pystencils_espresso.precision_prefix[ctx.double_accuracy]
+    precision_suffix = pystencils_espresso.precision_suffix[ctx.double_accuracy]
+    precision_rng = pystencils_espresso.precision_rng[ctx.double_accuracy]
+    kT = sp.symbols("kT")
+    stencil = lbmpy.stencils.LBStencil(lbmpy.enums.Stencil.D3Q19)
+    fields = pystencils_espresso.generate_fields(config, stencil)
+    force_field = fields["force"]
+
+    # LB Method definition
+    method = lbmpy.creationfunctions.create_mrt_orthogonal(
+        stencil=stencil,
+        compressible=True,
+        weighted=True,
+        relaxation_rates=relaxation_rates.rr_getter,
+        force_model=lbmpy.forcemodels.Schiller(force_field.center_vector)
+    )
+
+    # generate stream kernels
+    for params, target_suffix in paramlist(parameters, ("GPU", "CPU", "AVX")):
+        pystencils_espresso.generate_stream_sweep(
+            ctx,
+            method,
+            f"StreamSweep{precision_prefix}{target_suffix}",
+            params)
+
+    # generate initial densities
+    for params, target_suffix in paramlist(parameters, (default_key,)):
+        pystencils_walberla.codegen.generate_sweep(
+            ctx,
+            f"InitialPDFsSetter{precision_prefix}{target_suffix}",
+            pystencils_espresso.generate_setters(ctx, method, params),
+            **params)
+
+    # generate unthermalized Lees-Edwards collision rule
+    le_config = lbmpy.LBMConfig(stencil=stencil,
+                                method=lbmpy.Method.TRT,
+                                relaxation_rate=sp.Symbol("omega_shear"),
+                                compressible=True,
+                                zero_centered=False,
+                                force_model=lbmpy.ForceModel.GUO,
+                                force=force_field.center_vector,
+                                kernel_type="collide_only")
+    lbm_opt = lbmpy.LBMOptimisation(symbolic_field=fields["pdfs"])
+    le_collision_rule_unthermalized = lbmpy.create_lb_update_rule(
+        lbm_config=le_config,
+        lbm_optimisation=lbm_opt)
+    le_collision_rule_unthermalized = lees_edwards.add_lees_edwards_to_collision(
+        config, le_collision_rule_unthermalized,
+        fields["pdfs"], stencil, 1)  # shear_dir_normal y
+    for params, target_suffix in paramlist(parameters, ("GPU", "CPU", "AVX")):
+        pystencils_espresso.generate_collision_sweep(
+            ctx,
+            le_config,
+            le_collision_rule_unthermalized,
+            f"CollideSweep{precision_prefix}LeesEdwards{target_suffix}",
+            params
+        )
+
+    # generate thermalized LB
+    collision_rule_thermalized = lbmpy.creationfunctions.create_lb_collision_rule(
+        method,
+        zero_centered=False,
+        fluctuating={
+            "temperature": kT,
+            "block_offsets": "walberla",
+            "rng_node": precision_rng
+        },
+        optimization={"cse_global": True,
+                      "double_precision": ctx.double_accuracy}
+    )
+    for params, target_suffix in paramlist(parameters, ("GPU", "CPU", "AVX")):
+        pystencils_espresso.generate_collision_sweep(
+            ctx,
+            method,
+            collision_rule_thermalized,
+            f"CollideSweep{precision_prefix}Thermalized{target_suffix}",
+            params
+        )
+
+    # generate accessors
+    for _, target_suffix in paramlist(parameters, ("GPU", "CPU")):
+        filename = f"FieldAccessors{precision_prefix}{target_suffix}"
+        if target == ps.Target.GPU:
+            templates = {
+                f"{filename}.h": "templates/FieldAccessors.tmpl.cuh",
+                f"{filename}.cu": "templates/FieldAccessors.tmpl.cu",
+            }
+        else:
+            templates = {
+                f"{filename}.h": "templates/FieldAccessors.tmpl.h",
+            }
+        walberla_lbm_generation.generate_macroscopic_values_accessors(
+            ctx, config, method, templates
+        )
+
+    # boundary conditions
+    ubb_dynamic = lbmpy_espresso.UBB(
+        lambda *args: None, dim=3, data_type=config.data_type.default_factory())
+    ubb_data_handler = lbmpy_espresso.BounceBackSlipVelocityUBB(
+        method.stencil, ubb_dynamic)
+
+    for _, target_suffix in paramlist(parameters, ("GPU", "CPU")):
+        lbmpy_walberla.generate_boundary(
+            ctx, f"Dynamic_UBB_{precision_suffix}{target_suffix}", ubb_dynamic,
+            method, additional_data_handler=ubb_data_handler,
+            streaming_pattern="push", target=target)
+
+        with open(f"Dynamic_UBB_{precision_suffix}{target_suffix}.h", "r+") as f:
+            content = f.read()
+            f.seek(0)
+            f.truncate(0)
+            # patch for floating point accuracy
+            content = content.replace("real_t",
+                                      config.data_type.default_factory().c_name)
+            f.write(content)
diff --git a/maintainer/walberla_kernels/lbmpy_espresso.py b/maintainer/walberla_kernels/lbmpy_espresso.py
new file mode 100644
index 00000000000..5055fac308c
--- /dev/null
+++ b/maintainer/walberla_kernels/lbmpy_espresso.py
@@ -0,0 +1,81 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import pystencils as ps
+
+import lbmpy.advanced_streaming.indexing
+import lbmpy.boundaries
+
+import lbmpy_walberla.additional_data_handler
+
+
+class BounceBackSlipVelocityUBB(
+        lbmpy_walberla.additional_data_handler.UBBAdditionalDataHandler):
+    '''
+    Dynamic UBB that implements the bounce-back method with slip velocity.
+    '''
+
+    def data_initialisation(self, direction):
+        '''
+        Modified ``indexVector`` initialiser. The "classical" dynamic UBB
+        uses the velocity callback as a velocity flow profile generator.
+        Here we use that callback as a bounce-back slip velocity generator.
+        This way, the dynamic UBB can be used to implement a LB boundary.
+        '''
+        code = super().data_initialisation(direction)
+        dirVec = self.stencil_info[direction][1]
+        token = ' = elementInitaliser(Cell(it.x(){}, it.y(){}, it.z(){}),'
+        old_initialiser = token.format('', '', '')
+        assert old_initialiser in code
+        new_initialiser = token.format(
+            '+' + str(dirVec[0]),
+            '+' + str(dirVec[1]),
+            '+' + str(dirVec[2])).replace('+-', '-')
+        return code.replace(old_initialiser, new_initialiser)
+
+
+class UBB(lbmpy.boundaries.UBB):
+    '''
+    Velocity bounce back boundary condition, enforcing specified velocity at
+    obstacle. This is a patched version of ``lbmpy.boundaries.UBB``, which
+    currently doesn't support the bounce back scheme we need.
+    '''
+
+    def __call__(self, f_out, f_in, dir_symbol,
+                 inv_dir, lb_method, index_field):
+        '''
+        Modify the assignments such that the source and target pdfs are swapped.
+        '''
+        assignments = super().__call__(
+            f_out, f_in, dir_symbol, inv_dir, lb_method, index_field)
+
+        assert len(assignments) > 0
+
+        out = []
+        if len(assignments) > 1:
+            out.extend(assignments[:-1])
+
+        neighbor_offset = lbmpy.advanced_streaming.indexing.NeighbourOffsetArrays.neighbour_offset(
+            dir_symbol, lb_method.stencil)
+
+        assignment = assignments[-1]
+        assert assignment.lhs.field == f_in
+        out.append(ps.Assignment(assignment.lhs.get_shifted(*neighbor_offset),
+                                 assignment.rhs - f_out(dir_symbol) + f_in(dir_symbol)))
+        return out
diff --git a/maintainer/walberla_kernels/lees_edwards.py b/maintainer/walberla_kernels/lees_edwards.py
new file mode 100644
index 00000000000..041162e7068
--- /dev/null
+++ b/maintainer/walberla_kernels/lees_edwards.py
@@ -0,0 +1,129 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+from pystencils.astnodes import LoopOverCoordinate
+from pystencils.typing.typed_sympy import TypedSymbol
+from pystencils.typing import CastFunc
+from pystencils import Assignment
+
+from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
+
+import sympy as sp
+
+
+def type_all_numbers(expr, dtype):
+    # originally from file pystencils/data_types.py in pycodegen/lbmpy@942c7d96
+    substitutions = {a: CastFunc(a, dtype) for a in expr.atoms(sp.Number)}
+    return expr.subs(substitutions)
+
+
+def velocity_offset_eqs(config, method, pdfs, shear_dir_normal, stencil):
+    """Calculates the difference between quilibrium pdf distributions
+    with (rho, u) and (rho, u+v) and applies them to out-flowing
+    populations in the boundary layer. Returns an AssignmentCollection
+    with one Assignment per stencil direction.
+    """
+    dim = len(stencil[0])
+    default_dtype = config.data_type.default_factory()
+
+    # Placeholders indicating a population flows up or down.
+    # Will be replaced later using the component of the stencil direction
+    # along the shear_dir_normal.
+    points_up = sp.Symbol('points_up')
+    points_down = sp.Symbol('points_down')
+
+    # Symbol for the coordinate index within the field,
+    # used to identify boundary layers
+    counters = [LoopOverCoordinate.get_loop_counter_symbol(
+        i) for i in range(dim)]
+
+    grid_size = TypedSymbol("grid_size", dtype=default_dtype)
+
+    # +,-1 for upper/lower boundary layers, 0 otherwise.
+    # Based on symbolic counters defined above. Only becomes
+    # non-zero if the corresponding points_up/down flags
+    # are engaged (which is only done for out-flowing populations)
+    layer_prefactor = sp.Piecewise(
+        (-1,
+         sp.And(type_all_numbers(counters[1] <= 0, default_dtype),
+                points_down)),
+        (+1,
+         sp.And(type_all_numbers(counters[1] >= grid_size - 1, default_dtype),
+                points_up)),
+        (0, True)
+    )
+
+    # Start with an equilibrium distribution for a given density and velocity
+    delta_pdf_eqs = macroscopic_values_setter(
+        method, sp.Symbol("dens"), [
+            sp.Symbol("v_0"), sp.Symbol("v_1"), sp.Symbol("v_2")], pdfs)
+
+    # Replace the assignments of (rho,u) by (rho, u+v) - (rho,u)
+    ma = []
+    for a, c in zip(delta_pdf_eqs.main_assignments, method.stencil):
+        # Determine direction of the stencil component in the
+        # shear_dir_normal
+        if c[shear_dir_normal] == 1:
+            up = True
+            down = False
+        elif c[shear_dir_normal] == -1:
+            up = False
+            down = True
+        else:
+            up = False
+            down = False
+
+        # Replace (rho,u) by (rho,u+v) in boundary layers
+        rhs = sp.simplify(
+            a.rhs -
+            a.rhs.replace(
+                sp.Symbol("u_0"),
+                sp.Symbol("u_0") +
+                layer_prefactor *
+                sp.Symbol("v_s")))
+
+        # Only engage if the population is outflowing. See layer_prefactor
+        rhs = rhs.replace(points_up, up)
+        rhs = rhs.replace(points_down, down)
+        new_a = Assignment(a.lhs, rhs)
+
+        ma.append(new_a)
+        print(c, ma[-1])
+    # Plug in modified assignments
+    delta_pdf_eqs.main_assignments = ma
+    return delta_pdf_eqs.main_assignments
+
+
+def add_lees_edwards_to_collision(
+        config, collision, pdfs, stencil, shear_dir_normal):
+    # Get population shift for outflowing populations at the boundaries
+    offset = velocity_offset_eqs(
+        config,
+        collision.method,
+        pdfs,
+        shear_dir_normal,
+        stencil)
+
+    ma = []
+    for i, a in enumerate(collision.main_assignments):
+        # Add Lees-Edwards-shift to collision main assignments
+        new_a = Assignment(a.lhs, a.rhs + offset[i].rhs)
+        ma.append(new_a)
+    collision.main_assignments = ma
+    return collision
diff --git a/maintainer/walberla_kernels/pystencils_espresso.py b/maintainer/walberla_kernels/pystencils_espresso.py
new file mode 100644
index 00000000000..1980ba14387
--- /dev/null
+++ b/maintainer/walberla_kernels/pystencils_espresso.py
@@ -0,0 +1,162 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import sympy as sp
+import lbmpy.fieldaccess
+import lbmpy.macroscopic_value_kernels
+import lbmpy.updatekernels
+import pystencils as ps
+import pystencils_walberla
+
+
+def skip_philox_unthermalized(code, result_symbols, rng_name):
+    for r in result_symbols:
+        statement = f" {r.name};"
+        assert statement in code, f"no declaration for variable '{r.name}' in '{code}'"
+        code = code.replace(statement, f" {r.name}{{}};", 1)
+    statement = f"{rng_name}("
+    assert code.count(statement) == 1, f"need 1 '{rng_name}' call in '{code}'"
+    lines = code.rstrip().split("\n")
+    assert lines[-1].startswith(rng_name), f"'{rng_name}' not in '{lines[-1]}'"
+    lines[-1] = f"if (kT > 0.) {{  \n{lines[-1]}\n}}"
+    return "\n".join(lines)
+
+
+class PhiloxTwoDoubles(ps.rng.PhiloxTwoDoubles):
+    def get_code(self, *args, **kwargs):
+        code = super().get_code(*args, **kwargs)
+        return skip_philox_unthermalized(code, self.result_symbols, self._name)
+
+
+class PhiloxFourFloats(ps.rng.PhiloxFourFloats):
+    def get_code(self, *args, **kwargs):
+        code = super().get_code(*args, **kwargs)
+        return skip_philox_unthermalized(code, self.result_symbols, self._name)
+
+
+precision_prefix = {
+    True: 'DoublePrecision',
+    False: 'SinglePrecision'}
+precision_suffix = {
+    True: 'double_precision',
+    False: 'single_precision'}
+precision_rng = {
+    True: PhiloxTwoDoubles,
+    False: PhiloxFourFloats}
+data_type_np = {'double': 'float64', 'float': 'float32'}
+
+
+def generate_fields(config, stencil):
+    dtype = data_type_np[config.data_type.default_factory().c_name]
+    field_layout = 'fzyx'
+    q = len(stencil)
+    dim = len(stencil[0])
+
+    fields = {}
+    # Symbols for PDF (twice, due to double buffering)
+    fields['pdfs'] = ps.Field.create_generic(
+        'pdfs',
+        dim,
+        dtype,
+        index_dimensions=1,
+        layout=field_layout,
+        index_shape=(q,)
+    )
+    fields['pdfs_tmp'] = ps.Field.create_generic(
+        'pdfs_tmp',
+        dim,
+        dtype,
+        index_dimensions=1,
+        layout=field_layout,
+        index_shape=(q,)
+    )
+    fields['velocity'] = ps.Field.create_generic(
+        'velocity',
+        dim,
+        dtype,
+        index_dimensions=1,
+        layout=field_layout,
+        index_shape=(dim,)
+    )
+    fields['force'] = ps.Field.create_generic(
+        'force',
+        dim,
+        dtype,
+        index_dimensions=1,
+        layout=field_layout,
+        index_shape=(dim,)
+    )
+
+    return fields
+
+
+def generate_config(ctx, params):
+    return pystencils_walberla.codegen.config_from_context(ctx, **params)
+
+
+def generate_collision_sweep(
+        ctx, lb_method, collision_rule, class_name, params):
+    config = generate_config(ctx, params)
+
+    # Symbols for PDF (twice, due to double buffering)
+    fields = generate_fields(config, lb_method.stencil)
+
+    # Generate collision kernel
+    collide_update_rule = lbmpy.updatekernels.create_lbm_kernel(
+        collision_rule,
+        fields['pdfs'],
+        fields['pdfs_tmp'],
+        lbmpy.fieldaccess.CollideOnlyInplaceAccessor())
+    collide_ast = ps.create_kernel(
+        collide_update_rule, config=config, **params)
+    collide_ast.function_name = 'kernel_collide'
+    collide_ast.assumed_inner_stride_one = True
+    pystencils_walberla.codegen.generate_sweep(
+        ctx, class_name, collide_ast, **params)
+
+
+def generate_stream_sweep(ctx, lb_method, class_name, params):
+    config = generate_config(ctx, params)
+
+    # Symbols for PDF (twice, due to double buffering)
+    fields = generate_fields(config, lb_method.stencil)
+
+    # Generate stream kernel
+    stream_update_rule = lbmpy.updatekernels.create_stream_pull_with_output_kernel(
+        lb_method, fields['pdfs'], fields['pdfs_tmp'],
+        output={'velocity': fields['velocity']})
+    stream_ast = ps.create_kernel(stream_update_rule, config=config, **params)
+    stream_ast.function_name = 'kernel_stream'
+    stream_ast.assumed_inner_stride_one = True
+    pystencils_walberla.codegen.generate_sweep(
+        ctx, class_name, stream_ast,
+        field_swaps=[(fields['pdfs'], fields['pdfs_tmp'])], **params)
+
+
+def generate_setters(ctx, lb_method, params):
+    config = generate_config(ctx, params)
+    fields = generate_fields(config, lb_method.stencil)
+
+    initial_rho = sp.Symbol('rho_0')
+    pdfs_setter = lbmpy.macroscopic_value_kernels.macroscopic_values_setter(
+        lb_method,
+        initial_rho,
+        fields['velocity'].center_vector,
+        fields['pdfs'].center_vector)
+    return pdfs_setter
diff --git a/maintainer/walberla_kernels/relaxation_rates.py b/maintainer/walberla_kernels/relaxation_rates.py
new file mode 100644
index 00000000000..14d02fdcb22
--- /dev/null
+++ b/maintainer/walberla_kernels/relaxation_rates.py
@@ -0,0 +1,54 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+# Copyright (C) 2019-2021 The waLBerla project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# This describes the mapping between LB moments and the corresponding relaxation rates
+# There are 4 relaxation rates for shear, bulk, even and odd modes,
+# respectively.
+
+# Original source:
+# https://i10git.cs.fau.de/pycodegen/lbmpy/-/blob/0e7962be84613466e6842f37111c571db8183b3d/lbmpy_tests/test_fluctuating_lb.py#L25-47
+
+from lbmpy.moments import is_bulk_moment, is_shear_moment, get_order
+import sympy as sp
+
+
+def rr_getter(moment_group):
+    """Maps a group of moments to a relaxation rate (shear, bulk, even, odd)
+    in the 4 relaxation time thermalized LB model or 0 for conserved modes.
+    """
+    is_shear = [is_shear_moment(m, 3) for m in moment_group]
+    is_bulk = [is_bulk_moment(m, 3) for m in moment_group]
+    order = [get_order(m) for m in moment_group]
+    assert min(order) == max(order)
+    order = order[0]
+
+    if order < 2:
+        return [0] * len(moment_group)
+    elif any(is_bulk):
+        assert all(is_bulk)
+        return [sp.Symbol("omega_bulk")] * len(moment_group)
+    elif any(is_shear):
+        assert all(is_shear)
+        return [sp.Symbol("omega_shear")] * len(moment_group)
+    elif order % 2 == 0:
+        assert order > 2
+        return [sp.Symbol("omega_even")] * len(moment_group)
+    else:
+        return [sp.Symbol("omega_odd")] * len(moment_group)
diff --git a/maintainer/walberla_kernels/templates/Boundary.tmpl.h b/maintainer/walberla_kernels/templates/Boundary.tmpl.h
new file mode 100644
index 00000000000..6bda8f86e06
--- /dev/null
+++ b/maintainer/walberla_kernels/templates/Boundary.tmpl.h
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ * Copyright (C) 2020-2023 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Boundary class.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/fb076cd18daa6e2f24448349d1fffb974c845269/python/pystencils_walberla/templates/Boundary.tmpl.h
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+
+{% if target is equalto 'cpu' -%}
+#include <field/GhostLayerField.h>
+{%- elif target is equalto 'gpu' -%}
+#include <cuda/GPUField.h>
+#include <cuda/FieldCopy.h>
+{%- endif %}
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
+#include <blockforest/StructuredBlockForest.h>
+#include <field/FlagField.h>
+#include <core/debug/Debug.h>
+
+#include <functional>
+#include <set>
+#include <vector>
+
+{% for header in interface_spec.headers %}
+#include {{header}}
+{% endfor %}
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace {{namespace}} {
+
+
+class {{class_name}}
+{
+public:
+    {{StructDeclaration|indent(4)}}
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<{{StructName}}>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        {% if target == 'gpu' -%}
+        ~IndexVectors() {
+            for( auto & gpuVec: gpuVectors_)
+                cudaFree( gpuVec );
+        }
+        {% endif -%}
+
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        {{StructName}} * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        {% if target == 'gpu' -%}
+        {{StructName}} * pointerGpu(Type t)  { return gpuVectors_[t]; }
+        {% endif -%}
+
+        void syncGPU()
+        {
+            {% if target == 'gpu' -%}
+            for( auto & gpuVec: gpuVectors_)
+                cudaFree( gpuVec );
+            gpuVectors_.resize( cpuVectors_.size() );
+
+            WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);
+            for(size_t i=0; i < cpuVectors_.size(); ++i )
+            {
+                auto & gpuVec = gpuVectors_[i];
+                auto & cpuVec = cpuVectors_[i];
+                cudaMalloc( &gpuVec, sizeof({{StructName}}) * cpuVec.size() );
+                cudaMemcpy( gpuVec, &cpuVec[0], sizeof({{StructName}}) * cpuVec.size(), cudaMemcpyHostToDevice );
+            }
+            {%- endif %}
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        {% if target == 'gpu' -%}
+        using GpuIndexVector = {{StructName}} *;
+        std::vector<GpuIndexVector> gpuVectors_;
+        {%- endif %}
+    };
+
+    {{class_name}}( const shared_ptr<StructuredBlockForest> & blocks,
+                   {{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize'])}}{{additional_data_handler.constructor_arguments}})
+        :{{additional_data_handler.initialiser_list}} {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize']) }}
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_{{class_name}}");
+    };
+
+    {{class_name}}({{kernel|generate_constructor_parameters(['indexVectorSize'])}}{{additional_data_handler.constructor_arguments}})
+        : {{additional_data_handler.initialiser_list}} {{ kernel|generate_constructor_initializer_list(['indexVectorSize']) }}
+    {};
+
+    void run (
+        {{- ["IBlock * block", kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
+    );
+
+    {% if generate_functor -%}
+    void operator() (
+        {{- ["IBlock * block", kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
+    )
+    {
+        run( {{- ["block", kernel.kernel_selection_parameters, ["stream"] if target == 'gpu' else []] | identifier_list -}} );
+    }
+    {%- endif %}
+
+    void inner (
+        {{- ["IBlock * block", kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
+    );
+
+    void outer (
+        {{- ["IBlock * block", kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
+    );
+
+    std::function<void (IBlock *)> getSweep( {{- [interface_spec.high_level_args, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
+    {
+        return [ {{- ["this", interface_spec.high_level_args, ["stream"] if target == 'gpu' else []] | identifier_list -}} ]
+               (IBlock * b)
+               { this->run( {{- [ ['b'], interface_spec.mapping_codes, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep( {{- [interface_spec.high_level_args, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
+    {
+        return [ {{- [ ['this'], interface_spec.high_level_args, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ]
+               (IBlock * b)
+               { this->inner( {{- [ ['b'], interface_spec.mapping_codes, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep( {{- [interface_spec.high_level_args, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
+    {
+        return [ {{- [ ['this'], interface_spec.high_level_args, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ]
+               (IBlock * b)
+               { this->outer( {{- [ ['b'], interface_spec.mapping_codes, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>({{additional_data_handler.additional_arguments_for_fill_function}}&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField({{additional_data_handler.additional_parameters_for_fill_function}}IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        {{additional_data_handler.additional_field_data|indent(4)}}
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        {% if inner_or_boundary -%}
+        for( auto it = flagField->begin(); it != flagField->end(); ++it )
+        {
+            if( ! isFlagSet(it, domainFlag) )
+                continue;
+            {%- for dirIdx, dirVec, offset in additional_data_handler.stencil_info %}
+            if ( isFlagSet( it.neighbor({{offset}} {%if dim == 3%}, 0 {%endif %}), boundaryFlag ) )
+            {
+                auto element = {{StructName}}(it.x(), it.y(), {%if dim == 3%} it.z(), {%endif %} {{dirIdx}} );
+                {{additional_data_handler.data_initialisation(dirIdx)|indent(16)}}
+                indexVectorAll.push_back( element );
+                if( inner.contains( it.x(), it.y(), it.z() ) )
+                    indexVectorInner.push_back( element );
+                else
+                    indexVectorOuter.push_back( element );
+            }
+            {% endfor %}
+        }
+        {%else%}
+        auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+        {% if single_link %}
+        {{dtype}} dot = 0.0; {{dtype}} maxn = 0.0;
+        cell_idx_t calculated_idx = 0;
+        cell_idx_t dx = 0; cell_idx_t dy = 0; {%if dim == 3%}  cell_idx_t dz = 0; {% endif %}
+        cell_idx_t sum_x = 0; cell_idx_t sum_y = 0; {%if dim == 3%} cell_idx_t sum_z = 0; {%endif %}
+        {% endif -%}
+        for( auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end(); ++it )
+        {
+            {% if single_link -%}
+            sum_x = 0; sum_y = 0; {%if dim == 3%} sum_z = 0; {%endif %}
+            {% endif %}
+            if( ! isFlagSet(it, boundaryFlag) )
+                continue;
+            {%- for dirIdx, dirVec, offset in additional_data_handler.stencil_info %}
+            if ( flagWithGLayers.contains(it.x() + cell_idx_c({{dirVec[0]}}), it.y() + cell_idx_c({{dirVec[1]}}), it.z() + cell_idx_c({{dirVec[2]}})) && isFlagSet( it.neighbor({{offset}} {%if dim == 3%}, 0 {%endif %}), domainFlag ) )
+            {
+                {% if single_link -%}
+                sum_x += cell_idx_c({{dirVec[0]}}); sum_y += cell_idx_c({{dirVec[1]}}); {%if dim == 3%} sum_z += cell_idx_c({{dirVec[2]}}); {%endif %}
+                {% else %}
+                auto element = {{StructName}}(it.x(), it.y(), {%if dim == 3%} it.z(), {%endif %} {{dirIdx}} );
+                {{additional_data_handler.data_initialisation(dirIdx)|indent(16)}}
+                indexVectorAll.push_back( element );
+                if( inner.contains( it.x(), it.y(), it.z() ) )
+                    indexVectorInner.push_back( element );
+                else
+                    indexVectorOuter.push_back( element );
+                {% endif %}
+            }
+            {% endfor %}
+
+        {% if single_link %}
+            dot = 0.0; maxn = 0.0; calculated_idx = 0;
+            if(sum_x != 0 or sum_y !=0 {%if dim == 3%} or sum_z !=0 {%endif %})
+            {
+            {%- for dirIdx, dirVec, offset in additional_data_handler.stencil_info %}
+                dx = {{dirVec[0]}}; dy = {{dirVec[1]}}; {%if dim == 3%} dz = {{dirVec[2]}}; {% endif %}
+                dot = numeric_cast< {{dtype}} >( dx*sum_x + dy*sum_y {%if dim == 3%} + dz*sum_z {% endif %});
+                if (dot > maxn)
+                {
+                    maxn = dot;
+                    calculated_idx = {{dirIdx}};
+                }
+            {% endfor %}
+                auto element = {{StructName}}(it.x(), it.y(), {%if dim == 3%} it.z(), {%endif %} calculated_idx );
+                {{additional_data_handler.data_initialisation(dirIdx)|indent(16)}}
+                indexVectorAll.push_back( element );
+                if( inner.contains( it.x(), it.y(), it.z() ) )
+                indexVectorInner.push_back( element );
+                else
+                indexVectorOuter.push_back( element );
+            }
+        {% endif -%}
+
+        }
+        {% endif %}
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(
+        {{- ["IBlock * block", "IndexVectors::Type type",
+             kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []]
+            | type_identifier_list -}}
+   );
+
+    BlockDataID indexVectorID;
+    {{additional_data_handler.additional_member_variable|indent(4)}}
+public:
+    {{kernel|generate_members(('indexVector', 'indexVectorSize'))|indent(4)}}
+};
+
+} // namespace {{namespace}}
+} // namespace walberla
diff --git a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h
new file mode 100644
index 00000000000..37e1edcf9cd
--- /dev/null
+++ b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h
@@ -0,0 +1,437 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ * Copyright (C) 2020 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Lattice field accessors.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/a16141524c58ab88386e2a0f8fdd7c63c5edd704/python/lbmpy_walberla/templates/LatticeModel.tmpl.h
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+#include <core/cell/Cell.h>
+#include <core/cell/CellInterval.h>
+#include <core/math/Matrix{{D}}.h>
+#include <core/math/Vector{{D}}.h>
+
+#include <field/GhostLayerField.h>
+#include <stencil/{{stencil_name}}.h>
+
+#include <array>
+#include <cassert>
+#include <tuple>
+#include <vector>
+
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#endif
+
+#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#endif
+
+namespace walberla {
+namespace {{namespace}} {
+namespace accessor {
+
+namespace Population
+{
+    inline std::array<{{dtype}}, {{Q}}u>
+    get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
+         Cell const & cell )
+    {
+        {{dtype}} const & xyz0 = pdf_field->get(cell, uint_t{ 0u });
+        std::array<{{dtype}}, {{Q}}u> pop;
+        {% for i in range(Q) -%}
+            pop[{{i}}u] = pdf_field->getF( &xyz0, uint_t{ {{i}}u });
+        {% endfor -%}
+        return pop;
+    }
+
+    inline void
+    set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field,
+         std::array<{{dtype}}, {{Q}}u> const & pop,
+         Cell const & cell )
+    {
+        {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u });
+        {% for i in range(Q) -%}
+            pdf_field->getF( &xyz0, uint_t{ {{i}}u }) = pop[{{i}}u];
+        {% endfor -%}
+    }
+
+    inline void
+    broadcast( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field,
+               std::array<{{dtype}}, {{Q}}u> const & pop)
+     {
+         WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(pdf_field, {
+             {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u });
+             {% for i in range(Q) -%}
+                 pdf_field->getF( &xyz0, uint_t{ {{i}}u }) = pop[{{i}}u];
+             {% endfor -%}
+         });
+     }
+
+    inline std::vector< {{dtype}} >
+    get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
+         CellInterval const & ci )
+    {
+        std::vector< {{dtype}} > out;
+        out.reserve(ci.numCells() * uint_t({{Q}}u));
+        for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+            for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+                for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+                    {{dtype}} const & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u });
+                    {% for i in range(Q) -%}
+                        out.emplace_back(pdf_field->getF( &xyz0, uint_t{ {{i}}u }));
+                    {% endfor -%}
+                }
+            }
+        }
+        return out;
+    }
+
+    inline void
+    set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field,
+         std::vector< {{dtype}} > const & values,
+         CellInterval const & ci )
+    {
+        assert(uint_c(values.size()) == ci.numCells() * uint_t({{Q}}u));
+        auto values_ptr = values.data();
+        for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+            for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+                for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+                    {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u });
+                    {% for i in range(Q) -%}
+                        pdf_field->getF( &xyz0, uint_t{ {{i}}u }) = values_ptr[{{i}}u];
+                    {% endfor -%}
+                    values_ptr += {{Q}}u;
+                }
+            }
+        }
+    }
+} // namespace Population
+
+namespace Vector
+{
+    inline Vector{{D}}< {{dtype}} >
+    get( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * vec_field,
+         Cell const & cell )
+    {
+        const {{dtype}} & xyz0 = vec_field->get(cell, uint_t{ 0u });
+        Vector{{D}}< {{dtype}} > vec;
+        {% for i in range(D) -%}
+            vec[{{i}}] = vec_field->getF( &xyz0, uint_t{ {{i}}u });
+        {% endfor -%}
+        return vec;
+    }
+
+    inline void
+    set( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * vec_field,
+         Vector{{D}}< {{dtype}} > const & vec,
+         Cell const & cell )
+    {
+        {{dtype}} & xyz0 = vec_field->get(cell, uint_t{ 0u });
+        {% for i in range(D) -%}
+            vec_field->getF( &xyz0, uint_t{ {{i}}u }) = vec[{{i}}u];
+        {% endfor -%}
+    }
+
+    inline void
+    add( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * vec_field,
+         Vector{{D}}< {{dtype}} > const & vec,
+         Cell const & cell )
+    {
+        {{dtype}} & xyz0 = vec_field->get(cell, uint_t{ 0u });
+        {% for i in range(D) -%}
+            vec_field->getF( &xyz0, uint_t{ {{i}}u }) += vec[{{i}}u];
+        {% endfor -%}
+    }
+
+    inline void
+    broadcast( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * vec_field,
+               Vector{{D}}< {{dtype}} > const & vec)
+     {
+         WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(vec_field, {
+             {{dtype}} & xyz0 = vec_field->get(x, y, z, uint_t{ 0u });
+             {% for i in range(D) -%}
+                 vec_field->getF( &xyz0, uint_t{ {{i}}u }) = vec[{{i}}u];
+             {% endfor -%}
+         });
+     }
+
+    inline void
+    add_to_all( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * vec_field,
+                Vector{{D}}< {{dtype}} > const & vec)
+     {
+         WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(vec_field, {
+             {{dtype}} & xyz0 = vec_field->get(x, y, z, uint_t{ 0u });
+             {% for i in range(D) -%}
+                 vec_field->getF( &xyz0, uint_t{ {{i}}u }) += vec[{{i}}u];
+             {% endfor -%}
+         });
+     }
+
+    inline std::vector< {{dtype}} >
+    get( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * vec_field,
+         CellInterval const & ci )
+    {
+        std::vector< {{dtype}} > out;
+        out.reserve(ci.numCells() * uint_t({{D}}u));
+        for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+            for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+                for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+                    const {{dtype}} & xyz0 = vec_field->get(x, y, z, uint_t{ 0u });
+                    {% for i in range(D) -%}
+                      out.emplace_back(vec_field->getF( &xyz0, uint_t{ {{i}}u }));
+                    {% endfor -%}
+                }
+            }
+        }
+        return out;
+    }
+
+    inline void
+    set( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * vec_field,
+         std::vector< {{dtype}} > const & values,
+         CellInterval const & ci )
+    {
+        assert(uint_c(values.size()) == ci.numCells() * uint_t({{D}}u));
+        auto values_ptr = values.data();
+        for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+            for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+                for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+                    {{dtype}} & xyz0 = vec_field->get(x, y, z, uint_t{ 0u });
+                    {% for i in range(D) -%}
+                        vec_field->getF( &xyz0, uint_t{ {{i}}u }) = values_ptr[{{i}}u];
+                    {% endfor -%}
+                    values_ptr += {{D}}u;
+                }
+            }
+        }
+    }
+} // namespace Vector
+
+namespace EquilibriumDistribution
+{
+    inline {{dtype}}
+    get( stencil::Direction const direction,
+         Vector{{D}}< {{dtype}} > const & u = Vector{{D}}< {{dtype}} >( {{dtype}}(0.0) ),
+         {{dtype}} rho = {{dtype}}(1.0) )
+    {
+        {% if not compressible %}
+        rho -= {{dtype}}(1.0);
+        {% endif %}
+        {{equilibrium_from_direction}}
+    }
+} // namespace EquilibriumDistribution
+
+namespace Equilibrium
+{
+    inline void
+    set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field,
+         Vector{{D}}< {{dtype}} > const & u,
+         {{dtype}} const rho,
+         Cell const & cell )
+    {
+        {%if not compressible %}
+        rho -= {{dtype}}(1.0);
+        {%endif %}
+
+        {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u });
+        {% for eqTerm in equilibrium -%}
+            pdf_field->getF( &xyz0, uint_t{ {{ loop.index0 }}u }) = {{eqTerm}};
+        {% endfor -%}
+    }
+} // namespace Equilibrium
+
+namespace Density
+{
+    inline {{dtype}}
+    get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
+         Cell const & cell )
+    {
+        const {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u });
+        {% for i in range(Q) -%}
+            const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u });
+        {% endfor -%}
+        {{density_getters | indent(8)}}
+        return rho;
+    }
+
+    inline void
+    set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field,
+         {{dtype}} const rho_in,
+         Cell const & cell )
+    {
+        const {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u });
+        {% for i in range(Q) -%}
+            const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u });
+        {% endfor -%}
+
+        {{unshifted_momentum_density_getter | indent(8)}}
+
+        // calculate current velocity (before density change)
+        const {{dtype}} conversion = {{dtype}}(1) / rho;
+        Vector{{D}}< {{dtype}} > velocity;
+        {% for i in range(D) -%}
+            velocity[{{i}}u] = momdensity_{{i}} * conversion;
+        {% endfor %}
+
+        Equilibrium::set(pdf_field, velocity, rho_in {%if not compressible %} + {{dtype}}(1) {%endif%}, cell);
+    }
+
+    inline std::vector< {{dtype}} >
+    get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
+         CellInterval const & ci )
+    {
+        std::vector< {{dtype}} > out;
+        out.reserve(ci.numCells());
+        for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+            for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+                for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+                    const {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u });
+                    {% for i in range(Q) -%}
+                        const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u });
+                    {% endfor -%}
+                    {{density_getters | indent(12)}}
+                    out.emplace_back(rho);
+                }
+            }
+        }
+        return out;
+    }
+
+    inline void
+    set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field,
+         std::vector< {{dtype}} > const & values,
+         CellInterval const & ci )
+    {
+        assert(uint_c(values.size()) == ci.numCells());
+        auto values_it = values.begin();
+        for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+            for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+                for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+                    const {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u });
+                    {% for i in range(Q) -%}
+                        const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u });
+                    {% endfor -%}
+
+                    {{unshifted_momentum_density_getter | indent(12)}}
+
+                    // calculate current velocity (before density change)
+                    const {{dtype}} conversion = {{dtype}}(1) / rho;
+                    Vector{{D}}< {{dtype}} > velocity;
+                    {% for i in range(D) -%}
+                        velocity[{{i}}u] = momdensity_{{i}} * conversion;
+                    {% endfor %}
+
+                    Equilibrium::set(pdf_field, velocity, *values_it {%if not compressible %} + {{dtype}}(1) {%endif%}, Cell{x, y, z});
+                    ++values_it;
+                }
+            }
+        }
+    }
+} // namespace Density
+
+namespace Velocity
+{
+    inline void
+    set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field,
+         GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field,
+         Vector{{D}}< {{dtype}} > const & u,
+         Cell const & cell )
+    {
+        const {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u });
+        {% for i in range(Q) -%}
+            const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u });
+        {% endfor -%}
+        {{density_getters | indent(8)}}
+
+        {% for c in "xyz" -%}
+            const auto {{c}} = cell.{{c}}();
+        {% endfor -%}
+        {{density_velocity_setter_macroscopic_values | substitute_force_getter_cpp | indent(8)}}
+
+        Equilibrium::set(pdf_field, Vector{{D}}<{{dtype}}>({% for i in range(D) %}u_{{i}}{% if not loop.last %}, {% endif %}{% endfor %}), rho {%if not compressible %} + {{dtype}}(1) {%endif%}, cell);
+    }
+} // namespace Velocity
+
+namespace MomentumDensity
+{
+    inline Vector{{D}}< {{dtype}} >
+    reduce( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
+            GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field )
+    {
+        Vector{{D}}< {{dtype}} > momentumDensity({{dtype}} {0});
+        WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, {
+            const {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u });
+            {% for i in range(Q) -%}
+                const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u });
+            {% endfor -%}
+
+            {{momentum_density_getter | substitute_force_getter_cpp | indent(8) }}
+
+            {% for i in range(D) -%}
+                momentumDensity[{{i}}u] += md_{{i}};
+            {% endfor %}
+        });
+        return momentumDensity;
+    }
+} // namespace MomentumDensity
+
+namespace PressureTensor
+{
+    inline Matrix{{D}}< {{dtype}} >
+    get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
+         Cell const & cell )
+   {
+        const {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u });
+        {% for i in range(Q) -%}
+            const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u });
+        {% endfor -%}
+
+        {{second_momentum_getter | indent(8) }}
+
+        Matrix{{D}}< {{dtype}} > pressureTensor;
+        {% for i in range(D) -%}
+            {% for j in range(D) -%}
+                pressureTensor[{{i*D+j}}u] = p_{{i*D+j}};
+            {% endfor %}
+        {% endfor %}
+        return pressureTensor;
+   }
+} // namespace PressureTensor
+
+} // namespace accessor
+} // namespace {{namespace}}
+} // namespace walberla
+
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma clang diagnostic pop
+#endif
diff --git a/maintainer/walberla_kernels/templates/ReactionKernelSelector.tmpl.h b/maintainer/walberla_kernels/templates/ReactionKernelSelector.tmpl.h
new file mode 100644
index 00000000000..7ec4666d7b9
--- /dev/null
+++ b/maintainer/walberla_kernels/templates/ReactionKernelSelector.tmpl.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+{% for i in range(1, max_num_reactants + 1) %}
+#include "{{class_name}}_{{i}}_{{precision_suffix[True]}}.h"
+#include "{{class_name}}_{{i}}_{{precision_suffix[False]}}.h"
+{% endfor %}
+
+#include <domain_decomposition/BlockDataID.h>
+
+#include <cstddef>
+#include <memory>
+#include <stdexcept>
+#include <utility>
+#include <vector>
+
+namespace walberla {
+namespace detail {
+namespace {{class_name}}Selector {
+
+template <typename FloatType = double, std::size_t N = 1> struct KernelTrait {
+  using {{class_name}} =
+      {{namespace}}::{{class_name}}_1_{{precision_suffix[True]}};
+};
+{% for i in range(2, max_num_reactants + 1) %}
+template <> struct KernelTrait<double, {{i}}> {
+  using {{class_name}} =
+      {{namespace}}::{{class_name}}_{{i}}_{{precision_suffix[True]}};
+};
+{% endfor %}
+{% for i in range(1, max_num_reactants + 1) %}
+template <> struct KernelTrait<float, {{i}}> {
+  using {{class_name}} =
+      {{namespace}}::{{class_name}}_{{i}}_{{precision_suffix[False]}};
+};
+{% endfor %}
+
+template <typename FloatType, class Reactant, std::size_t... ints>
+auto get_kernel_impl(const std::vector<std::shared_ptr<Reactant>> &reactants,
+                     const double coefficient,
+                     {% if class_name == 'ReactionKernelIndexed' -%}
+                     const BlockDataID &indexFieldID,
+                     {% endif -%}
+                     std::index_sequence<ints...> int_seq) {
+  auto kernel = std::make_shared<
+      typename KernelTrait<FloatType, int_seq.size()>::{{class_name}}>(
+      {% if class_name == 'ReactionKernelIndexed' -%}
+      indexFieldID,
+      {% endif -%}
+      walberla::BlockDataID(
+          reactants[ints]->get_species()->get_density_id())...,
+      numeric_cast<FloatType>(reactants[ints]->get_order())...,
+      numeric_cast<FloatType>(coefficient),
+      numeric_cast<FloatType>(reactants[ints]->get_stoech_coeff())...);
+
+  std::function<void(IBlock *)> sweep = [kernel](IBlock * b) { kernel->run(b); };
+  return sweep;
+}
+
+template <typename FloatType, class Reactant, class... Args>
+auto get_kernel_impl(const std::vector<std::shared_ptr<Reactant>> &reactants,
+                     Args... args) {
+  switch (reactants.size()) {
+{% for i in range(1, max_num_reactants + 1) %}
+  case {{i}}:
+    return get_kernel_impl<FloatType>(reactants, args...,
+                                      std::make_index_sequence<{{i}}>{});
+{% endfor %}
+  default:
+    throw std::runtime_error("reactions of this size are not implemented!");
+  }
+}
+
+template <class Reactant, class... Args>
+auto get_kernel(const std::vector<std::shared_ptr<Reactant>> &reactants,
+                Args... args) {
+
+  const auto is_double_precision =
+      reactants[0]->get_species()->is_double_precision();
+
+  if (is_double_precision) {
+    return get_kernel_impl<double>(reactants, args...);
+  }
+
+  return get_kernel_impl<float>(reactants, args...);
+}
+
+} // namespace {{class_name}}Selector
+} // namespace detail
+} // namespace walberla
diff --git a/maintainer/walberla_kernels/walberla_lbm_generation.py b/maintainer/walberla_kernels/walberla_lbm_generation.py
new file mode 100644
index 00000000000..72f5ffdfec4
--- /dev/null
+++ b/maintainer/walberla_kernels/walberla_lbm_generation.py
@@ -0,0 +1,212 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+# Copyright (C) 2020-2022 The waLBerla project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import sympy as sp
+import pystencils as ps
+import lbmpy_walberla
+from pystencils.typing.typed_sympy import TypedSymbol
+from pystencils.typing import BasicType, CastFunc, TypedSymbol
+
+# File derived from lbmpy_walberla.walberla_lbm_generation in the
+# walberla project, commit 3455bf3eebc64efa9beaecd74ebde3459b98991d
+
+
+def __type_equilibrium_assignments(assignments, config, subs_dict):
+    # Function derived from lbmpy_walberla.walberla_lbm_generation.__type_equilibrium_assignments()
+    # in the walberla project, commit 9dcd0dd90f50f7b64b0a38bb06327854463fdafd
+    from pystencils.node_collection import NodeCollection
+    from pystencils.typing.transformations import add_types
+    result = assignments.new_with_substitutions(subs_dict)
+    result = NodeCollection(result.main_assignments)
+    result.evaluate_terms()
+    result = add_types(result.all_assignments, config)
+    return result
+
+
+def type_expr(eq, dtype):
+    # manually cast floats to dtype since this is not done automatically
+    repl = ((rational := sp.Rational(1, i), CastFunc(rational, dtype))
+            for i in (2, 3, 4, 6, 8, 9, 12, 24, 18, 36, 72))
+    eq = eq.subs(repl)
+    return eq.subs({s: TypedSymbol(s.name, dtype)
+                   for s in eq.atoms(sp.Symbol)})
+
+
+def pow_to_mul(eq):
+    keep_processing = True
+    while keep_processing:
+        for expr in sp.preorder_traversal(eq):
+            if expr.is_Pow:
+                if expr.args[0].is_Symbol and expr.args[1].is_Integer:
+                    power = expr.args[1].p
+                    if power >= 1:
+                        chained_product = expr.args[1].p * [expr.args[0]]
+                        expr_mul = sp.Mul(*chained_product, evaluate=False)
+                        print(f"folding '{expr}' to '{expr_mul}'")
+                        eq = eq.subs(expr, sp.UnevaluatedExpr(expr_mul))
+                        break
+        else:
+            keep_processing = False
+    return eq
+
+
+def make_velocity_getters(cqc, rho_sym, vel_arr_symbols):
+    velocity_getter = cqc.equilibrium_input_equations_from_init_values(
+        rho_sym, vel_arr_symbols)
+    eq = velocity_getter.main_assignments.pop(0)
+    assert eq.lhs == rho_sym and eq.rhs == rho_sym
+    eq = velocity_getter.main_assignments.pop(0)
+    assert eq.lhs.name == f"delta_{rho_sym.name}"
+    return velocity_getter
+
+
+def equations_to_code(equations, variable_prefix="",
+                      variables_without_prefix=None, dtype=None, backend=None):
+    if dtype is None:
+        dtype = BasicType("float64")
+
+    if variables_without_prefix is None:
+        variables_without_prefix = []
+    if isinstance(equations, ps.AssignmentCollection):
+        equations = equations.all_assignments
+
+    variables_without_prefix = list(variables_without_prefix)
+
+    result = []
+    left_hand_side_names = [eq.lhs.name for eq in equations]
+    for eq in equations:
+        lhs, rhs = eq.lhs, eq.rhs
+        rhs = lbmpy_walberla.walberla_lbm_generation.field_and_symbol_substitute(
+            rhs, variable_prefix, variables_without_prefix + left_hand_side_names)
+        lhs = type_expr(lhs, dtype=dtype)
+        rhs = type_expr(rhs, dtype=dtype)
+        rhs = pow_to_mul(rhs)
+        assignment = ps.astnodes.SympyAssignment(lhs, rhs)
+        result.append(backend(assignment))
+    return "\n".join(result)
+
+
+def substitute_force_getter_cpp(code):
+    field_getter = "force->"
+    assert field_getter in code is not None, f"pattern '{field_getter} not found in '''\n{code}\n'''"
+    return code.replace(field_getter, "force_field->")
+
+
+def add_espresso_filters_to_jinja_env(jinja_env):
+    jinja_env.filters["substitute_force_getter_cpp"] = substitute_force_getter_cpp
+
+
+def generate_macroscopic_values_accessors(ctx, config, lb_method, templates):
+
+    # Function derived from lbmpy_walberla.walberla_lbm_generation.__lattice_model()
+    # in the walberla project, commit 3455bf3eebc64efa9beaecd74ebde3459b98991d
+    # with backports from commit de6b00071233a9a1f45d7a6773988363e058f1a0
+
+    from jinja2 import Environment, FileSystemLoader, StrictUndefined
+    from sympy.tensor import IndexedBase
+    from pystencils.backends.cbackend import CustomSympyPrinter
+    from pystencils.backends.cbackend import CBackend
+    from pystencils.backends.cuda_backend import CudaBackend
+    from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
+    from lbmpy_walberla.walberla_lbm_generation import stencil_switch_statement
+
+    cpp_printer = CustomSympyPrinter()
+    stencil_name = lb_method.stencil.name
+    if not stencil_name:
+        raise ValueError(
+            "lb_method uses a stencil that is not supported in waLBerla")
+
+    default_dtype = config.data_type.default_factory()
+    if config.target == ps.Target.GPU:
+        backend = CudaBackend()
+    else:
+        backend = CBackend()
+    kwargs = {
+        "backend": backend,
+        "variable_prefix": "",
+        "dtype": default_dtype}
+
+    cqc = lb_method.conserved_quantity_computation
+    vel_symbols = cqc.velocity_symbols
+    rho_sym = sp.Symbol("rho")
+    pdfs_sym = sp.symbols(f"f_:{lb_method.stencil.Q}")
+    vel_arr_symbols = [
+        IndexedBase(TypedSymbol("u", dtype=default_dtype), shape=(1,))[i]
+        for i in range(len(vel_symbols))]
+    momentum_density_symbols = sp.symbols(f"md_:{len(vel_symbols)}")
+    second_momentum_symbols = sp.symbols(f"p_:{len(vel_symbols)**2}")
+
+    equilibrium_subs_dict = dict(zip(vel_symbols, vel_arr_symbols))
+    equilibrium = lb_method.get_equilibrium()
+    lhs_list = [a.lhs for a in equilibrium.main_assignments]
+    equilibrium_matrix = sp.Matrix(
+        [e.rhs for e in equilibrium.main_assignments])
+    equilibrium = ps.AssignmentCollection([ps.Assignment(lhs, rhs)
+                                           for lhs, rhs in zip(lhs_list, equilibrium_matrix)])
+    equilibrium = __type_equilibrium_assignments(
+        equilibrium, config, equilibrium_subs_dict)
+
+    velocity_getters = make_velocity_getters(cqc, rho_sym, vel_arr_symbols)
+    density_velocity_setter_macroscopic_values = equations_to_code(
+        velocity_getters, variables_without_prefix=["rho", "u"], **kwargs)
+    momentum_density_getter = cqc.output_equations_from_pdfs(
+        pdfs_sym, {"density": rho_sym, "momentum_density": momentum_density_symbols})
+    unshifted_momentum_density_getter = cqc.output_equations_from_pdfs(
+        pdfs_sym, {"density": rho_sym, "momentum_density": momentum_density_symbols})
+    for i, eq in reversed(
+            list(enumerate(unshifted_momentum_density_getter.main_assignments))):
+        if eq.lhs.name.startswith("md_"):
+            del unshifted_momentum_density_getter.main_assignments[i]
+    second_momentum_getter = cqc.output_equations_from_pdfs(
+        pdfs_sym, {"moment2": second_momentum_symbols})
+
+    jinja_context = {
+        "stencil_name": stencil_name,
+        "D": lb_method.stencil.D,
+        "Q": lb_method.stencil.Q,
+        "compressible": cqc.compressible,
+        "zero_centered": cqc.zero_centered_pdfs,
+        "dtype": default_dtype,
+
+        "equilibrium_from_direction": stencil_switch_statement(lb_method.stencil, equilibrium),
+        "equilibrium": [cpp_printer.doprint(e.rhs) for e in equilibrium],
+
+        "density_getters": equations_to_code(
+            cqc.output_equations_from_pdfs(pdfs_sym, {"density": rho_sym}),
+            variables_without_prefix=[e.name for e in pdfs_sym], **kwargs),
+        "momentum_density_getter": equations_to_code(
+            momentum_density_getter, variables_without_prefix=pdfs_sym, **kwargs),
+        "second_momentum_getter": equations_to_code(
+            second_momentum_getter, variables_without_prefix=pdfs_sym, **kwargs),
+        "density_velocity_setter_macroscopic_values": density_velocity_setter_macroscopic_values,
+        "unshifted_momentum_density_getter": equations_to_code(unshifted_momentum_density_getter, variables_without_prefix=pdfs_sym, **kwargs),
+
+        "namespace": "lbm",
+    }
+
+    env = Environment(loader=FileSystemLoader(os.path.dirname(__file__)),
+                      undefined=StrictUndefined)
+    add_pystencils_filters_to_jinja_env(env)
+    add_espresso_filters_to_jinja_env(env)
+
+    for filename, template in templates.items():
+        source = env.get_template(template).render(**jinja_context)
+        ctx.write_file(filename, source)
diff --git a/requirements.txt b/requirements.txt
index 8493b9872ab..111ff05ecd0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@
 cython>=0.29.21,<3.0
 setuptools>=59.6.0
 # required scientific packages
-numpy>=1.21.5
+numpy>=1.23
 h5py>=3.6.0
 # optional scientific packages
 scipy>=1.8.0
@@ -12,6 +12,11 @@ matplotlib>=3.5.1
 vtk>=9.1.0
 PyOpenGL>=3.1.5
 pygame>=2.1.2
+# waLBerla dependencies
+pystencils==1.2
+lbmpy==1.2
+sympy==1.9
+islpy==2022.2.1
 # CI-related
 requests>=2.25.1
 lxml>=4.8.0
diff --git a/samples/ekboundaries.py b/samples/ekboundaries.py
deleted file mode 100644
index e8b825070cb..00000000000
--- a/samples/ekboundaries.py
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# Copyright (C) 2010-2022 The ESPResSo project
-#
-# This file is part of ESPResSo.
-#
-# ESPResSo is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# ESPResSo is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-"""
-Set up an electrokinetics (LB) fluid confined between charged walls.
-"""
-
-import os
-
-import espressomd
-import espressomd.shapes
-import espressomd.electrokinetics
-import espressomd.ekboundaries
-
-required_features = ["ELECTROKINETICS", "EK_BOUNDARIES", "EXTERNAL_FORCES"]
-espressomd.assert_features(required_features)
-
-system = espressomd.System(box_l=[10, 10, 10])
-system.cell_system.skin = 0.4
-system.time_step = 0.1
-
-ek = espressomd.electrokinetics.Electrokinetics(
-    lb_density=1, friction=1, agrid=1, viscosity=1, T=1, prefactor=1)
-
-pos = espressomd.electrokinetics.Species(
-    density=0.05, D=0.1, valency=1, ext_force_density=[0, 0, 1.])
-neg = espressomd.electrokinetics.Species(
-    density=0.05, D=0.1, valency=-1, ext_force_density=[0, 0, -1.])
-ek.add_species(pos)
-ek.add_species(neg)
-system.actors.add(ek)
-
-print(ek.get_params())
-print(pos.get_params())
-print(neg.get_params())
-print(pos[5, 5, 5].density)
-
-
-ek_wall_left = espressomd.ekboundaries.EKBoundary(
-    shape=espressomd.shapes.Wall(dist=1, normal=[1, 0, 0]), charge_density=-0.01)
-ek_wall_right = espressomd.ekboundaries.EKBoundary(
-    shape=espressomd.shapes.Wall(dist=-9, normal=[-1, 0, 0]), charge_density=0.01)
-system.ekboundaries.add(ek_wall_left)
-system.ekboundaries.add(ek_wall_right)
-
-
-if not os.path.isdir("ek"):
-    os.makedirs("ek")
-
-
-n_int_cycles = 1000
-for i in range(n_int_cycles):
-    system.integrator.run(100)
-    print("\rIntegrating: %03i" % i, end='', flush=True)
-
-    pos.write_vtk_density("ek/pos_dens_%i.vtk" % i)
-    neg.write_vtk_density("ek/neg_dens_%i.vtk" % i)
-    pos.write_vtk_flux("ek/pos_flux_%i.vtk" % i)
-    neg.write_vtk_flux("ek/neg_flux_%i.vtk" % i)
-    ek.write_vtk_velocity("ek/ekv_%i.vtk" % i)
-    ek.write_vtk_boundary("ek/ekb_%i.vtk" % i)
diff --git a/samples/immersed_boundary/sampleImmersedBoundary.py b/samples/immersed_boundary/sampleImmersedBoundary.py
index 5e5ef28cbd1..11b63a45753 100644
--- a/samples/immersed_boundary/sampleImmersedBoundary.py
+++ b/samples/immersed_boundary/sampleImmersedBoundary.py
@@ -26,10 +26,9 @@
 import espressomd
 import espressomd.lb
 import espressomd.shapes
-import espressomd.lbboundaries
 import espressomd.virtual_sites
 
-required_features = ["LB_BOUNDARIES", "VIRTUAL_SITES_INERTIALESS_TRACERS"]
+required_features = ["VIRTUAL_SITES_INERTIALESS_TRACERS", "WALBERLA"]
 espressomd.assert_features(required_features)
 
 parser = argparse.ArgumentParser()
@@ -76,20 +75,20 @@
     outputDir = "outputVolParaCUDA"
 
 # Add LB Fluid
-lbf = espressomd.lb.LBFluid(agrid=1, dens=1, visc=1, tau=system.time_step,
-                            ext_force_density=[force, 0, 0])
+lbf = espressomd.lb.LBFluidWalberla(
+    agrid=1, density=1, kinematic_viscosity=1, tau=system.time_step,
+    ext_force_density=[force, 0, 0])
 system.actors.add(lbf)
 
 system.thermostat.set_lb(LB_fluid=lbf, gamma=1.0, act_on_virtual=False)
 
 # Setup boundaries
-walls = [espressomd.lbboundaries.LBBoundary() for k in range(2)]
-walls[0].set_params(shape=espressomd.shapes.Wall(normal=[0, 0, 1], dist=0.5))
-walls[1].set_params(shape=espressomd.shapes.Wall(
-    normal=[0, 0, -1], dist=-boxZ + 0.5))
+wall_shapes = [None] * 2
+wall_shapes[0] = espressomd.shapes.Wall(normal=[0, 0, 1], dist=0.5)
+wall_shapes[1] = espressomd.shapes.Wall(normal=[0, 0, -1], dist=-boxZ + 0.5)
 
-for wall in walls:
-    system.lbboundaries.add(wall)
+for wall_shape in wall_shapes:
+    lbf.add_boundary_from_shape(wall_shape)
 
 # make directory
 os.makedirs(outputDir)
diff --git a/samples/lb_circular_couette.py b/samples/lb_circular_couette.py
new file mode 100644
index 00000000000..9b30fbac048
--- /dev/null
+++ b/samples/lb_circular_couette.py
@@ -0,0 +1,201 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+"""
+Simulate a rotating cylinder in a fluid via slip velocity boundary conditions.
+"""
+
+import espressomd.lb
+import espressomd.shapes
+import espressomd.constraints
+import espressomd.observables
+import espressomd.math
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.colors
+import matplotlib.ticker
+import itertools
+import argparse
+
+espressomd.assert_features(["WALBERLA"])
+
+parser = argparse.ArgumentParser(epilog=__doc__)
+parser.add_argument("--visualizer", action="store_true", dest="visualizer",
+                    help="Run the visualizer")
+args = parser.parse_args()
+
+# set up LB system
+agrid = 0.5
+grid_size = np.array([31, 31, 4])
+system = espressomd.System(box_l=grid_size * agrid)
+system.time_step = 0.1
+if args.visualizer:
+    system.time_step = 0.001
+system.cell_system.skin = 0.1
+system.periodicity = [False, False, True]
+lb_fluid = espressomd.lb.LBFluidWalberla(
+    agrid=agrid, density=0.5, kinematic_viscosity=3.2, tau=system.time_step)
+system.actors.add(lb_fluid)
+
+# set up cylinders
+cyl_center = agrid * (grid_size // 2 + 0.5) * [1, 1, 0]
+cylinder_in = espressomd.shapes.Cylinder(
+    center=cyl_center, axis=[0, 0, 1], length=3 * system.box_l[2],
+    radius=8.1 * agrid, direction=1)
+cylinder_out = espressomd.shapes.Cylinder(
+    center=cyl_center, axis=[0, 0, 1], length=3 * system.box_l[2],
+    radius=14.5 * agrid, direction=-1)
+lb_fluid.add_boundary_from_shape(cylinder_in)
+lb_fluid.add_boundary_from_shape(cylinder_out)
+
+# the system needs to be fully symmetric
+mask = np.copy(lb_fluid[:, :, :].is_boundary.astype(int))
+np.testing.assert_array_equal(mask, np.flip(mask, axis=0))
+np.testing.assert_array_equal(mask, np.flip(mask, axis=1))
+np.testing.assert_array_equal(mask, np.flip(mask, axis=2))
+
+# the system needs to be closed in the x and y directions
+np.testing.assert_array_equal(mask[0, :, :], 1)
+np.testing.assert_array_equal(mask[-1, :, :], 1)
+np.testing.assert_array_equal(mask[:, 0, :], 1)
+np.testing.assert_array_equal(mask[:, -1, :], 1)
+
+# add tangential slip velocity to the inner cylinder
+velocity_magnitude = 0.01
+surface_nodes = espressomd.lb.edge_detection(
+    lb_fluid.get_shape_bitmask(cylinder_in), system.periodicity)
+tangents = espressomd.lb.calc_cylinder_tangential_vectors(
+    cylinder_in.center, lb_fluid.agrid, 0.5, surface_nodes)
+for node, tangent in zip(surface_nodes, tangents):
+    lb_fluid[node].boundary = espressomd.lb.VelocityBounceBack(
+        velocity_magnitude * tangent)
+
+if args.visualizer:
+    import espressomd.visualization
+    visualizer = espressomd.visualization.openGLLive(
+        system,
+        LB_draw_velocity_plane=True,
+        LB_plane_dist=0,
+        LB_plane_axis=2,
+        LB_vel_scale=80,
+        LB_vel_radius_scale=0.05,
+        LB_plane_ngrid=15,
+        quality_constraints=128,
+        camera_position=[8, 8, 30],
+        background_color=[1, 1, 1],
+        velocity_arrows_type_colors=[[0, 1, 0]]
+    )
+    system.constraints.add(shape=cylinder_in)
+    system.constraints.add(shape=cylinder_out)
+    system.integrator.run(100)
+    visualizer.run(1)
+
+# add observable for the fluid velocity in cylindrical coordinates
+cyl_transform_params = espressomd.math.CylindricalTransformationParameters(
+    center=cyl_center, axis=[0, 0, 1], orientation=[1, 0, 0])
+observable = espressomd.observables.CylindricalLBVelocityProfile(
+    transform_params=cyl_transform_params,
+    n_r_bins=grid_size[0] // 2,
+    n_phi_bins=1,
+    n_z_bins=1,
+    min_r=0.0,
+    max_r=system.box_l[0] / 2,
+    min_phi=0.,
+    max_phi=2 * np.pi,
+    min_z=-system.box_l[2] / 2,
+    max_z=+system.box_l[2] / 2,
+    axis=[0.0, 0.0, 1.0],
+    sampling_density=1
+)
+obs_data_baseline = observable.calculate()
+
+# equilibrate the fluid
+system.integrator.run(100)
+obs_data = observable.calculate()
+
+# fetch fluid and slip velocities
+boundary_mask = np.squeeze(lb_fluid[:, :, 0].is_boundary.astype(bool))
+quivers_boundary = []
+quivers_fluid = []
+for i, j in itertools.product(range(boundary_mask.shape[0]),
+                              range(boundary_mask.shape[1])):
+    v_fluid = lb_fluid[i, j, 0].velocity
+    if boundary_mask[i, j]:
+        quivers_boundary.append([i, j, v_fluid[0], v_fluid[1]])
+    else:
+        quivers_fluid.append([i, j, v_fluid[0], v_fluid[1]])
+
+# prepare canvas
+plt.rcParams.update({'font.size': 16})
+fig1 = plt.figure()
+fig2 = plt.figure()
+ax1 = fig1.add_subplot(111)
+ax2 = fig2.add_subplot(111)
+
+# plot velocity as a function of distance
+profile_r = observable.bin_centers().reshape([-1, 3])[:, 0]
+profile_v = (obs_data - obs_data_baseline).reshape([-1, 3])
+ax1.plot(profile_r, profile_v[:, 1])
+y_formatter = matplotlib.ticker.ScalarFormatter()
+y_formatter.set_powerlimits((-1e-2, 1e-2))
+ax1.yaxis.set_major_formatter(y_formatter)
+ax1.set(xlabel='Distance from cylinder center', ylabel='Fluid velocity')
+
+# plot boundary geometry
+cmap = matplotlib.colors.ListedColormap(['white', 'silver', 'silver'])
+cmap_bounds = [0, 1, 2]
+cmap_norm = matplotlib.colors.BoundaryNorm(cmap_bounds, cmap.N)
+ax2.imshow(boundary_mask.T, origin='lower', interpolation='nearest', cmap=cmap,
+           norm=cmap_norm)
+
+# add grid lines based on minor ticks
+minor_locator = matplotlib.ticker.FixedLocator(np.arange(0.5, grid_size[0], 1))
+ax2.xaxis.set_minor_locator(minor_locator)
+ax2.yaxis.set_minor_locator(minor_locator)
+ax2.tick_params(axis='both', which='minor', length=0)
+ax2.grid(which='minor', color='w', linestyle='-', linewidth=1.2, zorder=2)
+
+# remove major ticks
+ax2.set_xticks([])
+ax2.set_yticks([])
+
+# add cylinder radii
+# circle_in = plt.Circle(
+#    cyl_center[:2] / agrid - agrid, cylinder_in.radius / agrid,
+#    color='r', fill=False, zorder=3)
+# circle_out = plt.Circle(
+#    cyl_center[:2] / agrid - agrid, cylinder_out.radius / agrid,
+#    color='r', fill=False, zorder=3)
+# ax2.add_patch(circle_in)
+# ax2.add_patch(circle_out)
+
+# plot velocity field
+quivers_boundary = np.array(quivers_boundary)
+quivers_fluid = np.array(quivers_fluid)
+ax2.quiver(quivers_boundary[:, 0], quivers_boundary[:, 1], quivers_boundary[:, 2],
+           quivers_boundary[:, 3], scale=.25, width=0.003, color='black',
+           zorder=4, label='slip velocity')
+ax2.quiver(quivers_fluid[:, 0], quivers_fluid[:, 1], quivers_fluid[:, 2],
+           quivers_fluid[:, 3], scale=.25, width=0.003, color='royalblue',
+           zorder=4, label='fluid velocity')
+ax2.set(xlabel='x-axis', ylabel='y-axis')
+ax2.legend(framealpha=1, loc='upper right')
+
+plt.tight_layout()
+plt.show()
diff --git a/samples/lb_four_roller_mill.py b/samples/lb_four_roller_mill.py
new file mode 100644
index 00000000000..dc7c33cbc23
--- /dev/null
+++ b/samples/lb_four_roller_mill.py
@@ -0,0 +1,189 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+"""
+Simulate a four-roller mill via slip velocity boundary conditions.
+"""
+
+import espressomd.lb
+import espressomd.shapes
+import espressomd.constraints
+import espressomd.observables
+import espressomd.math
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.colors
+import matplotlib.ticker
+import itertools
+import argparse
+import logging
+import tqdm
+import sys
+
+espressomd.assert_features(["WALBERLA"])
+logging.basicConfig(level=logging.INFO, stream=sys.stdout)
+
+parser = argparse.ArgumentParser(epilog=__doc__)
+parser.add_argument("--visualizer", action="store_true", dest="visualizer",
+                    help="Run the visualizer")
+args = parser.parse_args()
+
+# set up LB system
+logging.info('Setting up the lattice-Boltzmann fluid')
+agrid = 0.5
+grid_size = np.array([64, 64, 2])
+system = espressomd.System(box_l=grid_size * agrid)
+system.time_step = 0.1
+if args.visualizer:
+    system.time_step = 0.001
+system.cell_system.skin = 0.1
+lb_fluid = espressomd.lb.LBFluidWalberla(
+    agrid=agrid, density=0.5, kinematic_viscosity=3.2, tau=system.time_step)
+system.actors.add(lb_fluid)
+
+# set up rollers by adding tangential slip velocities to cylinders
+logging.info('Setting up the rollers')
+cyl_center = agrid * (grid_size // 2 + 0.5) * [1, 1, 0]
+for i, j in itertools.product(range(2), range(2)):
+    cyl_offset = np.array([1 + i * 0.99 - 0.51, 1 + j * 0.99 - 0.51, 0])
+    cyl = espressomd.shapes.Cylinder(
+        center=agrid * (grid_size // 2 + 0.5) * cyl_offset, axis=[0, 0, 1],
+        length=3 * system.box_l[2], radius=14.1 * agrid, direction=1)
+    if args.visualizer:
+        system.constraints.add(shape=cyl)
+    lb_fluid.add_boundary_from_shape(cyl)
+    surface_nodes = espressomd.lb.edge_detection(
+        lb_fluid.get_shape_bitmask(cyl), system.periodicity)
+    tangents = espressomd.lb.calc_cylinder_tangential_vectors(
+        cyl.center, lb_fluid.agrid, 0.5, surface_nodes)
+    direction = 1 if (i + j) % 2 == 0 else -1
+    for node, tangent in zip(surface_nodes, tangents):
+        vbb = espressomd.lb.VelocityBounceBack(0.01 * direction * tangent)
+        lb_fluid[node].boundary = vbb
+
+# the system needs to be fully symmetric
+mask = np.copy(lb_fluid[:, :, :].is_boundary.astype(int))
+np.testing.assert_array_equal(mask, np.flip(mask, axis=0))
+np.testing.assert_array_equal(mask, np.flip(mask, axis=1))
+np.testing.assert_array_equal(mask, np.flip(mask, axis=2))
+
+if args.visualizer:
+    import espressomd.visualization
+    visualizer = espressomd.visualization.openGLLive(
+        system,
+        LB_draw_velocity_plane=True,
+        LB_plane_dist=0,
+        LB_plane_axis=2,
+        LB_vel_scale=80,
+        LB_vel_radius_scale=0.05,
+        LB_plane_ngrid=24,
+        LB_arrow_quality=6,
+        quality_constraints=48,
+        camera_position=[4, 4, 50],
+        background_color=[1, 1, 1],
+        velocity_arrows_type_colors=[[0, 1, 0]]
+    )
+    visualizer.run(1)
+
+# equilibrate the fluid
+logging.info('Integration loop')
+for _ in tqdm.tqdm(range(40)):
+    system.integrator.run(20)
+
+# fetch fluid and slip velocities
+boundary_mask = np.squeeze(lb_fluid[:, :, 0].is_boundary.astype(bool))
+quivers_boundary = []
+quivers_fluid = []
+for i, j in itertools.product(range(boundary_mask.shape[0]),
+                              range(boundary_mask.shape[1])):
+    v_fluid = lb_fluid[i, j, 0].velocity
+    if boundary_mask[i, j]:
+        if np.linalg.norm(v_fluid) > 1e-10:
+            quivers_boundary.append([i, j, v_fluid[0], v_fluid[1]])
+    else:
+        quivers_fluid.append([i, j, v_fluid[0], v_fluid[1]])
+
+# prepare canvas
+logging.info('Plotting')
+plt.rcParams.update({'font.size': 16})
+fig1 = plt.figure()
+fig2 = plt.figure()
+fig3 = plt.figure()
+ax1 = fig1.add_subplot(111)
+ax2 = fig2.add_subplot(111)
+ax3 = fig3.add_subplot(111)
+
+# plot fluid velocity
+fluid_vel = np.mean(np.linalg.norm(
+    lb_fluid[:, :, :].velocity, axis=-1), axis=-1)
+mask = np.ones(fluid_vel.shape) * np.nan
+mask[np.nonzero(np.squeeze(lb_fluid[:, :, 0].is_boundary))] = 0
+img = ax1.imshow(fluid_vel.T, origin='lower', interpolation='bilinear')
+cbar = plt.colorbar(img, ax=ax1)
+cbar.set_label('Fluid velocity (MD units)', rotation=90, labelpad=10)
+ax1.imshow(mask.T, origin='lower', interpolation='nearest')
+ax1.set_xticks([])
+ax1.set_yticks([])
+ax1.set(xlabel='x-axis', ylabel='y-axis')
+
+# plot fluid velocity between the rollers
+ax2.plot(agrid * np.arange(fluid_vel.shape[1]),
+         np.mean(fluid_vel[31:33, :], axis=0), label='$V(x, y=L / 2)$')
+ax2.set_xticks(np.arange(0, system.box_l[1] + 1, 4.0))
+ax2.set(xlabel='x-axis (MD units)', ylabel='Fluid velocity (MD units)')
+ax2.legend()
+
+# plot boundary geometry
+cmap = matplotlib.colors.ListedColormap(['white', 'silver', 'silver'])
+cmap_bounds = [0, 1, 2]
+cmap_norm = matplotlib.colors.BoundaryNorm(cmap_bounds, cmap.N)
+ax3.imshow(boundary_mask.T, origin='lower', interpolation='nearest', cmap=cmap,
+           norm=cmap_norm)
+
+# add grid lines based on minor ticks
+minor_locator = matplotlib.ticker.FixedLocator(np.arange(0.5, grid_size[0], 1))
+ax3.xaxis.set_minor_locator(minor_locator)
+ax3.yaxis.set_minor_locator(minor_locator)
+ax3.tick_params(axis='both', which='minor', length=0)
+ax3.grid(which='minor', color='w', linestyle='-', linewidth=1.2, zorder=2)
+
+# remove major ticks
+ax3.set_xticks([])
+ax3.set_yticks([])
+
+# add cylinder radii
+# for cyl in rollers:
+#     circle = plt.Circle(
+#         cyl.center[:2] / agrid - agrid, cyl.radius / agrid,
+#         color='r', fill=False, zorder=3)
+#     ax3.add_patch(circle)
+
+# plot velocity field
+quivers_boundary = np.array(quivers_boundary)
+quivers_fluid = np.array(quivers_fluid)
+ax3.quiver(quivers_boundary[:, 0], quivers_boundary[:, 1], quivers_boundary[:, 2],
+           quivers_boundary[:, 3], scale=.44, width=0.002, color='black',
+           zorder=4, label='slip velocity')
+ax3.quiver(quivers_fluid[:, 0], quivers_fluid[:, 1], quivers_fluid[:, 2],
+           quivers_fluid[:, 3], scale=.44, width=0.002, color='royalblue',
+           zorder=4, label='fluid velocity')
+ax3.set(xlabel='x-axis', ylabel='y-axis')
+ax3.legend(framealpha=1, loc='upper right')
+
+plt.show()
diff --git a/samples/lb_planar_couette.py b/samples/lb_planar_couette.py
new file mode 100644
index 00000000000..8ffbc8f0680
--- /dev/null
+++ b/samples/lb_planar_couette.py
@@ -0,0 +1,108 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+"""
+Simulate the flow profile of a lattice-Boltzmann fluid between two
+shear planes with Lees-Edwards boundary conditions and compare it
+to the analytical solution.
+"""
+
+import espressomd
+import espressomd.lb
+import espressomd.lees_edwards
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+required_features = ["WALBERLA"]
+espressomd.assert_features(required_features)
+
+
+def analytical(x, t, nu, v, h, k_max):
+    """
+    Analytical solution with Fourier series of the Navier-Stokes equation.
+
+    Parameters
+    ----------
+    x : :obj:`float`
+        Height within the channel
+    t : :obj:`float`
+        Time since the start up of the shear flow
+    nu: :obj:`float`
+        Kinematic viscosity
+    v: :obj:`float`
+        Shearing velocity
+    h : :obj:`float`
+        Distance between shear planes
+    k_max : :obj:`int`
+        Upper limit of sums for sinus series
+    """
+    u = x / h - 0.5
+    for k in np.arange(1, k_max + 1):
+        wave = 2 * np.pi * k / h
+        u += np.exp(-nu * wave ** 2 * t) * np.sin(wave * x) / (np.pi * k)
+    return v * u
+
+
+# LB and LE parameters
+nu = 1. / 6.
+h = 64.0
+v = 0.02
+k_max = 100
+
+system = espressomd.System(box_l=[h, 64, 1])
+system.time_step = 1.
+system.cell_system.skin = 0.1
+system.cell_system.set_n_square()
+
+system.lees_edwards.set_boundary_conditions(
+    shear_direction="x", shear_plane_normal="y",
+    protocol=espressomd.lees_edwards.LinearShear(
+        shear_velocity=v, initial_pos_offset=0.0, time_0=0.0))
+
+lbf = espressomd.lb.LBFluidWalberla(
+    agrid=1., density=1., kinematic_viscosity=nu, tau=1.)
+system.actors.add(lbf)
+
+# sampling
+time_breakpoints = [50, 200, 500, 2000]
+pos_breakpoints = 256
+for steps in time_breakpoints:
+    steps -= int(system.time) - 1
+    system.integrator.run(steps)
+    time = system.time - 1.
+    position_ref = np.linspace(0.5, 63.5, pos_breakpoints)
+    position_lbf = np.linspace(0.5, 63.5, 64)
+    velocity_ref = analytical(position_ref, time, nu, v, h, k_max)
+    velocity_lbf = np.copy(lbf[5, :, 0].velocity[:, 0].reshape([-1]))
+    ax = plt.gca()
+    color = next(ax._get_lines.prop_cycler)['color']
+    plt.plot(velocity_ref, position_ref, '-', color=color,
+             label=f"Analytical solution at t={time:.0f}")
+    plt.plot(velocity_lbf, position_lbf, 'o', color=color,
+             label=f"Simulated profile at t={time:.0f}")
+
+plt.xlabel('shear velocity')
+plt.ylabel('y-position')
+# format legend in 2 columns
+ax = plt.gca()
+handles, labels = ax.get_legend_handles_labels()
+handles, labels = zip(*sorted(zip(handles, labels), key=lambda x: x[1][0]))
+ax.legend(handles, labels, ncol=2)
+plt.show()
diff --git a/samples/lb_profile.py b/samples/lb_profile.py
index 63ebad94ecc..6043a649fce 100644
--- a/samples/lb_profile.py
+++ b/samples/lb_profile.py
@@ -28,11 +28,10 @@
 import espressomd.lb
 import espressomd.observables
 import espressomd.shapes
-import espressomd.lbboundaries
 import espressomd.accumulators
 import espressomd.math
 
-required_features = ["LB_BOUNDARIES"]
+required_features = ["WALBERLA"]
 espressomd.assert_features(required_features)
 
 system = espressomd.System(box_l=[10.0, 10.0, 5.0])
@@ -42,9 +41,9 @@
 n_steps_warmup = 1000
 n_steps = 800
 
-lb_fluid = espressomd.lb.LBFluid(
-    agrid=1.0, dens=1.0, visc=1.0, tau=0.01,
-    ext_force_density=[0, 0, 0.15], kT=1.0, seed=32)
+lb_fluid = espressomd.lb.LBFluidWalberla(
+    agrid=1.0, density=1.0, kinematic_viscosity=1.0, tau=0.01,
+    ext_force_density=[0, 0, 0.15], kT=0.0)
 system.actors.add(lb_fluid)
 system.thermostat.set_lb(LB_fluid=lb_fluid, seed=23)
 ctp = espressomd.math.CylindricalTransformationParameters(
@@ -64,8 +63,7 @@
     direction=-1,
     radius=radius,
     length=20.0)
-cylinder_boundary = espressomd.lbboundaries.LBBoundary(shape=cylinder_shape)
-system.lbboundaries.add(cylinder_boundary)
+lb_fluid.add_boundary_from_shape(cylinder_shape)
 
 # equilibrate fluid
 system.integrator.run(n_steps_warmup)
diff --git a/samples/lbf.py b/samples/lbf.py
index 2792d681ead..643fb172469 100644
--- a/samples/lbf.py
+++ b/samples/lbf.py
@@ -38,7 +38,7 @@
 =======================================================
 """)
 
-required_features = ["EXTERNAL_FORCES"]
+required_features = ["WALBERLA", "EXTERNAL_FORCES"]
 if args.gpu:
     print("Using GPU implementation")
     required_features.append("CUDA")
@@ -59,13 +59,13 @@
 particle = system.part.add(pos=[box_l / 2.0] * 3, fix=[True, True, True])
 
 
-lb_params = {'agrid': 1, 'dens': 1, 'visc': 1, 'tau': 0.01,
+lb_params = {'agrid': 1, 'density': 1, 'kinematic_viscosity': 1, 'tau': 0.01,
              'ext_force_density': [0, 0, -1.0 / (box_l**3)]}
 
 if args.gpu:
-    lbf = espressomd.lb.LBFluidGPU(**lb_params)
+    lbf = espressomd.lb.LBFluidWalberlaGPU(**lb_params)
 else:
-    lbf = espressomd.lb.LBFluid(**lb_params)
+    lbf = espressomd.lb.LBFluidWalberla(**lb_params)
 system.actors.add(lbf)
 system.thermostat.set_lb(LB_fluid=lbf, gamma=1.0)
 print(lbf.get_params())
diff --git a/samples/object_in_fluid/motivation.py b/samples/object_in_fluid/motivation.py
index 5cf2d5653dd..52f5316ae82 100644
--- a/samples/object_in_fluid/motivation.py
+++ b/samples/object_in_fluid/motivation.py
@@ -20,14 +20,13 @@
 """
 
 import espressomd
-import espressomd.lbboundaries
 import espressomd.shapes
 
-required_features = ["LB_BOUNDARIES", "EXTERNAL_FORCES", "SOFT_SPHERE",
-                     "MASS"]
+required_features = ["WALBERLA", "EXTERNAL_FORCES", "SOFT_SPHERE", "MASS"]
 espressomd.assert_features(required_features)
 
 import os
+import tqdm
 import argparse
 import warnings
 
@@ -48,8 +47,8 @@
 
 boxX = 22.0
 boxY = 14.0
-boxZ = 15.0
-time_step = 0.1
+boxZ = 6.0
+time_step = 0.05
 
 system = espressomd.System(box_l=(boxX, boxY, boxZ))
 system.time_step = time_step
@@ -65,92 +64,89 @@
 # creating the RBCs
 cell0 = oif.OifCell(cell_type=cell_type,
                     particle_type=0, origin=[5.0, 5.0, 3.0])
-cell1 = oif.OifCell(cell_type=cell_type,
-                    particle_type=1, origin=[5.0, 5.0, 7.0])
 
 # cell-wall interactions
-system.non_bonded_inter[0, 10].soft_sphere.set_params(
-    a=0.0001, n=1.2, cutoff=0.1, offset=0.0)
-system.non_bonded_inter[1, 10].soft_sphere.set_params(
+system.non_bonded_inter[cell0.particle_type, 10].soft_sphere.set_params(
     a=0.0001, n=1.2, cutoff=0.1, offset=0.0)
 
 # fluid
-lbf = espressomd.lb.LBFluid(agrid=1, dens=1.0, visc=1.5, tau=0.1,
-                            ext_force_density=[0.002, 0.0, 0.0])
+lbf = espressomd.lb.LBFluidWalberla(
+    agrid=1., density=1., kinematic_viscosity=1.5, tau=system.time_step,
+    ext_force_density=[0.025, 0., 0.], single_precision=True)
 system.actors.add(lbf)
 system.thermostat.set_lb(LB_fluid=lbf, gamma=1.5)
 
 # creating boundaries and obstacles in the channel
 # OutputVtk writes a file
-# lbboundaries created boundaries for fluid
-# constraints created boundaries for the cells
+# boundaries for the fluid are set up by marking LB nodes as boundaries, here with the help of shapes
+# boundaries for the cells are created by creating constraints from the shapes
 
-boundaries = []
+boundary_shapes = []
 
 # bottom of the channel
 bottom_shape = espressomd.shapes.Rhomboid(corner=[0.0, 0.0, 0.0], a=[boxX, 0.0, 0.0],
                                           b=[0.0, boxY, 0.0], c=[0.0, 0.0, 1.0],
                                           direction=1)
-boundaries.append(bottom_shape)
+boundary_shapes.append(bottom_shape)
 output_vtk_rhomboid(
     bottom_shape, out_file=os.path.join(output_path, "wallBottom.vtk"))
 
 # top of the channel
 top_shape = espressomd.shapes.Rhomboid(corner=[0.0, 0.0, boxZ - 1], a=[boxX, 0.0, 0.0],
                                        b=[0.0, boxY, 0.0], c=[0.0, 0.0, 1.0], direction=1)
-boundaries.append(top_shape)
+boundary_shapes.append(top_shape)
 output_vtk_rhomboid(
     top_shape, out_file=os.path.join(output_path, "wallTop.vtk"))
 
 # front wall of the channel
 front_shape = espressomd.shapes.Rhomboid(corner=[0.0, 0.0, 0.0], a=[boxX, 0.0, 0.0],
                                          b=[0.0, 1.0, 0.0], c=[0.0, 0.0, boxZ], direction=1)
-boundaries.append(front_shape)
+boundary_shapes.append(front_shape)
 output_vtk_rhomboid(
     front_shape, out_file=os.path.join(output_path, "wallFront.vtk"))
 
 # back wall of the channel
 back_shape = espressomd.shapes.Rhomboid(corner=[0.0, boxY - 1.0, 0.0], a=[boxX, 0.0, 0.0],
                                         b=[0.0, 1.0, 0.0], c=[0.0, 0.0, boxZ], direction=1)
-boundaries.append(back_shape)
+boundary_shapes.append(back_shape)
 output_vtk_rhomboid(
     back_shape, out_file=os.path.join(output_path, "wallBack.vtk"))
 
 # obstacle - cylinder A
 cylA_shape = espressomd.shapes.Cylinder(center=[11.0, 2.0, boxZ / 2.], axis=[0.0, 0.0, 1.0],
                                         length=boxZ, radius=2.0, direction=1)
-boundaries.append(cylA_shape)
+boundary_shapes.append(cylA_shape)
 output_vtk_cylinder(
     cylA_shape, n=20, out_file=os.path.join(output_path, "cylinderA.vtk"))
 
 # obstacle - cylinder B
 cylB_shape = espressomd.shapes.Cylinder(center=[16.0, 8.0, boxZ / 2.], axis=[0.0, 0.0, 1.0],
                                         length=boxZ, radius=2.0, direction=1)
-boundaries.append(cylB_shape)
+boundary_shapes.append(cylB_shape)
 output_vtk_cylinder(
     cylB_shape, n=20, out_file=os.path.join(output_path, "cylinderB.vtk"))
 
 # obstacle - cylinder C
 cylC_shape = espressomd.shapes.Cylinder(center=[11.0, 12.0, boxZ / 2.], axis=[0.0, 0.0, 1.0],
                                         length=boxZ, radius=2.0, direction=1)
-boundaries.append(cylC_shape)
+boundary_shapes.append(cylC_shape)
 output_vtk_cylinder(
     cylC_shape, n=20, out_file=os.path.join(output_path, "cylinderC.vtk"))
 
-for boundary in boundaries:
-    system.lbboundaries.add(espressomd.lbboundaries.LBBoundary(shape=boundary))
-    system.constraints.add(shape=boundary, particle_type=10)
+for shape in boundary_shapes:
+    lbf.add_boundary_from_shape(shape)
+    system.constraints.add(shape=shape, particle_type=10)
+
+
+def write_cells_vtk(i):
+    filepath = os.path.join(output_path, "cell{cell_id}_{index}.vtk")
+    cell0.output_vtk_pos_folded(file_name=filepath.format(cell_id=0, index=i))
 
 
-maxCycle = 50
+maxCycle = 100
 # main integration loop
-cell0.output_vtk_pos_folded(file_name=os.path.join(output_path, "cell0_0.vtk"))
-cell1.output_vtk_pos_folded(file_name=os.path.join(output_path, "cell1_0.vtk"))
-for i in range(1, maxCycle):
-    system.integrator.run(steps=500)
-    cell0.output_vtk_pos_folded(
-        file_name=os.path.join(output_path, f"cell0_{i}.vtk"))
-    cell1.output_vtk_pos_folded(
-        file_name=os.path.join(output_path, f"cell1_{i}.vtk"))
-    print(f"time: {i * time_step:.1f}")
+for i in tqdm.tqdm(range(maxCycle)):
+    write_cells_vtk(i)
+    system.integrator.run(steps=100)
+write_cells_vtk(maxCycle)
 print("Simulation completed.")
diff --git a/samples/visualization_lbboundaries.py b/samples/visualization_lbboundaries.py
index 77b96fbd565..92ca3b59b7f 100644
--- a/samples/visualization_lbboundaries.py
+++ b/samples/visualization_lbboundaries.py
@@ -24,18 +24,17 @@
 import espressomd
 import espressomd.lb
 import espressomd.shapes
-import espressomd.lbboundaries
 import espressomd.visualization
 
-required_features = ["LB_BOUNDARIES"]
+required_features = ["WALBERLA"]
 espressomd.assert_features(required_features)
 
 system = espressomd.System(box_l=[10.0, 10.0, 5.0])
 system.time_step = 0.01
 system.cell_system.skin = 0.4
 
-lb_fluid = espressomd.lb.LBFluid(
-    agrid=1.0, dens=1.0, visc=1.0, tau=0.01, ext_force_density=[0, 0, 0.15])
+lb_fluid = espressomd.lb.LBFluidWalberla(
+    agrid=1.0, density=1.0, kinematic_viscosity=1.0, tau=0.01, ext_force_density=[0, 0, 0.15])
 system.actors.add(lb_fluid)
 
 cylinder_shape = espressomd.shapes.Cylinder(
@@ -44,8 +43,7 @@
     direction=-1,
     radius=4.0,
     length=20.0)
-cylinder_boundary = espressomd.lbboundaries.LBBoundary(shape=cylinder_shape)
-system.lbboundaries.add(cylinder_boundary)
+lb_fluid.add_boundary_from_shape(cylinder_shape)
 
 visualizer = espressomd.visualization.openGLLive(
     system,
diff --git a/samples/visualization_poiseuille.py b/samples/visualization_poiseuille.py
index 4cf9eda33b6..94c7f614cc2 100644
--- a/samples/visualization_poiseuille.py
+++ b/samples/visualization_poiseuille.py
@@ -24,12 +24,11 @@
 
 import espressomd
 import espressomd.lb
-import espressomd.lbboundaries
 import espressomd.shapes
 import espressomd.visualization
 import numpy as np
 
-required_features = ["LB_BOUNDARIES", "EXTERNAL_FORCES"]
+required_features = ["WALBERLA", "EXTERNAL_FORCES"]
 espressomd.assert_features(required_features)
 
 # System setup
@@ -54,21 +53,20 @@
     velocity_arrows_type_radii=[0.1],
     velocity_arrows_type_colors=[[0, 1, 0]])
 
-lbf = espressomd.lb.LBFluid(kT=0, agrid=1.0, dens=1.0, visc=1.0, tau=0.1,
-                            ext_force_density=[0, 0.003, 0])
+lbf = espressomd.lb.LBFluidWalberla(kT=0, agrid=1.0, density=1.0, kinematic_viscosity=1.0,
+                                    tau=0.1, ext_force_density=[0, 0.003, 0])
 system.actors.add(lbf)
 system.thermostat.set_lb(LB_fluid=lbf, gamma=1.5)
 
 # Setup boundaries
-walls = [espressomd.lbboundaries.LBBoundary() for k in range(2)]
-walls[0].set_params(shape=espressomd.shapes.Wall(normal=[1, 0, 0], dist=1.5))
-walls[1].set_params(shape=espressomd.shapes.Wall(
-    normal=[-1, 0, 0], dist=-14.5))
+wall_shapes = [None] * 2
+wall_shapes[0] = espressomd.shapes.Wall(normal=[1, 0, 0], dist=1.5)
+wall_shapes[1] = espressomd.shapes.Wall(normal=[-1, 0, 0], dist=-14.5)
 
 for i in range(100):
     system.part.add(pos=np.random.random(3) * system.box_l)
 
-for wall in walls:
-    system.lbboundaries.add(wall)
+for wall_shape in wall_shapes:
+    lbf.add_boundary_from_shape(wall_shape)
 
 visualizer.run(1)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 14175bfd6fb..40e23972f3c 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -40,6 +40,10 @@ if(ESPRESSO_BUILD_WITH_SCAFACOS)
   add_subdirectory(scafacos)
 endif()
 
+if(ESPRESSO_BUILD_WITH_WALBERLA)
+  add_subdirectory(walberla_bridge)
+endif()
+
 if(ESPRESSO_BUILD_WITH_PYTHON)
   add_subdirectory(script_interface)
   add_subdirectory(python)
diff --git a/src/config/features.def b/src/config/features.def
index 10e462e3615..4e27c01ae43 100644
--- a/src/config/features.def
+++ b/src/config/features.def
@@ -75,14 +75,7 @@ VIRTUAL_SITES_INERTIALESS_TRACERS implies VIRTUAL_SITES
 DPD
 
 /* Lattice-Boltzmann features */
-LB_BOUNDARIES
-LB_BOUNDARIES_GPU               requires CUDA
 LB_ELECTROHYDRODYNAMICS
-ELECTROKINETICS                 implies EXTERNAL_FORCES, ELECTROSTATICS
-ELECTROKINETICS                 requires CUDA
-EK_BOUNDARIES                   implies ELECTROKINETICS, LB_BOUNDARIES_GPU, EXTERNAL_FORCES, ELECTROSTATICS
-EK_BOUNDARIES                   requires CUDA
-EK_DEBUG                        requires ELECTROKINETICS
 
 /* Interaction features */
 TABULATED
@@ -120,4 +113,6 @@ HDF5 external
 SCAFACOS external
 GSL external
 STOKESIAN_DYNAMICS external
+WALBERLA external
+WALBERLA_FFT external
 VALGRIND_MARKERS external
diff --git a/src/config/myconfig-default.hpp b/src/config/myconfig-default.hpp
index 5b3b0ad410f..4e8c8df611a 100644
--- a/src/config/myconfig-default.hpp
+++ b/src/config/myconfig-default.hpp
@@ -48,14 +48,6 @@
 // Active matter
 #define ENGINE
 
-// Hydrodynamics, Electrokinetics
-#define LB_BOUNDARIES
-#ifdef CUDA
-#define LB_BOUNDARIES_GPU
-#define ELECTROKINETICS
-#define EK_BOUNDARIES
-#endif
-
 // Force/energy calculation
 #define EXCLUSIONS
 
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 04465777b7d..72830217e30 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -61,27 +61,13 @@ set_target_properties(espresso_core PROPERTIES CXX_CLANG_TIDY
                                                "${ESPRESSO_CXX_CLANG_TIDY}")
 
 if(ESPRESSO_BUILD_WITH_CUDA)
-  target_sources(
-    espresso_core
-    PRIVATE cuda_init.cpp cuda_interface.cpp
-            grid_based_algorithms/electrokinetics.cpp
-            grid_based_algorithms/lbgpu.cpp)
+  target_sources(espresso_core PRIVATE cuda_init.cpp cuda_interface.cpp)
   espresso_add_gpu_library(
-    espresso_cuda
-    SHARED
-    cuda_common_cuda.cu
-    cuda_init_cuda.cu
-    CudaHostAllocator.cu
-    magnetostatics/barnes_hut_gpu_cuda.cu
+    espresso_cuda SHARED cuda_common_cuda.cu cuda_init_cuda.cu
+    CudaHostAllocator.cu magnetostatics/barnes_hut_gpu_cuda.cu
     magnetostatics/dipolar_direct_sum_gpu_cuda.cu
-    electrostatics/mmm1d_gpu_cuda.cu
-    electrostatics/p3m_gpu_cuda.cu
-    electrostatics/p3m_gpu_error_cuda.cu
-    EspressoSystemInterface_cuda.cu
-    grid_based_algorithms/electrokinetics_cuda.cu
-    grid_based_algorithms/lbgpu_cuda.cu
-    grid_based_algorithms/fd-electrostatics_cuda.cu
-    virtual_sites/lb_inertialess_tracers_cuda.cu)
+    electrostatics/mmm1d_gpu_cuda.cu electrostatics/p3m_gpu_cuda.cu
+    electrostatics/p3m_gpu_error_cuda.cu EspressoSystemInterface_cuda.cu)
   add_library(espresso::cuda ALIAS espresso_cuda)
   target_link_libraries(
     espresso_cuda PRIVATE CUDA::cuda_driver CUDA::cudart CUDA::cufft
@@ -89,8 +75,6 @@ if(ESPRESSO_BUILD_WITH_CUDA)
   target_include_directories(
     espresso_cuda
     PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
-            ${CMAKE_CURRENT_SOURCE_DIR}/virtual_sites
-            ${CMAKE_CURRENT_SOURCE_DIR}/grid_based_algorithms
             ${CMAKE_CURRENT_SOURCE_DIR}/electrostatics
             ${CMAKE_CURRENT_SOURCE_DIR}/magnetostatics)
   set_target_properties(espresso_cuda PROPERTIES CUDA_CLANG_TIDY
diff --git a/src/core/analysis/statistics.cpp b/src/core/analysis/statistics.cpp
index 27205a69f84..b48c0619676 100644
--- a/src/core/analysis/statistics.cpp
+++ b/src/core/analysis/statistics.cpp
@@ -28,7 +28,6 @@
 
 #include "Particle.hpp"
 #include "cells.hpp"
-#include "communication.hpp"
 #include "errorhandling.hpp"
 #include "grid.hpp"
 #include "grid_based_algorithms/lb_interface.hpp"
@@ -84,8 +83,8 @@ Utils::Vector3d calc_linear_momentum(bool include_particles,
                           return m + p.mass() * p.v();
                         });
   }
-  if (include_lbfluid) {
-    momentum += lb_lbfluid_calc_fluid_momentum();
+  if (include_lbfluid and lattice_switch != ActiveLB::NONE) {
+    momentum += LB::calc_fluid_momentum() * LB::get_lattice_speed();
   }
   return momentum;
 }
diff --git a/src/core/communication.cpp b/src/core/communication.cpp
index 2cee973cc02..7c4a430306a 100644
--- a/src/core/communication.cpp
+++ b/src/core/communication.cpp
@@ -19,12 +19,18 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include "config/config.hpp"
+
 #include "communication.hpp"
 
 #include "errorhandling.hpp"
 #include "event.hpp"
 #include "grid.hpp"
 
+#ifdef WALBERLA
+#include <walberla_bridge/walberla_init.hpp>
+#endif
+
 #include <utils/mpi/cart_comm.hpp>
 
 #include <boost/mpi.hpp>
@@ -73,6 +79,10 @@ void init(std::shared_ptr<boost::mpi::environment> mpi_env) {
 
   ErrorHandling::init_error_handling(mpiCallbacks());
 
+#ifdef WALBERLA
+  walberla::mpi_init();
+#endif
+
   on_program_start();
 }
 } // namespace Communication
diff --git a/src/core/cuda_utils.cuh b/src/core/cuda_utils.cuh
index a72be413dd6..6bdafb96a6e 100644
--- a/src/core/cuda_utils.cuh
+++ b/src/core/cuda_utils.cuh
@@ -27,7 +27,6 @@
 
 #include <cuda.h>
 
-#include <cassert>
 #include <string>
 
 class cuda_runtime_error_cuda : public cuda_runtime_error {
@@ -71,18 +70,6 @@ void cuda_check_errors_exit(const dim3 &block, const dim3 &grid,
 
 #define cuda_safe_mem(a) cuda_safe_mem_exit((a), __FILE__, __LINE__)
 
-/** Calculate @c dim_grid for CUDA kernel calls. */
-inline dim3 calculate_dim_grid(unsigned const threads_x,
-                               unsigned const blocks_per_grid_y,
-                               unsigned const threads_per_block) {
-  assert(threads_x >= 1);
-  assert(blocks_per_grid_y >= 1);
-  assert(threads_per_block >= 1);
-  auto const threads_y = threads_per_block * blocks_per_grid_y;
-  auto const blocks_per_grid_x = (threads_x + threads_y - 1) / threads_y;
-  return make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1);
-}
-
 #define KERNELCALL_shared(_function, _grid, _block, _stream, ...)              \
   _function<<<_grid, _block, _stream, stream[0]>>>(__VA_ARGS__);               \
   cuda_check_errors_exit(_grid, _block, #_function, __FILE__, __LINE__);
diff --git a/src/core/electrostatics/coulomb.cpp b/src/core/electrostatics/coulomb.cpp
index c6073e34791..0f4a5860306 100644
--- a/src/core/electrostatics/coulomb.cpp
+++ b/src/core/electrostatics/coulomb.cpp
@@ -30,7 +30,6 @@
 #include "communication.hpp"
 #include "electrostatics/icc.hpp"
 #include "errorhandling.hpp"
-#include "grid_based_algorithms/electrokinetics.hpp"
 #include "integrate.hpp"
 #include "npt.hpp"
 #include "partCfg_global.hpp"
@@ -284,12 +283,6 @@ void calc_long_range_force(ParticleRange const &particles) {
   if (electrostatics_actor) {
     boost::apply_visitor(LongRangeForce(particles), *electrostatics_actor);
   }
-#ifdef ELECTROKINETICS
-  /* Add fields from EK if enabled */
-  if (this_node == 0) {
-    ek_calculate_electrostatic_coupling();
-  }
-#endif
 }
 
 double calc_energy_long_range(ParticleRange const &particles) {
diff --git a/src/core/event.cpp b/src/core/event.cpp
index bf1a3588d16..18dc0ef34bc 100644
--- a/src/core/event.cpp
+++ b/src/core/event.cpp
@@ -38,8 +38,6 @@
 #include "electrostatics/icc.hpp"
 #include "errorhandling.hpp"
 #include "grid.hpp"
-#include "grid_based_algorithms/electrokinetics.hpp"
-#include "grid_based_algorithms/lb_boundaries.hpp"
 #include "grid_based_algorithms/lb_interface.hpp"
 #include "immersed_boundaries.hpp"
 #include "integrate.hpp"
@@ -97,14 +95,12 @@ void on_integration_start(double time_step) {
   integrator_npt_sanity_checks();
 #endif
   long_range_interactions_sanity_checks();
-  lb_lbfluid_sanity_checks(time_step);
+  LB::sanity_checks(time_step);
 
   /********************************************/
   /* end sanity checks                        */
   /********************************************/
 
-  lb_lbfluid_on_integration_start();
-
 #ifdef CUDA
   MPI_Bcast(gpu_get_global_particle_vars_pointer_host(),
             sizeof(CUDA_global_part_vars), MPI_BYTE, 0, comm_cart);
@@ -168,12 +164,6 @@ void on_observable_calc() {
   }
 #endif /* DIPOLES */
 
-#ifdef ELECTROKINETICS
-  if (ek_initialized) {
-    ek_integrate_electrostatics();
-  }
-#endif /* ELECTROKINETICS */
-
   clear_particle_node();
 }
 
@@ -248,13 +238,7 @@ void on_short_range_ia_change() {
 
 void on_constraint_change() { recalc_forces = true; }
 
-void on_lbboundary_change() {
-#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU)
-  LBBoundaries::lb_init_boundaries();
-
-  recalc_forces = true;
-#endif
-}
+void on_lb_boundary_conditions_change() { recalc_forces = true; }
 
 void on_boxl_change(bool skip_method_adaption) {
   grid_changed_box_l(box_geo);
@@ -272,16 +256,20 @@ void on_boxl_change(bool skip_method_adaption) {
     Dipoles::on_boxl_change();
 #endif
 
-    lb_lbfluid_init();
-#ifdef LB_BOUNDARIES
-    LBBoundaries::lb_init_boundaries();
-#endif
+    LB::init();
   }
 }
 
 void on_cell_structure_change() {
   clear_particle_node();
 
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
+    throw std::runtime_error(
+        "LB does not currently support handling changes of the MD cell "
+        "geometry. Setup the cell system, skin and interactions before "
+        "activating the CPU LB.");
+  }
+
   /* Now give methods a chance to react to the change in cell structure.
    * Most ES methods need to reinitialize, as they depend on skin,
    * node grid and so on. */
@@ -294,7 +282,11 @@ void on_cell_structure_change() {
 #endif
 }
 
-void on_temperature_change() { lb_lbfluid_reinit_parameters(); }
+void on_temperature_change() {
+  if (lattice_switch != ActiveLB::NONE) {
+    throw std::runtime_error("Temperature change not supported by LB");
+  }
+}
 
 void on_periodicity_change() {
 #ifdef ELECTROSTATICS
@@ -323,7 +315,9 @@ void on_skin_change() {
 void on_thermostat_param_change() { reinit_thermo = true; }
 
 void on_timestep_change() {
-  lb_lbfluid_reinit_parameters();
+  if (lattice_switch != ActiveLB::NONE) {
+    throw std::runtime_error("Time step change not supported by LB");
+  }
   on_thermostat_param_change();
 }
 
@@ -349,7 +343,7 @@ unsigned global_ghost_flags() {
   /* Position and Properties are always requested. */
   unsigned data_parts = Cells::DATA_PART_POSITION | Cells::DATA_PART_PROPERTIES;
 
-  if (lattice_switch == ActiveLB::CPU)
+  if (lattice_switch == ActiveLB::WALBERLA_LB)
     data_parts |= Cells::DATA_PART_MOMENTUM;
 
   if (thermo_switch & THERMO_DPD)
diff --git a/src/core/event.hpp b/src/core/event.hpp
index 8f1f56c6af6..3fda162f9ad 100644
--- a/src/core/event.hpp
+++ b/src/core/event.hpp
@@ -130,8 +130,10 @@ void on_node_grid_change();
 
 unsigned global_ghost_flags();
 
-/** called every time the walls for the lb fluid are changed */
-void on_lbboundary_change();
+/** @brief Called when the LB boundary conditions are changed
+ *  (geometry, slip velocity, or both).
+ */
+void on_lb_boundary_conditions_change();
 
 /** @brief Update particles with properties depending on other particles,
  *  namely virtual sites and ICC charges.
diff --git a/src/core/forces.cpp b/src/core/forces.cpp
index 7a553d389a2..b2db558000e 100644
--- a/src/core/forces.cpp
+++ b/src/core/forces.cpp
@@ -37,7 +37,6 @@
 #include "forcecap.hpp"
 #include "forces_inline.hpp"
 #include "galilei/ComFixed.hpp"
-#include "grid_based_algorithms/electrokinetics.hpp"
 #include "grid_based_algorithms/lb_interface.hpp"
 #include "grid_based_algorithms/lb_particle_coupling.hpp"
 #include "immersed_boundaries.hpp"
@@ -225,8 +224,10 @@ void force_calc(CellStructure &cell_structure, double time_step, double kT) {
   // Must be done here. Forces need to be ghost-communicated
   immersed_boundaries.volume_conservation(cell_structure);
 
-  lb_lbcoupling_calc_particle_lattice_ia(thermo_virtual, particles,
-                                         ghost_particles, time_step);
+  if (lattice_switch != ActiveLB::NONE) {
+    lb_lbcoupling_calc_particle_lattice_ia(thermo_virtual, particles,
+                                           ghost_particles, time_step);
+  }
 
 #ifdef CUDA
   copy_forces_from_GPU(particles, this_node);
diff --git a/src/core/grid.cpp b/src/core/grid.cpp
index 17ef5a4df28..5f8aca36b72 100644
--- a/src/core/grid.cpp
+++ b/src/core/grid.cpp
@@ -44,18 +44,6 @@ Utils::Vector3i node_grid{};
 
 void init_node_grid() { grid_changed_n_nodes(); }
 
-int map_position_node_array(const Utils::Vector3d &pos) {
-  auto const f_pos = folded_position(pos, box_geo);
-
-  Utils::Vector3i im;
-  for (unsigned int i = 0; i < 3; i++) {
-    im[i] = static_cast<int>(std::floor(f_pos[i] / local_geo.length()[i]));
-    im[i] = std::clamp(im[i], 0, node_grid[i] - 1);
-  }
-
-  return Utils::Mpi::cart_rank(comm_cart, im);
-}
-
 Utils::Vector3i calc_node_pos(const boost::mpi::communicator &comm) {
   return Utils::Mpi::cart_coords<3>(comm, comm.rank());
 }
diff --git a/src/core/grid.hpp b/src/core/grid.hpp
index a690695d7f6..2b8a17693e6 100644
--- a/src/core/grid.hpp
+++ b/src/core/grid.hpp
@@ -49,9 +49,6 @@ extern Utils::Vector3i node_grid;
  */
 void init_node_grid();
 
-/** @brief Map a spatial position to the node grid */
-int map_position_node_array(const Utils::Vector3d &pos);
-
 /** @brief Fill neighbor lists of node.
  *
  * Calculates the numbers of the nearest neighbors for a node.
diff --git a/src/core/grid_based_algorithms/CMakeLists.txt b/src/core/grid_based_algorithms/CMakeLists.txt
index a568c73a9bc..1f9c29d3a76 100644
--- a/src/core/grid_based_algorithms/CMakeLists.txt
+++ b/src/core/grid_based_algorithms/CMakeLists.txt
@@ -19,11 +19,15 @@
 
 target_sources(
   espresso_core
-  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/halo.cpp
-          ${CMAKE_CURRENT_SOURCE_DIR}/lattice.cpp
-          ${CMAKE_CURRENT_SOURCE_DIR}/lb_boundaries.cpp
-          ${CMAKE_CURRENT_SOURCE_DIR}/lb_collective_interface.cpp
-          ${CMAKE_CURRENT_SOURCE_DIR}/lb.cpp
-          ${CMAKE_CURRENT_SOURCE_DIR}/lb_interface.cpp
+  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/lb_interface.cpp
           ${CMAKE_CURRENT_SOURCE_DIR}/lb_interpolation.cpp
-          ${CMAKE_CURRENT_SOURCE_DIR}/lb_particle_coupling.cpp)
+          ${CMAKE_CURRENT_SOURCE_DIR}/lb_particle_coupling.cpp
+          ${CMAKE_CURRENT_SOURCE_DIR}/ek_container.cpp
+          ${CMAKE_CURRENT_SOURCE_DIR}/ek_reactions.cpp)
+
+if(ESPRESSO_BUILD_WITH_WALBERLA)
+  target_link_libraries(espresso_core PRIVATE espresso::walberla
+                                              ${WALBERLA_LIBS})
+  target_sources(espresso_core
+                 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/lb_walberla_instance.cpp)
+endif()
diff --git a/src/core/grid_based_algorithms/EKReactions.hpp b/src/core/grid_based_algorithms/EKReactions.hpp
new file mode 100644
index 00000000000..b1b7905e920
--- /dev/null
+++ b/src/core/grid_based_algorithms/EKReactions.hpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef ESPRESSO_EKREACTIONS_HPP
+#define ESPRESSO_EKREACTIONS_HPP
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+template <class EKReaction> class EKReactions {
+  using container_type = std::vector<std::shared_ptr<EKReaction>>;
+
+public:
+  using value_type = typename container_type::value_type;
+  using iterator = typename container_type::iterator;
+  using const_iterator = typename container_type::const_iterator;
+
+private:
+  container_type m_ekreactions;
+
+public:
+  void add(std::shared_ptr<EKReaction> const &c) {
+    assert(std::find(m_ekreactions.begin(), m_ekreactions.end(), c) ==
+           m_ekreactions.end());
+
+    m_ekreactions.emplace_back(c);
+  }
+  void remove(std::shared_ptr<EKReaction> const &c) {
+    assert(std::find(m_ekreactions.begin(), m_ekreactions.end(), c) !=
+           m_ekreactions.end());
+    m_ekreactions.erase(
+        std::remove(m_ekreactions.begin(), m_ekreactions.end(), c),
+        m_ekreactions.end());
+  }
+
+  iterator begin() { return m_ekreactions.begin(); }
+  iterator end() { return m_ekreactions.end(); }
+  const_iterator begin() const { return m_ekreactions.begin(); }
+  const_iterator end() const { return m_ekreactions.end(); }
+  [[nodiscard]] bool empty() const { return m_ekreactions.empty(); }
+};
+
+#endif
diff --git a/src/core/grid_based_algorithms/OptionalCounter.hpp b/src/core/grid_based_algorithms/OptionalCounter.hpp
deleted file mode 100644
index 1404e6481da..00000000000
--- a/src/core/grid_based_algorithms/OptionalCounter.hpp
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2020-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef OPTIONAL_COUNTER_HPP
-#define OPTIONAL_COUNTER_HPP
-
-#include <utils/Counter.hpp>
-
-#include <cstdint>
-#include <utility>
-
-/** Re-implementation of a boost::optional for a RNG counter.
- *
- *  Workaround for a compiler error with Clang 9.0, boost 1.71
- *  and CUDA 10.1 (see espressomd/espresso#3650).
- */
-class OptionalCounter {
-private:
-  Utils::Counter<uint64_t> m_counter;
-  bool m_initialized;
-
-public:
-  OptionalCounter() : m_counter{}, m_initialized(false) {}
-  OptionalCounter(Utils::Counter<uint64_t> const &counter)
-      : m_counter(counter), m_initialized(true) {}
-  OptionalCounter &operator=(Utils::Counter<uint64_t> counter) {
-    m_counter = std::move(counter);
-    m_initialized = true;
-    return *this;
-  }
-  template <class Archive>
-  void serialize(Archive &ar, const unsigned int /* version */) {
-    ar &m_counter;
-    ar &m_initialized;
-  }
-  bool is_initialized() noexcept { return m_initialized; }
-  explicit operator bool() const noexcept { return m_initialized; }
-  bool operator!() const noexcept { return !m_initialized; }
-  Utils::Counter<uint64_t> &operator*() { return m_counter; }
-  Utils::Counter<uint64_t> *operator->() { return &m_counter; }
-};
-
-#endif
diff --git a/src/core/grid_based_algorithms/ek_container.cpp b/src/core/grid_based_algorithms/ek_container.cpp
new file mode 100644
index 00000000000..780a8eee574
--- /dev/null
+++ b/src/core/grid_based_algorithms/ek_container.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config/config.hpp"
+
+#include "ek_container.hpp"
+#include "ek_reactions.hpp"
+#include "errorhandling.hpp"
+#include "lb_interface.hpp"
+#include "lb_walberla_instance.hpp"
+
+#ifdef WALBERLA
+#include <walberla_bridge/electrokinetics/EKContainer.hpp>
+#endif // WALBERLA
+
+#include <cmath>
+
+#ifdef WALBERLA
+#include <algorithm>
+#include <cstddef>
+#include <stdexcept>
+#endif // WALBERLA
+
+namespace EK {
+
+#ifdef WALBERLA
+EKContainer<EKinWalberlaBase> ek_container;
+#endif // WALBERLA
+
+double get_tau() {
+#ifdef WALBERLA
+  return ek_container.get_tau();
+#else
+  throw NoEKActive();
+#endif // WALBERLA
+}
+
+int get_steps_per_md_step(double md_timestep) {
+  return static_cast<int>(std::round(get_tau() / md_timestep));
+}
+
+void propagate() {
+#ifdef WALBERLA
+  // first calculate the charge for the potential, for that get all the
+  // field-ids from the ekspecies pass the potential-field-id to the
+  // flux-kernels of the eks for this the integrate function has to be split
+  // with a public interface to diffusive and advective-flux this should also
+  // allow the back-coupling to the LB with a field-id
+
+  if (ek_container.empty()) {
+    return;
+  }
+
+  if (!ek_container.is_poisson_solver_set()) {
+    runtimeErrorMsg() << "EK requires a Poisson solver.";
+    return;
+  }
+
+  ek_container.reset_charge();
+  std::for_each(ek_container.begin(), ek_container.end(), [](auto const &ek) {
+    ek_container.add_charge(ek->get_density_id(), ek->get_valency(),
+                            ek->is_double_precision());
+  });
+
+  ek_container.solve_poisson();
+
+  auto velocity_field_id = std::size_t{};
+  auto force_field_id = std::size_t{};
+  try {
+    auto const lbf = ::lb_walberla();
+    velocity_field_id = lbf->get_velocity_field_id();
+    force_field_id = lbf->get_force_field_id();
+  } catch (std::runtime_error const &) {
+  }
+
+  std::for_each(ek_container.begin(), ek_container.end(),
+                [velocity_field_id, force_field_id](auto const &ek) {
+                  try {
+                    ek->integrate(ek_container.get_potential_field_id(),
+                                  velocity_field_id, force_field_id);
+                  } catch (std::runtime_error const &e) {
+                    runtimeErrorMsg() << e.what();
+                  }
+                });
+
+  EK::perform_reactions();
+
+  for (auto const &species : ek_container) {
+    species->ghost_communication();
+  }
+#endif // WALBERLA
+}
+
+} // namespace EK
diff --git a/src/core/grid_based_algorithms/ek_container.hpp b/src/core/grid_based_algorithms/ek_container.hpp
new file mode 100644
index 00000000000..1892bb5adda
--- /dev/null
+++ b/src/core/grid_based_algorithms/ek_container.hpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef ESPRESSO_EK_CONTAINER_HPP
+#define ESPRESSO_EK_CONTAINER_HPP
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+#include <walberla_bridge/electrokinetics/EKContainer.hpp>
+#include <walberla_bridge/electrokinetics/EKinWalberlaBase.hpp>
+#endif // WALBERLA
+
+#include <stdexcept>
+
+struct NoEKActive : public std::exception {
+  const char *what() const noexcept override { return "EK not activated"; }
+};
+
+namespace EK {
+
+#ifdef WALBERLA
+extern EKContainer<EKinWalberlaBase> ek_container;
+#endif // WALBERLA
+
+double get_tau();
+int get_steps_per_md_step(double md_timestep);
+void propagate();
+
+} // namespace EK
+
+#endif
diff --git a/src/core/virtual_sites/lb_inertialess_tracers.hpp b/src/core/grid_based_algorithms/ek_reactions.cpp
similarity index 55%
rename from src/core/virtual_sites/lb_inertialess_tracers.hpp
rename to src/core/grid_based_algorithms/ek_reactions.cpp
index 666cafa0432..d65fc813984 100644
--- a/src/core/virtual_sites/lb_inertialess_tracers.hpp
+++ b/src/core/grid_based_algorithms/ek_reactions.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 The ESPResSo project
+ * Copyright (C) 2022 The ESPResSo project
  *
  * This file is part of ESPResSo.
  *
@@ -16,22 +16,28 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-/// \file
-/// \brief Main of the Bayreuth Immersed-Boundary implementation
-
-#ifndef VIRTUAL_SITES_LB_INERTIALESS_TRACERS_HPP
-#define VIRTUAL_SITES_LB_INERTIALESS_TRACERS_HPP
 
 #include "config/config.hpp"
 
-#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS
+#ifdef WALBERLA
+
+#include "ek_reactions.hpp"
+
+#include <algorithm>
+
+namespace EK {
+
+EKReactions<walberla::EKReactionBase> ek_reactions;
+
+void perform_reactions() {
+  if (ek_reactions.empty()) {
+    return;
+  }
 
-#include "ParticleRange.hpp"
+  std::for_each(ek_reactions.begin(), ek_reactions.end(),
+                [](auto const &reaction) { reaction->perform_reaction(); });
+}
 
-void IBM_UpdateParticlePositions(ParticleRange const &particles,
-                                 double time_step, int this_node);
-void IBM_ForcesIntoFluid_CPU();
-void IBM_ForcesIntoFluid_GPU(ParticleRange const &particles, int this_node);
+} // namespace EK
 
-#endif // VIRTUAL_SITES_INERTIALESS_TRACERS
-#endif
+#endif // WALBERLA
diff --git a/src/script_interface/lbboundaries/initialize.cpp b/src/core/grid_based_algorithms/ek_reactions.hpp
similarity index 61%
rename from src/script_interface/lbboundaries/initialize.cpp
rename to src/core/grid_based_algorithms/ek_reactions.hpp
index df2d93b5c23..91958140dcc 100644
--- a/src/script_interface/lbboundaries/initialize.cpp
+++ b/src/core/grid_based_algorithms/ek_reactions.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015-2022 The ESPResSo project
+ * Copyright (C) 2022 The ESPResSo project
  *
  * This file is part of ESPResSo.
  *
@@ -17,17 +17,23 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "initialize.hpp"
+#ifndef ESPRESSO_EK_REACTIONS_HPP
+#define ESPRESSO_EK_REACTIONS_HPP
 
-#include "LBBoundaries.hpp"
-#include "LBBoundary.hpp"
+#include "config/config.hpp"
 
-namespace ScriptInterface {
-namespace LBBoundaries {
+#ifdef WALBERLA
 
-void initialize(Utils::Factory<ObjectHandle> *om) {
-  om->register_new<LBBoundaries>("LBBoundaries::LBBoundaries");
-  om->register_new<LBBoundary>("LBBoundaries::LBBoundary");
-}
-} /* namespace LBBoundaries */
-} /* namespace ScriptInterface */
+#include "EKReactions.hpp"
+#include "walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp"
+
+namespace EK {
+
+extern EKReactions<walberla::EKReactionBase> ek_reactions;
+
+void perform_reactions();
+
+} // namespace EK
+
+#endif // WALBERLA
+#endif
diff --git a/src/core/grid_based_algorithms/electrokinetics.hpp b/src/core/grid_based_algorithms/electrokinetics.hpp
deleted file mode 100644
index b9f611c2148..00000000000
--- a/src/core/grid_based_algorithms/electrokinetics.hpp
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef CORE_GRID_BASED_ALGORITHMS_ELECTROKINETICS_HPP
-#define CORE_GRID_BASED_ALGORITHMS_ELECTROKINETICS_HPP
-
-#include "config/config.hpp"
-#include "grid_based_algorithms/lb_boundaries.hpp"
-
-// note that we need to declare the ek_parameters struct and instantiate it for
-// LB_GPU to compile when electrokinetics is not compiled in. This seemed more
-// elegant than ifdeffing multiple versions of the kernel integrate.
-#ifdef CUDA
-
-#define MAX_NUMBER_OF_SPECIES 10
-
-/* Data structure holding parameters and memory pointers for the link flux
- * system. */
-struct EKParameters {
-  float agrid;
-  float time_step; // MD time step
-  float lb_density;
-  unsigned int dim_x;
-  unsigned int dim_x_padded;
-  unsigned int dim_y;
-  unsigned int dim_z;
-  unsigned int number_of_nodes;
-  float viscosity;
-  float bulk_viscosity;
-  float gamma_odd;
-  float gamma_even;
-  float friction;
-  float T;
-  float prefactor;
-  float lb_ext_force_density[3];
-  unsigned int number_of_species;
-  int reaction_species[3];
-  float rho_reactant_reservoir;
-  float rho_product0_reservoir;
-  float rho_product1_reservoir;
-  float reaction_ct_rate;
-  float reaction_fraction_0;
-  float reaction_fraction_1;
-  float mass_reactant;
-  float mass_product0;
-  float mass_product1;
-  int stencil;
-  int number_of_boundary_nodes;
-  float fluctuation_amplitude;
-  bool fluctuations;
-  bool advection;
-  bool fluidcoupling_ideal_contribution;
-  bool es_coupling;
-  float *charge_potential_buffer;
-  float *electric_field;
-  float *charge_potential;
-  float *j;
-  float *lb_force_density_previous;
-#ifdef EK_DEBUG
-  float *j_fluc;
-#endif
-  float *rho[MAX_NUMBER_OF_SPECIES];
-  int species_index[MAX_NUMBER_OF_SPECIES];
-  float density[MAX_NUMBER_OF_SPECIES];
-  float D[MAX_NUMBER_OF_SPECIES];
-  float d[MAX_NUMBER_OF_SPECIES];
-  float valency[MAX_NUMBER_OF_SPECIES];
-  float ext_force_density[3][MAX_NUMBER_OF_SPECIES];
-  char *node_is_catalyst;
-};
-
-#endif
-
-#ifdef ELECTROKINETICS
-
-/* Constants enumerating the links of a node in the link flux system EK_LINK_xyz
-   is the number of the link in direction (x, y, z), where x, y and z can be 0,
-   U or D representing 0 and one agrid in direction of or against the x, y or z
-   axis. The numbering differs from the one used in the LB since the LB
-   velocities are directed but the links are not. Links 0 - 8 represent
-   the odd LB velocities and numbers 13 - 21 represent the even LB velocities
-   (without the 0). In between there are the links connecting the corners, which
-   represent the 3rd shell not used in the LB but in the advection. The
-   following 13 constants are only defined for the sake of completeness.*/
-
-#define EK_LINK_U00 0
-#define EK_LINK_0U0 1
-#define EK_LINK_00U 2
-#define EK_LINK_UU0 3
-#define EK_LINK_UD0 4
-#define EK_LINK_U0U 5
-#define EK_LINK_U0D 6
-#define EK_LINK_0UU 7
-#define EK_LINK_0UD 8
-
-#define EK_LINK_UUU 9
-#define EK_LINK_UUD 10
-#define EK_LINK_UDU 11
-#define EK_LINK_UDD 12
-
-#define EK_LINK_D00 13
-#define EK_LINK_0D0 14
-#define EK_LINK_00D 15
-#define EK_LINK_DD0 16
-#define EK_LINK_DU0 17
-#define EK_LINK_D0D 18
-#define EK_LINK_D0U 19
-#define EK_LINK_0DD 20
-#define EK_LINK_0DU 21
-
-#define EK_LINK_DDD 22
-#define EK_LINK_DDU 23
-#define EK_LINK_DUD 24
-#define EK_LINK_DUU 25
-
-extern EKParameters ek_parameters;
-extern bool ek_initialized;
-
-void ek_integrate();
-void ek_integrate_electrostatics();
-void ek_print_parameters();
-void ek_print_lbpar();
-unsigned int ek_calculate_boundary_mass();
-int ek_print_vtk_density(int species, char *filename);
-int ek_print_vtk_flux(int species, char *filename);
-int ek_print_vtk_flux_fluc(int species, char *filename);
-int ek_print_vtk_flux_link(int species, char *filename);
-int ek_print_vtk_potential(char *filename);
-int ek_print_vtk_particle_potential(char *filename);
-int ek_print_vtk_lbforce_density(char *filename);
-int ek_lb_print_vtk_density(char *filename);
-int ek_lb_print_vtk_velocity(char *filename);
-int ek_init();
-void ek_set_agrid(float agrid);
-void ek_set_lb_density(float lb_density);
-void ek_set_viscosity(float viscosity);
-void ek_set_lb_ext_force_density(float lb_ext_force_dens_x,
-                                 float lb_ext_force_dens_y,
-                                 float lb_ext_force_dens_z);
-void ek_set_friction(float friction);
-void ek_set_T(float T);
-void ek_set_prefactor(float prefactor);
-void ek_set_electrostatics_coupling(bool electrostatics_coupling);
-void ek_calculate_electrostatic_coupling();
-void ek_set_bulk_viscosity(float bulk_viscosity);
-void ek_set_gamma_odd(float gamma_odd);
-void ek_set_gamma_even(float gamma_even);
-void ek_set_density(int species, float density);
-void ek_set_D(int species, float D);
-void ek_set_valency(int species, float valency);
-void ek_set_ext_force_density(int species, float ext_force_density_x,
-                              float ext_force_density_y,
-                              float ext_force_density_z);
-void ek_set_stencil(int stencil);
-void ek_set_advection(bool advection);
-void ek_set_fluidcoupling(bool ideal_contribution);
-void ek_set_fluctuations(bool fluctuations);
-void ek_set_fluctuation_amplitude(float fluctuation_amplitude);
-void ek_set_rng_state(uint64_t counter);
-int ek_node_get_density(int species, int x, int y, int z, double *density);
-int ek_node_get_flux(int species, int x, int y, int z, double *flux);
-int ek_node_get_potential(int x, int y, int z, double *potential);
-int ek_node_set_density(int species, int x, int y, int z, double density);
-float ek_calculate_net_charge();
-int ek_neutralize_system(int species);
-
-#ifdef EK_BOUNDARIES
-void ek_gather_wallcharge_species_density(float *wallcharge_species_density,
-                                          int wallcharge_species);
-void ek_init_species_density_wallcharge(float *wallcharge_species_density,
-                                        int wallcharge_species);
-#endif
-
-#endif /* CUDA */
-
-#endif /* CORE_GRID_BASED_ALGORITHMS_ELECTROKINETICS_HPP */
diff --git a/src/core/grid_based_algorithms/electrokinetics_cuda.cu b/src/core/grid_based_algorithms/electrokinetics_cuda.cu
deleted file mode 100644
index 8a401bf9688..00000000000
--- a/src/core/grid_based_algorithms/electrokinetics_cuda.cu
+++ /dev/null
@@ -1,3842 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "config/config.hpp"
-
-#ifdef CUDA            /* Terminates at end of file */
-#ifdef ELECTROKINETICS /* Terminates at end of file */
-
-#include "grid_based_algorithms/electrokinetics.hpp"
-
-#include "cuda_interface.hpp"
-#include "cuda_utils.cuh"
-#include "errorhandling.hpp"
-#include "fd-electrostatics.cuh"
-#include "grid_based_algorithms/lb_boundaries.hpp"
-#include "grid_based_algorithms/lb_interface.hpp"
-#include "grid_based_algorithms/lb_particle_coupling.hpp"
-#include "grid_based_algorithms/lbgpu.cuh"
-#include "grid_based_algorithms/lbgpu.hpp"
-#include "integrate.hpp"
-
-#include <utils/math/int_pow.hpp>
-#include <utils/math/sqr.hpp>
-
-#include <thrust/device_ptr.h>
-#include <thrust/functional.h>
-#include <thrust/transform_reduce.h>
-
-#include <cuda.h>
-#include <cufft.h>
-
-#include <cstddef>
-#include <cstdio>
-#include <fstream>
-#include <iostream>
-#include <limits>
-#include <sstream>
-#include <stdexcept>
-#include <string>
-#include <vector>
-
-#if defined(OMPI_MPI_H) || defined(_MPI_H)
-#error CU-file includes mpi.h! This should not happen!
-#endif
-
-extern ActiveLB lattice_switch;
-extern bool ek_initialized;
-
-// Used to limit register use for the pressure calculation
-#define EK_LINK_U00_pressure 0
-#define EK_LINK_0U0_pressure 1
-#define EK_LINK_00U_pressure 2
-#define EK_LINK_D00_pressure 3
-#define EK_LINK_0D0_pressure 4
-#define EK_LINK_00D_pressure 5
-
-#ifdef EK_BOUNDARIES
-void LBBoundaries::lb_init_boundaries();
-#endif
-
-static constexpr unsigned int threads_per_block = 64;
-
-EKParameters ek_parameters = {
-    // agrid
-    -1.0,
-    // time_step
-    -1.0,
-    // lb_density
-    -1.0,
-    // dim_x
-    0,
-    // dim_x_padded
-    0,
-    // dim_y
-    0,
-    // dim_z
-    0,
-    // number_of_nodes
-    0,
-    // viscosity
-    -1.0,
-    // bulk_viscosity
-    -1.0,
-    // gamma_odd
-    0.0,
-    // gamma_even
-    0.0,
-    // friction
-    0.0,
-    // T
-    -1.0,
-    // prefactor
-    -1.0,
-    // lb_ext_force_density
-    {0.0, 0.0, 0.0},
-    // number_of_species
-    0,
-    // reaction_species
-    {-1, -1, -1},
-    // rho_reactant_reservoir
-    -1.0,
-    // rho_product0_reservoir
-    -1.0,
-    // rho_product1_reservoir
-    -1.0,
-    // reaction_ct_rate
-    -1.0,
-    // reaction_fraction_0
-    -1.0,
-    // reaction_fraction_1
-    -1.0,
-    // mass_reactant
-    -1.0,
-    // mass_product0
-    -1.0,
-    // mass_product1
-    -1.0,
-    // stencil
-    0,
-    // number_of_boundary_nodes
-    -1,
-    // fluctuation_amplitude
-    -1.0,
-    // fluctuation
-    false,
-    // advection
-    true,
-    // fluidcoupling_ideal_contribution
-    true,
-    // es_coupling
-    false,
-    // charge_potential_buffer
-    nullptr,
-    // electric_field
-    nullptr,
-    // charge_potential
-    nullptr,
-    // j
-    nullptr,
-    // lb_force_density_previous
-    nullptr,
-#ifdef EK_DEBUG
-    // j_fluc
-    nullptr,
-#endif
-    // rho
-    {},
-    // species_index
-    {-1},
-    // density
-    {},
-    // D
-    {},
-    // d
-    {},
-    // valency
-    {},
-    // ext_force_density
-    {},
-    // node_is_catalyst
-    nullptr,
-};
-
-__device__ __constant__ EKParameters ek_parameters_gpu[1];
-float *charge_gpu;
-LB_parameters_gpu *ek_lbparameters_gpu;
-CUDA_particle_data *particle_data_gpu;
-float *ek_lb_boundary_force;
-char *ek_node_is_catalyst;
-unsigned int old_number_of_species = 0;
-unsigned int old_number_of_boundaries = 0;
-Utils::Counter<uint64_t> philox_counter = Utils::Counter<uint64_t>(0);
-
-FdElectrostatics *electrostatics = nullptr;
-
-extern LB_parameters_gpu lbpar_gpu;
-extern LB_node_force_density_gpu node_f, node_f_buf;
-extern LB_nodes_gpu *current_nodes;
-extern EKParameters *lb_ek_parameters;
-
-__device__ cufftReal ek_getNode(unsigned x, unsigned y, unsigned z) {
-  auto *field =
-      reinterpret_cast<cufftReal *>(ek_parameters_gpu->charge_potential);
-  return field[ek_parameters_gpu->dim_y * ek_parameters_gpu->dim_x_padded * z +
-               ek_parameters_gpu->dim_x_padded * y + x];
-}
-
-__device__ void ek_setNode(unsigned x, unsigned y, unsigned z,
-                           cufftReal value) {
-  auto *field =
-      reinterpret_cast<cufftReal *>(ek_parameters_gpu->charge_potential);
-  field[ek_parameters_gpu->dim_y * ek_parameters_gpu->dim_x_padded * z +
-        ek_parameters_gpu->dim_x_padded * y + x] = value;
-}
-
-__device__ cufftReal ek_getNode(unsigned i) {
-  auto const x = i % ek_parameters_gpu->dim_x;
-  i /= ek_parameters_gpu->dim_x;
-  auto const y = i % ek_parameters_gpu->dim_y;
-  auto const z = i / ek_parameters_gpu->dim_y;
-  return ek_getNode(x, y, z);
-}
-
-__device__ void ek_setNode(unsigned i, cufftReal value) {
-  auto const x = i % ek_parameters_gpu->dim_x;
-  i /= ek_parameters_gpu->dim_x;
-  auto const y = i % ek_parameters_gpu->dim_y;
-  auto const z = i / ek_parameters_gpu->dim_y;
-  ek_setNode(x, y, z, value);
-}
-
-__device__ unsigned int ek_getThreadIndex() {
-
-  return blockIdx.y * gridDim.x * blockDim.x + blockDim.x * blockIdx.x +
-         threadIdx.x;
-}
-
-__device__ void rhoindex_linear2cartesian(unsigned int index,
-                                          unsigned int *coord) {
-
-  coord[0] = index % ek_parameters_gpu->dim_x;
-  index /= ek_parameters_gpu->dim_x;
-  coord[1] = index % ek_parameters_gpu->dim_y;
-  coord[2] = index / ek_parameters_gpu->dim_y;
-}
-
-__device__ unsigned int
-rhoindex_cartesian2linear(unsigned int x, unsigned int y, unsigned int z) {
-
-  return z * ek_parameters_gpu->dim_y * ek_parameters_gpu->dim_x +
-         y * ek_parameters_gpu->dim_x + x;
-}
-
-__device__ unsigned int rhoindex_cartesian2linear_padded(unsigned int x,
-                                                         unsigned int y,
-                                                         unsigned int z) {
-
-  return z * ek_parameters_gpu->dim_y * ek_parameters_gpu->dim_x_padded +
-         y * ek_parameters_gpu->dim_x_padded + x;
-}
-
-// TODO fluxindex fastest running might improve caching
-__device__ unsigned int jindex_getByRhoLinear(unsigned int rho_index,
-                                              unsigned int c) {
-
-  return c * ek_parameters_gpu->number_of_nodes + rho_index;
-}
-
-__device__ void ek_displacement(float *dx, LB_nodes_gpu n,
-                                unsigned int node_index,
-                                LB_parameters_gpu *ek_lbparameters_gpu) {
-
-  float rho = ek_lbparameters_gpu->rho * ek_lbparameters_gpu->agrid *
-              ek_lbparameters_gpu->agrid * ek_lbparameters_gpu->agrid;
-
-  float mode[19];
-
-  for (unsigned i = 0; i < 19; i++) {
-    mode[i] = n.populations[node_index][i];
-  }
-
-  rho += mode[0] + mode[1] + mode[2] + mode[3] + mode[4] + mode[5] + mode[6] +
-         mode[7] + mode[8] + mode[9] + mode[10] + mode[11] + mode[12] +
-         mode[13] + mode[14] + mode[15] + mode[16] + mode[17] + mode[18];
-
-  dx[0] = (mode[1] - mode[2]) + (mode[7] - mode[8]) + (mode[9] - mode[10]) +
-          (mode[11] - mode[12]) + (mode[13] - mode[14]);
-
-  dx[1] = (mode[3] - mode[4]) + (mode[7] - mode[8]) - (mode[9] - mode[10]) +
-          (mode[15] - mode[16]) + (mode[17] - mode[18]);
-
-  dx[2] = (mode[5] - mode[6]) + (mode[11] - mode[12]) - (mode[13] - mode[14]) +
-          (mode[15] - mode[16]) - (mode[17] - mode[18]);
-
-  // Velocity requires half the force_density in the previous time step
-
-  dx[0] += 0.5f * ek_parameters_gpu->lb_force_density_previous[node_index];
-  dx[1] += 0.5f *
-           ek_parameters_gpu
-               ->lb_force_density_previous[ek_parameters_gpu->number_of_nodes +
-                                           node_index];
-  dx[2] +=
-      0.5f *
-      ek_parameters_gpu
-          ->lb_force_density_previous[2 * ek_parameters_gpu->number_of_nodes +
-                                      node_index];
-
-  dx[0] *= 1.0f / rho;
-  dx[1] *= 1.0f / rho;
-  dx[2] *= 1.0f / rho;
-}
-
-__device__ void ek_diffusion_migration_lbforce_linkcentered_stencil(
-    unsigned int index, unsigned int index_padded,
-    unsigned int const *neighborindex, unsigned int const *neighborindex_padded,
-    unsigned int species_index, LB_node_force_density_gpu node_f,
-    LB_nodes_gpu lb_node) {
-  float flux, force;
-
-  float agrid_inv = 1.0f / ek_parameters_gpu->agrid;
-  float sqrt2agrid_inv = 1.0f / (sqrtf(2.0f) * ek_parameters_gpu->agrid);
-  float sqrt2_inv = 1.0f / sqrtf(2.0f);
-  float twoT_inv = 1.0f / (2.0f * ek_parameters_gpu->T);
-  float D_inv = 1.0f / ek_parameters_gpu->D[species_index];
-  float force_conv =
-      agrid_inv * ek_parameters_gpu->time_step * ek_parameters_gpu->time_step;
-
-  // face in x
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U00]]) *
-         agrid_inv;
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_U00]]) *
-           agrid_inv +
-       ek_parameters_gpu->ext_force_density[0][species_index]);
-
-  flux += force *
-          (ek_parameters_gpu->rho[species_index][index] +
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U00]]) *
-          twoT_inv;
-
-  flux *= ek_parameters_gpu->d[species_index] * agrid_inv;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_U00]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U00)],
-            flux * ek_parameters_gpu->time_step);
-
-  if (ek_parameters_gpu->fluidcoupling_ideal_contribution) {
-    force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv;
-    force *= force_conv;
-
-    atomicAdd(&node_f.force_density[index][0], force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U00]][0],
-              force * 0.5f);
-  } else {
-    force = -1.0f * ek_parameters_gpu->valency[species_index] *
-            (((cufftReal *)ek_parameters_gpu
-                  ->charge_potential)[neighborindex_padded[EK_LINK_U00]] -
-             ((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded]) *
-            agrid_inv;
-
-    force *= force_conv;
-
-    atomicAdd(&node_f.force_density[index][0],
-              ek_parameters_gpu->rho[species_index][index] *
-                  (force * 0.5f +
-                   ek_parameters_gpu->ext_force_density[0][species_index] *
-                       force_conv));
-  }
-
-  // face in y
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0U0]]) *
-         agrid_inv;
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_0U0]]) *
-           agrid_inv +
-       ek_parameters_gpu->ext_force_density[1][species_index]);
-
-  flux += force *
-          (ek_parameters_gpu->rho[species_index][index] +
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0U0]]) *
-          twoT_inv;
-
-  flux *= ek_parameters_gpu->d[species_index] * agrid_inv;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_0U0]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0U0)],
-            flux * ek_parameters_gpu->time_step);
-
-  if (ek_parameters_gpu->fluidcoupling_ideal_contribution) {
-    force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv;
-    force *= force_conv;
-
-    atomicAdd(&node_f.force_density[index][1], force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0U0]][1],
-              force * 0.5f);
-  } else {
-    force = -1.0f * ek_parameters_gpu->valency[species_index] *
-            (((cufftReal *)ek_parameters_gpu
-                  ->charge_potential)[neighborindex_padded[EK_LINK_0U0]] -
-             ((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded]) *
-            agrid_inv;
-
-    force *= force_conv;
-
-    atomicAdd(&node_f.force_density[index][1],
-              ek_parameters_gpu->rho[species_index][index] *
-                  (force * 0.5f +
-                   ek_parameters_gpu->ext_force_density[1][species_index] *
-                       force_conv));
-
-    atomicAdd(
-        &node_f.force_density[neighborindex[EK_LINK_0U0]][1],
-        ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0U0]] *
-            force * 0.5f);
-  }
-
-  // face in z
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_00U]]) *
-         agrid_inv;
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_00U]]) *
-           agrid_inv +
-       ek_parameters_gpu->ext_force_density[2][species_index]);
-
-  flux += force *
-          (ek_parameters_gpu->rho[species_index][index] +
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_00U]]) *
-          twoT_inv;
-
-  flux *= ek_parameters_gpu->d[species_index] * agrid_inv;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_00U]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_00U)],
-            flux * ek_parameters_gpu->time_step);
-
-  if (ek_parameters_gpu->fluidcoupling_ideal_contribution) {
-    force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv;
-    force *= force_conv;
-
-    atomicAdd(&node_f.force_density[index][2], force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_00U]][2],
-              force * 0.5f);
-  } else {
-    force = -1.0f * ek_parameters_gpu->valency[species_index] *
-            (((cufftReal *)ek_parameters_gpu
-                  ->charge_potential)[neighborindex_padded[EK_LINK_00U]] -
-             ((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded]) *
-            agrid_inv;
-
-    force *= force_conv;
-
-    atomicAdd(&node_f.force_density[index][2],
-              ek_parameters_gpu->rho[species_index][index] *
-                  (force * 0.5f +
-                   ek_parameters_gpu->ext_force_density[2][species_index] *
-                       force_conv));
-
-    atomicAdd(
-        &node_f.force_density[neighborindex[EK_LINK_00U]][2],
-        ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_00U]] *
-            force * 0.5f);
-  }
-
-  // edge in z
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UU0]]) *
-         sqrt2agrid_inv;
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_UU0]]) *
-           sqrt2agrid_inv +
-       (ek_parameters_gpu->ext_force_density[0][species_index] +
-        ek_parameters_gpu->ext_force_density[1][species_index]) *
-           sqrt2_inv);
-
-  flux += force *
-          (ek_parameters_gpu->rho[species_index][index] +
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UU0]]) *
-          twoT_inv;
-
-  flux *= ek_parameters_gpu->d[species_index] * agrid_inv;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_UU0]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UU0)],
-            flux * ek_parameters_gpu->time_step);
-
-  if (ek_parameters_gpu->fluidcoupling_ideal_contribution) {
-    force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv;
-    force *= force_conv;
-
-    atomicAdd(&node_f.force_density[index][0], force * 0.5f);
-    atomicAdd(&node_f.force_density[index][1], force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UU0]][0],
-              force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UU0]][1],
-              force * 0.5f);
-  }
-
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UD0]]) *
-         sqrt2agrid_inv;
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_UD0]]) *
-           sqrt2agrid_inv +
-       (ek_parameters_gpu->ext_force_density[0][species_index] -
-        ek_parameters_gpu->ext_force_density[1][species_index]) *
-           sqrt2_inv);
-
-  flux += force *
-          (ek_parameters_gpu->rho[species_index][index] +
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UD0]]) *
-          twoT_inv;
-
-  flux *= ek_parameters_gpu->d[species_index] * agrid_inv;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_UD0]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UD0)],
-            flux * ek_parameters_gpu->time_step);
-
-  if (ek_parameters_gpu->fluidcoupling_ideal_contribution) {
-    force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv;
-    force *= force_conv;
-
-    atomicAdd(&node_f.force_density[index][0], force * 0.5f);
-    atomicAdd(&node_f.force_density[index][1], -force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UD0]][0],
-              force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UD0]][1],
-              -force * 0.5f);
-  }
-
-  // edge in y
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0U]]) *
-         sqrt2agrid_inv;
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_U0U]]) *
-           sqrt2agrid_inv +
-       (ek_parameters_gpu->ext_force_density[0][species_index] +
-        ek_parameters_gpu->ext_force_density[2][species_index]) *
-           sqrt2_inv);
-
-  flux += force *
-          (ek_parameters_gpu->rho[species_index][index] +
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0U]]) *
-          twoT_inv;
-
-  flux *= ek_parameters_gpu->d[species_index] * agrid_inv;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_U0U]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U0U)],
-            flux * ek_parameters_gpu->time_step);
-
-  if (ek_parameters_gpu->fluidcoupling_ideal_contribution) {
-    force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv;
-    force *= force_conv;
-
-    atomicAdd(&node_f.force_density[index][0], force * 0.5f);
-    atomicAdd(&node_f.force_density[index][2], force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0U]][0],
-              force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0U]][2],
-              force * 0.5f);
-  }
-
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0D]]) *
-         sqrt2agrid_inv;
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_U0D]]) *
-           sqrt2agrid_inv +
-       (ek_parameters_gpu->ext_force_density[0][species_index] -
-        ek_parameters_gpu->ext_force_density[2][species_index]) *
-           sqrt2_inv);
-
-  flux += force *
-          (ek_parameters_gpu->rho[species_index][index] +
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0D]]) *
-          twoT_inv;
-
-  flux *= ek_parameters_gpu->d[species_index] * agrid_inv;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_U0D]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U0D)],
-            flux * ek_parameters_gpu->time_step);
-
-  if (ek_parameters_gpu->fluidcoupling_ideal_contribution) {
-    force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv;
-    force *= force_conv;
-
-    atomicAdd(&node_f.force_density[index][0], force * 0.5f);
-    atomicAdd(&node_f.force_density[index][2], -force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0D]][0],
-              force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0D]][2],
-              -force * 0.5f);
-  }
-
-  // edge in x
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UU]]) *
-         sqrt2agrid_inv;
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_0UU]]) *
-           sqrt2agrid_inv +
-       (ek_parameters_gpu->ext_force_density[1][species_index] +
-        ek_parameters_gpu->ext_force_density[2][species_index]) *
-           sqrt2_inv);
-
-  flux += force *
-          (ek_parameters_gpu->rho[species_index][index] +
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UU]]) *
-          twoT_inv;
-
-  flux *= ek_parameters_gpu->d[species_index] * agrid_inv;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_0UU]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0UU)],
-            flux * ek_parameters_gpu->time_step);
-
-  if (ek_parameters_gpu->fluidcoupling_ideal_contribution) {
-    force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv;
-    force *= force_conv;
-
-    atomicAdd(&node_f.force_density[index][1], force * 0.5f);
-    atomicAdd(&node_f.force_density[index][2], force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UU]][1],
-              force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UU]][2],
-              force * 0.5f);
-  }
-
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UD]]) *
-         sqrt2agrid_inv;
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_0UD]]) *
-           sqrt2agrid_inv +
-       (ek_parameters_gpu->ext_force_density[1][species_index] -
-        ek_parameters_gpu->ext_force_density[2][species_index]) *
-           sqrt2_inv);
-
-  flux += force *
-          (ek_parameters_gpu->rho[species_index][index] +
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UD]]) *
-          twoT_inv;
-
-  flux *= ek_parameters_gpu->d[species_index] * agrid_inv;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_0UD]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0UD)],
-            flux * ek_parameters_gpu->time_step);
-
-  if (ek_parameters_gpu->fluidcoupling_ideal_contribution) {
-    force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv;
-    force *= force_conv;
-
-    atomicAdd(&node_f.force_density[index][1], force * 0.5f);
-    atomicAdd(&node_f.force_density[index][2], -force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UD]][1],
-              force * 0.5f);
-    atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UD]][2],
-              -force * 0.5f);
-  }
-}
-
-__device__ void ek_diffusion_migration_lbforce_nodecentered_stencil(
-    unsigned int index, unsigned int index_padded,
-    unsigned int const *neighborindex, unsigned int const *neighborindex_padded,
-    unsigned int species_index, LB_node_force_density_gpu node_f,
-    LB_nodes_gpu lb_node) {
-  float flux, force;
-
-  // face in x
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U00]]) /
-         ek_parameters_gpu->agrid;
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_U00]]) /
-           ek_parameters_gpu->agrid +
-       ek_parameters_gpu->ext_force_density[0][species_index]);
-
-  flux +=
-      force *
-      (static_cast<float>(force >= 0.0f) *
-           ek_parameters_gpu->rho[species_index][index] +
-       static_cast<float>(force < 0.0f) *
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U00]]) /
-      ek_parameters_gpu->T;
-
-  flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_U00]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U00)],
-            flux * ek_parameters_gpu->time_step);
-
-  force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid /
-          ek_parameters_gpu->D[species_index];
-
-  force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step *
-           ek_parameters_gpu->time_step;
-
-  atomicAdd(&node_f.force_density[index][0], force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U00]][0], force / 2.0f);
-
-  // face in y
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0U0]]) /
-         ek_parameters_gpu->agrid;
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_0U0]]) /
-           ek_parameters_gpu->agrid +
-       ek_parameters_gpu->ext_force_density[1][species_index]);
-
-  flux +=
-      force *
-      (static_cast<float>(force >= 0.0f) *
-           ek_parameters_gpu->rho[species_index][index] +
-       static_cast<float>(force < 0.0f) *
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0U0]]) /
-      ek_parameters_gpu->T;
-
-  flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_0U0]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0U0)],
-            flux * ek_parameters_gpu->time_step);
-
-  force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid /
-          ek_parameters_gpu->D[species_index];
-
-  force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step *
-           ek_parameters_gpu->time_step;
-
-  atomicAdd(&node_f.force_density[index][1], force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0U0]][1], force / 2.0f);
-
-  // face in z
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_00U]]) /
-         ek_parameters_gpu->agrid;
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_00U]]) /
-           ek_parameters_gpu->agrid +
-       ek_parameters_gpu->ext_force_density[2][species_index]);
-
-  flux +=
-      force *
-      (static_cast<float>(force >= 0.0f) *
-           ek_parameters_gpu->rho[species_index][index] +
-       static_cast<float>(force < 0.0f) *
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_00U]]) /
-      ek_parameters_gpu->T;
-
-  flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_00U]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_00U)],
-            flux * ek_parameters_gpu->time_step);
-
-  force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid /
-          ek_parameters_gpu->D[species_index];
-
-  force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step *
-           ek_parameters_gpu->time_step;
-
-  atomicAdd(&node_f.force_density[index][2], force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_00U]][2], force / 2.0f);
-
-  // edge in z
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UU0]]) /
-         (sqrtf(2.0f) * ek_parameters_gpu->agrid);
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_UU0]]) /
-           (sqrtf(2.0f) * ek_parameters_gpu->agrid) +
-       (ek_parameters_gpu->ext_force_density[0][species_index] +
-        ek_parameters_gpu->ext_force_density[1][species_index]) /
-           sqrtf(2.0f));
-
-  flux +=
-      force *
-      (static_cast<float>(force >= 0.0f) *
-           ek_parameters_gpu->rho[species_index][index] +
-       static_cast<float>(force < 0.0f) *
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UU0]]) /
-      ek_parameters_gpu->T;
-
-  flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_UU0]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UU0)],
-            flux * ek_parameters_gpu->time_step);
-
-  force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid /
-          ek_parameters_gpu->D[species_index];
-
-  force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step *
-           ek_parameters_gpu->time_step;
-
-  atomicAdd(&node_f.force_density[index][0], force / 2.0f);
-  atomicAdd(&node_f.force_density[index][1], force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UU0]][0], force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UU0]][1], force / 2.0f);
-
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UD0]]) /
-         (sqrtf(2.0f) * ek_parameters_gpu->agrid);
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_UD0]]) /
-           (sqrtf(2.0f) * ek_parameters_gpu->agrid) +
-       (ek_parameters_gpu->ext_force_density[0][species_index] -
-        ek_parameters_gpu->ext_force_density[1][species_index]) /
-           sqrtf(2.0f));
-
-  flux +=
-      force *
-      (static_cast<float>(force >= 0.0f) *
-           ek_parameters_gpu->rho[species_index][index] +
-       static_cast<float>(force < 0.0f) *
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UD0]]) /
-      ek_parameters_gpu->T;
-
-  flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_UD0]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UD0)],
-            flux * ek_parameters_gpu->time_step);
-
-  force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid /
-          ek_parameters_gpu->D[species_index];
-
-  force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step *
-           ek_parameters_gpu->time_step;
-
-  atomicAdd(&node_f.force_density[index][0], force / 2.0f);
-  atomicAdd(&node_f.force_density[index][1], -force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UD0]][0], force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UD0]][1],
-            -force / 2.0f);
-
-  // edge in y
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0U]]) /
-         (sqrtf(2.0f) * ek_parameters_gpu->agrid);
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_U0U]]) /
-           (sqrtf(2.0f) * ek_parameters_gpu->agrid) +
-       (ek_parameters_gpu->ext_force_density[0][species_index] +
-        ek_parameters_gpu->ext_force_density[2][species_index]) /
-           sqrtf(2.0f));
-
-  flux +=
-      force *
-      (static_cast<float>(force >= 0.0f) *
-           ek_parameters_gpu->rho[species_index][index] +
-       static_cast<float>(force < 0.0f) *
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0U]]) /
-      ek_parameters_gpu->T;
-
-  flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_U0U]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U0U)],
-            flux * ek_parameters_gpu->time_step);
-
-  force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid /
-          ek_parameters_gpu->D[species_index];
-
-  force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step *
-           ek_parameters_gpu->time_step;
-
-  atomicAdd(&node_f.force_density[index][0], force / 2.0f);
-  atomicAdd(&node_f.force_density[index][2], force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0U]][0], force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0U]][2], force / 2.0f);
-
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0D]]) /
-         (sqrtf(2.0f) * ek_parameters_gpu->agrid);
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_U0D]]) /
-           (sqrtf(2.0f) * ek_parameters_gpu->agrid) +
-       (ek_parameters_gpu->ext_force_density[0][species_index] -
-        ek_parameters_gpu->ext_force_density[2][species_index]) /
-           sqrtf(2.0f));
-
-  flux +=
-      force *
-      (static_cast<float>(force >= 0.0f) *
-           ek_parameters_gpu->rho[species_index][index] +
-       static_cast<float>(force < 0.0f) *
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0D]]) /
-      ek_parameters_gpu->T;
-
-  flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_U0D]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U0D)],
-            flux * ek_parameters_gpu->time_step);
-
-  force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid /
-          ek_parameters_gpu->D[species_index];
-
-  force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step *
-           ek_parameters_gpu->time_step;
-
-  atomicAdd(&node_f.force_density[index][0], force / 2.0f);
-  atomicAdd(&node_f.force_density[index][2], -force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0D]][0], force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0D]][2],
-            -force / 2.0f);
-
-  // edge in x
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UU]]) /
-         (sqrtf(2.0f) * ek_parameters_gpu->agrid);
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_0UU]]) /
-           (sqrtf(2.0f) * ek_parameters_gpu->agrid) +
-       (ek_parameters_gpu->ext_force_density[1][species_index] +
-        ek_parameters_gpu->ext_force_density[2][species_index]) /
-           sqrtf(2.0f));
-
-  flux +=
-      force *
-      (static_cast<float>(force >= 0.0f) *
-           ek_parameters_gpu->rho[species_index][index] +
-       static_cast<float>(force < 0.0f) *
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UU]]) /
-      ek_parameters_gpu->T;
-
-  flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_0UU]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0UU)],
-            flux * ek_parameters_gpu->time_step);
-
-  force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid /
-          ek_parameters_gpu->D[species_index];
-
-  force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step *
-           ek_parameters_gpu->time_step;
-
-  atomicAdd(&node_f.force_density[index][1], force / 2.0f);
-  atomicAdd(&node_f.force_density[index][2], force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UU]][1], force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UU]][2], force / 2.0f);
-
-  flux = (ek_parameters_gpu->rho[species_index][index] -
-          ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UD]]) /
-         (sqrtf(2.0f) * ek_parameters_gpu->agrid);
-
-  force =
-      (ek_parameters_gpu->valency[species_index] *
-           (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] -
-            ((cufftReal *)ek_parameters_gpu
-                 ->charge_potential)[neighborindex_padded[EK_LINK_0UD]]) /
-           (sqrtf(2.0f) * ek_parameters_gpu->agrid) +
-       (ek_parameters_gpu->ext_force_density[1][species_index] -
-        ek_parameters_gpu->ext_force_density[2][species_index]) /
-           sqrtf(2.0f));
-
-  flux +=
-      force *
-      (static_cast<float>(force >= 0.0f) *
-           ek_parameters_gpu->rho[species_index][index] +
-       static_cast<float>(force < 0.0f) *
-           ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UD]]) /
-      ek_parameters_gpu->T;
-
-  flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid;
-
-  flux *= static_cast<float>(!(lb_node.boundary[index] ||
-                               lb_node.boundary[neighborindex[EK_LINK_0UD]]));
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0UD)],
-            flux * ek_parameters_gpu->time_step);
-
-  force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid /
-          ek_parameters_gpu->D[species_index];
-
-  force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step *
-           ek_parameters_gpu->time_step;
-
-  atomicAdd(&node_f.force_density[index][1], force / 2.0f);
-  atomicAdd(&node_f.force_density[index][2], -force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UD]][1], force / 2.0f);
-  atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UD]][2],
-            -force / 2.0f);
-}
-
-__device__ void
-ek_add_advection_to_flux(unsigned int index, unsigned int *coord,
-                         unsigned int species_index, LB_nodes_gpu lb_node,
-                         LB_parameters_gpu *ek_lbparameters_gpu) {
-  float dx[3];
-  unsigned int di[3];
-  unsigned int node;
-
-  ek_displacement(dx, lb_node, index, ek_lbparameters_gpu);
-
-  di[0] = 1 - static_cast<unsigned>(signbit(dx[0]));
-  di[1] = 1 - static_cast<unsigned>(signbit(dx[1]));
-  di[2] = 1 - static_cast<unsigned>(signbit(dx[2]));
-
-  dx[0] = fabs(dx[0]);
-  dx[1] = fabs(dx[1]);
-  dx[2] = fabs(dx[2]);
-
-  unsigned int target_node[3];
-  unsigned int target_node_index;
-  int not_boundary;
-
-  // face in x
-  node = rhoindex_cartesian2linear(
-      (coord[0] + di[0] - 1 + ek_parameters_gpu->dim_x) %
-          ek_parameters_gpu->dim_x,
-      coord[1], coord[2]);
-
-  target_node[0] = (coord[0] + 2 * di[0] - 1 + ek_parameters_gpu->dim_x) %
-                   ek_parameters_gpu->dim_x;
-  target_node[1] = coord[1];
-  target_node[2] = coord[2];
-  target_node_index =
-      rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]);
-  not_boundary =
-      (lb_node.boundary[index] || lb_node.boundary[target_node_index]) == 0;
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(node, EK_LINK_U00)],
-            (2 * static_cast<float>(di[0]) - 1) *
-                ek_parameters_gpu->rho[species_index][index] * dx[0] *
-                (1.0f - dx[1]) * (1.0f - dx[2]) *
-                static_cast<float>(not_boundary));
-
-  // face in y
-  node = rhoindex_cartesian2linear(
-      coord[0],
-      (coord[1] + di[1] - 1 + ek_parameters_gpu->dim_y) %
-          ek_parameters_gpu->dim_y,
-      coord[2]);
-
-  target_node[0] = coord[0];
-  target_node[1] = (coord[1] + 2 * di[1] - 1 + ek_parameters_gpu->dim_y) %
-                   ek_parameters_gpu->dim_y;
-  target_node[2] = coord[2];
-  target_node_index =
-      rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]);
-  not_boundary =
-      (lb_node.boundary[index] || lb_node.boundary[target_node_index]) == 0;
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(node, EK_LINK_0U0)],
-            (2 * static_cast<float>(di[1]) - 1) *
-                ek_parameters_gpu->rho[species_index][index] * (1.0f - dx[0]) *
-                dx[1] * (1.0f - dx[2]) * static_cast<float>(not_boundary));
-
-  // face in z
-  node = rhoindex_cartesian2linear(
-      coord[0], coord[1],
-      (coord[2] + di[2] - 1 + ek_parameters_gpu->dim_z) %
-          ek_parameters_gpu->dim_z);
-
-  target_node[0] = coord[0];
-  target_node[1] = coord[1];
-  target_node[2] = (coord[2] + 2 * di[2] - 1 + ek_parameters_gpu->dim_z) %
-                   ek_parameters_gpu->dim_z;
-  target_node_index =
-      rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]);
-  not_boundary =
-      (lb_node.boundary[index] || lb_node.boundary[target_node_index]) == 0;
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(node, EK_LINK_00U)],
-            (2 * static_cast<float>(di[2]) - 1) *
-                ek_parameters_gpu->rho[species_index][index] * (1.0f - dx[0]) *
-                (1.0f - dx[1]) * dx[2] * static_cast<float>(not_boundary));
-
-  // edge in x
-  node = rhoindex_cartesian2linear(
-      coord[0],
-      (coord[1] + di[1] - 1 + ek_parameters_gpu->dim_y) %
-          ek_parameters_gpu->dim_y,
-      (coord[2] + (1 - di[1]) * (2 * di[2] - 1) + ek_parameters_gpu->dim_z) %
-          ek_parameters_gpu->dim_z);
-
-  target_node[0] = coord[0];
-  target_node[1] = (coord[1] + 2 * di[1] - 1 + ek_parameters_gpu->dim_y) %
-                   ek_parameters_gpu->dim_y;
-  target_node[2] = (coord[2] + 2 * di[2] - 1 + ek_parameters_gpu->dim_z) %
-                   ek_parameters_gpu->dim_z;
-  target_node_index =
-      rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]);
-  not_boundary =
-      (lb_node.boundary[index] || lb_node.boundary[target_node_index]) == 0;
-
-  atomicAdd(
-      &ek_parameters_gpu
-           ->j[jindex_getByRhoLinear(node, EK_LINK_0UU + (di[1] + di[2] == 1))],
-      (2 * static_cast<float>(di[1]) - 1) *
-          ek_parameters_gpu->rho[species_index][index] * (1.0f - dx[0]) *
-          dx[1] * dx[2] * static_cast<float>(not_boundary));
-
-  // edge in y
-  node = rhoindex_cartesian2linear(
-      (coord[0] + di[0] - 1 + ek_parameters_gpu->dim_x) %
-          ek_parameters_gpu->dim_x,
-      coord[1],
-      (coord[2] + (1 - di[0]) * (2 * di[2] - 1) + ek_parameters_gpu->dim_z) %
-          ek_parameters_gpu->dim_z);
-
-  target_node[0] = (coord[0] + 2 * di[0] - 1 + ek_parameters_gpu->dim_x) %
-                   ek_parameters_gpu->dim_x;
-  target_node[1] = coord[1];
-  target_node[2] = (coord[2] + 2 * di[2] - 1 + ek_parameters_gpu->dim_z) %
-                   ek_parameters_gpu->dim_z;
-  target_node_index =
-      rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]);
-  not_boundary =
-      (lb_node.boundary[index] || lb_node.boundary[target_node_index]) == 0;
-
-  atomicAdd(
-      &ek_parameters_gpu
-           ->j[jindex_getByRhoLinear(node, EK_LINK_U0U + (di[0] + di[2] == 1))],
-      (2 * static_cast<float>(di[0]) - 1) *
-          ek_parameters_gpu->rho[species_index][index] * dx[0] *
-          (1.0f - dx[1]) * dx[2] * static_cast<float>(not_boundary));
-
-  // edge in z
-  node = rhoindex_cartesian2linear(
-      (coord[0] + di[0] - 1 + ek_parameters_gpu->dim_x) %
-          ek_parameters_gpu->dim_x,
-      (coord[1] + (1 - di[0]) * (2 * di[1] - 1) + ek_parameters_gpu->dim_y) %
-          ek_parameters_gpu->dim_y,
-      coord[2]);
-
-  target_node[0] = (coord[0] + 2 * di[0] - 1 + ek_parameters_gpu->dim_x) %
-                   ek_parameters_gpu->dim_x;
-  target_node[1] = (coord[1] + 2 * di[1] - 1 + ek_parameters_gpu->dim_y) %
-                   ek_parameters_gpu->dim_y;
-  target_node[2] = coord[2];
-  target_node_index =
-      rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]);
-  not_boundary =
-      (lb_node.boundary[index] || lb_node.boundary[target_node_index]) == 0;
-
-  atomicAdd(
-      &ek_parameters_gpu
-           ->j[jindex_getByRhoLinear(node, EK_LINK_UU0 + (di[0] + di[1] == 1))],
-      (2 * static_cast<float>(di[0]) - 1) *
-          ek_parameters_gpu->rho[species_index][index] * dx[0] * dx[1] *
-          (1.0f - dx[2]) * static_cast<float>(not_boundary));
-
-  // corner
-  node = rhoindex_cartesian2linear(
-      (coord[0] + di[0] - 1 + ek_parameters_gpu->dim_x) %
-          ek_parameters_gpu->dim_x,
-      (coord[1] + (1 - di[0]) * (2 * di[1] - 1) + ek_parameters_gpu->dim_y) %
-          ek_parameters_gpu->dim_y,
-      (coord[2] + (1 - di[0]) * (2 * di[2] - 1) + ek_parameters_gpu->dim_z) %
-          ek_parameters_gpu->dim_z);
-
-  target_node[0] = (coord[0] + 2 * di[0] - 1 + ek_parameters_gpu->dim_x) %
-                   ek_parameters_gpu->dim_x;
-  target_node[1] = (coord[1] + 2 * di[1] - 1 + ek_parameters_gpu->dim_y) %
-                   ek_parameters_gpu->dim_y;
-  target_node[2] = (coord[2] + 2 * di[2] - 1 + ek_parameters_gpu->dim_z) %
-                   ek_parameters_gpu->dim_z;
-  target_node_index =
-      rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]);
-  not_boundary =
-      (lb_node.boundary[index] || lb_node.boundary[target_node_index]) == 0;
-
-  atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(
-                node, (1 - di[0]) * (EK_LINK_UUU + 2 * di[1] + di[2]) +
-                          di[0] * (EK_LINK_UDD - 2 * di[1] - di[2]))],
-            (2 * static_cast<float>(di[0]) - 1) *
-                ek_parameters_gpu->rho[species_index][index] * dx[0] * dx[1] *
-                dx[2] * static_cast<float>(not_boundary));
-}
-
-__device__ float4 ek_random_wrapper_philox(unsigned int index,
-                                           unsigned int mode,
-                                           uint64_t philox_counter) {
-  // Split the 64 bit counter into two 32 bit ints.
-  auto const philox_counter_hi = static_cast<uint32_t>(philox_counter >> 32);
-  auto const philox_counter_low = static_cast<uint32_t>(philox_counter);
-  uint4 rnd_ints =
-      curand_Philox4x32_10(make_uint4(index, philox_counter_hi, 0, mode),
-                           make_uint2(philox_counter_low, 0));
-  float4 rnd_floats;
-  rnd_floats.w = static_cast<float>(rnd_ints.w) * CURAND_2POW32_INV +
-                 (CURAND_2POW32_INV / 2.0f);
-  rnd_floats.x = static_cast<float>(rnd_ints.x) * CURAND_2POW32_INV +
-                 (CURAND_2POW32_INV / 2.0f);
-  rnd_floats.y = static_cast<float>(rnd_ints.y) * CURAND_2POW32_INV +
-                 (CURAND_2POW32_INV / 2.0f);
-  rnd_floats.z = static_cast<float>(rnd_ints.z) * CURAND_2POW32_INV +
-                 (CURAND_2POW32_INV / 2.0f);
-  return rnd_floats;
-}
-
-__device__ void ek_add_fluctuations_to_flux(unsigned int index,
-                                            unsigned int species_index,
-                                            unsigned int const *neighborindex,
-                                            LB_nodes_gpu lb_node,
-                                            uint64_t philox_counter) {
-  if (index < ek_parameters_gpu->number_of_nodes) {
-    float density = ek_parameters_gpu->rho[species_index][index];
-    float *flux = ek_parameters_gpu->j;
-    float diffusion = ek_parameters_gpu->D[species_index];
-    float time_step = ek_parameters_gpu->time_step;
-    float agrid = ek_parameters_gpu->agrid;
-    float4 random_floats;
-    float random;
-
-#ifdef EK_DEBUG
-    float *flux_fluc = ek_parameters_gpu->j_fluc;
-#endif
-    float fluc = 0.0f;
-
-    for (unsigned i = 0; i < 9; i++) {
-
-      if (i % 4 == 0) {
-        random_floats = ek_random_wrapper_philox(index, i + 40, philox_counter);
-        random = (random_floats.w - 0.5f) * 2.0f;
-      } else if (i % 4 == 1) {
-        random = (random_floats.x - 0.5f) * 2.0f;
-      } else if (i % 4 == 2) {
-        random = (random_floats.y - 0.5f) * 2.0f;
-      } else if (i % 4 == 3) {
-        random = (random_floats.z - 0.5f) * 2.0f;
-      }
-      float H = 0.0f;
-      float HN = 0.0f;
-      float neighbor_density =
-          ek_parameters_gpu->rho[species_index][neighborindex[i]];
-
-      H = static_cast<float>(density >= 0.0f) * min(density, 1.0f);
-      HN = static_cast<float>(neighbor_density >= 0.0f) *
-           min(neighbor_density, 1.0f);
-
-      float average_density = H * HN * (density + neighbor_density) / 2.0f;
-
-      if (i > 2) {
-        fluc = 1.0f *
-               powf(2.0f * average_density * diffusion * time_step /
-                        (agrid * agrid),
-                    0.5f) *
-               random * ek_parameters_gpu->fluctuation_amplitude / sqrtf(2.0f);
-        fluc *= static_cast<float>(
-            !(lb_node.boundary[index] || lb_node.boundary[neighborindex[i]]));
-#ifdef EK_DEBUG
-        flux_fluc[jindex_getByRhoLinear(index, i)] = fluc;
-#endif
-        flux[jindex_getByRhoLinear(index, i)] += fluc;
-      } else {
-        fluc = 1.0f *
-               powf(2.0f * average_density * diffusion * time_step /
-                        (agrid * agrid),
-                    0.5f) *
-               random * ek_parameters_gpu->fluctuation_amplitude;
-        fluc *= static_cast<float>(
-            !(lb_node.boundary[index] || lb_node.boundary[neighborindex[i]]));
-#ifdef EK_DEBUG
-        flux_fluc[jindex_getByRhoLinear(index, i)] = fluc;
-#endif
-        flux[jindex_getByRhoLinear(index, i)] += fluc;
-      }
-    }
-  }
-}
-
-__global__ void ek_calculate_quantities(unsigned int species_index,
-                                        LB_nodes_gpu lb_node,
-                                        LB_node_force_density_gpu node_f,
-                                        LB_parameters_gpu *ek_lbparameters_gpu,
-                                        uint64_t philox_counter) {
-
-  unsigned int index = ek_getThreadIndex();
-
-  if (index < ek_parameters_gpu->number_of_nodes) {
-
-    unsigned int coord[3];
-    unsigned int neighborindex[9];
-    unsigned int neighborindex_padded[9];
-    unsigned int index_padded;
-
-    rhoindex_linear2cartesian(index, coord);
-
-    /* Calculate the diffusive fluxes between this node and its neighbors. Only
-       the 9 fluxes along the directions of the LB velocities c_i with i odd are
-       stored with a node to avoid redundancies. */
-
-    neighborindex[EK_LINK_U00] = rhoindex_cartesian2linear(
-        (coord[0] + 1) % ek_parameters_gpu->dim_x, coord[1], coord[2]);
-
-    neighborindex[EK_LINK_0U0] = rhoindex_cartesian2linear(
-        coord[0], (coord[1] + 1) % ek_parameters_gpu->dim_y, coord[2]);
-
-    neighborindex[EK_LINK_00U] = rhoindex_cartesian2linear(
-        coord[0], coord[1], (coord[2] + 1) % ek_parameters_gpu->dim_z);
-
-    neighborindex[EK_LINK_UU0] = rhoindex_cartesian2linear(
-        (coord[0] + 1) % ek_parameters_gpu->dim_x,
-        (coord[1] + 1) % ek_parameters_gpu->dim_y, coord[2]);
-
-    neighborindex[EK_LINK_UD0] = rhoindex_cartesian2linear(
-        (coord[0] + 1) % ek_parameters_gpu->dim_x,
-        (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y,
-        coord[2]);
-
-    neighborindex[EK_LINK_U0U] = rhoindex_cartesian2linear(
-        (coord[0] + 1) % ek_parameters_gpu->dim_x, coord[1],
-        (coord[2] + 1) % ek_parameters_gpu->dim_z);
-
-    neighborindex[EK_LINK_U0D] = rhoindex_cartesian2linear(
-        (coord[0] + 1) % ek_parameters_gpu->dim_x, coord[1],
-        (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z);
-
-    neighborindex[EK_LINK_0UU] = rhoindex_cartesian2linear(
-        coord[0], (coord[1] + 1) % ek_parameters_gpu->dim_y,
-        (coord[2] + 1) % ek_parameters_gpu->dim_z);
-
-    neighborindex[EK_LINK_0UD] = rhoindex_cartesian2linear(
-        coord[0], (coord[1] + 1) % ek_parameters_gpu->dim_y,
-        (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z);
-
-    /* calculate the same indices respecting the FFT padding */
-
-    index_padded =
-        rhoindex_cartesian2linear_padded(coord[0], coord[1], coord[2]);
-
-    neighborindex_padded[EK_LINK_U00] = rhoindex_cartesian2linear_padded(
-        (coord[0] + 1) % ek_parameters_gpu->dim_x, coord[1], coord[2]);
-
-    neighborindex_padded[EK_LINK_0U0] = rhoindex_cartesian2linear_padded(
-        coord[0], (coord[1] + 1) % ek_parameters_gpu->dim_y, coord[2]);
-
-    neighborindex_padded[EK_LINK_00U] = rhoindex_cartesian2linear_padded(
-        coord[0], coord[1], (coord[2] + 1) % ek_parameters_gpu->dim_z);
-
-    neighborindex_padded[EK_LINK_UU0] = rhoindex_cartesian2linear_padded(
-        (coord[0] + 1) % ek_parameters_gpu->dim_x,
-        (coord[1] + 1) % ek_parameters_gpu->dim_y, coord[2]);
-
-    neighborindex_padded[EK_LINK_UD0] = rhoindex_cartesian2linear_padded(
-        (coord[0] + 1) % ek_parameters_gpu->dim_x,
-        (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y,
-        coord[2]);
-
-    neighborindex_padded[EK_LINK_U0U] = rhoindex_cartesian2linear_padded(
-        (coord[0] + 1) % ek_parameters_gpu->dim_x, coord[1],
-        (coord[2] + 1) % ek_parameters_gpu->dim_z);
-
-    neighborindex_padded[EK_LINK_U0D] = rhoindex_cartesian2linear_padded(
-        (coord[0] + 1) % ek_parameters_gpu->dim_x, coord[1],
-        (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z);
-
-    neighborindex_padded[EK_LINK_0UU] = rhoindex_cartesian2linear_padded(
-        coord[0], (coord[1] + 1) % ek_parameters_gpu->dim_y,
-        (coord[2] + 1) % ek_parameters_gpu->dim_z);
-
-    neighborindex_padded[EK_LINK_0UD] = rhoindex_cartesian2linear_padded(
-        coord[0], (coord[1] + 1) % ek_parameters_gpu->dim_y,
-        (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z);
-
-    /* diffusive contribution to flux and LB force_density*/
-    if (ek_parameters_gpu->stencil == 0) // link centered
-      ek_diffusion_migration_lbforce_linkcentered_stencil(
-          index, index_padded, neighborindex, neighborindex_padded,
-          species_index, node_f, lb_node);
-    else if (ek_parameters_gpu->stencil == 1) // node centered
-      ek_diffusion_migration_lbforce_nodecentered_stencil(
-          index, index_padded, neighborindex, neighborindex_padded,
-          species_index, node_f, lb_node);
-
-    /* advective contribution to flux */
-    if (ek_parameters_gpu->advection)
-      ek_add_advection_to_flux(index, coord, species_index, lb_node,
-                               ek_lbparameters_gpu);
-
-    /* fluctuation contribution to flux */
-    if (ek_parameters_gpu->fluctuations)
-      ek_add_fluctuations_to_flux(index, species_index, neighborindex, lb_node,
-                                  philox_counter);
-  }
-}
-
-__global__ void ek_propagate_densities(unsigned int species_index) {
-
-  unsigned int index = ek_getThreadIndex();
-
-  if (index < ek_parameters_gpu->number_of_nodes) {
-    unsigned int neighborindex[13];
-    unsigned int coord[3];
-
-    rhoindex_linear2cartesian(index, coord);
-
-    /* Indices of the neighbors storing the other half
-       of the fluxes associated with this link */
-    neighborindex[EK_LINK_D00 - 13] = rhoindex_cartesian2linear(
-        (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-        coord[1], coord[2]);
-
-    neighborindex[EK_LINK_0D0 - 13] = rhoindex_cartesian2linear(
-        coord[0],
-        (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y,
-        coord[2]);
-
-    neighborindex[EK_LINK_00D - 13] = rhoindex_cartesian2linear(
-        coord[0], coord[1],
-        (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z);
-
-    neighborindex[EK_LINK_DD0 - 13] = rhoindex_cartesian2linear(
-        (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-        (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y,
-        coord[2]);
-
-    neighborindex[EK_LINK_DU0 - 13] = rhoindex_cartesian2linear(
-        (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-        (coord[1] + 1) % ek_parameters_gpu->dim_y, coord[2]);
-
-    neighborindex[EK_LINK_D0D - 13] = rhoindex_cartesian2linear(
-        (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-        coord[1],
-        (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z);
-
-    neighborindex[EK_LINK_D0U - 13] = rhoindex_cartesian2linear(
-        (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-        coord[1], (coord[2] + 1) % ek_parameters_gpu->dim_z);
-
-    neighborindex[EK_LINK_0DD - 13] = rhoindex_cartesian2linear(
-        coord[0],
-        (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y,
-        (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z);
-
-    neighborindex[EK_LINK_0DU - 13] = rhoindex_cartesian2linear(
-        coord[0],
-        (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y,
-        (coord[2] + 1) % ek_parameters_gpu->dim_z);
-
-    neighborindex[EK_LINK_DDD - 13] = rhoindex_cartesian2linear(
-        (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-        (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y,
-        (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z);
-
-    neighborindex[EK_LINK_DDU - 13] = rhoindex_cartesian2linear(
-        (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-        (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y,
-        (coord[2] + 1) % ek_parameters_gpu->dim_z);
-
-    neighborindex[EK_LINK_DUD - 13] = rhoindex_cartesian2linear(
-        (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-        (coord[1] + 1) % ek_parameters_gpu->dim_y,
-        (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z);
-
-    neighborindex[EK_LINK_DUU - 13] = rhoindex_cartesian2linear(
-        (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-        (coord[1] + 1) % ek_parameters_gpu->dim_y,
-        (coord[2] + 1) % ek_parameters_gpu->dim_z);
-
-    /* Calculate change of densities due to diffusive fluxes */
-    ek_parameters_gpu->rho[species_index][index] -=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U00)];
-    ek_parameters_gpu->rho[species_index][index] +=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(
-            neighborindex[EK_LINK_D00 - 13], EK_LINK_U00)];
-
-    ek_parameters_gpu->rho[species_index][index] -=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0U0)];
-    ek_parameters_gpu->rho[species_index][index] +=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(
-            neighborindex[EK_LINK_0D0 - 13], EK_LINK_0U0)];
-
-    ek_parameters_gpu->rho[species_index][index] -=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_00U)];
-    ek_parameters_gpu->rho[species_index][index] +=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(
-            neighborindex[EK_LINK_00D - 13], EK_LINK_00U)];
-
-    ek_parameters_gpu->rho[species_index][index] -=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UU0)];
-    ek_parameters_gpu->rho[species_index][index] +=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(
-            neighborindex[EK_LINK_DD0 - 13], EK_LINK_UU0)];
-
-    ek_parameters_gpu->rho[species_index][index] -=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UD0)];
-    ek_parameters_gpu->rho[species_index][index] +=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(
-            neighborindex[EK_LINK_DU0 - 13], EK_LINK_UD0)];
-
-    ek_parameters_gpu->rho[species_index][index] -=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U0U)];
-    ek_parameters_gpu->rho[species_index][index] +=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(
-            neighborindex[EK_LINK_D0D - 13], EK_LINK_U0U)];
-
-    ek_parameters_gpu->rho[species_index][index] -=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U0D)];
-    ek_parameters_gpu->rho[species_index][index] +=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(
-            neighborindex[EK_LINK_D0U - 13], EK_LINK_U0D)];
-
-    ek_parameters_gpu->rho[species_index][index] -=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0UU)];
-    ek_parameters_gpu->rho[species_index][index] +=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(
-            neighborindex[EK_LINK_0DD - 13], EK_LINK_0UU)];
-
-    ek_parameters_gpu->rho[species_index][index] -=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0UD)];
-    ek_parameters_gpu->rho[species_index][index] +=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(
-            neighborindex[EK_LINK_0DU - 13], EK_LINK_0UD)];
-
-    ek_parameters_gpu->rho[species_index][index] -=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UUU)];
-    ek_parameters_gpu->rho[species_index][index] +=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(
-            neighborindex[EK_LINK_DDD - 13], EK_LINK_UUU)];
-
-    ek_parameters_gpu->rho[species_index][index] -=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UUD)];
-    ek_parameters_gpu->rho[species_index][index] +=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(
-            neighborindex[EK_LINK_DDU - 13], EK_LINK_UUD)];
-
-    ek_parameters_gpu->rho[species_index][index] -=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UDU)];
-    ek_parameters_gpu->rho[species_index][index] +=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(
-            neighborindex[EK_LINK_DUD - 13], EK_LINK_UDU)];
-
-    ek_parameters_gpu->rho[species_index][index] -=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UDD)];
-    ek_parameters_gpu->rho[species_index][index] +=
-        ek_parameters_gpu->j[jindex_getByRhoLinear(
-            neighborindex[EK_LINK_DUU - 13], EK_LINK_UDD)];
-  }
-}
-
-__global__ void ek_apply_boundaries(LB_nodes_gpu lbnode) {
-
-  unsigned int index = ek_getThreadIndex();
-  unsigned int neighborindex[22];
-  unsigned int coord[3];
-
-  if (index < ek_parameters_gpu->number_of_nodes) {
-    if (lbnode.boundary[index]) {
-
-      rhoindex_linear2cartesian(index, coord);
-
-      /* Indices of the neighbors */
-      neighborindex[EK_LINK_D00 - 13] = rhoindex_cartesian2linear(
-          (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-          coord[1], coord[2]);
-
-      neighborindex[EK_LINK_0D0 - 13] = rhoindex_cartesian2linear(
-          coord[0],
-          (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y,
-          coord[2]);
-
-      neighborindex[EK_LINK_00D - 13] = rhoindex_cartesian2linear(
-          coord[0], coord[1],
-          (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z);
-
-      neighborindex[EK_LINK_DD0 - 13] = rhoindex_cartesian2linear(
-          (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-          (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y,
-          coord[2]);
-
-      neighborindex[EK_LINK_DU0 - 13] = rhoindex_cartesian2linear(
-          (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-          (coord[1] + 1) % ek_parameters_gpu->dim_y, coord[2]);
-
-      neighborindex[EK_LINK_D0D - 13] = rhoindex_cartesian2linear(
-          (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-          coord[1],
-          (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z);
-
-      neighborindex[EK_LINK_D0U - 13] = rhoindex_cartesian2linear(
-          (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-          coord[1], (coord[2] + 1) % ek_parameters_gpu->dim_z);
-
-      neighborindex[EK_LINK_0DD - 13] = rhoindex_cartesian2linear(
-          coord[0],
-          (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y,
-          (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z);
-
-      neighborindex[EK_LINK_0DU - 13] = rhoindex_cartesian2linear(
-          coord[0],
-          (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y,
-          (coord[2] + 1) % ek_parameters_gpu->dim_z);
-
-      neighborindex[EK_LINK_DDD - 13] = rhoindex_cartesian2linear(
-          (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-          (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y,
-          (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z);
-
-      neighborindex[EK_LINK_DDU - 13] = rhoindex_cartesian2linear(
-          (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-          (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y,
-          (coord[2] + 1) % ek_parameters_gpu->dim_z);
-
-      neighborindex[EK_LINK_DUD - 13] = rhoindex_cartesian2linear(
-          (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-          (coord[1] + 1) % ek_parameters_gpu->dim_y,
-          (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z);
-
-      neighborindex[EK_LINK_DUU - 13] = rhoindex_cartesian2linear(
-          (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x,
-          (coord[1] + 1) % ek_parameters_gpu->dim_y,
-          (coord[2] + 1) % ek_parameters_gpu->dim_z);
-
-      /* Clear fluxes on links connecting a boundary node */
-      for (unsigned i = 0; i < 13; i++)
-        ek_parameters_gpu->j[jindex_getByRhoLinear(index, i)] = 0.0f;
-
-      ek_parameters_gpu->j[jindex_getByRhoLinear(
-          neighborindex[EK_LINK_D00 - 13], EK_LINK_U00)] = 0.0f;
-      ek_parameters_gpu->j[jindex_getByRhoLinear(
-          neighborindex[EK_LINK_0D0 - 13], EK_LINK_0U0)] = 0.0f;
-      ek_parameters_gpu->j[jindex_getByRhoLinear(
-          neighborindex[EK_LINK_00D - 13], EK_LINK_00U)] = 0.0f;
-      ek_parameters_gpu->j[jindex_getByRhoLinear(
-          neighborindex[EK_LINK_DD0 - 13], EK_LINK_UU0)] = 0.0f;
-      ek_parameters_gpu->j[jindex_getByRhoLinear(
-          neighborindex[EK_LINK_DU0 - 13], EK_LINK_UD0)] = 0.0f;
-      ek_parameters_gpu->j[jindex_getByRhoLinear(
-          neighborindex[EK_LINK_D0D - 13], EK_LINK_U0U)] = 0.0f;
-      ek_parameters_gpu->j[jindex_getByRhoLinear(
-          neighborindex[EK_LINK_D0U - 13], EK_LINK_U0D)] = 0.0f;
-      ek_parameters_gpu->j[jindex_getByRhoLinear(
-          neighborindex[EK_LINK_0DD - 13], EK_LINK_0UU)] = 0.0f;
-      ek_parameters_gpu->j[jindex_getByRhoLinear(
-          neighborindex[EK_LINK_0DU - 13], EK_LINK_0UD)] = 0.0f;
-      ek_parameters_gpu->j[jindex_getByRhoLinear(
-          neighborindex[EK_LINK_DDD - 13], EK_LINK_UUU)] = 0.0f;
-      ek_parameters_gpu->j[jindex_getByRhoLinear(
-          neighborindex[EK_LINK_DDU - 13], EK_LINK_UUD)] = 0.0f;
-      ek_parameters_gpu->j[jindex_getByRhoLinear(
-          neighborindex[EK_LINK_DUD - 13], EK_LINK_UDU)] = 0.0f;
-      ek_parameters_gpu->j[jindex_getByRhoLinear(
-          neighborindex[EK_LINK_DUU - 13], EK_LINK_UDD)] = 0.0f;
-    }
-  }
-}
-
-__global__ void ek_clear_fluxes() {
-  unsigned int index = ek_getThreadIndex();
-
-  if (index < ek_parameters_gpu->number_of_nodes) {
-    for (unsigned i = 0; i < 13; i++) {
-      ek_parameters_gpu->j[jindex_getByRhoLinear(index, i)] = 0.0f;
-#ifdef EK_DEBUG
-      ek_parameters_gpu->j_fluc[jindex_getByRhoLinear(index, i)] = 0.0f;
-#endif
-    }
-  }
-}
-
-__global__ void ek_init_species_density_homogeneous() {
-  unsigned int index = ek_getThreadIndex();
-  unsigned int coord[3];
-
-  rhoindex_linear2cartesian(index, coord);
-
-  if (index < ek_parameters_gpu->number_of_nodes) {
-    for (int i = 0; i < ek_parameters_gpu->number_of_species; i++) {
-      ek_parameters_gpu->rho[i][index] =
-          ek_parameters_gpu->density[i] * ek_parameters_gpu->agrid *
-          ek_parameters_gpu->agrid * ek_parameters_gpu->agrid;
-    }
-  }
-}
-
-__global__ void ek_gather_species_charge_density() {
-  auto const index = ek_getThreadIndex();
-
-  if (index < ek_parameters_gpu->number_of_nodes) {
-    ek_setNode(index, 0.0f);
-    cufftReal tmp = 0.0f;
-    for (int i = 0; i < ek_parameters_gpu->number_of_species; i++) {
-      tmp += ek_parameters_gpu->valency[i] * ek_parameters_gpu->rho[i][index];
-    }
-    ek_setNode(index, tmp / powf(ek_parameters_gpu->agrid, 3));
-  }
-}
-
-__global__ void
-ek_gather_particle_charge_density(CUDA_particle_data *particle_data,
-                                  std::size_t number_of_particles,
-                                  LB_parameters_gpu *ek_lbparameters_gpu) {
-  unsigned int index = ek_getThreadIndex();
-  unsigned int lowernode[3];
-  float cellpos[3];
-  float gridpos;
-
-  if (index < number_of_particles) {
-    gridpos = particle_data[index].p[0] / ek_parameters_gpu->agrid - 0.5f;
-    lowernode[0] = static_cast<unsigned>(floorf(gridpos));
-    cellpos[0] = gridpos - static_cast<float>(lowernode[0]);
-
-    gridpos = particle_data[index].p[1] / ek_parameters_gpu->agrid - 0.5f;
-    lowernode[1] = static_cast<unsigned>(floorf(gridpos));
-    cellpos[1] = gridpos - static_cast<float>(lowernode[1]);
-
-    gridpos = particle_data[index].p[2] / ek_parameters_gpu->agrid - 0.5f;
-    lowernode[2] = static_cast<unsigned>(floorf(gridpos));
-    cellpos[2] = gridpos - static_cast<float>(lowernode[2]);
-
-    lowernode[0] = (lowernode[0] + ek_lbparameters_gpu->dim[0]) %
-                   ek_lbparameters_gpu->dim[0];
-    lowernode[1] = (lowernode[1] + ek_lbparameters_gpu->dim[1]) %
-                   ek_lbparameters_gpu->dim[1];
-    lowernode[2] = (lowernode[2] + ek_lbparameters_gpu->dim[2]) %
-                   ek_lbparameters_gpu->dim[2];
-
-    atomicAdd(&((cufftReal *)ek_parameters_gpu
-                    ->charge_potential)[rhoindex_cartesian2linear_padded(
-                  lowernode[0], lowernode[1], lowernode[2])],
-              particle_data[index].q * (1 - cellpos[0]) * (1 - cellpos[1]) *
-                  (1 - cellpos[2]));
-
-    atomicAdd(&((cufftReal *)ek_parameters_gpu
-                    ->charge_potential)[rhoindex_cartesian2linear_padded(
-                  (lowernode[0] + 1) % ek_parameters_gpu->dim_x, lowernode[1],
-                  lowernode[2])],
-              particle_data[index].q * cellpos[0] * (1 - cellpos[1]) *
-                  (1 - cellpos[2]));
-
-    atomicAdd(&((cufftReal *)ek_parameters_gpu
-                    ->charge_potential)[rhoindex_cartesian2linear_padded(
-                  lowernode[0], (lowernode[1] + 1) % ek_parameters_gpu->dim_y,
-                  lowernode[2])],
-              particle_data[index].q * (1 - cellpos[0]) * cellpos[1] *
-                  (1 - cellpos[2]));
-
-    atomicAdd(&((cufftReal *)ek_parameters_gpu
-                    ->charge_potential)[rhoindex_cartesian2linear_padded(
-                  lowernode[0], lowernode[1],
-                  (lowernode[2] + 1) % ek_parameters_gpu->dim_z)],
-              particle_data[index].q * (1 - cellpos[0]) * (1 - cellpos[1]) *
-                  cellpos[2]);
-
-    atomicAdd(&((cufftReal *)ek_parameters_gpu
-                    ->charge_potential)[rhoindex_cartesian2linear_padded(
-                  (lowernode[0] + 1) % ek_parameters_gpu->dim_x,
-                  (lowernode[1] + 1) % ek_parameters_gpu->dim_y, lowernode[2])],
-              particle_data[index].q * cellpos[0] * cellpos[1] *
-                  (1 - cellpos[2]));
-
-    atomicAdd(&((cufftReal *)ek_parameters_gpu
-                    ->charge_potential)[rhoindex_cartesian2linear_padded(
-                  (lowernode[0] + 1) % ek_parameters_gpu->dim_x, lowernode[1],
-                  (lowernode[2] + 1) % ek_parameters_gpu->dim_z)],
-              particle_data[index].q * cellpos[0] * (1 - cellpos[1]) *
-                  cellpos[2]);
-
-    atomicAdd(&((cufftReal *)ek_parameters_gpu
-                    ->charge_potential)[rhoindex_cartesian2linear_padded(
-                  lowernode[0], (lowernode[1] + 1) % ek_parameters_gpu->dim_y,
-                  (lowernode[2] + 1) % ek_parameters_gpu->dim_z)],
-              particle_data[index].q * (1 - cellpos[0]) * cellpos[1] *
-                  cellpos[2]);
-
-    atomicAdd(&((cufftReal *)ek_parameters_gpu
-                    ->charge_potential)[rhoindex_cartesian2linear_padded(
-                  (lowernode[0] + 1) % ek_parameters_gpu->dim_x,
-                  (lowernode[1] + 1) % ek_parameters_gpu->dim_y,
-                  (lowernode[2] + 1) % ek_parameters_gpu->dim_z)],
-              particle_data[index].q * cellpos[0] * cellpos[1] * cellpos[2]);
-  }
-}
-
-__global__ void ek_spread_particle_force(
-    CUDA_particle_data *particle_data, std::size_t number_of_particles,
-    float *particle_forces, LB_parameters_gpu *ek_lbparameters_gpu) {
-
-  unsigned int index = ek_getThreadIndex();
-  unsigned int lowernode[3];
-  float cellpos[3];
-  float gridpos;
-
-  if (index < number_of_particles) {
-    gridpos = particle_data[index].p[0] / ek_parameters_gpu->agrid - 0.5f;
-    lowernode[0] = static_cast<unsigned>(floorf(gridpos));
-    cellpos[0] = gridpos - static_cast<float>(lowernode[0]);
-
-    gridpos = particle_data[index].p[1] / ek_parameters_gpu->agrid - 0.5f;
-    lowernode[1] = static_cast<unsigned>(floorf(gridpos));
-    cellpos[1] = gridpos - static_cast<float>(lowernode[1]);
-
-    gridpos = particle_data[index].p[2] / ek_parameters_gpu->agrid - 0.5f;
-    lowernode[2] = static_cast<unsigned>(floorf(gridpos));
-    cellpos[2] = gridpos - static_cast<float>(lowernode[2]);
-
-    lowernode[0] = (lowernode[0] + ek_lbparameters_gpu->dim[0]) %
-                   ek_lbparameters_gpu->dim[0];
-    lowernode[1] = (lowernode[1] + ek_lbparameters_gpu->dim[1]) %
-                   ek_lbparameters_gpu->dim[1];
-    lowernode[2] = (lowernode[2] + ek_lbparameters_gpu->dim[2]) %
-                   ek_lbparameters_gpu->dim[2];
-
-    float efield[3] = {0., 0., 0.};
-    for (unsigned int dim = 0; dim < 3; ++dim) {
-      // 0 0 0
-      efield[dim] +=
-          ek_parameters_gpu->electric_field[3 * rhoindex_cartesian2linear(
-                                                    lowernode[0], lowernode[1],
-                                                    lowernode[2]) +
-                                            dim] *
-          (1 - cellpos[0]) * (1 - cellpos[1]) * (1 - cellpos[2]);
-
-      // 0 0 1
-      efield[dim] +=
-          ek_parameters_gpu
-              ->electric_field[3 * rhoindex_cartesian2linear(
-                                       lowernode[0], lowernode[1],
-                                       (lowernode[2] + 1) %
-                                           ek_lbparameters_gpu->dim[2]) +
-                               dim] *
-          (1 - cellpos[0]) * (1 - cellpos[1]) * cellpos[2];
-
-      // 0 1 0
-      efield[dim] +=
-          ek_parameters_gpu
-              ->electric_field[3 * rhoindex_cartesian2linear(
-                                       lowernode[0],
-                                       (lowernode[1] + 1) %
-                                           ek_lbparameters_gpu->dim[1],
-                                       lowernode[2]) +
-                               dim] *
-          (1 - cellpos[0]) * cellpos[1] * (1 - cellpos[2]);
-
-      // 0 1 1
-      efield[dim] +=
-          ek_parameters_gpu->electric_field
-              [3 * rhoindex_cartesian2linear(
-                       lowernode[0],
-                       (lowernode[1] + 1) % ek_lbparameters_gpu->dim[1],
-                       (lowernode[2] + 1) % ek_lbparameters_gpu->dim[2]) +
-               dim] *
-          (1 - cellpos[0]) * cellpos[1] * cellpos[2];
-
-      // 1 0 0
-      efield[dim] +=
-          ek_parameters_gpu
-              ->electric_field[3 * rhoindex_cartesian2linear(
-                                       (lowernode[0] + 1) %
-                                           ek_lbparameters_gpu->dim[0],
-                                       lowernode[1], lowernode[2]) +
-                               dim] *
-          cellpos[0] * (1 - cellpos[1]) * (1 - cellpos[2]);
-
-      // 1 0 1
-      efield[dim] +=
-          ek_parameters_gpu->electric_field
-              [3 * rhoindex_cartesian2linear(
-                       (lowernode[0] + 1) % ek_lbparameters_gpu->dim[0],
-                       lowernode[1],
-                       (lowernode[2] + 1) % ek_lbparameters_gpu->dim[2]) +
-               dim] *
-          cellpos[0] * (1 - cellpos[1]) * cellpos[2];
-
-      // 1 1 0
-      efield[dim] +=
-          ek_parameters_gpu->electric_field
-              [3 * rhoindex_cartesian2linear(
-                       (lowernode[0] + 1) % ek_lbparameters_gpu->dim[0],
-                       (lowernode[1] + 1) % ek_lbparameters_gpu->dim[1],
-                       lowernode[2]) +
-               dim] *
-          cellpos[0] * cellpos[1] * (1 - cellpos[2]);
-
-      // 1 1 1
-      efield[dim] +=
-          ek_parameters_gpu->electric_field
-              [3 * rhoindex_cartesian2linear(
-                       (lowernode[0] + 1) % ek_lbparameters_gpu->dim[0],
-                       (lowernode[1] + 1) % ek_lbparameters_gpu->dim[1],
-                       (lowernode[2] + 1) % ek_lbparameters_gpu->dim[2]) +
-               dim] *
-          cellpos[0] * cellpos[1] * cellpos[2];
-    }
-    particle_forces[3 * index + 0] += particle_data[index].q * efield[0];
-    particle_forces[3 * index + 1] += particle_data[index].q * efield[1];
-    particle_forces[3 * index + 2] += particle_data[index].q * efield[2];
-  }
-}
-
-__global__ void ek_calc_electric_field(const float *potential) {
-  unsigned int coord[3];
-  const unsigned int index = ek_getThreadIndex();
-
-  if (index < ek_parameters_gpu->number_of_nodes) {
-    rhoindex_linear2cartesian(index, coord);
-    const float agrid_inv = 1.0f / ek_parameters_gpu->agrid;
-
-    ek_parameters_gpu->electric_field[3 * index + 0] =
-        -0.5f * agrid_inv *
-        (potential[rhoindex_cartesian2linear_padded(
-             (coord[0] + 1) % ek_parameters_gpu->dim_x, coord[1], coord[2])] -
-         potential[rhoindex_cartesian2linear_padded(
-             (coord[0] - 1 + ek_parameters_gpu->dim_x) %
-                 ek_parameters_gpu->dim_x,
-             coord[1], coord[2])]);
-    ek_parameters_gpu->electric_field[3 * index + 1] =
-        -0.5f * agrid_inv *
-        (potential[rhoindex_cartesian2linear_padded(
-             coord[0], (coord[1] + 1) % ek_parameters_gpu->dim_y, coord[2])] -
-         potential[rhoindex_cartesian2linear_padded(
-             coord[0],
-             (coord[1] - 1 + ek_parameters_gpu->dim_y) %
-                 ek_parameters_gpu->dim_y,
-             coord[2])]);
-    ek_parameters_gpu->electric_field[3 * index + 2] =
-        -0.5f * agrid_inv *
-        (potential[rhoindex_cartesian2linear_padded(
-             coord[0], coord[1], (coord[2] + 1) % ek_parameters_gpu->dim_z)] -
-         potential[rhoindex_cartesian2linear_padded(
-             coord[0], coord[1],
-             (coord[2] - 1 + ek_parameters_gpu->dim_z) %
-                 ek_parameters_gpu->dim_z)]);
-  }
-}
-
-__global__ void ek_clear_boundary_densities(LB_nodes_gpu lbnode) {
-
-  unsigned int index = ek_getThreadIndex();
-
-  if (index < ek_parameters_gpu->number_of_nodes) {
-    if (lbnode.boundary[index]) {
-      for (int i = 0; i < ek_parameters_gpu->number_of_species; i++) {
-        ek_parameters_gpu->rho[i][index] = 0.0f;
-      }
-    }
-  }
-}
-
-__global__ void ek_calculate_system_charge(float *charge_gpu) {
-
-  unsigned int index = ek_getThreadIndex();
-
-  if (index < ek_parameters_gpu->number_of_nodes) {
-    for (int i = 0; i < ek_parameters_gpu->number_of_species; i++) {
-      atomicAdd(charge_gpu, ek_parameters_gpu->rho[i][index] *
-                                ek_parameters_gpu->valency[i]);
-    }
-  }
-}
-
-// TODO delete ?? (it has the previous step setting now)
-// This is not compatible with external LB force_densities!
-__global__ void ek_clear_node_force(LB_node_force_density_gpu node_f) {
-
-  unsigned int index = ek_getThreadIndex();
-
-  if (index < ek_parameters_gpu->number_of_nodes) {
-    ek_parameters_gpu->lb_force_density_previous[index] =
-        node_f.force_density[index][0];
-    ek_parameters_gpu
-        ->lb_force_density_previous[ek_parameters_gpu->number_of_nodes +
-                                    index] = node_f.force_density[index][1];
-    ek_parameters_gpu
-        ->lb_force_density_previous[2 * ek_parameters_gpu->number_of_nodes +
-                                    index] = node_f.force_density[index][2];
-
-    node_f.force_density[index] = {};
-  }
-}
-
-void ek_calculate_electrostatic_coupling() {
-
-  if ((!ek_parameters.es_coupling) || (!ek_initialized))
-    return;
-
-  auto device_particles = gpu_get_particle_pointer();
-  dim3 dim_grid = calculate_dim_grid(
-      static_cast<unsigned>(device_particles.size()), 4, threads_per_block);
-
-  KERNELCALL(ek_spread_particle_force, dim_grid, threads_per_block,
-             device_particles.data(), device_particles.size(),
-             gpu_get_particle_force_pointer(), ek_lbparameters_gpu);
-}
-
-void ek_integrate_electrostatics() {
-
-  dim3 dim_grid =
-      calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(ek_gather_species_charge_density, dim_grid, threads_per_block);
-
-  if (ek_parameters.es_coupling) {
-    cuda_safe_mem(cudaMemcpy(
-        ek_parameters.charge_potential_buffer, ek_parameters.charge_potential,
-        sizeof(cufftComplex) * ek_parameters.dim_z * ek_parameters.dim_y *
-            (ek_parameters.dim_x / 2 + 1),
-        cudaMemcpyDeviceToDevice));
-    electrostatics->calculatePotential(
-        (cufftComplex *)ek_parameters.charge_potential_buffer);
-    KERNELCALL(ek_calc_electric_field, dim_grid, threads_per_block,
-               ek_parameters.charge_potential_buffer);
-  }
-
-  auto device_particles = gpu_get_particle_pointer();
-  // TODO make it an if number_of_charged_particles != 0
-  if (not device_particles.empty()) {
-    dim_grid = calculate_dim_grid(
-        static_cast<unsigned>(device_particles.size()), 4, threads_per_block);
-
-    particle_data_gpu = device_particles.data();
-
-    KERNELCALL(ek_gather_particle_charge_density, dim_grid, threads_per_block,
-               particle_data_gpu, device_particles.size(), ek_lbparameters_gpu);
-  }
-
-  electrostatics->calculatePotential();
-}
-
-void ek_integrate() {
-  dim3 dim_grid =
-      calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block);
-
-  /* Clears the force on the nodes and must be called before fluxes are
-     calculated, since in the reaction set up the previous-step LB force is
-     added to the flux
-     (in ek_calculate_quantities / ek_displacement), which is copied in this
-     routine */
-
-  // KERNELCALL( ek_clear_node_force, dim_grid, threads_per_block, node_f );
-
-  /* Integrate diffusion-advection */
-  for (unsigned i = 0; i < ek_parameters.number_of_species; i++) {
-    KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block);
-    KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block, i,
-               *current_nodes, node_f, ek_lbparameters_gpu,
-               philox_counter.value());
-
-    KERNELCALL(ek_propagate_densities, dim_grid, threads_per_block, i);
-  }
-
-  /* Integrate electrostatics */
-  ek_integrate_electrostatics();
-
-  /* Integrate Navier-Stokes */
-  lb_integrate_GPU();
-
-  philox_counter.increment();
-}
-
-#ifdef EK_BOUNDARIES
-void ek_gather_wallcharge_species_density(float *wallcharge_species_density,
-                                          int wallcharge_species) {
-  if (wallcharge_species != -1) {
-    cuda_safe_mem(cudaMemcpy(
-        wallcharge_species_density, ek_parameters.rho[wallcharge_species],
-        ek_parameters.number_of_nodes * sizeof(float), cudaMemcpyDeviceToHost));
-  }
-}
-void ek_init_species_density_wallcharge(float *wallcharge_species_density,
-                                        int wallcharge_species) {
-  dim3 dim_grid =
-      calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(ek_clear_boundary_densities, dim_grid, threads_per_block,
-             *current_nodes);
-
-  if (wallcharge_species != -1) {
-    cuda_safe_mem(cudaMemcpy(
-        ek_parameters.rho[wallcharge_species], wallcharge_species_density,
-        ek_parameters.number_of_nodes * sizeof(float), cudaMemcpyHostToDevice));
-  }
-}
-#endif
-
-void ek_init_species(int species) {
-  if (!ek_initialized) {
-    ek_init();
-  }
-
-  if (ek_parameters.species_index[species] == -1) {
-    ek_parameters.species_index[species] =
-        static_cast<int>(ek_parameters.number_of_species);
-    ek_parameters.number_of_species++;
-
-    cuda_safe_mem(cudaMalloc(
-        (void **)&ek_parameters.rho[ek_parameters.species_index[species]],
-        ek_parameters.number_of_nodes * sizeof(float)));
-
-    ek_parameters.density[ek_parameters.species_index[species]] = 0.0;
-    ek_parameters.D[ek_parameters.species_index[species]] = 0.0;
-    ek_parameters.valency[ek_parameters.species_index[species]] = 0.0;
-    ek_parameters.ext_force_density[0][ek_parameters.species_index[species]] =
-        0.0;
-    ek_parameters.ext_force_density[1][ek_parameters.species_index[species]] =
-        0.0;
-    ek_parameters.ext_force_density[2][ek_parameters.species_index[species]] =
-        0.0;
-    ek_parameters.d[ek_parameters.species_index[species]] =
-        ek_parameters.D[ek_parameters.species_index[species]] /
-        (1.0f + 2.0f * sqrt(2.0f));
-  }
-}
-
-int ek_init() {
-  if (ek_parameters.agrid < 0.0 || ek_parameters.viscosity < 0.0 ||
-      ek_parameters.T < 0.0 || ek_parameters.prefactor < 0.0) {
-
-    fprintf(stderr, "ERROR: invalid agrid, viscosity, T or prefactor\n");
-
-    return 1;
-  }
-
-  if (!ek_initialized) {
-    for (auto &val : ek_parameters.species_index) {
-      val = -1;
-    }
-
-    if (lattice_switch != ActiveLB::NONE) {
-      fprintf(stderr,
-              "ERROR: Electrokinetics automatically initializes the LB on the "
-              "GPU and can therefore not be used in conjunction with LB.\n");
-      fprintf(stderr, "ERROR: Please run either electrokinetics or LB.\n");
-
-      return 1;
-    }
-
-    lattice_switch = ActiveLB::GPU;
-    ek_initialized = true;
-
-    lbpar_gpu.agrid = ek_parameters.agrid;
-    lbpar_gpu.viscosity = 1.0;      // dummy values (real initialization later)
-    lbpar_gpu.bulk_viscosity = 1.0; // dummy values (real initialization later)
-    lb_lbcoupling_set_gamma(ek_parameters.friction);
-
-    // Convert the density (given in MD units) to LB units
-    lbpar_gpu.rho =
-        (ek_parameters.lb_density < 0.0)
-            ? 1.0f
-            : ek_parameters.lb_density * Utils::int_pow<3>(lbpar_gpu.agrid);
-
-    lbpar_gpu.is_TRT = true;
-
-    lb_reinit_parameters_gpu();
-    auto const time_step = static_cast<float>(get_time_step());
-    lbpar_gpu.viscosity =
-        ek_parameters.viscosity * time_step / Utils::sqr(lbpar_gpu.agrid);
-    lbpar_gpu.bulk_viscosity =
-        ek_parameters.bulk_viscosity * time_step / Utils::sqr(lbpar_gpu.agrid);
-
-    lbpar_gpu.external_force_density =
-        ek_parameters.lb_ext_force_density[0] != 0.f ||
-        ek_parameters.lb_ext_force_density[1] != 0.f ||
-        ek_parameters.lb_ext_force_density[2] != 0.f;
-    lbpar_gpu.ext_force_density =
-        Utils::Vector3f(ek_parameters.lb_ext_force_density) *
-        Utils::sqr(lbpar_gpu.agrid * time_step);
-
-    lb_reinit_parameters_gpu();
-    lb_init_gpu();
-
-    ek_parameters.time_step = time_step;
-    ek_parameters.dim_x = lbpar_gpu.dim[0];
-    ek_parameters.dim_x_padded = (ek_parameters.dim_x / 2 + 1) * 2;
-    ek_parameters.dim_y = lbpar_gpu.dim[1];
-    ek_parameters.dim_z = lbpar_gpu.dim[2];
-    ek_parameters.number_of_nodes =
-        ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z;
-
-    cuda_safe_mem(
-        cudaMalloc((void **)&ek_parameters.j,
-                   ek_parameters.number_of_nodes * 13 * sizeof(float)));
-#ifdef EK_DEBUG
-    cuda_safe_mem(
-        cudaMalloc((void **)&ek_parameters.j_fluc,
-                   ek_parameters.number_of_nodes * 13 * sizeof(float)));
-#endif
-
-    cuda_safe_mem(cudaMemcpyToSymbol(ek_parameters_gpu, &ek_parameters,
-                                     sizeof(EKParameters)));
-
-    lb_get_para_pointer(&ek_lbparameters_gpu);
-
-    cuda_safe_mem(
-        cudaMalloc((void **)&ek_parameters.lb_force_density_previous,
-                   ek_parameters.number_of_nodes * 3 * sizeof(float)));
-
-    if (ek_parameters.es_coupling) {
-      cuda_safe_mem(cudaMalloc((void **)&ek_parameters.charge_potential_buffer,
-                               sizeof(cufftComplex) * ek_parameters.dim_z *
-                                   ek_parameters.dim_y *
-                                   (ek_parameters.dim_x / 2 + 1)));
-      cuda_safe_mem(
-          cudaMalloc((void **)&ek_parameters.electric_field,
-                     ek_parameters.number_of_nodes * 3 * sizeof(float)));
-    }
-
-    cuda_safe_mem(cudaMalloc((void **)&charge_gpu, sizeof(float)));
-
-    if (cudaGetLastError() != cudaSuccess) {
-      fprintf(stderr, "ERROR: Failed to allocate\n");
-      return 1;
-    }
-
-    cudaMallocHost((void **)&ek_parameters.node_is_catalyst,
-                   sizeof(char) * ek_parameters.dim_z * ek_parameters.dim_y *
-                       ek_parameters.dim_x);
-
-    if (cudaGetLastError() != cudaSuccess) {
-      fprintf(stderr, "ERROR: Failed to allocate\n");
-      return 1;
-    }
-
-    // initialize electrostatics
-    delete electrostatics;
-
-    FdElectrostatics::InputParameters es_parameters = {
-        ek_parameters.prefactor, int(ek_parameters.dim_x),
-        int(ek_parameters.dim_y), int(ek_parameters.dim_z),
-        ek_parameters.agrid};
-    try {
-      electrostatics = new FdElectrostatics(es_parameters, stream[0]);
-    } catch (std::string e) {
-      std::cerr << "Error in initialization of electrokinetics electrostatics "
-                   "solver: "
-                << e << std::endl;
-      return 1;
-    }
-
-    ek_parameters.charge_potential = electrostatics->getGrid().grid;
-    cuda_safe_mem(cudaMemcpyToSymbol(ek_parameters_gpu, &ek_parameters,
-                                     sizeof(EKParameters)));
-
-    // clear initial LB force and finish up
-    dim3 dim_grid = calculate_dim_grid(
-        ek_parameters.dim_z * ek_parameters.dim_y * ek_parameters.dim_x, 4,
-        threads_per_block);
-    KERNELCALL(ek_clear_node_force, dim_grid, threads_per_block, node_f);
-
-    ek_initialized = true;
-  } else {
-    auto const not_close = [](float a, float b) {
-      return std::abs(a - b) > std::numeric_limits<float>::epsilon();
-    };
-    if (not_close(lbpar_gpu.agrid, ek_parameters.agrid) ||
-        not_close(lbpar_gpu.viscosity, ek_parameters.viscosity *
-                                           ek_parameters.time_step /
-                                           Utils::sqr(ek_parameters.agrid)) ||
-        not_close(lbpar_gpu.bulk_viscosity,
-                  ek_parameters.bulk_viscosity * ek_parameters.time_step /
-                      Utils::sqr(ek_parameters.agrid)) ||
-        not_close(static_cast<float>(lb_lbcoupling_get_gamma()),
-                  ek_parameters.friction) ||
-        not_close(lbpar_gpu.rho, ek_parameters.lb_density *
-                                     Utils::int_pow<3>(ek_parameters.agrid))) {
-      fprintf(stderr,
-              "ERROR: The LB parameters on the GPU cannot be reinitialized.\n");
-
-      return 1;
-    }
-    cuda_safe_mem(cudaMemcpyToSymbol(ek_parameters_gpu, &ek_parameters,
-                                     sizeof(EKParameters)));
-
-    dim3 dim_grid =
-        calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block);
-
-    KERNELCALL(ek_init_species_density_homogeneous, dim_grid,
-               threads_per_block);
-
-#ifdef EK_BOUNDARIES
-    LBBoundaries::lb_init_boundaries();
-    lb_get_boundary_force_pointer(&ek_lb_boundary_force);
-
-    cuda_safe_mem(cudaMemcpyToSymbol(ek_parameters_gpu, &ek_parameters,
-                                     sizeof(EKParameters)));
-#endif
-
-    ek_integrate_electrostatics();
-  }
-  return 0;
-}
-
-unsigned int ek_calculate_boundary_mass() {
-  std::vector<unsigned int> bound_array(lbpar_gpu.number_of_nodes);
-
-  lb_get_boundary_flags_GPU(bound_array.data());
-
-  unsigned int boundary_node_number = 0;
-
-  for (unsigned j = 0; j < ek_parameters.number_of_nodes; j++)
-    if (bound_array[j] != 0)
-      boundary_node_number++;
-
-  return boundary_node_number;
-}
-
-void rhoindex_linear2cartesian_host(unsigned int index, unsigned int *coord) {
-
-  coord[0] = index % ek_parameters.dim_x;
-  index /= ek_parameters.dim_x;
-  coord[1] = index % ek_parameters.dim_y;
-  coord[2] = index / ek_parameters.dim_y;
-}
-
-unsigned int jindex_cartesian2linear_host(unsigned int x, unsigned int y,
-                                          unsigned int z, unsigned int c) {
-  x = (x + ek_parameters.dim_x) % ek_parameters.dim_x;
-  y = (y + ek_parameters.dim_y) % ek_parameters.dim_y;
-  z = (z + ek_parameters.dim_z) % ek_parameters.dim_z;
-
-  return c * ek_parameters.number_of_nodes +
-         z * ek_parameters.dim_y * ek_parameters.dim_x +
-         y * ek_parameters.dim_x + x;
-}
-
-unsigned int jindex_getByRhoLinear_host(unsigned int rho_index,
-                                        unsigned int c) {
-
-  return c * ek_parameters.number_of_nodes + rho_index;
-}
-
-unsigned int rhoindex_cartesian2linear_host(unsigned int x, unsigned int y,
-                                            unsigned int z) {
-
-  return z * ek_parameters.dim_y * ek_parameters.dim_x +
-         y * ek_parameters.dim_x + x;
-}
-
-int ek_lb_print_vtk_velocity(char *filename) {
-
-  FILE *fp = fopen(filename, "w");
-
-  if (fp == nullptr) {
-    return 1;
-  }
-
-  std::vector<LB_rho_v_pi_gpu> host_values(lbpar_gpu.number_of_nodes);
-  lb_get_values_GPU(host_values.data());
-  auto const lattice_speed = lbpar_gpu.agrid / lbpar_gpu.tau;
-  fprintf(fp, "\
-# vtk DataFile Version 2.0\n\
-velocity\n\
-ASCII\n\
-\n\
-DATASET STRUCTURED_POINTS\n\
-DIMENSIONS %u %u %u\n\
-ORIGIN %f %f %f\n\
-SPACING %f %f %f\n\
-\nPOINT_DATA %u\n\
-SCALARS velocity float 3\n\
-LOOKUP_TABLE default\n",
-          lbpar_gpu.dim[0], lbpar_gpu.dim[1], lbpar_gpu.dim[2],
-          lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid * 0.5f,
-          lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid, lbpar_gpu.agrid,
-          lbpar_gpu.agrid, lbpar_gpu.number_of_nodes);
-
-  for (unsigned i = 0; i < lbpar_gpu.number_of_nodes; i++) {
-    fprintf(fp, "%e %e %e\n", host_values[i].v[0] * lattice_speed,
-            host_values[i].v[1] * lattice_speed,
-            host_values[i].v[2] * lattice_speed);
-  }
-
-  fclose(fp);
-
-  return 0;
-}
-
-int ek_lb_print_vtk_density(char *filename) {
-
-  FILE *fp = fopen(filename, "w");
-
-  if (fp == nullptr) {
-    return 1;
-  }
-
-  std::vector<LB_rho_v_pi_gpu> host_values(lbpar_gpu.number_of_nodes);
-  lb_get_values_GPU(host_values.data());
-
-  fprintf(fp, "\
-# vtk DataFile Version 2.0\n\
-density_lb\n\
-ASCII\n\
-\n\
-DATASET STRUCTURED_POINTS\n\
-DIMENSIONS %u %u %u\n\
-ORIGIN %f %f %f\n\
-SPACING %f %f %f\n\
-\n\
-POINT_DATA %u\n\
-SCALARS density_lb float 1\n\
-LOOKUP_TABLE default\n",
-          lbpar_gpu.dim[0], lbpar_gpu.dim[1], lbpar_gpu.dim[2],
-          lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid * 0.5f,
-          lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid, lbpar_gpu.agrid,
-          lbpar_gpu.agrid, lbpar_gpu.number_of_nodes);
-  auto const agrid = lb_lbfluid_get_agrid();
-  for (unsigned i = 0; i < lbpar_gpu.number_of_nodes; i++) {
-    fprintf(fp, "%e\n", host_values[i].rho / agrid / agrid / agrid);
-  }
-
-  fclose(fp);
-
-  return 0;
-}
-
-int ek_print_vtk_density(int species, char *filename) {
-
-  if (ek_parameters.species_index[species] == -1) {
-    return 1;
-  }
-
-  FILE *fp = fopen(filename, "w");
-
-  if (fp == nullptr) {
-    return 1;
-  }
-
-  std::vector<float> densities(ek_parameters.number_of_nodes);
-
-  cuda_safe_mem(cudaMemcpy(
-      densities.data(), ek_parameters.rho[ek_parameters.species_index[species]],
-      densities.size() * sizeof(float), cudaMemcpyDeviceToHost));
-
-  fprintf(fp, "\
-# vtk DataFile Version 2.0\n\
-density_%d\n\
-ASCII\n\
-\n\
-DATASET STRUCTURED_POINTS\n\
-DIMENSIONS %u %u %u\n\
-ORIGIN %f %f %f\n\
-SPACING %f %f %f\n\
-\n\
-POINT_DATA %u\n\
-SCALARS density_%d float 1\n\
-LOOKUP_TABLE default\n",
-          species, ek_parameters.dim_x, ek_parameters.dim_y,
-          ek_parameters.dim_z, ek_parameters.agrid * 0.5f,
-          ek_parameters.agrid * 0.5f, ek_parameters.agrid * 0.5f,
-          ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid,
-          ek_parameters.number_of_nodes, species);
-
-  for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) {
-    fprintf(fp, "%e\n", densities[i] / Utils::int_pow<3>(ek_parameters.agrid));
-  }
-
-  fclose(fp);
-
-  return 0;
-}
-
-int ek_node_get_density(int species, int x, int y, int z, double *density) {
-
-  if (ek_parameters.species_index[species] == -1) {
-    return 1;
-  }
-
-  std::vector<float> densities(ek_parameters.number_of_nodes);
-
-  cuda_safe_mem(cudaMemcpy(
-      densities.data(), ek_parameters.rho[ek_parameters.species_index[species]],
-      densities.size() * sizeof(float), cudaMemcpyDeviceToHost));
-
-  auto const index =
-      static_cast<unsigned>(z) * ek_parameters.dim_y * ek_parameters.dim_x +
-      static_cast<unsigned>(y) * ek_parameters.dim_x + static_cast<unsigned>(x);
-  *density = densities[index] / Utils::int_pow<3>(ek_parameters.agrid);
-
-  return 0;
-}
-
-int ek_node_get_flux(int species, int x, int y, int z, double *flux) {
-
-  if (ek_parameters.species_index[species] == -1) {
-    return 1;
-  }
-
-  float flux_local_cartesian[3]; // temporary variable for converting fluxes
-                                 // into Cartesian coordinates for output
-  unsigned int coord[3];
-
-  coord[0] = static_cast<unsigned>(x);
-  coord[1] = static_cast<unsigned>(y);
-  coord[2] = static_cast<unsigned>(z);
-
-  std::vector<float> fluxes(ek_parameters.number_of_nodes * 13);
-
-  dim3 dim_grid =
-      calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block);
-  KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block,
-             static_cast<unsigned>(ek_parameters.species_index[species]),
-             *current_nodes, node_f, ek_lbparameters_gpu,
-             philox_counter.value());
-  reset_LB_force_densities_GPU(false);
-
-#ifdef EK_BOUNDARIES
-  KERNELCALL(ek_apply_boundaries, dim_grid, threads_per_block, *current_nodes);
-#endif
-
-  cuda_safe_mem(cudaMemcpy(fluxes.data(), ek_parameters.j,
-                           fluxes.size() * sizeof(float),
-                           cudaMemcpyDeviceToHost));
-
-  auto const i = rhoindex_cartesian2linear_host(coord[0], coord[1], coord[2]);
-
-  flux_local_cartesian[0] =
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U00)];
-
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UU0)];
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UD0)];
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0U)];
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0D)];
-
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)];
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)];
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)];
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)];
-
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1],
-                                                 coord[2], EK_LINK_D00 - 13)];
-
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] - 1,
-                                                 coord[2], EK_LINK_DD0 - 13)];
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] + 1,
-                                                 coord[2], EK_LINK_DU0 - 13)];
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1], coord[2] - 1, EK_LINK_D0D - 13)];
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1], coord[2] + 1, EK_LINK_D0U - 13)];
-
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)];
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)];
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)];
-  flux_local_cartesian[0] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)];
-
-  flux_local_cartesian[1] =
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0U0)];
-
-  flux_local_cartesian[1] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UU0)];
-  flux_local_cartesian[1] -=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UD0)];
-  flux_local_cartesian[1] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UU)];
-  flux_local_cartesian[1] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UD)];
-
-  flux_local_cartesian[1] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)];
-  flux_local_cartesian[1] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)];
-  flux_local_cartesian[1] -=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)];
-  flux_local_cartesian[1] -=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)];
-
-  flux_local_cartesian[1] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(coord[0], coord[1] - 1,
-                                                 coord[2], EK_LINK_0D0 - 13)];
-
-  flux_local_cartesian[1] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] - 1,
-                                                 coord[2], EK_LINK_DD0 - 13)];
-  flux_local_cartesian[1] -=
-      0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] + 1,
-                                                 coord[2], EK_LINK_DU0 - 13)];
-  flux_local_cartesian[1] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0], coord[1] - 1, coord[2] - 1, EK_LINK_0DD - 13)];
-  flux_local_cartesian[1] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0], coord[1] - 1, coord[2] + 1, EK_LINK_0DU - 13)];
-
-  flux_local_cartesian[1] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)];
-  flux_local_cartesian[1] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)];
-  flux_local_cartesian[1] -=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)];
-  flux_local_cartesian[1] -=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)];
-
-  flux_local_cartesian[2] =
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_00U)];
-
-  flux_local_cartesian[2] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0U)];
-  flux_local_cartesian[2] -=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0D)];
-  flux_local_cartesian[2] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UU)];
-  flux_local_cartesian[2] -=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UD)];
-
-  flux_local_cartesian[2] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)];
-  flux_local_cartesian[2] -=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)];
-  flux_local_cartesian[2] +=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)];
-  flux_local_cartesian[2] -=
-      0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)];
-
-  flux_local_cartesian[2] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0], coord[1], coord[2] - 1, EK_LINK_00D - 13)];
-
-  flux_local_cartesian[2] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1], coord[2] - 1, EK_LINK_D0D - 13)];
-  flux_local_cartesian[2] -=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1], coord[2] + 1, EK_LINK_D0U - 13)];
-  flux_local_cartesian[2] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0], coord[1] - 1, coord[2] - 1, EK_LINK_0DD - 13)];
-  flux_local_cartesian[2] -=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0], coord[1] - 1, coord[2] + 1, EK_LINK_0DU - 13)];
-
-  flux_local_cartesian[2] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)];
-  flux_local_cartesian[2] -=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)];
-  flux_local_cartesian[2] +=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)];
-  flux_local_cartesian[2] -=
-      0.5f * fluxes[jindex_cartesian2linear_host(
-                 coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)];
-
-  flux[0] = flux_local_cartesian[0] /
-            (ek_parameters.time_step * Utils::sqr(ek_parameters.agrid));
-  flux[1] = flux_local_cartesian[1] /
-            (ek_parameters.time_step * Utils::sqr(ek_parameters.agrid));
-  flux[2] = flux_local_cartesian[2] /
-            (ek_parameters.time_step * Utils::sqr(ek_parameters.agrid));
-
-  return 0;
-}
-
-int ek_node_set_density(int species, int x, int y, int z, double density) {
-
-  if (ek_parameters.species_index[species] == -1) {
-    return 1;
-  }
-
-  auto const index =
-      static_cast<unsigned>(z) * ek_parameters.dim_y * ek_parameters.dim_x +
-      static_cast<unsigned>(y) * ek_parameters.dim_x + static_cast<unsigned>(x);
-  float num_particles =
-      static_cast<float>(density) * Utils::int_pow<3>(ek_parameters.agrid);
-
-  cuda_safe_mem(cudaMemcpy(
-      &ek_parameters.rho[ek_parameters.species_index[species]][index],
-      &num_particles, sizeof(float), cudaMemcpyHostToDevice));
-
-  return 0;
-}
-
-int ek_print_vtk_flux(int species, char *filename) {
-
-  if (ek_parameters.species_index[species] == -1) {
-    return 1;
-  }
-
-  FILE *fp = fopen(filename, "w");
-
-  if (fp == nullptr) {
-    return 1;
-  }
-
-  float flux_local_cartesian[3]; // temporary variable for converting fluxes
-                                 // into Cartesian coordinates for output
-
-  unsigned int coord[3];
-
-  std::vector<float> fluxes(ek_parameters.number_of_nodes * 13);
-
-  dim3 dim_grid =
-      calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block);
-  KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block,
-             static_cast<unsigned>(ek_parameters.species_index[species]),
-             *current_nodes, node_f, ek_lbparameters_gpu,
-             philox_counter.value());
-  reset_LB_force_densities_GPU(false);
-
-#ifdef EK_BOUNDARIES
-  KERNELCALL(ek_apply_boundaries, dim_grid, threads_per_block, *current_nodes);
-#endif
-
-  cuda_safe_mem(cudaMemcpy(fluxes.data(), ek_parameters.j,
-                           fluxes.size() * sizeof(float),
-                           cudaMemcpyDeviceToHost));
-
-  fprintf(fp, "\
-# vtk DataFile Version 2.0\n\
-flux_%d\n\
-ASCII\n\
-\n\
-DATASET STRUCTURED_POINTS\n\
-DIMENSIONS %u %u %u\n\
-ORIGIN %f %f %f\n\
-SPACING %f %f %f\n\
-\n\
-POINT_DATA %u\n\
-SCALARS flux_%d float 3\n\
-LOOKUP_TABLE default\n",
-          species, ek_parameters.dim_x, ek_parameters.dim_y,
-          ek_parameters.dim_z, ek_parameters.agrid * 0.5f,
-          ek_parameters.agrid * 0.5f, ek_parameters.agrid * 0.5f,
-          ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid,
-          ek_parameters.number_of_nodes, species);
-
-  for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) {
-    rhoindex_linear2cartesian_host(i, coord);
-
-    flux_local_cartesian[0] =
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U00)];
-
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UU0)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UD0)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0U)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0D)];
-
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)];
-
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1],
-                                                   coord[2], EK_LINK_D00 - 13)];
-
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] - 1,
-                                                   coord[2], EK_LINK_DD0 - 13)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] + 1,
-                                                   coord[2], EK_LINK_DU0 - 13)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1], coord[2] - 1, EK_LINK_D0D - 13)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1], coord[2] + 1, EK_LINK_D0U - 13)];
-
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)];
-
-    flux_local_cartesian[1] =
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0U0)];
-
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UU0)];
-    flux_local_cartesian[1] -=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UD0)];
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UU)];
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UD)];
-
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)];
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)];
-    flux_local_cartesian[1] -=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)];
-    flux_local_cartesian[1] -=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)];
-
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(coord[0], coord[1] - 1,
-                                                   coord[2], EK_LINK_0D0 - 13)];
-
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] - 1,
-                                                   coord[2], EK_LINK_DD0 - 13)];
-    flux_local_cartesian[1] -=
-        0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] + 1,
-                                                   coord[2], EK_LINK_DU0 - 13)];
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0], coord[1] - 1, coord[2] - 1, EK_LINK_0DD - 13)];
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0], coord[1] - 1, coord[2] + 1, EK_LINK_0DU - 13)];
-
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)];
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)];
-    flux_local_cartesian[1] -=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)];
-    flux_local_cartesian[1] -=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)];
-
-    flux_local_cartesian[2] =
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_00U)];
-
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0U)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0D)];
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UU)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UD)];
-
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)];
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)];
-
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0], coord[1], coord[2] - 1, EK_LINK_00D - 13)];
-
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1], coord[2] - 1, EK_LINK_D0D - 13)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1], coord[2] + 1, EK_LINK_D0U - 13)];
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0], coord[1] - 1, coord[2] - 1, EK_LINK_0DD - 13)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0], coord[1] - 1, coord[2] + 1, EK_LINK_0DU - 13)];
-
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)];
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)];
-
-    fprintf(fp, "%e %e %e\n",
-            flux_local_cartesian[0] /
-                (ek_parameters.time_step * Utils::sqr(ek_parameters.agrid)),
-            flux_local_cartesian[1] /
-                (ek_parameters.time_step * Utils::sqr(ek_parameters.agrid)),
-            flux_local_cartesian[2] /
-                (ek_parameters.time_step * Utils::sqr(ek_parameters.agrid)));
-  }
-
-  fclose(fp);
-
-  return 0;
-}
-
-int ek_print_vtk_flux_fluc(int species, char *filename) {
-#ifndef EK_DEBUG
-  return 1;
-#else
-  if (ek_parameters.species_index[species] == -1) {
-    return 1;
-  }
-
-  FILE *fp = fopen(filename, "w");
-  float flux_local_cartesian[3]; // temporary variable for converting fluxes
-                                 // into cartesian coordinates for output
-
-  unsigned int coord[3];
-
-  if (fp == nullptr) {
-    return 1;
-  }
-
-  std::vector<float> fluxes(ek_parameters.number_of_nodes * 13);
-
-  dim3 dim_grid =
-      calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block);
-  KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block,
-             static_cast<unsigned>(ek_parameters.species_index[species]),
-             *current_nodes, node_f, ek_lbparameters_gpu,
-             philox_counter.value());
-  reset_LB_force_densities_GPU(false);
-
-#ifdef EK_BOUNDARIES
-  KERNELCALL(ek_apply_boundaries, dim_grid, threads_per_block, *current_nodes);
-#endif
-
-  cuda_safe_mem(cudaMemcpy(fluxes.data(), ek_parameters.j_fluc,
-                           fluxes.size() * sizeof(float),
-                           cudaMemcpyDeviceToHost));
-
-  fprintf(fp, "\
-# vtk DataFile Version 2.0\n\
-flux_fluc_%d\n\
-ASCII\n\
-\n\
-DATASET STRUCTURED_POINTS\n\
-DIMENSIONS %u %u %u\n\
-ORIGIN %f %f %f\n\
-SPACING %f %f %f\n\
-\n\
-POINT_DATA %u\n\
-SCALARS flux_fluc_%d float 4\n\
-LOOKUP_TABLE default\n",
-          species, ek_parameters.dim_x, ek_parameters.dim_y,
-          ek_parameters.dim_z, ek_parameters.agrid * 0.5f,
-          ek_parameters.agrid * 0.5f, ek_parameters.agrid * 0.5f,
-          ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid,
-          ek_parameters.number_of_nodes, species);
-
-  for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) {
-
-    float flux_local_linksum = 0;
-    rhoindex_linear2cartesian_host(i, coord);
-
-    flux_local_cartesian[0] =
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U00)];
-
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UU0)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UD0)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0U)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0D)];
-
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)];
-
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1],
-                                                   coord[2], EK_LINK_D00 - 13)];
-
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] - 1,
-                                                   coord[2], EK_LINK_DD0 - 13)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] + 1,
-                                                   coord[2], EK_LINK_DU0 - 13)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1], coord[2] - 1, EK_LINK_D0D - 13)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1], coord[2] + 1, EK_LINK_D0U - 13)];
-
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)];
-    flux_local_cartesian[0] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)];
-
-    flux_local_cartesian[1] =
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0U0)];
-
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UU0)];
-    flux_local_cartesian[1] -=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UD0)];
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UU)];
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UD)];
-
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)];
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)];
-    flux_local_cartesian[1] -=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)];
-    flux_local_cartesian[1] -=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)];
-
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(coord[0], coord[1] - 1,
-                                                   coord[2], EK_LINK_0D0 - 13)];
-
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] - 1,
-                                                   coord[2], EK_LINK_DD0 - 13)];
-    flux_local_cartesian[1] -=
-        0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] + 1,
-                                                   coord[2], EK_LINK_DU0 - 13)];
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0], coord[1] - 1, coord[2] - 1, EK_LINK_0DD - 13)];
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0], coord[1] - 1, coord[2] + 1, EK_LINK_0DU - 13)];
-
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)];
-    flux_local_cartesian[1] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)];
-    flux_local_cartesian[1] -=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)];
-    flux_local_cartesian[1] -=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)];
-
-    flux_local_cartesian[2] =
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_00U)];
-
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0U)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0D)];
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UU)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UD)];
-
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)];
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)];
-
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0], coord[1], coord[2] - 1, EK_LINK_00D - 13)];
-
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1], coord[2] - 1, EK_LINK_D0D - 13)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1], coord[2] + 1, EK_LINK_D0U - 13)];
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0], coord[1] - 1, coord[2] - 1, EK_LINK_0DD - 13)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0], coord[1] - 1, coord[2] + 1, EK_LINK_0DU - 13)];
-
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)];
-    flux_local_cartesian[2] +=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)];
-    flux_local_cartesian[2] -=
-        0.5f * fluxes[jindex_cartesian2linear_host(
-                   coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)];
-
-    for (int j = 0; j < 13; j++) {
-      flux_local_linksum += fluxes[jindex_getByRhoLinear_host(i, j)];
-    }
-
-    fprintf(
-        fp, "%e %e %e %e\n",
-        flux_local_cartesian[0] / (ek_parameters.agrid * ek_parameters.agrid),
-        flux_local_cartesian[1] / (ek_parameters.agrid * ek_parameters.agrid),
-        flux_local_cartesian[2] / (ek_parameters.agrid * ek_parameters.agrid),
-        flux_local_linksum / (ek_parameters.agrid * ek_parameters.agrid));
-  }
-
-  fclose(fp);
-
-  return 0;
-#endif // EK_DEBUG
-}
-
-int ek_print_vtk_flux_link(int species, char *filename) {
-
-  if (ek_parameters.species_index[species] == -1) {
-    return 1;
-  }
-
-  FILE *fp = fopen(filename, "w");
-
-  if (fp == nullptr) {
-    return 1;
-  }
-
-  unsigned int coord[3];
-
-  std::vector<float> fluxes(ek_parameters.number_of_nodes * 13);
-
-  dim3 dim_grid =
-      calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block);
-  KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block,
-             static_cast<unsigned>(ek_parameters.species_index[species]),
-             *current_nodes, node_f, ek_lbparameters_gpu,
-             philox_counter.value());
-  reset_LB_force_densities_GPU(false);
-
-#ifdef EK_BOUNDARIES
-  KERNELCALL(ek_apply_boundaries, dim_grid, threads_per_block, *current_nodes);
-#endif
-
-  cuda_safe_mem(cudaMemcpy(fluxes.data(), ek_parameters.j,
-                           fluxes.size() * sizeof(float),
-                           cudaMemcpyDeviceToHost));
-
-  fprintf(fp, "\
-# vtk DataFile Version 2.0\n\
-flux_link_%d\n\
-ASCII\n\
-\n\
-DATASET STRUCTURED_POINTS\n\
-DIMENSIONS %u %u %u\n\
-ORIGIN %f %f %f\n\
-SPACING %f %f %f\n\
-\n\
-POINT_DATA %u\n\
-SCALARS flux_link_%d float 13\n\
-LOOKUP_TABLE default\n",
-          species, ek_parameters.dim_x, ek_parameters.dim_y,
-          ek_parameters.dim_z, ek_parameters.agrid * 0.5f,
-          ek_parameters.agrid * 0.5f, ek_parameters.agrid * 0.5f,
-          ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid,
-          ek_parameters.number_of_nodes, species);
-
-  for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) {
-    rhoindex_linear2cartesian_host(i, coord);
-
-    fprintf(fp, "%e %e %e %e %e %e %e %e %e %e %e %e %e\n",
-            fluxes[jindex_getByRhoLinear_host(i, 0)],
-            fluxes[jindex_getByRhoLinear_host(i, 1)],
-            fluxes[jindex_getByRhoLinear_host(i, 2)],
-            fluxes[jindex_getByRhoLinear_host(i, 3)],
-            fluxes[jindex_getByRhoLinear_host(i, 4)],
-            fluxes[jindex_getByRhoLinear_host(i, 5)],
-            fluxes[jindex_getByRhoLinear_host(i, 6)],
-            fluxes[jindex_getByRhoLinear_host(i, 7)],
-            fluxes[jindex_getByRhoLinear_host(i, 8)],
-            fluxes[jindex_getByRhoLinear_host(i, 9)],
-            fluxes[jindex_getByRhoLinear_host(i, 10)],
-            fluxes[jindex_getByRhoLinear_host(i, 11)],
-            fluxes[jindex_getByRhoLinear_host(i, 12)]);
-  }
-
-  fclose(fp);
-
-  return 0;
-}
-
-int ek_node_get_potential(int x, int y, int z, double *potential) {
-  auto const index = static_cast<unsigned>(z) * ek_parameters.dim_y *
-                         ek_parameters.dim_x_padded +
-                     static_cast<unsigned>(y) * ek_parameters.dim_x_padded +
-                     static_cast<unsigned>(x);
-  float pot;
-
-  cuda_safe_mem(cudaMemcpy(&pot, &ek_parameters.charge_potential[index],
-                           1 * sizeof(cufftReal), cudaMemcpyDeviceToHost));
-
-  *potential = pot;
-  return 0;
-}
-
-int ek_print_vtk_potential(char *filename) {
-
-  FILE *fp = fopen(filename, "w");
-
-  if (fp == nullptr) {
-    return 1;
-  }
-
-  std::vector<cufftReal> potential(ek_parameters.number_of_nodes);
-
-  cuda_safe_mem(cudaMemcpy2D(
-      potential.data(), ek_parameters.dim_x * sizeof(cufftReal),
-      ek_parameters.charge_potential,
-      ek_parameters.dim_x_padded * sizeof(cufftReal),
-      ek_parameters.dim_x * sizeof(cufftReal),
-      ek_parameters.dim_z * ek_parameters.dim_y, cudaMemcpyDeviceToHost));
-
-  fprintf(fp, "\
-# vtk DataFile Version 2.0\n\
-potential\n\
-ASCII\n\
-\n\
-DATASET STRUCTURED_POINTS\n\
-DIMENSIONS %u %u %u\n\
-ORIGIN %f %f %f\n\
-SPACING %f %f %f\n\
-\n\
-POINT_DATA %u\n\
-SCALARS potential float 1\n\
-LOOKUP_TABLE default\n",
-          ek_parameters.dim_x, ek_parameters.dim_y, ek_parameters.dim_z,
-          ek_parameters.agrid * 0.5f, ek_parameters.agrid * 0.5f,
-          ek_parameters.agrid * 0.5f, ek_parameters.agrid, ek_parameters.agrid,
-          ek_parameters.agrid, ek_parameters.number_of_nodes);
-
-  for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) {
-    fprintf(fp, "%e\n", potential[i]);
-  }
-
-  fclose(fp);
-
-  return 0;
-}
-
-int ek_print_vtk_particle_potential(char *filename) {
-
-  FILE *fp = fopen(filename, "w");
-
-  if (fp == nullptr) {
-    return 1;
-  }
-
-  std::vector<cufftReal> potential(ek_parameters.number_of_nodes);
-
-  cuda_safe_mem(cudaMemcpy2D(
-      potential.data(), ek_parameters.dim_x * sizeof(cufftReal),
-      ek_parameters.charge_potential_buffer,
-      ek_parameters.dim_x_padded * sizeof(cufftReal),
-      ek_parameters.dim_x * sizeof(cufftReal),
-      ek_parameters.dim_z * ek_parameters.dim_y, cudaMemcpyDeviceToHost));
-
-  fprintf(fp, "\
-# vtk DataFile Version 2.0\n\
-potential\n\
-ASCII\n\
-\n\
-DATASET STRUCTURED_POINTS\n\
-DIMENSIONS %u %u %u\n\
-ORIGIN %f %f %f\n\
-SPACING %f %f %f\n\
-\n\
-POINT_DATA %u\n\
-SCALARS potential float 1\n\
-LOOKUP_TABLE default\n",
-          ek_parameters.dim_x, ek_parameters.dim_y, ek_parameters.dim_z,
-          ek_parameters.agrid * 0.5f, ek_parameters.agrid * 0.5f,
-          ek_parameters.agrid * 0.5f, ek_parameters.agrid, ek_parameters.agrid,
-          ek_parameters.agrid, ek_parameters.number_of_nodes);
-
-  for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) {
-    fprintf(fp, "%e\n", potential[i]);
-  }
-
-  fclose(fp);
-
-  return 0;
-}
-
-int ek_print_vtk_lbforce_density(char *filename) {
-#if !defined(VIRTUAL_SITES_INERTIALESS_TRACERS) && !defined(EK_DEBUG)
-  throw std::runtime_error("Please rebuild ESPResSo with EK_DEBUG");
-#else
-
-  FILE *fp = fopen(filename, "w");
-
-  if (fp == nullptr) {
-    return 1;
-  }
-
-  std::vector<float> lbforce_density(ek_parameters.number_of_nodes * 3);
-
-  cuda_safe_mem(cudaMemcpy(lbforce_density.data(), node_f.force_density_buf,
-                           ek_parameters.number_of_nodes * 3 * sizeof(float),
-                           cudaMemcpyDeviceToHost));
-
-  fprintf(fp, "\
-# vtk DataFile Version 2.0\n\
-lbforce\n\
-ASCII\n\
-\n\
-DATASET STRUCTURED_POINTS\n\
-DIMENSIONS %u %u %u\n\
-ORIGIN %f %f %f\n\
-SPACING %f %f %f\n\
-\n\
-POINT_DATA %u\n\
-SCALARS lbforce float 3\n\
-LOOKUP_TABLE default\n",
-          ek_parameters.dim_x, ek_parameters.dim_y, ek_parameters.dim_z,
-          ek_parameters.agrid * 0.5f, ek_parameters.agrid * 0.5f,
-          ek_parameters.agrid * 0.5f, ek_parameters.agrid, ek_parameters.agrid,
-          ek_parameters.agrid, ek_parameters.number_of_nodes);
-
-  auto const norm = (Utils::int_pow<2>(ek_parameters.time_step) *
-                     Utils::int_pow<4>(ek_parameters.agrid));
-  for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) {
-    fprintf(fp, "%e %e %e\n", lbforce_density[i] / norm,
-            lbforce_density[i + ek_parameters.number_of_nodes] / norm,
-            lbforce_density[i + 2 * ek_parameters.number_of_nodes] / norm);
-  }
-
-  fclose(fp);
-
-  return 0;
-#endif
-}
-
-void ek_print_parameters() {
-
-  printf("ek_parameters {\n");
-
-  printf("  float agrid = %f;\n", ek_parameters.agrid);
-  printf("  float time_step = %f;\n", ek_parameters.time_step);
-  printf("  float lb_density = %f;\n", ek_parameters.lb_density);
-  printf("  unsigned int dim_x = %d;\n", ek_parameters.dim_x);
-  printf("  unsigned int dim_y = %d;\n", ek_parameters.dim_y);
-  printf("  unsigned int dim_z = %d;\n", ek_parameters.dim_z);
-  printf("  unsigned int number_of_nodes = %d;\n",
-         ek_parameters.number_of_nodes);
-  printf("  float viscosity = %f;\n", ek_parameters.viscosity);
-  printf("  float bulk_viscosity = %f;\n", ek_parameters.bulk_viscosity);
-  printf("  float gamma_odd = %f;\n", ek_parameters.gamma_odd);
-  printf("  float gamma_even = %f;\n", ek_parameters.gamma_even);
-  printf("  float friction = %f;\n", ek_parameters.friction);
-  printf("  float T = %f;\n", ek_parameters.T);
-  printf("  float prefactor = %f;\n", ek_parameters.prefactor);
-  printf("  float lb_ext_force_density[] = {%f, %f, %f};\n",
-         ek_parameters.lb_ext_force_density[0],
-         ek_parameters.lb_ext_force_density[1],
-         ek_parameters.lb_ext_force_density[2]);
-  printf("  unsigned int number_of_species = %d;\n",
-         ek_parameters.number_of_species);
-  printf("  int reaction_species[] = {%d, %d, %d};\n",
-         ek_parameters.reaction_species[0], ek_parameters.reaction_species[1],
-         ek_parameters.reaction_species[2]);
-  printf("  float rho_reactant_reservoir = %f;\n",
-         ek_parameters.rho_reactant_reservoir);
-  printf("  float rho_product0_reservoir = %f;\n",
-         ek_parameters.rho_product0_reservoir);
-  printf("  float rho_product1_reservoir = %f;\n",
-         ek_parameters.rho_product1_reservoir);
-  printf("  float reaction_ct_rate = %f;\n", ek_parameters.reaction_ct_rate);
-  printf("  float reaction_fraction_0 = %f;\n",
-         ek_parameters.reaction_fraction_0);
-  printf("  float reaction_fraction_1 = %f;\n",
-         ek_parameters.reaction_fraction_0);
-  printf("  float* j = %p;\n", (void *)ek_parameters.j);
-
-  printf("  float* rho[] = {%p, %p, %p, %p, %p, %p, %p, %p, %p, %p};\n",
-         (void *)ek_parameters.rho[0], (void *)ek_parameters.rho[1],
-         (void *)ek_parameters.rho[2], (void *)ek_parameters.rho[3],
-         (void *)ek_parameters.rho[4], (void *)ek_parameters.rho[5],
-         (void *)ek_parameters.rho[6], (void *)ek_parameters.rho[7],
-         (void *)ek_parameters.rho[8], (void *)ek_parameters.rho[9]);
-
-  printf("  int species_index[] = {%d, %d, %d, %d, %d, %d, %d, %d, %d, %d};\n",
-         ek_parameters.species_index[0], ek_parameters.species_index[1],
-         ek_parameters.species_index[2], ek_parameters.species_index[3],
-         ek_parameters.species_index[4], ek_parameters.species_index[5],
-         ek_parameters.species_index[6], ek_parameters.species_index[7],
-         ek_parameters.species_index[8], ek_parameters.species_index[9]);
-
-  printf("  float density = {%f, %f, %f, %f, %f, %f, %f, %f, %f, %f};\n",
-         ek_parameters.density[0], ek_parameters.density[1],
-         ek_parameters.density[2], ek_parameters.density[3],
-         ek_parameters.density[4], ek_parameters.density[5],
-         ek_parameters.density[6], ek_parameters.density[7],
-         ek_parameters.density[8], ek_parameters.density[9]);
-
-  printf("  float D[] = {%f, %f, %f, %f, %f, %f, %f, %f, %f, %f};\n",
-         ek_parameters.D[0], ek_parameters.D[1], ek_parameters.D[2],
-         ek_parameters.D[3], ek_parameters.D[4], ek_parameters.D[5],
-         ek_parameters.D[6], ek_parameters.D[7], ek_parameters.D[8],
-         ek_parameters.D[9]);
-
-  printf("  float d[] = {%f, %f, %f, %f, %f, %f, %f, %f, %f, %f};\n",
-         ek_parameters.d[0], ek_parameters.d[1], ek_parameters.d[2],
-         ek_parameters.d[3], ek_parameters.d[4], ek_parameters.d[5],
-         ek_parameters.d[6], ek_parameters.d[7], ek_parameters.d[8],
-         ek_parameters.d[9]);
-
-  printf("  float valency[] = {%f, %f, %f, %f, %f, %f, %f, %f, %f, %f};\n",
-         ek_parameters.valency[0], ek_parameters.valency[1],
-         ek_parameters.valency[2], ek_parameters.valency[3],
-         ek_parameters.valency[4], ek_parameters.valency[5],
-         ek_parameters.valency[6], ek_parameters.valency[7],
-         ek_parameters.valency[8], ek_parameters.valency[9]);
-
-  printf("  float ext_force_density[0][] = {%f, %f, %f, %f, %f, %f, %f, %f, "
-         "%f, %f};\n",
-         ek_parameters.ext_force_density[0][0],
-         ek_parameters.ext_force_density[0][1],
-         ek_parameters.ext_force_density[0][2],
-         ek_parameters.ext_force_density[0][3],
-         ek_parameters.ext_force_density[0][4],
-         ek_parameters.ext_force_density[0][5],
-         ek_parameters.ext_force_density[0][6],
-         ek_parameters.ext_force_density[0][7],
-         ek_parameters.ext_force_density[0][8],
-         ek_parameters.ext_force_density[0][9]);
-
-  printf("  float ext_force_density[1][] = {%f, %f, %f, %f, %f, %f, %f, %f, "
-         "%f, %f};\n",
-         ek_parameters.ext_force_density[1][0],
-         ek_parameters.ext_force_density[1][1],
-         ek_parameters.ext_force_density[1][2],
-         ek_parameters.ext_force_density[1][3],
-         ek_parameters.ext_force_density[1][4],
-         ek_parameters.ext_force_density[1][5],
-         ek_parameters.ext_force_density[1][6],
-         ek_parameters.ext_force_density[1][7],
-         ek_parameters.ext_force_density[1][8],
-         ek_parameters.ext_force_density[1][9]);
-
-  printf("  float ext_force_density[2][] = {%f, %f, %f, %f, %f, %f, %f, %f, "
-         "%f, %f};\n",
-         ek_parameters.ext_force_density[2][0],
-         ek_parameters.ext_force_density[2][1],
-         ek_parameters.ext_force_density[2][2],
-         ek_parameters.ext_force_density[2][3],
-         ek_parameters.ext_force_density[2][4],
-         ek_parameters.ext_force_density[2][5],
-         ek_parameters.ext_force_density[2][6],
-         ek_parameters.ext_force_density[2][7],
-         ek_parameters.ext_force_density[2][8],
-         ek_parameters.ext_force_density[2][9]);
-
-  printf("}\n");
-}
-
-void ek_print_lbpar() {
-
-  printf("lbpar_gpu {\n");
-
-  printf("    float rho = %f;\n", lbpar_gpu.rho);
-  printf("    float mu = %f;\n", lbpar_gpu.mu);
-  printf("    float viscosity = %f;\n", lbpar_gpu.viscosity);
-  printf("    float gamma_shear = %f;\n", lbpar_gpu.gamma_shear);
-  printf("    float gamma_bulk = %f;\n", lbpar_gpu.gamma_bulk);
-  printf("    float gamma_odd = %f;\n", lbpar_gpu.gamma_odd);
-  printf("    float gamma_even = %f;\n", lbpar_gpu.gamma_even);
-  printf("    float agrid = %f;\n", lbpar_gpu.agrid);
-  printf("    float tau = %f;\n", lbpar_gpu.tau);
-  printf("    float bulk_viscosity = %f;\n", lbpar_gpu.bulk_viscosity);
-  printf("    unsigned int dim_x = %u;\n", lbpar_gpu.dim[0]);
-  printf("    unsigned int dim_y = %u;\n", lbpar_gpu.dim[1]);
-  printf("    unsigned int dim_z = %u;\n", lbpar_gpu.dim[2]);
-  printf("    unsigned int number_of_nodes = %u;\n", lbpar_gpu.number_of_nodes);
-  printf("    bool external_force_density = %d;\n",
-         static_cast<int>(lbpar_gpu.external_force_density));
-  printf("    float ext_force_density[3] = {%f, %f, %f};\n",
-         lbpar_gpu.ext_force_density[0], lbpar_gpu.ext_force_density[1],
-         lbpar_gpu.ext_force_density[2]);
-
-  printf("}\n");
-}
-
-inline void ek_setter_throw_if_initialized() {
-  if (ek_initialized)
-    throw std::runtime_error(
-        "Electrokinetics parameters cannot be set after initialisation");
-};
-
-void ek_set_agrid(float agrid) {
-  ek_setter_throw_if_initialized();
-  ek_parameters.agrid = agrid;
-}
-
-void ek_set_lb_density(float lb_density) {
-  ek_setter_throw_if_initialized();
-  ek_parameters.lb_density = lb_density;
-}
-
-void ek_set_prefactor(float prefactor) {
-  ek_setter_throw_if_initialized();
-  ek_parameters.prefactor = prefactor;
-}
-
-void ek_set_electrostatics_coupling(bool electrostatics_coupling) {
-  ek_setter_throw_if_initialized();
-  ek_parameters.es_coupling = electrostatics_coupling;
-}
-
-void ek_set_viscosity(float viscosity) {
-  ek_setter_throw_if_initialized();
-  ek_parameters.viscosity = viscosity;
-}
-
-void ek_set_lb_ext_force_density(float lb_ext_force_dens_x,
-                                 float lb_ext_force_dens_y,
-                                 float lb_ext_force_dens_z) {
-  ek_setter_throw_if_initialized();
-  ek_parameters.lb_ext_force_density[0] = lb_ext_force_dens_x;
-  ek_parameters.lb_ext_force_density[1] = lb_ext_force_dens_y;
-  ek_parameters.lb_ext_force_density[2] = lb_ext_force_dens_z;
-}
-
-void ek_set_friction(float friction) {
-  ek_setter_throw_if_initialized();
-  ek_parameters.friction = friction;
-}
-
-void ek_set_bulk_viscosity(float bulk_viscosity) {
-  ek_setter_throw_if_initialized();
-  ek_parameters.bulk_viscosity = bulk_viscosity;
-}
-
-void ek_set_gamma_odd(float gamma_odd) {
-  ek_setter_throw_if_initialized();
-  ek_parameters.gamma_odd = gamma_odd;
-}
-
-void ek_set_gamma_even(float gamma_even) {
-
-  ek_setter_throw_if_initialized();
-  ek_parameters.gamma_even = gamma_even;
-}
-
-void ek_set_stencil(int stencil) {
-  ek_setter_throw_if_initialized();
-  if (!ek_parameters.fluidcoupling_ideal_contribution)
-    throw std::runtime_error(
-        "Combination of stencil and fluid coupling not implmented.");
-  ek_parameters.stencil = stencil;
-}
-
-void ek_set_advection(bool advection) {
-  ek_setter_throw_if_initialized();
-  ek_parameters.advection = advection;
-}
-
-void ek_set_fluctuations(bool fluctuations) {
-  ek_setter_throw_if_initialized();
-  ek_parameters.fluctuations = fluctuations;
-}
-
-void ek_set_fluctuation_amplitude(float fluctuation_amplitude) {
-  ek_setter_throw_if_initialized();
-  ek_parameters.fluctuation_amplitude = fluctuation_amplitude;
-}
-
-void ek_set_fluidcoupling(bool ideal_contribution) {
-  ek_setter_throw_if_initialized();
-  if (ek_parameters.stencil != 0)
-    throw std::runtime_error(
-        "Combination of stencil and fluid coupling not implemented.");
-  ek_parameters.fluidcoupling_ideal_contribution = ideal_contribution;
-}
-
-void ek_set_T(float T) {
-  ek_setter_throw_if_initialized();
-  ek_parameters.T = T;
-}
-
-void ek_set_density(int species, float density) {
-  ek_init_species(species);
-  ek_parameters.density[ek_parameters.species_index[species]] = density;
-}
-
-void ek_set_D(int species, float D) {
-  ek_init_species(species);
-  ek_parameters.D[ek_parameters.species_index[species]] = D;
-  ek_parameters.d[ek_parameters.species_index[species]] =
-      D / (1.0f + 2.0f * sqrt(2.0f));
-}
-
-void ek_set_valency(int species, float valency) {
-  ek_init_species(species);
-  ek_parameters.valency[ek_parameters.species_index[species]] = valency;
-}
-
-void ek_set_ext_force_density(int species, float ext_force_density_x,
-                              float ext_force_density_y,
-                              float ext_force_density_z) {
-  ek_init_species(species);
-  ek_parameters.ext_force_density[0][ek_parameters.species_index[species]] =
-      ext_force_density_x;
-  ek_parameters.ext_force_density[1][ek_parameters.species_index[species]] =
-      ext_force_density_y;
-  ek_parameters.ext_force_density[2][ek_parameters.species_index[species]] =
-      ext_force_density_z;
-}
-
-struct ek_charge_of_particle {
-  __host__ __device__ float operator()(CUDA_particle_data particle) {
-    return particle.q;
-  };
-};
-
-float ek_get_particle_charge() {
-  auto device_particles = gpu_get_particle_pointer();
-  float particle_charge = thrust::transform_reduce(
-      thrust::device_ptr<CUDA_particle_data>(device_particles.begin()),
-      thrust::device_ptr<CUDA_particle_data>(device_particles.end()),
-      ek_charge_of_particle(), 0.0f, thrust::plus<float>());
-  return particle_charge;
-}
-
-float ek_calculate_net_charge() {
-  cuda_safe_mem(cudaMemset(charge_gpu, 0, sizeof(float)));
-
-  dim3 dim_grid =
-      calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(ek_calculate_system_charge, dim_grid, threads_per_block,
-             charge_gpu);
-
-  float charge;
-  cuda_safe_mem(
-      cudaMemcpy(&charge, charge_gpu, sizeof(float), cudaMemcpyDeviceToHost));
-
-  if (ek_parameters.es_coupling)
-    charge += ek_get_particle_charge();
-
-  return charge;
-}
-
-int ek_neutralize_system(int species) {
-  int species_index = ek_parameters.species_index[species];
-
-  if (species_index == -1)
-    return 1;
-
-  if (ek_parameters.valency[species_index] == 0.0f)
-    return 2;
-
-  float compensating_species_density = 0.0f;
-
-#ifndef EK_BOUNDARIES
-  for (unsigned i = 0; i < ek_parameters.number_of_species; i++)
-    compensating_species_density +=
-        ek_parameters.density[i] * ek_parameters.valency[i];
-
-  compensating_species_density =
-      ek_parameters.density[species_index] -
-      compensating_species_density / ek_parameters.valency[species_index];
-
-  if (ek_parameters.es_coupling) {
-    float particle_charge = ek_get_particle_charge();
-    compensating_species_density -= particle_charge /
-                                    ek_parameters.valency[species_index] /
-                                    Utils::int_pow<3>(ek_parameters.agrid) /
-                                    float(ek_parameters.number_of_nodes);
-  }
-
-#else
-  float charge = ek_calculate_net_charge();
-
-  compensating_species_density =
-      ek_parameters.density[species_index] -
-      (charge / ek_parameters.valency[species_index]) /
-          (Utils::int_pow<3>(ek_parameters.agrid) *
-           float(static_cast<int>(ek_parameters.number_of_nodes) -
-                 ek_parameters.number_of_boundary_nodes));
-#endif // EK_BOUNDARIES
-
-  if (compensating_species_density < 0.0f)
-    return 3;
-
-  ek_parameters.density[species_index] = compensating_species_density;
-
-  return 0;
-}
-
-void ek_set_rng_state(uint64_t counter) {
-  if (ek_initialized)
-    philox_counter = Utils::Counter<uint64_t>(counter);
-}
-
-#endif /* ELECTROKINETICS */
-
-#endif /* CUDA */
diff --git a/src/core/grid_based_algorithms/fd-electrostatics.cuh b/src/core/grid_based_algorithms/fd-electrostatics.cuh
deleted file mode 100644
index 0bc81ec49b6..00000000000
--- a/src/core/grid_based_algorithms/fd-electrostatics.cuh
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef CORE_GRID_BASED_ALGORITHMS_FD_ELECTROSTATICS_HPP
-#define CORE_GRID_BASED_ALGORITHMS_FD_ELECTROSTATICS_HPP
-
-#include <cufft.h>
-
-class FdElectrostatics {
-public:
-  struct InputParameters {
-    float prefactor;
-    int dim_x, dim_y, dim_z;
-    float agrid;
-  };
-
-  struct Parameters : public InputParameters {
-    Parameters() = default;
-    Parameters(InputParameters &inputParameters)
-        : InputParameters(inputParameters) {
-      charge_potential = nullptr;
-      greensfcn = nullptr;
-      dim_x_padded = (inputParameters.dim_x / 2 + 1) * 2;
-    }
-
-    cufftComplex *charge_potential;
-    cufftReal *greensfcn;
-    int dim_x_padded;
-  };
-
-  struct Grid {
-    float *grid;
-    int dim_x;
-    int dim_y;
-    int dim_z;
-    float agrid;
-  };
-
-  ~FdElectrostatics();
-  FdElectrostatics(InputParameters inputParameters, cudaStream_t stream);
-  void calculatePotential();
-  void calculatePotential(cufftComplex *charge_potential);
-  Grid getGrid();
-
-private:
-  Parameters parameters;
-  cudaStream_t cuda_stream;
-  cufftHandle plan_fft;
-  cufftHandle plan_ifft;
-  bool initialized;
-};
-
-#endif
diff --git a/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu b/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu
deleted file mode 100644
index ce543341304..00000000000
--- a/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-// TODO: throw exceptions upon errors initialization
-
-#include "grid_based_algorithms/fd-electrostatics.cuh"
-
-#include "cuda_utils.cuh"
-
-#include <utils/constants.hpp>
-
-#include <cuda.h>
-#include <cufft.h>
-
-#include <cstdio>
-#include <stdexcept>
-#include <string>
-
-#if defined(OMPI_MPI_H) || defined(_MPI_H)
-#error CU-file includes mpi.h! This should not happen!
-#endif
-
-static constexpr unsigned int threads_per_block = 64;
-
-__device__ cufftReal fde_getNode(int x, int y, int z);
-__device__ cufftReal fde_getNode(int i);
-__device__ void fde_setNode(int x, int y, int z, cufftReal value);
-__device__ void fde_setNode(int i, cufftReal value);
-
-__global__ void createGreensfcn();
-__global__ void multiplyGreensfcn(cufftComplex *charge_potential);
-
-__device__ __constant__ FdElectrostatics::Parameters fde_parameters_gpu[1];
-
-__device__ unsigned int fde_getThreadIndex() {
-
-  return blockIdx.y * gridDim.x * blockDim.x + blockDim.x * blockIdx.x +
-         threadIdx.x;
-}
-
-__device__ cufftReal fde_getNode(int x, int y, int z) {
-  auto *field =
-      reinterpret_cast<cufftReal *>(fde_parameters_gpu->charge_potential);
-  return field[fde_parameters_gpu->dim_y * fde_parameters_gpu->dim_x_padded *
-                   z +
-               fde_parameters_gpu->dim_x_padded * y + x];
-}
-
-__device__ void fde_setNode(int x, int y, int z, cufftReal value) {
-  auto *field =
-      reinterpret_cast<cufftReal *>(fde_parameters_gpu->charge_potential);
-  field[fde_parameters_gpu->dim_y * fde_parameters_gpu->dim_x_padded * z +
-        fde_parameters_gpu->dim_x_padded * y + x] = value;
-}
-
-__device__ cufftReal fde_getNode(int i) {
-  int x = i % fde_parameters_gpu->dim_x_padded;
-  i /= fde_parameters_gpu->dim_x_padded;
-  int y = i % fde_parameters_gpu->dim_y;
-  int z = i / fde_parameters_gpu->dim_y;
-  return fde_getNode(x, y, z);
-}
-
-__device__ void fde_setNode(int i, cufftReal value) {
-  int x = i % fde_parameters_gpu->dim_x_padded;
-  i /= fde_parameters_gpu->dim_x_padded;
-  int y = i % fde_parameters_gpu->dim_y;
-  int z = i / fde_parameters_gpu->dim_y;
-  fde_setNode(x, y, z, value);
-}
-
-FdElectrostatics::~FdElectrostatics() {
-  cufftDestroy(plan_ifft);
-  cufftDestroy(plan_fft);
-
-  cuda_safe_mem(cudaFree(parameters.greensfcn));
-  cuda_safe_mem(cudaFree(parameters.charge_potential));
-}
-
-FdElectrostatics::FdElectrostatics(InputParameters inputParameters,
-                                   cudaStream_t stream)
-    : parameters(inputParameters), cuda_stream(stream) {
-  cuda_safe_mem(cudaMalloc((void **)&parameters.charge_potential,
-                           sizeof(cufftComplex) * parameters.dim_z *
-                               parameters.dim_y * (parameters.dim_x / 2 + 1)));
-
-  cuda_safe_mem(cudaMalloc((void **)&parameters.greensfcn,
-                           sizeof(cufftReal) * parameters.dim_z *
-                               parameters.dim_y * (parameters.dim_x / 2 + 1)));
-
-  if (cudaGetLastError() != cudaSuccess) {
-    throw std::runtime_error("Failed to allocate");
-  }
-
-  cuda_safe_mem(
-      cudaMemcpyToSymbol(fde_parameters_gpu, &parameters, sizeof(Parameters)));
-
-  dim3 dim_grid = calculate_dim_grid(
-      static_cast<unsigned>(parameters.dim_z * parameters.dim_y *
-                            (parameters.dim_x / 2 + 1)),
-      4, threads_per_block);
-  KERNELCALL_stream(createGreensfcn, dim_grid, threads_per_block, stream);
-
-  /* create 3D FFT plans */
-
-  if (cufftPlan3d(&plan_fft, parameters.dim_z, parameters.dim_y,
-                  parameters.dim_x, CUFFT_R2C) != CUFFT_SUCCESS) {
-    throw std::runtime_error("Unable to create fft plan");
-  }
-
-  if (cufftSetStream(plan_fft, cuda_stream) != CUFFT_SUCCESS) {
-    throw std::runtime_error("Unable to assign FFT to cuda stream");
-  }
-
-  if (cufftPlan3d(&plan_ifft, parameters.dim_z, parameters.dim_y,
-                  parameters.dim_x, CUFFT_C2R) != CUFFT_SUCCESS) {
-    throw std::runtime_error("Unable to create ifft plan");
-  }
-
-  if (cufftSetStream(plan_ifft, cuda_stream) != CUFFT_SUCCESS) {
-    throw std::runtime_error("Unable to assign FFT to cuda stream");
-  }
-
-  initialized = true;
-}
-
-__global__ void createGreensfcn() {
-  unsigned int index = fde_getThreadIndex();
-  unsigned int tmp;
-  unsigned int coord[3];
-
-  coord[0] = index % (fde_parameters_gpu->dim_x / 2 + 1);
-  tmp = index / (fde_parameters_gpu->dim_x / 2 + 1);
-  coord[1] = tmp % fde_parameters_gpu->dim_y;
-  coord[2] = tmp / fde_parameters_gpu->dim_y;
-
-  if (index < fde_parameters_gpu->dim_z * fde_parameters_gpu->dim_y *
-                  (fde_parameters_gpu->dim_x / 2 + 1)) {
-
-    if (index == 0) {
-      // setting 0th Fourier mode to 0 enforces charge neutrality
-      fde_parameters_gpu->greensfcn[index] = 0.0f;
-    } else {
-      constexpr cufftReal two_pi = 2.0f * Utils::pi<cufftReal>();
-      fde_parameters_gpu->greensfcn[index] =
-          -2.0f * two_pi * fde_parameters_gpu->prefactor *
-          fde_parameters_gpu->agrid * fde_parameters_gpu->agrid * 0.5f /
-          (cos(two_pi * static_cast<cufftReal>(coord[0]) /
-               static_cast<cufftReal>(fde_parameters_gpu->dim_x)) +
-           cos(two_pi * static_cast<cufftReal>(coord[1]) /
-               static_cast<cufftReal>(fde_parameters_gpu->dim_y)) +
-           cos(two_pi * static_cast<cufftReal>(coord[2]) /
-               static_cast<cufftReal>(fde_parameters_gpu->dim_z)) -
-           3.0f) /
-          static_cast<cufftReal>(fde_parameters_gpu->dim_x *
-                                 fde_parameters_gpu->dim_y *
-                                 fde_parameters_gpu->dim_z);
-    }
-
-    // fde_parameters_gpu->greensfcn[index] = 0.0f; //TODO delete
-  }
-}
-
-__global__ void multiplyGreensfcn(cufftComplex *charge_potential) {
-
-  unsigned int index = fde_getThreadIndex();
-
-  if (index < fde_parameters_gpu->dim_z * fde_parameters_gpu->dim_y *
-                  (fde_parameters_gpu->dim_x / 2 + 1)) {
-    charge_potential[index].x *= fde_parameters_gpu->greensfcn[index];
-    charge_potential[index].y *= fde_parameters_gpu->greensfcn[index];
-  }
-}
-
-void FdElectrostatics::calculatePotential() {
-  calculatePotential(parameters.charge_potential);
-}
-
-void FdElectrostatics::calculatePotential(cufftComplex *charge_potential) {
-
-  if (cufftExecR2C(plan_fft, (cufftReal *)charge_potential, charge_potential) !=
-      CUFFT_SUCCESS) {
-
-    fprintf(stderr, "ERROR: Unable to execute FFT plan\n");
-  }
-
-  dim3 dim_grid = calculate_dim_grid(
-      static_cast<unsigned>(parameters.dim_z * parameters.dim_y *
-                            (parameters.dim_x / 2 + 1)),
-      4, threads_per_block);
-
-  KERNELCALL(multiplyGreensfcn, dim_grid, threads_per_block, charge_potential);
-
-  if (cufftExecC2R(plan_ifft, charge_potential,
-                   (cufftReal *)charge_potential) != CUFFT_SUCCESS) {
-
-    fprintf(stderr, "ERROR: Unable to execute iFFT plan\n");
-  }
-}
-
-FdElectrostatics::Grid FdElectrostatics::getGrid() {
-  Grid g = {(float *)parameters.charge_potential, parameters.dim_x,
-            parameters.dim_y, parameters.dim_z, parameters.agrid};
-  return g;
-}
diff --git a/src/core/grid_based_algorithms/halo.cpp b/src/core/grid_based_algorithms/halo.cpp
deleted file mode 100644
index 03291c95b18..00000000000
--- a/src/core/grid_based_algorithms/halo.cpp
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
- *   Max-Planck-Institute for Polymer Research, Theory Group
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/** \file
- *
- * Halo scheme for parallelization of lattice algorithms.
- * Implementation of file \ref halo.hpp.
- *
- */
-
-#include "config/config.hpp"
-
-#include "communication.hpp"
-#include "grid.hpp"
-#include "grid_based_algorithms/lattice.hpp"
-#include "halo.hpp"
-
-#include <utils/Vector.hpp>
-
-#include <cstdlib>
-#include <cstring>
-#include <memory>
-
-/** Predefined fieldtype for double-precision LB */
-static std::shared_ptr<FieldType> fieldtype_double =
-    std::make_shared<FieldType>(static_cast<int>(sizeof(double)));
-
-/** Set halo region to a given value
- * @param[out] dest pointer to the halo buffer
- * @param value integer value to write into the halo buffer
- * @param type halo field layout description
- */
-void halo_dtset(char *dest, int value, std::shared_ptr<FieldType> type) {
-  auto const vblocks = type->vblocks;
-  auto const vstride = type->vstride;
-  auto const vskip = type->vskip;
-  auto const &lens = type->lengths;
-  auto const &disps = type->disps;
-  auto const extent = type->extent;
-  auto const block_size = static_cast<long>(vskip) * static_cast<long>(extent);
-
-  for (int i = 0; i < vblocks; i++) {
-    for (int j = 0; j < vstride; j++) {
-      for (std::size_t k = 0; k < disps.size(); k++)
-        memset(dest + disps[k], value, lens[k]);
-    }
-    dest += block_size;
-  }
-}
-
-void halo_dtcopy(char *r_buffer, char *s_buffer, int count,
-                 std::shared_ptr<FieldType> type);
-
-void halo_copy_vector(char *r_buffer, char *s_buffer, int count,
-                      std::shared_ptr<FieldType> type, bool vflag) {
-
-  auto const vblocks = type->vblocks;
-  auto const vstride = type->vstride;
-  auto const extent = type->extent;
-
-  auto block_size = static_cast<long>(type->vskip);
-  if (vflag) {
-    block_size *= static_cast<long>(type->subtype->extent);
-  }
-
-  for (int i = 0; i < count; i++, s_buffer += extent, r_buffer += extent) {
-    char *dest = r_buffer, *src = s_buffer;
-    for (int j = 0; j < vblocks; j++, dest += block_size, src += block_size) {
-      halo_dtcopy(dest, src, vstride, type->subtype);
-    }
-  }
-}
-
-/** Copy lattice data with layout described by @p type.
- * @param r_buffer data destination
- * @param s_buffer data source
- * @param count    amount of data to copy
- * @param type     field layout type
- */
-void halo_dtcopy(char *r_buffer, char *s_buffer, int count,
-                 std::shared_ptr<FieldType> type) {
-
-  if (type->subtype) {
-    halo_copy_vector(r_buffer, s_buffer, count, type, type->vflag);
-  } else {
-
-    for (int i = 0; i < count;
-         i++, s_buffer += type->extent, r_buffer += type->extent) {
-      if (!type->count) {
-        memmove(r_buffer, s_buffer, type->extent);
-      } else {
-        for (int j = 0; j < type->count; j++) {
-          memmove(r_buffer + type->disps[j], s_buffer + type->disps[j],
-                  type->lengths[j]);
-        }
-      }
-    }
-  }
-}
-
-void prepare_halo_communication(HaloCommunicator &hc, const Lattice &lattice,
-                                MPI_Datatype datatype,
-                                const Utils::Vector3i &local_node_grid) {
-
-  const auto &grid = lattice.grid;
-  const auto &period = lattice.halo_grid;
-
-  for (int n = 0; n < hc.num; n++) {
-    MPI_Type_free(&(hc.halo_info[n].datatype));
-  }
-
-  int const num = 2 * 3; /* two communications in each space direction */
-  hc.num = num;
-  hc.halo_info.resize(num);
-
-  auto const extent = static_cast<long>(fieldtype_double->extent);
-
-  auto const node_neighbors = calc_node_neighbors(comm_cart);
-
-  int cnt = 0;
-  for (int dir = 0; dir < 3; dir++) {
-    for (int lr = 0; lr < 2; lr++) {
-
-      HaloInfo &hinfo = hc.halo_info[cnt];
-
-      int nblocks = 1;
-      for (int k = dir + 1; k < 3; k++) {
-        nblocks *= period[k];
-      }
-      int stride = 1;
-      for (int k = 0; k < dir; k++) {
-        stride *= period[k];
-      }
-      int skip = 1;
-      for (int k = 0; k < dir + 1 && k < 2; k++) {
-        skip *= period[k];
-      }
-
-      if (lr == 0) {
-        /* send to left, recv from right */
-        hinfo.s_offset = extent * static_cast<long>(stride * 1);
-        hinfo.r_offset = extent * static_cast<long>(stride * (grid[dir] + 1));
-      } else {
-        /* send to right, recv from left */
-        hinfo.s_offset = extent * static_cast<long>(stride * grid[dir]);
-        hinfo.r_offset = extent * static_cast<long>(stride * 0);
-      }
-
-      hinfo.source_node = node_neighbors[2 * dir + 1 - lr];
-      hinfo.dest_node = node_neighbors[2 * dir + lr];
-
-      hinfo.fieldtype = std::make_shared<FieldType>(nblocks, stride, skip, true,
-                                                    fieldtype_double);
-
-      MPI_Type_vector(nblocks, stride, skip, datatype, &hinfo.datatype);
-      MPI_Type_commit(&hinfo.datatype);
-
-      if (!box_geo.periodic(dir) &&
-          (local_geo.boundary()[2 * dir + lr] != 0 ||
-           local_geo.boundary()[2 * dir + 1 - lr] != 0)) {
-        if (local_node_grid[dir] == 1) {
-          hinfo.type = HALO_OPEN;
-        } else if (lr == 0) {
-          if (local_geo.boundary()[2 * dir + lr] == 1) {
-            hinfo.type = HALO_RECV;
-          } else {
-            hinfo.type = HALO_SEND;
-          }
-        } else {
-          if (local_geo.boundary()[2 * dir + lr] == -1) {
-            hinfo.type = HALO_RECV;
-          } else {
-            hinfo.type = HALO_SEND;
-          }
-        }
-      } else {
-        if (local_node_grid[dir] == 1) {
-          hc.halo_info[cnt].type = HALO_LOCL;
-        } else {
-          hc.halo_info[cnt].type = HALO_SENDRECV;
-        }
-      }
-      cnt++;
-    }
-  }
-}
-
-void release_halo_communication(HaloCommunicator &hc) {
-  for (int n = 0; n < hc.num; n++) {
-    MPI_Type_free(&(hc.halo_info[n].datatype));
-  }
-}
-
-void halo_communication(const HaloCommunicator &hc, char *const base) {
-
-  std::shared_ptr<FieldType> fieldtype;
-  MPI_Datatype datatype;
-  MPI_Request request;
-  MPI_Status status;
-
-  for (int n = 0; n < hc.num; n++) {
-    int s_node, r_node;
-    int comm_type = hc.halo_info[n].type;
-    char *s_buffer = static_cast<char *>(base) + hc.halo_info[n].s_offset;
-    char *r_buffer = static_cast<char *>(base) + hc.halo_info[n].r_offset;
-
-    switch (comm_type) {
-
-    case HALO_LOCL:
-      fieldtype = hc.halo_info[n].fieldtype;
-      halo_dtcopy(r_buffer, s_buffer, 1, fieldtype);
-      break;
-
-    case HALO_SENDRECV:
-      datatype = hc.halo_info[n].datatype;
-      s_node = hc.halo_info[n].source_node;
-      r_node = hc.halo_info[n].dest_node;
-      MPI_Sendrecv(s_buffer, 1, datatype, r_node, REQ_HALO_SPREAD, r_buffer, 1,
-                   datatype, s_node, REQ_HALO_SPREAD, comm_cart, &status);
-      break;
-
-    case HALO_SEND:
-      datatype = hc.halo_info[n].datatype;
-      fieldtype = hc.halo_info[n].fieldtype;
-      r_node = hc.halo_info[n].dest_node;
-      MPI_Isend(s_buffer, 1, datatype, r_node, REQ_HALO_SPREAD, comm_cart,
-                &request);
-      halo_dtset(r_buffer, 0, fieldtype);
-      MPI_Wait(&request, &status);
-      break;
-
-    case HALO_RECV:
-      datatype = hc.halo_info[n].datatype;
-      s_node = hc.halo_info[n].source_node;
-      MPI_Irecv(r_buffer, 1, datatype, s_node, REQ_HALO_SPREAD, comm_cart,
-                &request);
-      MPI_Wait(&request, &status);
-      break;
-
-    case HALO_OPEN:
-      fieldtype = hc.halo_info[n].fieldtype;
-      /** \todo this does not work for the n_i - \<n_i\> */
-      halo_dtset(r_buffer, 0, fieldtype);
-      break;
-    }
-  }
-}
diff --git a/src/core/grid_based_algorithms/halo.hpp b/src/core/grid_based_algorithms/halo.hpp
deleted file mode 100644
index 989442605e3..00000000000
--- a/src/core/grid_based_algorithms/halo.hpp
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
- *   Max-Planck-Institute for Polymer Research, Theory Group
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef CORE_GRID_BASED_ALGORITHMS_HALO_HPP
-#define CORE_GRID_BASED_ALGORITHMS_HALO_HPP
-/** \file
- *
- * Halo scheme for parallelization of lattice algorithms.
- * Header file for \ref halo.cpp.
- *
- */
-
-#include "grid_based_algorithms/lattice.hpp"
-
-#include <utils/Vector.hpp>
-
-#include <mpi.h>
-
-#include <memory>
-#include <vector>
-
-/** \name Types of halo communications */
-/**@{*/
-#define HALO_LOCL                                                              \
-  0 /**< Tag for local exchange of halo regions on the same processor */
-#define HALO_SENDRECV                                                          \
-  1                 /**< Tag for halo exchange between different processors */
-#define HALO_SEND 2 /**< Tag for halo send only */
-#define HALO_RECV 3 /**< Tag for halo receive only */
-#define HALO_OPEN 4 /**< Tag for halo open boundary */
-/**@}*/
-
-/** \name Tags for halo communications */
-/**@{*/
-#define REQ_HALO_SPREAD 501 /**< Tag for halo update */
-#define REQ_HALO_CHECK 599  /**< Tag for consistency check of halo regions */
-/**@}*/
-
-/** Layout of the lattice data.
- *  The description is similar to MPI datatypes but a bit more compact.
- */
-struct FieldType {
-  FieldType(int new_extent)
-      : count(0), disps({}), lengths({}), extent(new_extent), vblocks(0),
-        vstride(0), vskip(0), vflag(false), subtype(nullptr) {}
-  FieldType(int new_vblocks, int new_vstride, int new_vskip, bool new_vflag,
-            std::shared_ptr<FieldType> oldtype)
-      : count(oldtype->count), disps(oldtype->disps), lengths(oldtype->lengths),
-        extent(0), vblocks(new_vblocks), vstride(new_vstride), vskip(new_vskip),
-        vflag(new_vflag), subtype(oldtype) {
-    if (vflag) {
-      extent = oldtype->extent * ((vblocks - 1) * vskip + vstride);
-    } else {
-      extent = oldtype->extent * vstride + (vblocks - 1) * vskip;
-    }
-  }
-  int count;                /**< number of subtypes in fieldtype */
-  std::vector<int> disps;   /**< displacements of the subtypes */
-  std::vector<int> lengths; /**< lengths of the subtypes */
-  int extent;  /**< extent of the complete fieldtype including gaps */
-  int vblocks; /**< number of blocks in field vectors */
-  int vstride; /**< size of strides in field vectors */
-  int vskip;   /**< displacement between strides in field vectors */
-  bool vflag;
-  std::shared_ptr<FieldType> subtype;
-};
-
-/** Structure describing a Halo region */
-struct HaloInfo {
-
-  int type; /**< type of halo communication */
-
-  int source_node; /**< index of processor which sends halo data */
-  int dest_node;   /**< index of processor receiving halo data */
-
-  unsigned long s_offset; /**< offset for send buffer */
-  unsigned long r_offset; /**< offset for receive buffer */
-
-  std::shared_ptr<FieldType>
-      fieldtype;         /**< type layout of the data being exchanged */
-  MPI_Datatype datatype; /**< MPI datatype of data being communicated */
-};
-
-/** Structure holding a set of \ref HaloInfo which comprise a certain
- *  parallelization scheme */
-class HaloCommunicator {
-public:
-  HaloCommunicator(int num) : num(num) {}
-
-  int num; /**< number of halo communications in the scheme */
-
-  std::vector<HaloInfo> halo_info; /**< set of halo communications */
-};
-
-/** Preparation of the halo parallelization scheme. Sets up the
- *  necessary data structures for \ref halo_communication
- *  @param[in,out] hc       halo communicator being created
- *  @param[in]     lattice  lattice the communication is created for
- *  @param datatype         MPI datatype for the lattice data
- *  @param local_node_grid  Number of nodes in each spatial dimension
- */
-void prepare_halo_communication(HaloCommunicator &hc, const Lattice &lattice,
-                                MPI_Datatype datatype,
-                                const Utils::Vector3i &local_node_grid);
-
-/** Frees data structures associated with a halo communicator
- *  @param[in,out] hc  halo communicator to be released
- */
-void release_halo_communication(HaloCommunicator &hc);
-
-/** Perform communication according to the parallelization scheme
- *  described by the halo communicator
- *  @param[in]  hc    halo communicator describing the parallelization scheme
- *  @param[in]  base  base plane of local node
- */
-void halo_communication(const HaloCommunicator &hc, char *base);
-
-#endif /* CORE_GRID_BASED_ALGORITHMS_HALO_HPP */
diff --git a/src/core/grid_based_algorithms/lattice.cpp b/src/core/grid_based_algorithms/lattice.cpp
deleted file mode 100644
index e2d41b0ddc2..00000000000
--- a/src/core/grid_based_algorithms/lattice.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
- *   Max-Planck-Institute for Polymer Research, Theory Group
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "grid_based_algorithms/lattice.hpp"
-
-#include <boost/range/numeric.hpp>
-
-#include <utils/Vector.hpp>
-#include <utils/index.hpp>
-
-#include <cmath>
-#include <cstddef>
-#include <functional>
-#include <limits>
-#include <stdexcept>
-#include <string>
-
-Lattice::Lattice(double agrid, double offset, int halo_size,
-                 Utils::Vector3d const &local_box,
-                 Utils::Vector3d const &my_right,
-                 Utils::Vector3d const &box_length,
-                 Utils::Vector3i const &node_pos,
-                 Utils::Vector3i const &node_grid)
-    : agrid(agrid), halo_size(halo_size), offset(offset), node_grid(node_grid),
-      local_box(local_box), my_right(my_right) {
-  /* determine the number of local lattice nodes */
-  auto const epsilon = std::numeric_limits<double>::epsilon();
-  for (int d = 0; d < 3; d++) {
-    grid[d] = static_cast<int>(round(local_box[d] / agrid));
-    global_grid[d] = node_grid[d] * grid[d];
-    local_index_offset[d] = node_pos[d] * grid[d];
-  }
-
-  // sanity checks
-  for (int dir = 0; dir < 3; dir++) {
-    // check if local_box_l is compatible with lattice spacing
-    auto diff = fabs(local_box[dir] - grid[dir] * agrid);
-    if (diff > epsilon * box_length[dir]) {
-      throw std::runtime_error(
-          "Lattice spacing agrid[" + std::to_string(dir) +
-          "]=" + std::to_string(agrid) + " is incompatible with local_box_l[" +
-          std::to_string(dir) + "]=" + std::to_string(local_box[dir]) +
-          " ( box_l[" + std::to_string(dir) +
-          "]=" + std::to_string(box_length[dir]) +
-          " ). Mismatch: " + std::to_string(diff));
-    }
-  }
-
-  /* determine the number of total nodes including halo */
-  halo_grid = grid + Utils::Vector3i::broadcast(2 * halo_size);
-  halo_grid_volume = Utils::product(halo_grid);
-  halo_offset =
-      Utils::get_linear_index(halo_size, halo_size, halo_size, halo_grid);
-}
-
-bool Lattice::is_local(Utils::Vector3i const &index) const noexcept {
-  auto const x = index * agrid;
-  return x >= my_right - local_box and x < my_right;
-}
-
-void Lattice::map_position_to_lattice(const Utils::Vector3d &pos,
-                                      Utils::Vector<std::size_t, 8> &node_index,
-                                      Utils::Vector6d &delta) const {
-  Utils::Vector3i ind{};
-  auto const epsilon = std::numeric_limits<double>::epsilon();
-
-  /* determine the elementary lattice cell containing the particle
-     and the relative position of the particle in this cell */
-  for (int dir = 0; dir < 3; dir++) {
-    auto const lpos = pos[dir] - (my_right[dir] - local_box[dir]);
-    auto const rel = lpos / agrid + offset;
-    ind[dir] = static_cast<int>(floor(rel));
-
-    /* surrounding elementary cell is not completely inside this box,
-       adjust if this is due to round off errors */
-    if (ind[dir] < 0) {
-      if (fabs(rel) < epsilon) {
-        ind[dir] = 0;
-      } else {
-        throw std::runtime_error("position outside local LB domain");
-      }
-    } else if (ind[dir] > grid[dir]) {
-      if (lpos - local_box[dir] < epsilon * local_box[dir])
-        ind[dir] = grid[dir];
-      else
-        throw std::runtime_error("position outside local LB domain");
-    }
-
-    delta[3 + dir] = rel - ind[dir]; // delta_x/a
-    delta[dir] = 1.0 - delta[3 + dir];
-  }
-  auto const slice_x = static_cast<std::size_t>(halo_grid[0]);
-  auto const slice_xy = static_cast<std::size_t>(halo_grid[1]) * slice_x;
-  node_index[0] = Utils::get_linear_index(ind, halo_grid);
-  node_index[1] = node_index[0] + 1u;
-  node_index[2] = node_index[0] + slice_x;
-  node_index[3] = node_index[0] + slice_x + 1u;
-  node_index[4] = node_index[0] + slice_xy;
-  node_index[5] = node_index[0] + slice_xy + 1u;
-  node_index[6] = node_index[0] + slice_xy + slice_x;
-  node_index[7] = node_index[0] + slice_xy + slice_x + 1u;
-}
-
-Utils::Vector3i
-Lattice::local_index(Utils::Vector3i const &global_index) const noexcept {
-  return global_index - local_index_offset +
-         Utils::Vector3i::broadcast(halo_size);
-}
diff --git a/src/core/grid_based_algorithms/lattice.hpp b/src/core/grid_based_algorithms/lattice.hpp
deleted file mode 100644
index 32c6f32051d..00000000000
--- a/src/core/grid_based_algorithms/lattice.hpp
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
- *   Max-Planck-Institute for Polymer Research, Theory Group
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/** \file
- *
- * Lattice class definition.
- * Contains the lattice layout.
- * For parallelization purposes, it is assumed that a halo region
- * surrounds the local lattice sites.
- */
-
-#ifndef CORE_LB_LATTICE_HPP
-#define CORE_LB_LATTICE_HPP
-
-#include <utils/Vector.hpp>
-
-#include <cstddef>
-
-class Lattice {
-public:
-  using index_t = int;
-
-  /** number of local lattice sites in each direction (excluding halo) */
-  Utils::Vector3i grid;
-  Utils::Vector3i global_grid;
-  double agrid; /**< lattice constant */
-
-  /** number of lattice sites in each direction (including halo) */
-  Utils::Vector3i halo_grid;
-  index_t halo_size; /**< halo size in all directions */
-
-  double offset; /**< global offset */
-  /** global index of the local domain origin */
-  Utils::Vector3i local_index_offset;
-  /** global domain partition */
-  Utils::Vector3i node_grid;
-  /** dimensions of the local domain */
-  Utils::Vector3d local_box;
-  /** global position of the top right corner of the local domain */
-  Utils::Vector3d my_right;
-
-  /** total number of lattice sites (including halo) */
-  index_t halo_grid_volume;
-  /** offset for number of halo sites stored in front of the local
-   *  lattice sites
-   */
-  index_t halo_offset;
-
-  Lattice() = default;
-  /** @brief %Lattice constructor.
-   *
-   *  This function initializes the variables describing the lattice
-   *  layout. Important: The lattice data is <em>not</em> allocated here!
-   *
-   *  @param agrid       lattice spacing
-   *  @param offset      lattice offset
-   *  @param halo_size   halo size
-   *  @param local_box   dimensions of the local box
-   *  @param myright     right (top, back) corner of the local box
-   *  @param box_length  lengths of the local box
-   *  @param node_pos    position of this node in the domain decomposition
-   *  @param node_grid   node_grid of domain decomposition
-   */
-  Lattice(double agrid, double offset, int halo_size,
-          const Utils::Vector3d &local_box, const Utils::Vector3d &myright,
-          const Utils::Vector3d &box_length, Utils::Vector3i const &node_pos,
-          Utils::Vector3i const &node_grid);
-
-  /** Map a spatial position to the surrounding lattice sites.
-   *
-   * This function takes a global spatial position and determines the
-   * surrounding elementary cell of the lattice for this position.
-   * The distance fraction in each direction is also calculated.
-   *
-   * Remarks:
-   * - The spatial position has to be in the local domain
-   * - The lattice sites of the elementary cell are returned as local indices
-   *
-   * @param[in]  pos        spatial position
-   * @param[out] node_index local indices of the surrounding lattice sites
-   * @param[out] delta      distance fraction of %p pos from the surrounding
-   *                        elementary cell, 6 directions
-   */
-  void map_position_to_lattice(Utils::Vector3d const &pos,
-                               Utils::Vector<std::size_t, 8> &node_index,
-                               Utils::Vector6d &delta) const;
-
-  /**
-   * @brief Determine if given global index is node-local.
-   * @param index Global lattice index.
-   */
-  bool is_local(Utils::Vector3i const &index) const noexcept;
-  /**
-   * @brief Calculate the node-local index.
-   * @param global_index Index into global lattice.
-   */
-  Utils::Vector3i
-  local_index(Utils::Vector3i const &global_index) const noexcept;
-};
-
-#endif /* CORE_LB_LATTICE_HPP */
diff --git a/src/core/grid_based_algorithms/lb-d3q19.hpp b/src/core/grid_based_algorithms/lb-d3q19.hpp
deleted file mode 100644
index 39c25971670..00000000000
--- a/src/core/grid_based_algorithms/lb-d3q19.hpp
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
- *   Max-Planck-Institute for Polymer Research, Theory Group
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/** \file
- * %Lattice Boltzmann D3Q19 model.
- */
-
-#ifndef D3Q19_H
-#define D3Q19_H
-
-#include <utils/Vector.hpp>
-
-#include <array>
-#include <cstddef>
-
-namespace D3Q19 {
-
-static constexpr std::size_t n_vel = 19;
-
-/** Velocity sub-lattice of the D3Q19 model */
-static constexpr const std::array<Utils::Vector3i, 19> c = {{{{0, 0, 0}},
-                                                             {{1, 0, 0}},
-                                                             {{-1, 0, 0}},
-                                                             {{0, 1, 0}},
-                                                             {{0, -1, 0}},
-                                                             {{0, 0, 1}},
-                                                             {{0, 0, -1}},
-                                                             {{1, 1, 0}},
-                                                             {{-1, -1, 0}},
-                                                             {{1, -1, 0}},
-                                                             {{-1, 1, 0}},
-                                                             {{1, 0, 1}},
-                                                             {{-1, 0, -1}},
-                                                             {{1, 0, -1}},
-                                                             {{-1, 0, 1}},
-                                                             {{0, 1, 1}},
-                                                             {{0, -1, -1}},
-                                                             {{0, 1, -1}},
-                                                             {{0, -1, 1}}}};
-
-/** Coefficients for pseudo-equilibrium distribution of the D3Q19 model */
-static constexpr const std::array<std::array<double, 4>, 19> coefficients = {
-    {{{1. / 3., 1., 3. / 2., -1. / 2.}},
-     {{1. / 18., 1. / 6., 1. / 4., -1. / 12.}},
-     {{1. / 18., 1. / 6., 1. / 4., -1. / 12.}},
-     {{1. / 18., 1. / 6., 1. / 4., -1. / 12.}},
-     {{1. / 18., 1. / 6., 1. / 4., -1. / 12.}},
-     {{1. / 18., 1. / 6., 1. / 4., -1. / 12.}},
-     {{1. / 18., 1. / 6., 1. / 4., -1. / 12.}},
-     {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}},
-     {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}},
-     {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}},
-     {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}},
-     {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}},
-     {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}},
-     {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}},
-     {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}},
-     {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}},
-     {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}},
-     {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}},
-     {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}}}};
-
-/** Coefficients in the functional for the equilibrium distribution */
-static constexpr const std::array<double, 19> w = {
-    {1. / 3., 1. / 18., 1. / 18., 1. / 18., 1. / 18., 1. / 18., 1. / 18.,
-     1. / 36., 1. / 36., 1. / 36., 1. / 36., 1. / 36., 1. / 36., 1. / 36.,
-     1. / 36., 1. / 36., 1. / 36., 1. / 36., 1. / 36.}};
-
-/* the following values are the (weighted) lengths of the vectors */
-static constexpr const std::array<double, 19> w_k = {
-    {1.0, 1. / 3., 1. / 3., 1. / 3., 2. / 3., 4. / 9., 4. / 3., 1. / 9.,
-     1. / 9., 1. / 9., 2. / 3., 2. / 3., 2. / 3., 2. / 9., 2. / 9., 2. / 9.,
-     2.0, 4. / 9., 4. / 3.}};
-
-template <typename T>
-static constexpr const T c_sound_sq = static_cast<T>(1. / 3.);
-
-} // namespace D3Q19
-
-#undef GCC_EXTERN_STATEMENT
-
-#endif /* D3Q19_H */
diff --git a/src/core/grid_based_algorithms/lb.cpp b/src/core/grid_based_algorithms/lb.cpp
deleted file mode 100644
index fe3bce0959d..00000000000
--- a/src/core/grid_based_algorithms/lb.cpp
+++ /dev/null
@@ -1,1353 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
- *   Max-Planck-Institute for Polymer Research, Theory Group
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/** \file
- *  %Lattice Boltzmann algorithm for hydrodynamic degrees of freedom.
- *
- *  Includes fluctuating LB and coupling to MD particles via frictional
- *  momentum transfer.
- *
- *  The corresponding header file is lb.hpp.
- */
-
-#include "grid_based_algorithms/lb.hpp"
-
-#include "cell_system/CellStructureType.hpp"
-#include "communication.hpp"
-#include "errorhandling.hpp"
-#include "event.hpp"
-#include "grid.hpp"
-#include "grid_based_algorithms/lb_boundaries.hpp"
-#include "halo.hpp"
-#include "lb-d3q19.hpp"
-#include "random.hpp"
-
-#include <utils/Counter.hpp>
-#include <utils/Span.hpp>
-#include <utils/Vector.hpp>
-#include <utils/index.hpp>
-#include <utils/math/matrix_vector_product.hpp>
-#include <utils/math/sqr.hpp>
-#include <utils/uniform.hpp>
-
-#include <Random123/philox.h>
-#include <boost/mpi/collectives/reduce.hpp>
-#include <boost/multi_array.hpp>
-#include <boost/optional.hpp>
-#include <boost/range/algorithm.hpp>
-#include <boost/range/numeric.hpp>
-#include <profiler/profiler.hpp>
-
-#include <mpi.h>
-
-#include <algorithm>
-#include <array>
-#include <cassert>
-#include <cinttypes>
-#include <cmath>
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>
-#include <functional>
-#include <iostream>
-#include <memory>
-#include <stdexcept>
-#include <vector>
-
-using Utils::get_linear_index;
-
-namespace {
-/** Basis of the mode space as described in @cite dunweg07a */
-extern constexpr const std::array<std::array<int, 19>, 19> e_ki = {
-    {{{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}},
-     {{0, 1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0}},
-     {{0, 0, 0, 1, -1, 0, 0, 1, -1, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1}},
-     {{0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, -1, -1, 1, 1, -1, -1, 1}},
-     {{-1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}},
-     {{0, 1, 1, -1, -1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1}},
-     {{-0, 1, 1, 1, 1, -2, -2, 2, 2, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1}},
-     {{0, 0, 0, 0, 0, 0, 0, 1, 1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0}},
-     {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, -1, -1, 0, 0, 0, 0}},
-     {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, -1, -1}},
-     {{0, -2, 2, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0}},
-     {{0, 0, 0, -2, 2, 0, 0, 1, -1, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1}},
-     {{0, 0, 0, 0, 0, -2, 2, 0, 0, 0, 0, 1, -1, -1, 1, 1, -1, -1, 1}},
-     {{0, -0, 0, 0, 0, 0, 0, 1, -1, 1, -1, -1, 1, -1, 1, 0, 0, 0, 0}},
-     {{0, 0, 0, -0, 0, 0, 0, 1, -1, -1, 1, 0, 0, 0, 0, -1, 1, -1, 1}},
-     {{0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 1, -1, -1, 1, -1, 1, 1, -1}},
-     {{1, -2, -2, -2, -2, -2, -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}},
-     {{0, -1, -1, 1, 1, -0, -0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1}},
-     {{0, -1, -1, -1, -1, 2, 2, 2, 2, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1}}}};
-
-/** Transposed version of @ref e_ki */
-extern constexpr const std::array<std::array<int, 19>, 19> e_ki_transposed = {
-    {{{1, 0, 0, 0, -1, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0}},
-     {{1, 1, 0, 0, 0, 1, 1, 0, 0, 0, -2, 0, 0, -0, 0, 0, -2, -1, -1}},
-     {{1, -1, 0, 0, 0, 1, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, -2, -1, -1}},
-     {{1, 0, 1, 0, 0, -1, 1, 0, 0, 0, 0, -2, 0, 0, -0, 0, -2, 1, -1}},
-     {{1, 0, -1, 0, 0, -1, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, -2, 1, -1}},
-     {{1, 0, 0, 1, 0, 0, -2, 0, 0, 0, 0, 0, -2, 0, 0, -0, -2, -0, 2}},
-     {{1, 0, 0, -1, 0, 0, -2, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, -0, 2}},
-     {{1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 2}},
-     {{1, -1, -1, 0, 1, 0, 2, 1, 0, 0, -1, -1, 0, -1, -1, 0, 1, 0, 2}},
-     {{1, 1, -1, 0, 1, 0, 2, -1, 0, 0, 1, -1, 0, 1, -1, 0, 1, 0, 2}},
-     {{1, -1, 1, 0, 1, 0, 2, -1, 0, 0, -1, 1, 0, -1, 1, 0, 1, 0, 2}},
-     {{1, 1, 0, 1, 1, 1, -1, 0, 1, 0, 1, 0, 1, -1, 0, 1, 1, 1, -1}},
-     {{1, -1, 0, -1, 1, 1, -1, 0, 1, 0, -1, 0, -1, 1, 0, -1, 1, 1, -1}},
-     {{1, 1, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, -1, 0, -1, 1, 1, -1}},
-     {{1, -1, 0, 1, 1, 1, -1, 0, -1, 0, -1, 0, 1, 1, 0, 1, 1, 1, -1}},
-     {{1, 0, 1, 1, 1, -1, -1, 0, 0, 1, 0, 1, 1, 0, -1, -1, 1, -1, -1}},
-     {{1, 0, -1, -1, 1, -1, -1, 0, 0, 1, 0, -1, -1, 0, 1, 1, 1, -1, -1}},
-     {{1, 0, 1, -1, 1, -1, -1, 0, 0, -1, 0, 1, -1, 0, -1, 1, 1, -1, -1}},
-     {{1, 0, -1, 1, 1, -1, -1, 0, 0, -1, 0, -1, 1, 0, 1, -1, 1, -1, -1}}}};
-} // namespace
-
-void lb_on_param_change(LBParam param) {
-  switch (param) {
-  case LBParam::AGRID:
-    lb_init(lbpar);
-    break;
-  case LBParam::DENSITY:
-    lb_reinit_fluid(lbfields, lblattice, lbpar);
-    break;
-  case LBParam::VISCOSITY:
-  case LBParam::EXT_FORCE_DENSITY:
-    lb_initialize_fields(lbfields, lbpar, lblattice);
-  case LBParam::BULKVISC:
-  case LBParam::KT:
-  case LBParam::GAMMA_ODD:
-  case LBParam::GAMMA_EVEN:
-  case LBParam::TAU:
-    break;
-  }
-  lb_reinit_parameters(lbpar);
-}
-
-#ifdef ADDITIONAL_CHECKS
-static void lb_check_halo_regions(const LB_Fluid &lb_fluid,
-                                  const Lattice &lb_lattice);
-#endif // ADDITIONAL_CHECKS
-
-boost::optional<Utils::Counter<uint64_t>> rng_counter_fluid;
-
-LB_Parameters lbpar = {
-    // density
-    0.0,
-    // viscosity
-    0.0,
-    // bulk_viscosity
-    -1.0,
-    // agrid
-    -1.0,
-    // tau
-    -1.0,
-    // ext_force_density
-    {0.0, 0.0, 0.0},
-    // gamma_odd
-    0.,
-    // gamma_even
-    0.,
-    // gamma_shear
-    0.,
-    // gamma_bulk
-    0.,
-    // is_TRT
-    false,
-    // phi
-    {},
-    // Thermal energy
-    0.0};
-
-Lattice lblattice;
-
-using LB_FluidData = boost::multi_array<double, 2>;
-static LB_FluidData lbfluid_a;
-static LB_FluidData lbfluid_b;
-
-/** Span of the velocity populations of the fluid (pre-collision populations).
- */
-LB_Fluid lbfluid;
-/** Span of the velocity populations of the fluid (post-collision populations).
- */
-static LB_Fluid lbfluid_post;
-
-std::vector<LB_FluidNode> lbfields;
-
-HaloCommunicator update_halo_comm = HaloCommunicator(0);
-
-/**
- * @brief Initialize fluid nodes.
- * @param[out] lb_fields      Vector containing the fluid nodes
- * @param[in]  lb_parameters  Parameters for the LB
- * @param[in]  lb_lattice     Lattice instance
- */
-void lb_initialize_fields(std::vector<LB_FluidNode> &lb_fields,
-                          LB_Parameters const &lb_parameters,
-                          Lattice const &lb_lattice) {
-  lb_fields.resize(lb_lattice.halo_grid_volume);
-  for (auto &field : lb_fields) {
-    field.force_density = lb_parameters.ext_force_density;
-#ifdef LB_BOUNDARIES
-    field.boundary = false;
-#endif // LB_BOUNDARIES
-  }
-  on_lbboundary_change();
-}
-
-/** (Re-)allocate memory for the fluid and initialize pointers. */
-void lb_realloc_fluid(LB_FluidData &lb_fluid_a, LB_FluidData &lb_fluid_b,
-                      const Lattice::index_t halo_grid_volume,
-                      LB_Fluid &lb_fluid, LB_Fluid &lb_fluid_post) {
-  const std::array<int, 2> size = {{D3Q19::n_vel, halo_grid_volume}};
-
-  lb_fluid_a.resize(size);
-  lb_fluid_b.resize(size);
-
-  using Utils::Span;
-  for (int i = 0; i < size[0]; i++) {
-    lb_fluid[i] = Span<double>(lb_fluid_a[i].origin(), size[1]);
-    lb_fluid_post[i] = Span<double>(lb_fluid_b[i].origin(), size[1]);
-  }
-}
-
-void lb_set_equilibrium_populations(const Lattice &lb_lattice,
-                                    const LB_Parameters &lb_parameters) {
-  for (Lattice::index_t index = 0; index < lb_lattice.halo_grid_volume;
-       ++index) {
-    lb_set_population_from_density_momentum_density_stress(
-        index, lb_parameters.density, Utils::Vector3d{} /*momentum density*/,
-        Utils::Vector6d{} /*stress*/);
-  }
-}
-
-void lb_init(const LB_Parameters &lb_parameters) {
-  if (lb_parameters.agrid <= 0.0) {
-    runtimeErrorMsg()
-        << "Lattice Boltzmann agrid not set when initializing fluid";
-  }
-  if (check_runtime_errors(comm_cart))
-    return;
-
-  /* initialize the local lattice domain */
-  try {
-    lblattice = Lattice(lb_parameters.agrid, 0.5 /*offset*/, 1 /*halo size*/,
-                        local_geo.length(), local_geo.my_right(),
-                        box_geo.length(), calc_node_pos(comm_cart), node_grid);
-  } catch (const std::runtime_error &e) {
-    runtimeErrorMsg() << e.what();
-    return;
-  }
-
-  /* allocate memory for data structures */
-  lb_realloc_fluid(lbfluid_a, lbfluid_b, lblattice.halo_grid_volume, lbfluid,
-                   lbfluid_post);
-
-  lb_initialize_fields(lbfields, lbpar, lblattice);
-
-  /* prepare the halo communication */
-  lb_prepare_communication(update_halo_comm, lblattice);
-
-  /* initialize derived parameters */
-  lb_reinit_parameters(lbpar);
-
-  lb_set_equilibrium_populations(lblattice, lbpar);
-
-#ifdef LB_BOUNDARIES
-  LBBoundaries::lb_init_boundaries();
-#endif
-}
-
-void lb_reinit_fluid(std::vector<LB_FluidNode> &lb_fields,
-                     Lattice const &lb_lattice,
-                     LB_Parameters const &lb_parameters) {
-  lb_set_equilibrium_populations(lb_lattice, lb_parameters);
-  lb_initialize_fields(lb_fields, lb_parameters, lb_lattice);
-}
-
-void lb_reinit_parameters(LB_Parameters &lb_parameters) {
-  if (lb_parameters.viscosity > 0.0) {
-    /* Eq. (80) @cite dunweg07a. */
-    lb_parameters.gamma_shear = 1. - 2. / (6. * lb_parameters.viscosity + 1.);
-  }
-
-  if (lb_parameters.bulk_viscosity > 0.0) {
-    /* Eq. (81) @cite dunweg07a. */
-    lb_parameters.gamma_bulk =
-        1. - 2. / (9. * lb_parameters.bulk_viscosity + 1.);
-  }
-
-  if (lb_parameters.is_TRT) {
-    lb_parameters.gamma_bulk = lb_parameters.gamma_shear;
-    lb_parameters.gamma_even = lb_parameters.gamma_shear;
-    lb_parameters.gamma_odd = -(7.0 * lb_parameters.gamma_even + 1.0) /
-                              (lb_parameters.gamma_even + 7.0);
-    // gamma_odd = lb_parameters.gamma_shear; //uncomment for BGK
-  }
-
-  // lb_parameters.gamma_shear = 0.0; //uncomment for special case of BGK
-  // lb_parameters.gamma_bulk = 0.0;
-  // gamma_odd = 0.0;
-  // gamma_even = 0.0;
-
-  if (lb_parameters.kT > 0.0) {
-    /* Eq. (51) @cite dunweg07a.
-     * Note that the modes are not normalized as in the paper here! */
-    double mu = lb_parameters.kT / D3Q19::c_sound_sq<double> *
-                lb_parameters.tau * lb_parameters.tau /
-                (lb_parameters.agrid * lb_parameters.agrid);
-
-    for (int i = 0; i < 4; i++)
-      lb_parameters.phi[i] = 0.0;
-    lb_parameters.phi[4] =
-        sqrt(mu * D3Q19::w_k[4] * (1. - Utils::sqr(lb_parameters.gamma_bulk)));
-    for (int i = 5; i < 10; i++)
-      lb_parameters.phi[i] = sqrt(mu * D3Q19::w_k[i] *
-                                  (1. - Utils::sqr(lb_parameters.gamma_shear)));
-    for (int i = 10; i < 16; i++)
-      lb_parameters.phi[i] =
-          sqrt(mu * D3Q19::w_k[i] * (1 - Utils::sqr(lb_parameters.gamma_odd)));
-    for (int i = 16; i < 19; i++)
-      lb_parameters.phi[i] =
-          sqrt(mu * D3Q19::w_k[i] * (1 - Utils::sqr(lb_parameters.gamma_even)));
-  } else {
-    for (int i = 0; i < D3Q19::n_vel; i++)
-      lb_parameters.phi[i] = 0.0;
-  }
-}
-
-/** Halo communication for push scheme */
-static void halo_push_communication(LB_Fluid &lb_fluid,
-                                    const Lattice &lb_lattice) {
-  Lattice::index_t index;
-  int x, y, z, count;
-  int rnode, snode;
-  double *buffer;
-  MPI_Status status;
-
-  auto const yperiod = lb_lattice.halo_grid[0];
-  auto const zperiod = lb_lattice.halo_grid[0] * lb_lattice.halo_grid[1];
-
-  auto const node_neighbors = calc_node_neighbors(comm_cart);
-
-  /***************
-   * X direction *
-   ***************/
-  count = 5 * lb_lattice.halo_grid[1] * lb_lattice.halo_grid[2];
-  std::vector<double> sbuf(count);
-  std::vector<double> rbuf(count);
-
-  /* send to right, recv from left i = 1, 7, 9, 11, 13 */
-  snode = node_neighbors[1];
-  rnode = node_neighbors[0];
-
-  buffer = sbuf.data();
-  index = get_linear_index(lb_lattice.grid[0] + 1, 0, 0, lb_lattice.halo_grid);
-  for (z = 0; z < lb_lattice.halo_grid[2]; z++) {
-    for (y = 0; y < lb_lattice.halo_grid[1]; y++) {
-      buffer[0] = lb_fluid[1][index];
-      buffer[1] = lb_fluid[7][index];
-      buffer[2] = lb_fluid[9][index];
-      buffer[3] = lb_fluid[11][index];
-      buffer[4] = lb_fluid[13][index];
-      buffer += 5;
-
-      index += yperiod;
-    }
-  }
-
-  MPI_Sendrecv(sbuf.data(), count, MPI_DOUBLE, snode, REQ_HALO_SPREAD,
-               rbuf.data(), count, MPI_DOUBLE, rnode, REQ_HALO_SPREAD,
-               comm_cart, &status);
-
-  buffer = rbuf.data();
-  index = get_linear_index(1, 0, 0, lb_lattice.halo_grid);
-  for (z = 0; z < lb_lattice.halo_grid[2]; z++) {
-    for (y = 0; y < lb_lattice.halo_grid[1]; y++) {
-      lb_fluid[1][index] = buffer[0];
-      lb_fluid[7][index] = buffer[1];
-      lb_fluid[9][index] = buffer[2];
-      lb_fluid[11][index] = buffer[3];
-      lb_fluid[13][index] = buffer[4];
-      buffer += 5;
-
-      index += yperiod;
-    }
-  }
-
-  /* send to left, recv from right i = 2, 8, 10, 12, 14 */
-  snode = node_neighbors[0];
-  rnode = node_neighbors[1];
-
-  buffer = sbuf.data();
-  index = get_linear_index(0, 0, 0, lb_lattice.halo_grid);
-  for (z = 0; z < lb_lattice.halo_grid[2]; z++) {
-    for (y = 0; y < lb_lattice.halo_grid[1]; y++) {
-      buffer[0] = lb_fluid[2][index];
-      buffer[1] = lb_fluid[8][index];
-      buffer[2] = lb_fluid[10][index];
-      buffer[3] = lb_fluid[12][index];
-      buffer[4] = lb_fluid[14][index];
-      buffer += 5;
-
-      index += yperiod;
-    }
-  }
-
-  MPI_Sendrecv(sbuf.data(), count, MPI_DOUBLE, snode, REQ_HALO_SPREAD,
-               rbuf.data(), count, MPI_DOUBLE, rnode, REQ_HALO_SPREAD,
-               comm_cart, &status);
-
-  buffer = rbuf.data();
-  index = get_linear_index(lb_lattice.grid[0], 0, 0, lb_lattice.halo_grid);
-  for (z = 0; z < lb_lattice.halo_grid[2]; z++) {
-    for (y = 0; y < lb_lattice.halo_grid[1]; y++) {
-      lb_fluid[2][index] = buffer[0];
-      lb_fluid[8][index] = buffer[1];
-      lb_fluid[10][index] = buffer[2];
-      lb_fluid[12][index] = buffer[3];
-      lb_fluid[14][index] = buffer[4];
-      buffer += 5;
-
-      index += yperiod;
-    }
-  }
-
-  /***************
-   * Y direction *
-   ***************/
-  count = 5 * lb_lattice.halo_grid[0] * lb_lattice.halo_grid[2];
-  sbuf.resize(count);
-  rbuf.resize(count);
-
-  /* send to right, recv from left i = 3, 7, 10, 15, 17 */
-  snode = node_neighbors[3];
-  rnode = node_neighbors[2];
-
-  buffer = sbuf.data();
-  index = get_linear_index(0, lb_lattice.grid[1] + 1, 0, lb_lattice.halo_grid);
-  for (z = 0; z < lb_lattice.halo_grid[2]; z++) {
-    for (x = 0; x < lb_lattice.halo_grid[0]; x++) {
-      buffer[0] = lb_fluid[3][index];
-      buffer[1] = lb_fluid[7][index];
-      buffer[2] = lb_fluid[10][index];
-      buffer[3] = lb_fluid[15][index];
-      buffer[4] = lb_fluid[17][index];
-      buffer += 5;
-
-      ++index;
-    }
-    index += zperiod - lb_lattice.halo_grid[0];
-  }
-
-  MPI_Sendrecv(sbuf.data(), count, MPI_DOUBLE, snode, REQ_HALO_SPREAD,
-               rbuf.data(), count, MPI_DOUBLE, rnode, REQ_HALO_SPREAD,
-               comm_cart, &status);
-
-  buffer = rbuf.data();
-  index = get_linear_index(0, 1, 0, lb_lattice.halo_grid);
-  for (z = 0; z < lb_lattice.halo_grid[2]; z++) {
-    for (x = 0; x < lb_lattice.halo_grid[0]; x++) {
-      lb_fluid[3][index] = buffer[0];
-      lb_fluid[7][index] = buffer[1];
-      lb_fluid[10][index] = buffer[2];
-      lb_fluid[15][index] = buffer[3];
-      lb_fluid[17][index] = buffer[4];
-      buffer += 5;
-
-      ++index;
-    }
-    index += zperiod - lb_lattice.halo_grid[0];
-  }
-
-  /* send to left, recv from right i = 4, 8, 9, 16, 18 */
-  snode = node_neighbors[2];
-  rnode = node_neighbors[3];
-
-  buffer = sbuf.data();
-  index = get_linear_index(0, 0, 0, lb_lattice.halo_grid);
-  for (z = 0; z < lb_lattice.halo_grid[2]; z++) {
-    for (x = 0; x < lb_lattice.halo_grid[0]; x++) {
-      buffer[0] = lb_fluid[4][index];
-      buffer[1] = lb_fluid[8][index];
-      buffer[2] = lb_fluid[9][index];
-      buffer[3] = lb_fluid[16][index];
-      buffer[4] = lb_fluid[18][index];
-      buffer += 5;
-
-      ++index;
-    }
-    index += zperiod - lb_lattice.halo_grid[0];
-  }
-
-  MPI_Sendrecv(sbuf.data(), count, MPI_DOUBLE, snode, REQ_HALO_SPREAD,
-               rbuf.data(), count, MPI_DOUBLE, rnode, REQ_HALO_SPREAD,
-               comm_cart, &status);
-
-  buffer = rbuf.data();
-  index = get_linear_index(0, lb_lattice.grid[1], 0, lb_lattice.halo_grid);
-  for (z = 0; z < lb_lattice.halo_grid[2]; z++) {
-    for (x = 0; x < lb_lattice.halo_grid[0]; x++) {
-      lb_fluid[4][index] = buffer[0];
-      lb_fluid[8][index] = buffer[1];
-      lb_fluid[9][index] = buffer[2];
-      lb_fluid[16][index] = buffer[3];
-      lb_fluid[18][index] = buffer[4];
-      buffer += 5;
-
-      ++index;
-    }
-    index += zperiod - lb_lattice.halo_grid[0];
-  }
-
-  /***************
-   * Z direction *
-   ***************/
-  count = 5 * lb_lattice.halo_grid[0] * lb_lattice.halo_grid[1];
-  sbuf.resize(count);
-  rbuf.resize(count);
-
-  /* send to right, recv from left i = 5, 11, 14, 15, 18 */
-  snode = node_neighbors[5];
-  rnode = node_neighbors[4];
-
-  buffer = sbuf.data();
-  index = get_linear_index(0, 0, lb_lattice.grid[2] + 1, lb_lattice.halo_grid);
-  for (y = 0; y < lb_lattice.halo_grid[1]; y++) {
-    for (x = 0; x < lb_lattice.halo_grid[0]; x++) {
-      buffer[0] = lb_fluid[5][index];
-      buffer[1] = lb_fluid[11][index];
-      buffer[2] = lb_fluid[14][index];
-      buffer[3] = lb_fluid[15][index];
-      buffer[4] = lb_fluid[18][index];
-      buffer += 5;
-
-      ++index;
-    }
-  }
-
-  MPI_Sendrecv(sbuf.data(), count, MPI_DOUBLE, snode, REQ_HALO_SPREAD,
-               rbuf.data(), count, MPI_DOUBLE, rnode, REQ_HALO_SPREAD,
-               comm_cart, &status);
-
-  buffer = rbuf.data();
-  index = get_linear_index(0, 0, 1, lb_lattice.halo_grid);
-  for (y = 0; y < lb_lattice.halo_grid[1]; y++) {
-    for (x = 0; x < lb_lattice.halo_grid[0]; x++) {
-      lb_fluid[5][index] = buffer[0];
-      lb_fluid[11][index] = buffer[1];
-      lb_fluid[14][index] = buffer[2];
-      lb_fluid[15][index] = buffer[3];
-      lb_fluid[18][index] = buffer[4];
-      buffer += 5;
-
-      ++index;
-    }
-  }
-
-  /* send to left, recv from right i = 6, 12, 13, 16, 17 */
-  snode = node_neighbors[4];
-  rnode = node_neighbors[5];
-
-  buffer = sbuf.data();
-  index = get_linear_index(0, 0, 0, lb_lattice.halo_grid);
-  for (y = 0; y < lb_lattice.halo_grid[1]; y++) {
-    for (x = 0; x < lb_lattice.halo_grid[0]; x++) {
-      buffer[0] = lb_fluid[6][index];
-      buffer[1] = lb_fluid[12][index];
-      buffer[2] = lb_fluid[13][index];
-      buffer[3] = lb_fluid[16][index];
-      buffer[4] = lb_fluid[17][index];
-      buffer += 5;
-
-      ++index;
-    }
-  }
-
-  MPI_Sendrecv(sbuf.data(), count, MPI_DOUBLE, snode, REQ_HALO_SPREAD,
-               rbuf.data(), count, MPI_DOUBLE, rnode, REQ_HALO_SPREAD,
-               comm_cart, &status);
-
-  buffer = rbuf.data();
-  index = get_linear_index(0, 0, lb_lattice.grid[2], lb_lattice.halo_grid);
-  for (y = 0; y < lb_lattice.halo_grid[1]; y++) {
-    for (x = 0; x < lb_lattice.halo_grid[0]; x++) {
-      lb_fluid[6][index] = buffer[0];
-      lb_fluid[12][index] = buffer[1];
-      lb_fluid[13][index] = buffer[2];
-      lb_fluid[16][index] = buffer[3];
-      lb_fluid[17][index] = buffer[4];
-      buffer += 5;
-
-      ++index;
-    }
-  }
-}
-
-/***********************************************************************/
-
-/** Performs basic sanity checks. */
-void lb_sanity_checks(const LB_Parameters &lb_parameters) {
-  if (lb_parameters.agrid <= 0.0) {
-    runtimeErrorMsg() << "Lattice Boltzmann agrid not set";
-  }
-  if (lb_parameters.tau <= 0.0) {
-    runtimeErrorMsg() << "Lattice Boltzmann time step not set";
-  }
-  if (lb_parameters.density <= 0.0) {
-    runtimeErrorMsg() << "Lattice Boltzmann fluid density not set";
-  }
-  if (lb_parameters.viscosity <= 0.0) {
-    runtimeErrorMsg() << "Lattice Boltzmann fluid viscosity not set";
-  }
-}
-
-uint64_t lb_fluid_get_rng_state() {
-  assert(rng_counter_fluid);
-  return rng_counter_fluid->value();
-}
-
-void mpi_set_lb_fluid_counter(uint64_t counter) {
-  rng_counter_fluid = Utils::Counter<uint64_t>(counter);
-}
-
-REGISTER_CALLBACK(mpi_set_lb_fluid_counter)
-
-void lb_fluid_set_rng_state(uint64_t counter) {
-  mpi_call(mpi_set_lb_fluid_counter, counter);
-  mpi_set_lb_fluid_counter(counter);
-}
-
-/***********************************************************************/
-
-/** Set up the structures for exchange of the halo regions.
- *  See also \ref halo.cpp
- */
-void lb_prepare_communication(HaloCommunicator &halo_comm,
-                              const Lattice &lb_lattice) {
-  HaloCommunicator comm = HaloCommunicator(0);
-
-  /* since the data layout is a structure of arrays, we have to
-   * generate a communication for this structure: first we generate
-   * the communication for one of the arrays (the 0-th velocity
-   * population), then we replicate this communication for the other
-   * velocity indices by constructing appropriate vector
-   * datatypes */
-
-  /* prepare the communication for a single velocity */
-  prepare_halo_communication(comm, lb_lattice, MPI_DOUBLE, node_grid);
-
-  halo_comm.num = comm.num;
-  halo_comm.halo_info.resize(comm.num);
-
-  /* replicate the halo structure */
-  for (int i = 0; i < comm.num; i++) {
-    HaloInfo &hinfo = halo_comm.halo_info[i];
-
-    hinfo.source_node = comm.halo_info[i].source_node;
-    hinfo.dest_node = comm.halo_info[i].dest_node;
-    hinfo.s_offset = comm.halo_info[i].s_offset;
-    hinfo.r_offset = comm.halo_info[i].r_offset;
-    hinfo.type = comm.halo_info[i].type;
-
-    /* generate the vector datatype for the structure of lattices we
-     * have to use hvector here because the extent of the subtypes
-     * does not span the full lattice and hence we cannot get the
-     * correct vskip out of them */
-
-    MPI_Aint lower;
-    MPI_Aint extent;
-    MPI_Type_get_extent(MPI_DOUBLE, &lower, &extent);
-    MPI_Type_create_hvector(D3Q19::n_vel, 1,
-                            lb_lattice.halo_grid_volume * extent,
-                            comm.halo_info[i].datatype, &hinfo.datatype);
-    MPI_Type_commit(&hinfo.datatype);
-
-    hinfo.fieldtype = std::make_shared<FieldType>(
-        D3Q19::n_vel, 1,
-        static_cast<int>(lb_lattice.halo_grid_volume * sizeof(double)), false,
-        comm.halo_info[i].fieldtype);
-  }
-
-  release_halo_communication(comm);
-}
-
-/***********************************************************************/
-/** \name Mapping between hydrodynamic fields and particle populations */
-/***********************************************************************/
-/**@{*/
-template <typename T>
-std::array<T, 19> normalize_modes(const std::array<T, 19> &modes) {
-  auto normalized_modes = modes;
-  for (int i = 0; i < modes.size(); i++) {
-    normalized_modes[i] /= D3Q19::w_k[i];
-  }
-  return normalized_modes;
-}
-
-/**
- * @brief Transform modes to populations.
- */
-template <typename T>
-std::array<T, 19> lb_calc_n_from_m(const std::array<T, 19> &modes) {
-  auto ret = Utils::matrix_vector_product<T, 19, e_ki_transposed>(
-      normalize_modes(modes));
-  std::transform(ret.begin(), ret.end(), ::D3Q19::w.begin(), ret.begin(),
-                 std::multiplies<T>());
-  return ret;
-}
-
-Utils::Vector19d lb_get_population_from_density_momentum_density_stress(
-    double density, Utils::Vector3d const &momentum_density,
-    Utils::Vector6d const &stress) {
-  std::array<double, 19> modes{
-      {density, momentum_density[0], momentum_density[1], momentum_density[2],
-       stress[0], stress[1], stress[2], stress[3], stress[4], stress[5]}};
-
-  return Utils::Vector19d{lb_calc_n_from_m(modes)};
-}
-
-void lb_set_population_from_density_momentum_density_stress(
-    Lattice::index_t const index, double density,
-    Utils::Vector3d const &momentum_density, Utils::Vector6d const &stress) {
-  auto const population =
-      lb_get_population_from_density_momentum_density_stress(
-          density, momentum_density, stress);
-  lb_set_population(index, population);
-}
-/**@}*/
-
-std::array<double, 19> lb_calc_modes(Lattice::index_t index,
-                                     const LB_Fluid &lb_fluid) {
-  return Utils::matrix_vector_product<double, 19, e_ki>(
-      LB_Fluid_Ref(index, lb_fluid));
-}
-
-template <typename T>
-std::array<T, 19> lb_relax_modes(const std::array<T, 19> &modes,
-                                 const Utils::Vector<T, 3> &force_density,
-                                 const LB_Parameters &parameters) {
-  using Utils::sqr;
-  using Utils::Vector;
-
-  /* re-construct the real density
-   * remember that the populations are stored as differences to their
-   * equilibrium value */
-  auto const density = modes[0] + parameters.density;
-  auto const momentum_density =
-      Vector<T, 3>{modes[1], modes[2], modes[3]} + T{0.5} * force_density;
-  auto const momentum_density2 = momentum_density.norm2();
-
-  /* equilibrium part of the stress modes */
-  auto const stress_eq =
-      Vector<T, 6>{momentum_density2,
-                   (sqr(momentum_density[0]) - sqr(momentum_density[1])),
-                   (momentum_density2 - 3.0 * sqr(momentum_density[2])),
-                   momentum_density[0] * momentum_density[1],
-                   momentum_density[0] * momentum_density[2],
-                   momentum_density[1] * momentum_density[2]} /
-      density;
-
-  return {{modes[0], modes[1], modes[2], modes[3],
-           /* relax the stress modes */
-           stress_eq[0] + parameters.gamma_bulk * (modes[4] - stress_eq[0]),
-           stress_eq[1] + parameters.gamma_shear * (modes[5] - stress_eq[1]),
-           stress_eq[2] + parameters.gamma_shear * (modes[6] - stress_eq[2]),
-           stress_eq[3] + parameters.gamma_shear * (modes[7] - stress_eq[3]),
-           stress_eq[4] + parameters.gamma_shear * (modes[8] - stress_eq[4]),
-           stress_eq[5] + parameters.gamma_shear * (modes[9] - stress_eq[5]),
-           /* relax the ghost modes (project them out) */
-           /* ghost modes have no equilibrium part due to orthogonality */
-           parameters.gamma_odd * modes[10], parameters.gamma_odd * modes[11],
-           parameters.gamma_odd * modes[12], parameters.gamma_odd * modes[13],
-           parameters.gamma_odd * modes[14], parameters.gamma_odd * modes[15],
-           parameters.gamma_even * modes[16], parameters.gamma_even * modes[17],
-           parameters.gamma_even * modes[18]}};
-}
-
-template <typename T>
-std::array<T, 19> lb_thermalize_modes(
-    Lattice::index_t index, const std::array<T, 19> &modes,
-    const LB_Parameters &lb_parameters,
-    boost::optional<Utils::Counter<uint64_t>> const &rng_counter) {
-  if (lb_parameters.kT > 0.0) {
-    using Utils::uniform;
-    using rng_type = r123::Philox4x64;
-    using ctr_type = rng_type::ctr_type;
-
-    const ctr_type c{
-        {rng_counter->value(), static_cast<uint64_t>(RNGSalt::FLUID)}};
-    const T rootdensity =
-        std::sqrt(std::fabs(modes[0] + lb_parameters.density));
-    auto const pref = std::sqrt(12.) * rootdensity;
-
-    const ctr_type noise[4] = {
-        rng_type{}(c, {{static_cast<uint64_t>(index), 0ul}}),
-        rng_type{}(c, {{static_cast<uint64_t>(index), 1ul}}),
-        rng_type{}(c, {{static_cast<uint64_t>(index), 2ul}}),
-        rng_type{}(c, {{static_cast<uint64_t>(index), 3ul}})};
-
-    auto rng = [&](int i) { return uniform(noise[i / 4][i % 4]) - 0.5; };
-
-    return {/* conserved modes */
-            {modes[0], modes[1], modes[2], modes[3],
-             /* stress modes */
-             modes[4] + pref * lb_parameters.phi[4] * rng(0),
-             modes[5] + pref * lb_parameters.phi[5] * rng(1),
-             modes[6] + pref * lb_parameters.phi[6] * rng(2),
-             modes[7] + pref * lb_parameters.phi[7] * rng(3),
-             modes[8] + pref * lb_parameters.phi[8] * rng(4),
-             modes[9] + pref * lb_parameters.phi[9] * rng(5),
-
-             /* ghost modes */
-             modes[10] + pref * lb_parameters.phi[10] * rng(6),
-             modes[11] + pref * lb_parameters.phi[11] * rng(7),
-             modes[12] + pref * lb_parameters.phi[12] * rng(8),
-             modes[13] + pref * lb_parameters.phi[13] * rng(9),
-             modes[14] + pref * lb_parameters.phi[14] * rng(10),
-             modes[15] + pref * lb_parameters.phi[15] * rng(11),
-             modes[16] + pref * lb_parameters.phi[16] * rng(12),
-             modes[17] + pref * lb_parameters.phi[17] * rng(13),
-             modes[18] + pref * lb_parameters.phi[18] * rng(14)}};
-  }
-  return modes;
-}
-
-template <typename T>
-std::array<T, 19> lb_apply_forces(const std::array<T, 19> &modes,
-                                  const LB_Parameters &lb_parameters,
-                                  Utils::Vector<T, 3> const &f) {
-  auto const density = modes[0] + lb_parameters.density;
-
-  /* hydrodynamic momentum density is redefined when external forces present */
-  auto const u =
-      Utils::Vector3d{modes[1], modes[2], modes[3]} + T{0.5} * f / density;
-
-  auto const C = std::array<T, 6>{
-      {(1. + lb_parameters.gamma_shear) * u[0] * f[0] +
-           1. / 3. * (lb_parameters.gamma_bulk - lb_parameters.gamma_shear) *
-               (u * f),
-       1. / 2. * (1. + lb_parameters.gamma_shear) * (u[0] * f[1] + u[1] * f[0]),
-       (1. + lb_parameters.gamma_shear) * u[1] * f[1] +
-           1. / 3. * (lb_parameters.gamma_bulk - lb_parameters.gamma_shear) *
-               (u * f),
-       1. / 2. * (1. + lb_parameters.gamma_shear) * (u[0] * f[2] + u[2] * f[0]),
-       1. / 2. * (1. + lb_parameters.gamma_shear) * (u[1] * f[2] + u[2] * f[1]),
-       (1. + lb_parameters.gamma_shear) * u[2] * f[2] +
-           1. / 3. * (lb_parameters.gamma_bulk - lb_parameters.gamma_shear) *
-               (u * f)}};
-
-  return {{modes[0],
-           /* update momentum modes */
-           modes[1] + f[0], modes[2] + f[1], modes[3] + f[2],
-           /* update stress modes */
-           modes[4] + C[0] + C[2] + C[5], modes[5] + C[0] - C[2],
-           modes[6] + C[0] + C[2] - 2. * C[5], modes[7] + C[1], modes[8] + C[3],
-           modes[9] + C[4], modes[10], modes[11], modes[12], modes[13],
-           modes[14], modes[15], modes[16], modes[17], modes[18]}};
-}
-
-/**
- * @brief Relative index for the next node for each lattice velocity.
- *
- * @param lb_lattice The lattice parameters.
- * @param c Lattice velocities.
- */
-auto lb_next_offsets(const Lattice &lb_lattice,
-                     std::array<Utils::Vector3i, 19> const &c) {
-  const Utils::Vector3<std::ptrdiff_t> strides = {
-      {1, lb_lattice.halo_grid[0],
-       static_cast<std::ptrdiff_t>(lb_lattice.halo_grid[0]) *
-           static_cast<std::ptrdiff_t>(lb_lattice.halo_grid[1])}};
-
-  std::array<std::ptrdiff_t, 19> offsets;
-  boost::transform(c, offsets.begin(),
-                   [&strides](auto const &ci) { return strides * ci; });
-
-  return offsets;
-}
-
-template <typename T>
-void lb_stream(LB_Fluid &lb_fluid, const std::array<T, 19> &populations,
-               std::size_t index,
-               std::array<std::ptrdiff_t, 19> const &offsets) {
-  for (int i = 0; i < populations.size(); i++) {
-    lb_fluid[i][index + offsets[i]] = populations[i];
-  }
-}
-
-/* Collisions and streaming (push scheme) */
-void lb_integrate() {
-  ESPRESSO_PROFILER_CXX_MARK_FUNCTION;
-  /* loop over all lattice cells (halo excluded) */
-#ifdef LB_BOUNDARIES
-  for (auto &lbboundary : LBBoundaries::lbboundaries) {
-    (*lbboundary).reset_force();
-  }
-#endif // LB_BOUNDARIES
-
-  auto const next_offsets = lb_next_offsets(lblattice, D3Q19::c);
-
-  Lattice::index_t index = lblattice.halo_offset;
-  for (int z = 1; z <= lblattice.grid[2]; z++) {
-    for (int y = 1; y <= lblattice.grid[1]; y++) {
-      for (int x = 1; x <= lblattice.grid[0]; x++) {
-        // as we only want to apply this to non-boundary nodes we can throw out
-        // the if-clause if we have a non-bounded domain
-#ifdef LB_BOUNDARIES
-        if (!lbfields[index].boundary)
-#endif // LB_BOUNDARIES
-        {
-          /* calculate modes locally */
-          auto const modes = lb_calc_modes(index, lbfluid);
-
-          /* deterministic collisions */
-          auto const relaxed_modes =
-              lb_relax_modes(modes, lbfields[index].force_density, lbpar);
-
-          /* fluctuating hydrodynamics */
-          auto const thermalized_modes = lb_thermalize_modes(
-              index, relaxed_modes, lbpar, rng_counter_fluid);
-
-          /* apply forces */
-          auto const modes_with_forces = lb_apply_forces(
-              thermalized_modes, lbpar, lbfields[index].force_density);
-
-#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS
-          // Safeguard the node forces so that we can later use them for the IBM
-          // particle update
-          lbfields[index].force_density_buf = lbfields[index].force_density;
-#endif
-
-          /* reset the force density */
-          lbfields[index].force_density = lbpar.ext_force_density;
-
-          /* transform back to populations and streaming */
-          auto const populations = lb_calc_n_from_m(modes_with_forces);
-          lb_stream(lbfluid_post, populations, index, next_offsets);
-        }
-
-        ++index; /* next node */
-      }
-      index += 2; /* skip halo region */
-    }
-    index += 2 * lblattice.halo_grid[0]; /* skip halo region */
-  }
-
-  /* exchange halo regions */
-  halo_push_communication(lbfluid_post, lblattice);
-
-#ifdef LB_BOUNDARIES
-  /* boundary conditions for links */
-  lb_bounce_back(lbfluid_post, lbpar, lbfields);
-#endif // LB_BOUNDARIES
-
-  /* swap the pointers for old and new population fields */
-  std::swap(lbfluid, lbfluid_post);
-
-  halo_communication(update_halo_comm,
-                     reinterpret_cast<char *>(lbfluid[0].data()));
-
-#ifdef ADDITIONAL_CHECKS
-  lb_check_halo_regions(lbfluid, lblattice);
-#endif
-}
-
-#ifdef ADDITIONAL_CHECKS
-int compare_buffers(std::array<double, D3Q19::n_vel> const &buff_a,
-                    std::array<double, D3Q19::n_vel> const &buff_b) {
-  if (buff_a != buff_b) {
-    runtimeErrorMsg() << "Halo buffers are not identical";
-    return ES_ERROR;
-  }
-  return ES_OK;
-}
-
-void log_buffer_diff(std::ostream &out, int dir, Lattice::index_t index, int x,
-                     int y, int z) {
-  out << "buffers differ in dir=" << dir << " at node index=" << index;
-  if (x != -1)
-    out << " x=" << x;
-  if (y != -1)
-    out << " y=" << y;
-  if (z != -1)
-    out << " z=" << z;
-  out << "\n";
-}
-
-/** Check consistency of the halo regions.
- *  Test whether the halo regions have been exchanged correctly.
- */
-void lb_check_halo_regions(const LB_Fluid &lb_fluid,
-                           const Lattice &lb_lattice) {
-  Lattice::index_t index;
-  std::size_t i;
-  int x, y, z, s_node, r_node;
-  std::array<double, D3Q19::n_vel> s_buffer;
-  std::array<double, D3Q19::n_vel> r_buffer;
-
-  auto const node_neighbors = calc_node_neighbors(comm_cart);
-
-  if (box_geo.periodic(0)) {
-    for (z = 0; z < lb_lattice.halo_grid[2]; ++z) {
-      for (y = 0; y < lb_lattice.halo_grid[1]; ++y) {
-        index = get_linear_index(0, y, z, lb_lattice.halo_grid);
-        for (i = 0; i < D3Q19::n_vel; i++)
-          s_buffer[i] = lb_fluid[i][index];
-
-        s_node = node_neighbors[1];
-        r_node = node_neighbors[0];
-        if (n_nodes > 1) {
-          comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node,
-                             REQ_HALO_CHECK, r_buffer);
-          index =
-              get_linear_index(lb_lattice.grid[0], y, z, lb_lattice.halo_grid);
-          for (i = 0; i < D3Q19::n_vel; i++)
-            s_buffer[i] = lb_fluid[i][index];
-          compare_buffers(s_buffer, r_buffer);
-        } else {
-          index =
-              get_linear_index(lb_lattice.grid[0], y, z, lb_lattice.halo_grid);
-          for (i = 0; i < D3Q19::n_vel; i++)
-            r_buffer[i] = lb_fluid[i][index];
-          if (compare_buffers(s_buffer, r_buffer)) {
-            log_buffer_diff(std::cerr, 0, index, -1, y, z);
-          }
-        }
-
-        index = get_linear_index(lb_lattice.grid[0] + 1, y, z,
-                                 lb_lattice.halo_grid);
-        for (i = 0; i < D3Q19::n_vel; i++)
-          s_buffer[i] = lb_fluid[i][index];
-
-        s_node = node_neighbors[0];
-        r_node = node_neighbors[1];
-        if (n_nodes > 1) {
-          comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node,
-                             REQ_HALO_CHECK, r_buffer);
-          index = get_linear_index(1, y, z, lb_lattice.halo_grid);
-          for (i = 0; i < D3Q19::n_vel; i++)
-            s_buffer[i] = lb_fluid[i][index];
-          compare_buffers(s_buffer, r_buffer);
-        } else {
-          index = get_linear_index(1, y, z, lb_lattice.halo_grid);
-          for (i = 0; i < D3Q19::n_vel; i++)
-            r_buffer[i] = lb_fluid[i][index];
-          if (compare_buffers(s_buffer, r_buffer)) {
-            log_buffer_diff(std::cerr, 0, index, -1, y, z);
-          }
-        }
-      }
-    }
-  }
-
-  if (box_geo.periodic(1)) {
-    for (z = 0; z < lb_lattice.halo_grid[2]; ++z) {
-      for (x = 0; x < lb_lattice.halo_grid[0]; ++x) {
-        index = get_linear_index(x, 0, z, lb_lattice.halo_grid);
-        for (i = 0; i < D3Q19::n_vel; i++)
-          s_buffer[i] = lb_fluid[i][index];
-
-        s_node = node_neighbors[3];
-        r_node = node_neighbors[2];
-        if (n_nodes > 1) {
-          comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node,
-                             REQ_HALO_CHECK, r_buffer);
-          index =
-              get_linear_index(x, lb_lattice.grid[1], z, lb_lattice.halo_grid);
-          for (i = 0; i < D3Q19::n_vel; i++)
-            s_buffer[i] = lb_fluid[i][index];
-          compare_buffers(s_buffer, r_buffer);
-        } else {
-          index =
-              get_linear_index(x, lb_lattice.grid[1], z, lb_lattice.halo_grid);
-          for (i = 0; i < D3Q19::n_vel; i++)
-            r_buffer[i] = lb_fluid[i][index];
-          if (compare_buffers(s_buffer, r_buffer)) {
-            log_buffer_diff(std::cerr, 1, index, x, -1, z);
-          }
-        }
-      }
-      for (x = 0; x < lb_lattice.halo_grid[0]; ++x) {
-        index = get_linear_index(x, lb_lattice.grid[1] + 1, z,
-                                 lb_lattice.halo_grid);
-        for (i = 0; i < D3Q19::n_vel; i++)
-          s_buffer[i] = lb_fluid[i][index];
-
-        s_node = node_neighbors[2];
-        r_node = node_neighbors[3];
-        if (n_nodes > 1) {
-          comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node,
-                             REQ_HALO_CHECK, r_buffer);
-          index = get_linear_index(x, 1, z, lb_lattice.halo_grid);
-          for (i = 0; i < D3Q19::n_vel; i++)
-            s_buffer[i] = lb_fluid[i][index];
-          compare_buffers(s_buffer, r_buffer);
-        } else {
-          index = get_linear_index(x, 1, z, lb_lattice.halo_grid);
-          for (i = 0; i < D3Q19::n_vel; i++)
-            r_buffer[i] = lb_fluid[i][index];
-          if (compare_buffers(s_buffer, r_buffer)) {
-            log_buffer_diff(std::cerr, 1, index, x, -1, z);
-          }
-        }
-      }
-    }
-  }
-
-  if (box_geo.periodic(2)) {
-    for (y = 0; y < lb_lattice.halo_grid[1]; ++y) {
-      for (x = 0; x < lb_lattice.halo_grid[0]; ++x) {
-        index = get_linear_index(x, y, 0, lb_lattice.halo_grid);
-        for (i = 0; i < D3Q19::n_vel; i++)
-          s_buffer[i] = lb_fluid[i][index];
-
-        s_node = node_neighbors[5];
-        r_node = node_neighbors[4];
-        if (n_nodes > 1) {
-          comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node,
-                             REQ_HALO_CHECK, r_buffer);
-          index =
-              get_linear_index(x, y, lb_lattice.grid[2], lb_lattice.halo_grid);
-          for (i = 0; i < D3Q19::n_vel; i++)
-            s_buffer[i] = lb_fluid[i][index];
-          compare_buffers(s_buffer, r_buffer);
-        } else {
-          index =
-              get_linear_index(x, y, lb_lattice.grid[2], lb_lattice.halo_grid);
-          for (i = 0; i < D3Q19::n_vel; i++)
-            r_buffer[i] = lb_fluid[i][index];
-          if (compare_buffers(s_buffer, r_buffer)) {
-            log_buffer_diff(std::cerr, 2, index, x, y, lb_lattice.grid[2]);
-          }
-        }
-      }
-    }
-    for (y = 0; y < lb_lattice.halo_grid[1]; ++y) {
-      for (x = 0; x < lb_lattice.halo_grid[0]; ++x) {
-        index = get_linear_index(x, y, lb_lattice.grid[2] + 1,
-                                 lb_lattice.halo_grid);
-        for (i = 0; i < D3Q19::n_vel; i++)
-          s_buffer[i] = lb_fluid[i][index];
-
-        s_node = node_neighbors[4];
-        r_node = node_neighbors[5];
-        if (n_nodes > 1) {
-          comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node,
-                             REQ_HALO_CHECK, r_buffer);
-          index = get_linear_index(x, y, 1, lb_lattice.halo_grid);
-          for (i = 0; i < D3Q19::n_vel; i++)
-            s_buffer[i] = lb_fluid[i][index];
-          compare_buffers(s_buffer, r_buffer);
-        } else {
-          index = get_linear_index(x, y, 1, lb_lattice.halo_grid);
-          for (i = 0; i < D3Q19::n_vel; i++)
-            r_buffer[i] = lb_fluid[i][index];
-          if (compare_buffers(s_buffer, r_buffer)) {
-            log_buffer_diff(std::cerr, 2, index, x, y, -1);
-          }
-        }
-      }
-    }
-  }
-}
-#endif // ADDITIONAL_CHECKS
-
-double lb_calc_density(std::array<double, 19> const &modes,
-                       const LB_Parameters &lb_parameters) {
-  return modes[0] + lb_parameters.density;
-}
-
-Utils::Vector3d lb_calc_momentum_density(std::array<double, 19> const &modes,
-                                         Utils::Vector3d const &force_density) {
-  return Utils::Vector3d{{modes[1] + 0.5 * force_density[0],
-                          modes[2] + 0.5 * force_density[1],
-                          modes[3] + 0.5 * force_density[2]}};
-}
-
-Utils::Vector6d lb_calc_pressure_tensor(std::array<double, 19> const &modes,
-                                        Utils::Vector3d const &force_density,
-                                        const LB_Parameters &lb_parameters) {
-  auto const momentum_density = lb_calc_momentum_density(modes, force_density);
-  auto const density = lb_calc_density(modes, lb_parameters);
-  using Utils::sqr;
-  auto const momentum_density2 = sqr(momentum_density[0]) +
-                                 sqr(momentum_density[1]) +
-                                 sqr(momentum_density[2]);
-  /* equilibrium part of the stress modes */
-  Utils::Vector6d modes_from_stress_eq{};
-  modes_from_stress_eq[0] = momentum_density2 / density;
-  modes_from_stress_eq[1] =
-      (sqr(momentum_density[0]) - sqr(momentum_density[1])) / density;
-  modes_from_stress_eq[2] =
-      (momentum_density2 - 3.0 * sqr(momentum_density[2])) / density;
-  modes_from_stress_eq[3] = momentum_density[0] * momentum_density[1] / density;
-  modes_from_stress_eq[4] = momentum_density[0] * momentum_density[2] / density;
-  modes_from_stress_eq[5] = momentum_density[1] * momentum_density[2] / density;
-
-  /* Now we must predict the outcome of the next collision */
-  /* We immediately average pre- and post-collision. */
-
-  Utils::Vector6d avg_modes;
-  avg_modes[0] =
-      modes_from_stress_eq[0] + (0.5 + 0.5 * lb_parameters.gamma_bulk) *
-                                    (modes[4] - modes_from_stress_eq[0]);
-  avg_modes[1] =
-      modes_from_stress_eq[1] + (0.5 + 0.5 * lb_parameters.gamma_shear) *
-                                    (modes[5] - modes_from_stress_eq[1]);
-  avg_modes[2] =
-      modes_from_stress_eq[2] + (0.5 + 0.5 * lb_parameters.gamma_shear) *
-                                    (modes[6] - modes_from_stress_eq[2]);
-  avg_modes[3] =
-      modes_from_stress_eq[3] + (0.5 + 0.5 * lb_parameters.gamma_shear) *
-                                    (modes[7] - modes_from_stress_eq[3]);
-  avg_modes[4] =
-      modes_from_stress_eq[4] + (0.5 + 0.5 * lb_parameters.gamma_shear) *
-                                    (modes[8] - modes_from_stress_eq[4]);
-  avg_modes[5] =
-      modes_from_stress_eq[5] + (0.5 + 0.5 * lb_parameters.gamma_shear) *
-                                    (modes[9] - modes_from_stress_eq[5]);
-
-  // Transform the stress tensor components according to the modes that
-  // correspond to those used by U. Schiller. In terms of populations this
-  // expression then corresponds exactly to those in eq. (116)-(121) in
-  // @cite dunweg07a, when these are written out in populations.
-  // But to ensure this, the expression in Schiller's modes has to be different!
-
-  Utils::Vector6d stress;
-  stress[0] =
-      (2.0 * (modes[0] + avg_modes[0]) + avg_modes[2] + 3.0 * avg_modes[1]) /
-      6.0;                  // xx
-  stress[1] = avg_modes[3]; // xy
-  stress[2] =
-      (2.0 * (modes[0] + avg_modes[0]) + avg_modes[2] - 3.0 * avg_modes[1]) /
-      6.0;                                                    // yy
-  stress[3] = avg_modes[4];                                   // xz
-  stress[4] = avg_modes[5];                                   // yz
-  stress[5] = (modes[0] + avg_modes[0] - avg_modes[2]) / 3.0; // zz
-  return stress;
-}
-
-#ifdef LB_BOUNDARIES
-void lb_bounce_back(LB_Fluid &lb_fluid, const LB_Parameters &lb_parameters,
-                    const std::vector<LB_FluidNode> &lb_fields) {
-  auto const next = lb_next_offsets(lblattice, D3Q19::c);
-  static constexpr int reverse[] = {0, 2,  1,  4,  3,  6,  5,  8,  7, 10,
-                                    9, 12, 11, 14, 13, 16, 15, 18, 17};
-
-  /* bottom-up sweep */
-  for (int z = 0; z < lblattice.grid[2] + 2; z++) {
-    for (int y = 0; y < lblattice.grid[1] + 2; y++) {
-      for (int x = 0; x < lblattice.grid[0] + 2; x++) {
-        auto const k = get_linear_index(x, y, z, lblattice.halo_grid);
-
-        if (lb_fields[k].boundary) {
-          Utils::Vector3d boundary_force = {};
-          for (int i = 0; i < 19; i++) {
-            auto const ci = D3Q19::c[i];
-
-            if (x - ci[0] > 0 && x - ci[0] < lblattice.grid[0] + 1 &&
-                y - ci[1] > 0 && y - ci[1] < lblattice.grid[1] + 1 &&
-                z - ci[2] > 0 && z - ci[2] < lblattice.grid[2] + 1) {
-              if (!lb_fields[k - next[i]].boundary) {
-                auto const population_shift =
-                    -lb_parameters.density * 2 * D3Q19::w[i] *
-                    (ci * lb_fields[k].slip_velocity) /
-                    D3Q19::c_sound_sq<double>;
-
-                boundary_force += (2 * lb_fluid[i][k] + population_shift) * ci;
-                lb_fluid[reverse[i]][k - next[i]] =
-                    lb_fluid[i][k] + population_shift;
-              } else {
-                lb_fluid[reverse[i]][k - next[i]] = lb_fluid[i][k] = 0.0;
-              }
-            }
-          }
-          LBBoundaries::lbboundaries[lb_fields[k].boundary - 1]->force() +=
-              boundary_force;
-        }
-      }
-    }
-  }
-}
-#endif // LB_BOUNDARIES
-
-/** Calculate the local fluid momentum.
- *  The calculation is implemented explicitly for the special case of D3Q19.
- *  @param[in]  index     Local lattice site
- *  @param[in]  lb_fluid  Populations of the fluid
- *  @retval The local fluid momentum.
- */
-Utils::Vector3d lb_calc_local_momentum_density(Lattice::index_t index,
-                                               const LB_Fluid &lb_fluid) {
-  return {{lb_fluid[1][index] - lb_fluid[2][index] + lb_fluid[7][index] -
-               lb_fluid[8][index] + lb_fluid[9][index] - lb_fluid[10][index] +
-               lb_fluid[11][index] - lb_fluid[12][index] + lb_fluid[13][index] -
-               lb_fluid[14][index],
-           lb_fluid[3][index] - lb_fluid[4][index] + lb_fluid[7][index] -
-               lb_fluid[8][index] - lb_fluid[9][index] + lb_fluid[10][index] +
-               lb_fluid[15][index] - lb_fluid[16][index] + lb_fluid[17][index] -
-               lb_fluid[18][index],
-           lb_fluid[5][index] - lb_fluid[6][index] + lb_fluid[11][index] -
-               lb_fluid[12][index] - lb_fluid[13][index] + lb_fluid[14][index] +
-               lb_fluid[15][index] - lb_fluid[16][index] - lb_fluid[17][index] +
-               lb_fluid[18][index]}};
-}
-
-/** Calculate momentum of the LB fluid.
- *  @param[in]  lb_parameters  LB parameters
- *  @param[in]  lb_fields      Hydrodynamic fields of the fluid
- *  @param[in]  lb_lattice     The underlying lattice
- */
-Utils::Vector3d
-mpi_lb_calc_fluid_momentum_local(LB_Parameters const &lb_parameters,
-                                 std::vector<LB_FluidNode> const &lb_fields,
-                                 Lattice const &lb_lattice) {
-  Utils::Vector3d momentum_density{}, momentum{}, result{};
-
-  for (int x = 1; x <= lb_lattice.grid[0]; x++) {
-    for (int y = 1; y <= lb_lattice.grid[1]; y++) {
-      for (int z = 1; z <= lb_lattice.grid[2]; z++) {
-        auto const index = get_linear_index(x, y, z, lb_lattice.halo_grid);
-
-        momentum_density = lb_calc_local_momentum_density(index, lbfluid);
-        momentum += momentum_density + .5 * lb_fields[index].force_density;
-      }
-    }
-  }
-
-  momentum *= lb_parameters.agrid / lb_parameters.tau;
-  boost::mpi::reduce(::comm_cart, momentum, result, std::plus<>(), 0);
-  return result;
-}
-
-void lb_collect_boundary_forces(double *result) {
-#ifdef LB_BOUNDARIES
-  auto const lbb_data_len = 3 * LBBoundaries::lbboundaries.size();
-  std::vector<double> boundary_forces(lbb_data_len);
-  std::size_t i = 0;
-  for (auto it = LBBoundaries::lbboundaries.begin();
-       it != LBBoundaries::lbboundaries.end(); ++it, i++)
-    for (std::size_t j = 0; j < 3; j++)
-      boundary_forces[3 * i + j] = (**it).force()[j];
-
-  boost::mpi::reduce(comm_cart, boundary_forces.data(),
-                     static_cast<int>(lbb_data_len), result, std::plus<>(), 0);
-#endif
-}
diff --git a/src/core/grid_based_algorithms/lb.hpp b/src/core/grid_based_algorithms/lb.hpp
deleted file mode 100644
index f7dc8eae44d..00000000000
--- a/src/core/grid_based_algorithms/lb.hpp
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
- *   Max-Planck-Institute for Polymer Research, Theory Group
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef SRC_CORE_GRID_BASED_ALGORITHMS_LB_HPP
-#define SRC_CORE_GRID_BASED_ALGORITHMS_LB_HPP
-/** \file
- *
- *  %Lattice Boltzmann algorithm for hydrodynamic degrees of freedom.
- *
- *  For performance reasons it is clever to do streaming and collision at the
- *  same time because every fluid node has to be read and written only once.
- *  This increases mainly cache efficiency.
- *
- *  The hydrodynamic fields, corresponding to density, velocity and pressure,
- *  are stored in @ref LB_FluidNode in the array @ref lbfields, the populations
- *  in @ref LB_Fluid in the array @ref lbfluid which is constructed as
- *  2 x (Nx x Ny x Nz) x 19 array.
- *
- *  Implementation in lb.cpp.
- */
-
-#include "config/config.hpp"
-#include "grid_based_algorithms/lattice.hpp"
-#include "grid_based_algorithms/lb-d3q19.hpp"
-#include "grid_based_algorithms/lb_constants.hpp"
-
-#include "halo.hpp"
-
-#include <utils/Counter.hpp>
-#include <utils/Span.hpp>
-#include <utils/Vector.hpp>
-
-#include <boost/optional.hpp>
-
-#include <array>
-#include <cstddef>
-#include <cstdint>
-#include <ostream>
-#include <vector>
-
-/** Counter for the RNG */
-extern boost::optional<Utils::Counter<uint64_t>> rng_counter_fluid;
-
-/** Data structure for fluid on a local lattice site */
-struct LB_FluidNode {
-#ifdef LB_BOUNDARIES
-  /** flag indicating whether this site belongs to a boundary */
-  int boundary;
-  Utils::Vector3d slip_velocity = {};
-#endif // LB_BOUNDARIES
-
-  /** local force density */
-  Utils::Vector3d force_density;
-#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS
-  // For particle update, we need the force on the nodes in LBM
-  // Yet, ESPResSo resets the force immediately after the LBM update
-  // Therefore we save it here
-  Utils::Vector3d force_density_buf;
-#endif
-};
-
-/** Data structure holding the parameters for the Lattice Boltzmann system. */
-struct LB_Parameters {
-  /** number density (LB units) */
-  double density;
-
-  /** kinematic viscosity (LB units) */
-  double viscosity;
-
-  /** bulk viscosity (LB units) */
-  double bulk_viscosity;
-
-  /** lattice spacing */
-  double agrid;
-
-  /** time step for fluid propagation (MD units)
-   *  Note: Has to be larger than MD time step! */
-  double tau;
-
-  /** external force density applied to the fluid at each lattice site (LB
-   * Units) */
-  Utils::Vector3d ext_force_density;
-
-  /** relaxation of the odd kinetic modes */
-  double gamma_odd;
-  /** relaxation of the even kinetic modes */
-  double gamma_even;
-  /** relaxation rate of shear modes */
-  double gamma_shear;
-  /** relaxation rate of bulk modes */
-  double gamma_bulk;
-
-  /** Flag determining whether lbpar.gamma_shear, gamma_odd, and gamma_even are
-   *  calculated from lbpar.gamma_shear in such a way to yield a TRT LB with
-   *  minimized slip at bounce-back boundaries
-   */
-  bool is_TRT;
-
-  /** \name Derived parameters */
-  /**@{*/
-  /** amplitudes of the fluctuations of the modes */
-  Utils::Vector19d phi;
-  /**@}*/
-  /** Thermal energy */
-  double kT;
-
-  template <class Archive> void serialize(Archive &ar, long int) {
-    ar &density &viscosity &bulk_viscosity &agrid &tau &ext_force_density
-        &gamma_odd &gamma_even &gamma_shear &gamma_bulk &is_TRT &phi &kT;
-  }
-};
-
-/** %Lattice Boltzmann parameters. */
-extern LB_Parameters lbpar;
-
-/** The underlying lattice */
-extern Lattice lblattice;
-
-/** Communicator for halo exchange between processors */
-extern HaloCommunicator update_halo_comm;
-
-void lb_init(const LB_Parameters &lb_parameters);
-
-void lb_reinit_fluid(std::vector<LB_FluidNode> &lb_fields,
-                     const Lattice &lb_lattice,
-                     const LB_Parameters &lb_parameters);
-
-void lb_reinit_parameters(LB_Parameters &lb_parameters);
-
-using LB_Fluid = std::array<Utils::Span<double>, 19>;
-extern LB_Fluid lbfluid;
-
-class LB_Fluid_Ref {
-public:
-  LB_Fluid_Ref(std::size_t index, const LB_Fluid &lb_fluid)
-      : m_index(index), m_lb_fluid(lb_fluid) {}
-  template <std::size_t I> const auto &get() const {
-    return m_lb_fluid[I][m_index];
-  }
-
-private:
-  const std::size_t m_index;
-  const LB_Fluid &m_lb_fluid;
-};
-
-namespace Utils {
-
-template <std::size_t I> auto get(const LB_Fluid_Ref &lb_fluid) {
-  return lb_fluid.get<I>();
-}
-
-} // namespace Utils
-
-/** Hydrodynamic fields of the fluid */
-extern std::vector<LB_FluidNode> lbfields;
-
-/** Integrate the lattice-Boltzmann system for one time step.
- *  This function performs the collision step and the streaming step.
- *  If external force densities are present, they are applied prior to the
- *  collisions. If boundaries are present, it also applies the boundary
- *  conditions.
- */
-void lb_integrate();
-
-void lb_sanity_checks(const LB_Parameters &lb_parameters);
-
-/** Sets the equilibrium distributions.
- *  @param index Index of the local site
- *  @param density local fluid density
- *  @param momentum_density local fluid flux density
- *  @param stress local fluid stress
- */
-void lb_set_population_from_density_momentum_density_stress(
-    Lattice::index_t index, double density,
-    Utils::Vector3d const &momentum_density, Utils::Vector6d const &stress);
-
-double lb_calc_density(std::array<double, 19> const &modes,
-                       const LB_Parameters &lb_parameters);
-Utils::Vector3d lb_calc_momentum_density(std::array<double, 19> const &modes,
-                                         Utils::Vector3d const &force_density);
-Utils::Vector6d lb_calc_pressure_tensor(std::array<double, 19> const &modes,
-                                        Utils::Vector3d const &force_density,
-                                        const LB_Parameters &lb_parameters);
-
-/** Calculation of hydrodynamic modes.
- *
- *  @param[in]  index     Number of the node to calculate the modes for
- *  @param[in]  lb_fluid  Populations of the fluid
- *  @retval Array containing the modes.
- */
-std::array<double, 19> lb_calc_modes(Lattice::index_t index,
-                                     const LB_Fluid &lb_fluid);
-
-/**
- * @brief Get the populations as a function of density, flux density and stress.
- * @param density fluid density
- * @param momentum_density       fluid flux density
- * @param stress      fluid stress
- * @return 19 populations (including equilibrium density contribution).
- */
-Utils::Vector19d lb_get_population_from_density_momentum_density_stress(
-    double density, Utils::Vector3d const &momentum_density,
-    Utils::Vector6d const &stress);
-
-inline Utils::Vector19d lb_get_population(Lattice::index_t index) {
-  Utils::Vector19d pop{};
-  for (int i = 0; i < D3Q19::n_vel; ++i) {
-    pop[i] = lbfluid[i][index] + D3Q19::coefficients[i][0] * lbpar.density;
-  }
-  return pop;
-}
-
-inline void lb_set_population(Lattice::index_t index,
-                              const Utils::Vector19d &pop) {
-  for (int i = 0; i < D3Q19::n_vel; ++i) {
-    lbfluid[i][index] = pop[i] - D3Q19::coefficients[i][0] * lbpar.density;
-  }
-}
-
-uint64_t lb_fluid_get_rng_state();
-void lb_fluid_set_rng_state(uint64_t counter);
-void lb_prepare_communication(HaloCommunicator &halo_comm,
-                              const Lattice &lb_lattice);
-
-#ifdef LB_BOUNDARIES
-/** Bounce back boundary conditions.
- * The populations that have propagated into a boundary node
- * are bounced back to the node they came from. This results
- * in no slip boundary conditions, cf. @cite ladd01a.
- */
-void lb_bounce_back(LB_Fluid &lbfluid, const LB_Parameters &lb_parameters,
-                    const std::vector<LB_FluidNode> &lb_fields);
-
-#endif /* LB_BOUNDARIES */
-
-Utils::Vector3d
-mpi_lb_calc_fluid_momentum_local(LB_Parameters const &lb_parameters,
-                                 std::vector<LB_FluidNode> const &lb_fields,
-                                 Lattice const &lb_lattice);
-void lb_collect_boundary_forces(double *result);
-void lb_initialize_fields(std::vector<LB_FluidNode> &fields,
-                          LB_Parameters const &lb_parameters,
-                          Lattice const &lb_lattice);
-void lb_on_param_change(LBParam param);
-
-#ifdef ADDITIONAL_CHECKS
-void log_buffer_diff(std::ostream &out, int dir, Lattice::index_t index, int x,
-                     int y, int z);
-#endif // ADDITIONAL_CHECKS
-
-#endif // SRC_CORE_GRID_BASED_ALGORITHMS_LB_HPP
diff --git a/src/core/grid_based_algorithms/lb_boundaries.cpp b/src/core/grid_based_algorithms/lb_boundaries.cpp
deleted file mode 100644
index 12a19f9d423..00000000000
--- a/src/core/grid_based_algorithms/lb_boundaries.cpp
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
- *   Max-Planck-Institute for Polymer Research, Theory Group,
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/** \file
- *
- * Boundary conditions for lattice Boltzmann fluid dynamics.
- * Source file for \ref lb_boundaries.hpp.
- */
-
-#include "grid_based_algorithms/lb_boundaries.hpp"
-
-#include "communication.hpp"
-#include "errorhandling.hpp"
-#include "event.hpp"
-#include "grid.hpp"
-#include "grid_based_algorithms/electrokinetics.hpp"
-#include "grid_based_algorithms/lattice.hpp"
-#include "grid_based_algorithms/lb.hpp"
-#include "grid_based_algorithms/lb_interface.hpp"
-#include "grid_based_algorithms/lbgpu.hpp"
-#include "lbboundaries/LBBoundary.hpp"
-
-#include <utils/Vector.hpp>
-#include <utils/index.hpp>
-#include <utils/math/int_pow.hpp>
-
-#include <boost/range/adaptor/reversed.hpp>
-#include <boost/range/algorithm.hpp>
-
-#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <iterator>
-#include <memory>
-#include <stdexcept>
-#include <vector>
-
-namespace LBBoundaries {
-
-std::vector<std::shared_ptr<LBBoundary>> lbboundaries;
-#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU)
-
-void add(const std::shared_ptr<LBBoundary> &b) {
-  assert(std::find(lbboundaries.begin(), lbboundaries.end(), b) ==
-         lbboundaries.end());
-  lbboundaries.emplace_back(b);
-
-  on_lbboundary_change();
-}
-
-void remove(const std::shared_ptr<LBBoundary> &b) {
-  assert(std::find(lbboundaries.begin(), lbboundaries.end(), b) !=
-         lbboundaries.end());
-  lbboundaries.erase(std::remove(lbboundaries.begin(), lbboundaries.end(), b),
-                     lbboundaries.end());
-
-  on_lbboundary_change();
-}
-
-bool sanity_check_mach_limit() {
-  // Boundary velocities are stored in MD units, therefore we need to scale them
-  // in order to get lattice units.
-  auto const conv_fac = 1. / lb_lbfluid_get_lattice_speed();
-  auto constexpr mach_limit = 0.2;
-  return std::any_of(lbboundaries.begin(), lbboundaries.end(),
-                     [conv_fac, mach_limit](auto const &b) {
-                       return (b->velocity() * conv_fac).norm() >= mach_limit;
-                     });
-}
-
-#if defined(EK_BOUNDARIES)
-static void ek_init_boundaries() {
-  int number_of_boundnodes = 0;
-
-  std::vector<float> host_wallcharge_species_density;
-  float node_wallcharge = 0.0f;
-  int wallcharge_species = -1, charged_boundaries = 0;
-  bool node_charged = false;
-
-  for (auto &lbboundary : lbboundaries) {
-    lbboundary->set_net_charge(0.0);
-  }
-
-  if (ek_initialized) {
-    host_wallcharge_species_density.resize(ek_parameters.number_of_nodes);
-    for (auto &lbboundary : lbboundaries) {
-      if (lbboundary->charge_density() != 0.0f) {
-        charged_boundaries = 1;
-        break;
-      }
-    }
-
-    for (int n = 0; n < int(ek_parameters.number_of_species); n++)
-      if (ek_parameters.valency[n] != 0.0f) {
-        wallcharge_species = n;
-        break;
-      }
-
-    ek_gather_wallcharge_species_density(host_wallcharge_species_density.data(),
-                                         wallcharge_species);
-
-    if (wallcharge_species == -1 && charged_boundaries) {
-      runtimeErrorMsg()
-          << "no charged species available to create wall charge\n";
-    }
-
-    auto const node_volume = Utils::int_pow<3>(ek_parameters.agrid);
-    for (int z = 0; z < int(lbpar_gpu.dim[2]); z++) {
-      for (int y = 0; y < int(lbpar_gpu.dim[1]); y++) {
-        for (int x = 0; x < int(lbpar_gpu.dim[0]); x++) {
-          auto const pos = static_cast<double>(lbpar_gpu.agrid) *
-                           (Utils::Vector3d{1. * x, 1. * y, 1. * z} +
-                            Utils::Vector3d::broadcast(0.5));
-          node_charged = false;
-          node_wallcharge = 0.0f;
-
-          std::vector<std::shared_ptr<LBBoundary>> boundaries;
-          std::copy_if(lbboundaries.begin(), lbboundaries.end(),
-                       std::back_inserter(boundaries), [&pos](auto const lbb) {
-                         return lbb->shape().is_inside(pos);
-                       });
-          for (auto lbb : boundaries) {
-            if ((*lbb).charge_density() != 0.0f) {
-              node_charged = true;
-              auto const node_charge = (*lbb).charge_density() * node_volume;
-              node_wallcharge += node_charge;
-              (*lbb).set_net_charge((*lbb).net_charge() + node_charge);
-            }
-          }
-          if (not boundaries.empty()) {
-            number_of_boundnodes++;
-          }
-          ek_parameters.number_of_boundary_nodes = number_of_boundnodes;
-
-          if (wallcharge_species != -1) {
-            if (node_charged)
-              host_wallcharge_species_density[ek_parameters.dim_y *
-                                                  ek_parameters.dim_x * z +
-                                              ek_parameters.dim_x * y + x] =
-                  node_wallcharge / ek_parameters.valency[wallcharge_species];
-          }
-        }
-      }
-    }
-    ek_init_species_density_wallcharge(host_wallcharge_species_density.data(),
-                                       wallcharge_species);
-  }
-}
-#endif // defined(EK_BOUNDARIES)
-
-/** Initialize boundary conditions for all constraints in the system. */
-void lb_init_boundaries() {
-  if (lattice_switch == ActiveLB::GPU) {
-    if (this_node != 0) {
-      return;
-    }
-#if defined(CUDA)
-#if defined(LB_BOUNDARIES_GPU)
-#if defined(EK_BOUNDARIES)
-    ek_init_boundaries();
-#endif
-    unsigned number_of_boundnodes = 0;
-    std::vector<int> host_boundary_node_list;
-    std::vector<int> host_boundary_index_list;
-    std::size_t size_of_index;
-
-    for (unsigned z = 0; z < lbpar_gpu.dim[2]; z++) {
-      for (unsigned y = 0; y < lbpar_gpu.dim[1]; y++) {
-        for (unsigned x = 0; x < lbpar_gpu.dim[0]; x++) {
-          auto const pos = static_cast<double>(lbpar_gpu.agrid) *
-                           (Utils::Vector3d{1. * x, 1. * y, 1. * z} +
-                            Utils::Vector3d::broadcast(0.5));
-
-          // take last boundary containing the node
-          auto const boundary = boost::find_if(
-              lbboundaries | boost::adaptors::reversed,
-              [&pos](auto const lbb) { return lbb->shape().is_inside(pos); });
-
-          if (boundary != boost::rend(lbboundaries)) {
-            size_of_index = (number_of_boundnodes + 1) * sizeof(int);
-            host_boundary_node_list.resize(size_of_index);
-            host_boundary_index_list.resize(size_of_index);
-            host_boundary_node_list[number_of_boundnodes] =
-                static_cast<int>(x + lbpar_gpu.dim[0] * y +
-                                 lbpar_gpu.dim[0] * lbpar_gpu.dim[1] * z);
-            host_boundary_index_list[number_of_boundnodes] = static_cast<int>(
-                std::distance(lbboundaries.begin(), boundary.base()));
-            number_of_boundnodes++;
-          }
-        }
-      }
-    }
-    lbpar_gpu.number_of_boundnodes = number_of_boundnodes;
-    /* call of cuda fkt */
-    std::vector<float> boundary_velocity(3 * (lbboundaries.size() + 1));
-    int n = 0;
-    for (auto lbb = lbboundaries.begin(); lbb != lbboundaries.end();
-         ++lbb, n++) {
-      boundary_velocity[3 * n + 0] = static_cast<float>((**lbb).velocity()[0]);
-      boundary_velocity[3 * n + 1] = static_cast<float>((**lbb).velocity()[1]);
-      boundary_velocity[3 * n + 2] = static_cast<float>((**lbb).velocity()[2]);
-    }
-
-    boundary_velocity[3 * lbboundaries.size() + 0] = 0.0f;
-    boundary_velocity[3 * lbboundaries.size() + 1] = 0.0f;
-    boundary_velocity[3 * lbboundaries.size() + 2] = 0.0f;
-
-    lb_init_boundaries_GPU(lbboundaries.size(), number_of_boundnodes,
-                           host_boundary_node_list.data(),
-                           host_boundary_index_list.data(),
-                           boundary_velocity.data());
-
-#else  // defined (LB_BOUNDARIES_GPU)
-    if (not lbboundaries.empty()) {
-      runtimeErrorMsg()
-          << "LB boundaries not empty for GPU LB but LB_BOUNDARIES_GPU not "
-             "compiled in. Activate in myconfig.hpp.";
-    }
-#endif // defined (LB_BOUNDARIES_GPU)
-#endif // defined (CUDA)
-  } else if (lattice_switch == ActiveLB::CPU) {
-#if defined(LB_BOUNDARIES)
-    using Utils::get_linear_index;
-    boost::for_each(lbfields, [](auto &f) { f.boundary = 0; });
-
-    auto const node_pos = calc_node_pos(comm_cart);
-    auto const offset = Utils::hadamard_product(node_pos, lblattice.grid);
-    auto const vel_conv = 1. / lb_lbfluid_get_lattice_speed();
-
-    for (int z = 0; z < lblattice.grid[2] + 2; z++) {
-      for (int y = 0; y < lblattice.grid[1] + 2; y++) {
-        for (int x = 0; x < lblattice.grid[0] + 2; x++) {
-          auto const pos =
-              (offset + Utils::Vector3d{x - 0.5, y - 0.5, z - 0.5}) *
-              lblattice.agrid;
-
-          auto const boundary = boost::find_if(
-              lbboundaries | boost::adaptors::reversed,
-              [&pos](auto const lbb) { return lbb->shape().is_inside(pos); });
-          auto const index = get_linear_index(x, y, z, lblattice.halo_grid);
-          if (boundary != boost::rend(lbboundaries)) {
-            auto &node = lbfields[index];
-            node.boundary = static_cast<int>(
-                std::distance(lbboundaries.begin(), boundary.base()));
-            node.slip_velocity = (*boundary)->velocity() * vel_conv;
-          } else {
-            lbfields[index].boundary = 0;
-          }
-        }
-      }
-    }
-#else  // defined(LB_BOUNDARIES)
-    if (not lbboundaries.empty()) {
-      runtimeErrorMsg()
-          << "LB boundaries not empty for CPU LB but LB_BOUNDARIES not "
-             "compiled in. Activate in myconfig.hpp.";
-    }
-#endif // defined(LB_BOUNDARIES)
-  }
-}
-
-#if defined(LB_BOUNDARIES)
-static void lb_collect_boundary_forces_local() {
-  lb_collect_boundary_forces(nullptr);
-}
-
-REGISTER_CALLBACK(lb_collect_boundary_forces_local)
-#endif
-
-Utils::Vector3d lbboundary_get_force(LBBoundary const *lbb) {
-  Utils::Vector3d force{};
-  auto const it =
-      boost::find_if(lbboundaries, [lbb](std::shared_ptr<LBBoundary> const &i) {
-        return i.get() == lbb;
-      });
-  if (it == lbboundaries.end())
-    throw std::runtime_error("You probably tried to get the force of an "
-                             "lbboundary that was not added to "
-                             "system.lbboundaries.");
-  std::vector<double> forces(3 * lbboundaries.size());
-  if (lattice_switch == ActiveLB::GPU) {
-#if defined(LB_BOUNDARIES_GPU)
-    lb_gpu_get_boundary_forces(forces);
-#endif
-  } else if (lattice_switch == ActiveLB::CPU) {
-#if defined(LB_BOUNDARIES)
-    mpi_call(lb_collect_boundary_forces_local);
-    lb_collect_boundary_forces(forces.data());
-#endif
-  }
-  auto const container_index = std::distance(lbboundaries.begin(), it);
-  force[0] = forces[3 * container_index + 0];
-  force[1] = forces[3 * container_index + 1];
-  force[2] = forces[3 * container_index + 2];
-  return force;
-}
-
-#endif // defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU)
-
-} // namespace LBBoundaries
diff --git a/src/core/grid_based_algorithms/lb_boundaries.hpp b/src/core/grid_based_algorithms/lb_boundaries.hpp
deleted file mode 100644
index 3dcf7e35f60..00000000000
--- a/src/core/grid_based_algorithms/lb_boundaries.hpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
- *   Max-Planck-Institute for Polymer Research, Theory Group
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/** \file
- *
- * Boundary conditions for lattice Boltzmann fluid dynamics.
- * Header file for \ref lb_boundaries.cpp.
- *
- * In the current version only simple bounce back walls are implemented. Thus
- * after the streaming step, in all wall nodes all populations are bounced
- * back from where they came from.
- *
- */
-
-#ifndef LBBOUNDARIES_H
-#define LBBOUNDARIES_H
-
-#include "lbboundaries/LBBoundary.hpp"
-
-#include "config/config.hpp"
-
-#include <utils/Span.hpp>
-
-#include <array>
-#include <memory>
-#include <vector>
-
-namespace LBBoundaries {
-using LB_Fluid = std::array<Utils::Span<double>, 19>;
-
-extern std::vector<std::shared_ptr<LBBoundary>> lbboundaries;
-#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU)
-
-/** Initializes the constraints in the system.
- *  This function determines the lattice sites which belong to boundaries
- *  and marks them with a corresponding flag.
- */
-void lb_init_boundaries();
-
-void add(const std::shared_ptr<LBBoundary> &);
-void remove(const std::shared_ptr<LBBoundary> &);
-
-/**
- * @brief Check the boundary velocities.
- * Sanity check if the velocity defined at LB boundaries is within the Mach
- * number limit of the scheme, i.e. u < 0.2.
- */
-bool sanity_check_mach_limit();
-
-#endif // (LB_BOUNDARIES) || (LB_BOUNDARIES_GPU)
-} // namespace LBBoundaries
-#endif /* LB_BOUNDARIES_H */
diff --git a/src/core/grid_based_algorithms/lb_collective_interface.cpp b/src/core/grid_based_algorithms/lb_collective_interface.cpp
deleted file mode 100644
index f1a9765c5c1..00000000000
--- a/src/core/grid_based_algorithms/lb_collective_interface.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "MpiCallbacks.hpp"
-#include "communication.hpp"
-#include "config/config.hpp"
-#include "grid.hpp"
-#include "lb.hpp"
-#include "lb_constants.hpp"
-#include "lb_interpolation.hpp"
-
-#include <utils/Vector.hpp>
-#include <utils/index.hpp>
-
-#include <boost/optional.hpp>
-
-using Utils::get_linear_index;
-
-/* LB CPU callback interface */
-namespace detail {
-
-template <typename Kernel>
-void lb_set(Utils::Vector3i const &index, Kernel kernel) {
-  if (lblattice.is_local(index)) {
-    kernel(index);
-  }
-}
-
-template <typename Kernel>
-auto lb_calc(Utils::Vector3i const &index, Kernel kernel) {
-  using R = decltype(kernel(index));
-  if (lblattice.is_local(index)) {
-    return boost::optional<R>(kernel(index));
-  }
-  return boost::optional<R>();
-}
-
-template <typename Kernel>
-auto lb_calc_for_pos(Utils::Vector3d const &pos, Kernel kernel) {
-  using R = decltype(kernel(pos));
-  if (map_position_node_array(pos) == this_node) {
-    return boost::optional<R>(kernel(pos));
-  }
-  return boost::optional<R>();
-}
-
-template <class Kernel>
-auto lb_calc_fluid_kernel(Utils::Vector3i const &index, Kernel kernel) {
-  return lb_calc(index, [&](auto index) {
-    auto const linear_index =
-        get_linear_index(lblattice.local_index(index), lblattice.halo_grid);
-    auto const force_density = lbfields[linear_index].force_density;
-    auto const modes = lb_calc_modes(linear_index, lbfluid);
-    return kernel(modes, force_density);
-  });
-}
-} // namespace detail
-
-boost::optional<Utils::Vector3d>
-mpi_lb_get_interpolated_velocity(Utils::Vector3d const &pos) {
-  return detail::lb_calc_for_pos(pos, [&](auto pos) {
-    return lb_lbinterpolation_get_interpolated_velocity(pos);
-  });
-}
-
-REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_interpolated_velocity)
-
-boost::optional<double>
-mpi_lb_get_interpolated_density(Utils::Vector3d const &pos) {
-  return detail::lb_calc_for_pos(pos, [&](auto pos) {
-    return lb_lbinterpolation_get_interpolated_density(pos);
-  });
-}
-
-REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_interpolated_density)
-
-auto mpi_lb_get_density(Utils::Vector3i const &index) {
-  return detail::lb_calc_fluid_kernel(index,
-                                      [&](auto const &modes, auto const &) {
-                                        return lb_calc_density(modes, lbpar);
-                                      });
-}
-
-REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_density)
-
-auto mpi_lb_get_populations(Utils::Vector3i const &index) {
-  return detail::lb_calc(index, [&](auto index) {
-    auto const linear_index =
-        get_linear_index(lblattice.local_index(index), lblattice.halo_grid);
-    return lb_get_population(linear_index);
-  });
-}
-
-REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_populations)
-
-boost::optional<int> mpi_lb_get_boundary_flag(Utils::Vector3i const &index) {
-  return detail::lb_calc(index, [&](auto index) {
-#ifdef LB_BOUNDARIES
-    auto const linear_index =
-        get_linear_index(lblattice.local_index(index), lblattice.halo_grid);
-    return lbfields[linear_index].boundary;
-#else
-    return 0;
-#endif
-  });
-}
-
-REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_boundary_flag)
-
-void mpi_lb_set_population(Utils::Vector3i const &index,
-                           Utils::Vector19d const &population) {
-  detail::lb_set(index, [&](auto index) {
-    auto const linear_index =
-        get_linear_index(lblattice.local_index(index), lblattice.halo_grid);
-    lb_set_population(linear_index, population);
-  });
-}
-
-REGISTER_CALLBACK(mpi_lb_set_population)
-
-void mpi_lb_set_force_density(Utils::Vector3i const &index,
-                              Utils::Vector3d const &force_density) {
-  detail::lb_set(index, [&](auto index) {
-    auto const linear_index =
-        get_linear_index(lblattice.local_index(index), lblattice.halo_grid);
-    lbfields[linear_index].force_density = force_density;
-  });
-}
-
-REGISTER_CALLBACK(mpi_lb_set_force_density)
-
-auto mpi_lb_get_momentum_density(Utils::Vector3i const &index) {
-  return detail::lb_calc_fluid_kernel(
-      index, [&](auto const &modes, auto const &force_density) {
-        return lb_calc_momentum_density(modes, force_density);
-      });
-}
-
-REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_momentum_density)
-
-auto mpi_lb_get_pressure_tensor(Utils::Vector3i const &index) {
-  return detail::lb_calc_fluid_kernel(
-      index, [&](auto const &modes, auto const &force_density) {
-        return lb_calc_pressure_tensor(modes, force_density, lbpar);
-      });
-}
-
-REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_pressure_tensor)
-
-void mpi_bcast_lb_params_local(LBParam field, LB_Parameters const &params) {
-  lbpar = params;
-  lb_on_param_change(field);
-}
-
-REGISTER_CALLBACK(mpi_bcast_lb_params_local)
-
-/** @brief Broadcast a parameter for lattice Boltzmann.
- *  @param[in] field  References the parameter field to be broadcasted.
- *                    The references are defined in lb.hpp
- */
-void mpi_bcast_lb_params(LBParam field) {
-  mpi_call(mpi_bcast_lb_params_local, field, lbpar);
-  lb_on_param_change(field);
-}
diff --git a/src/core/grid_based_algorithms/lb_collective_interface.hpp b/src/core/grid_based_algorithms/lb_collective_interface.hpp
deleted file mode 100644
index 1375278ce5a..00000000000
--- a/src/core/grid_based_algorithms/lb_collective_interface.hpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef LB_COLLECTIVE_INTERFACE_HPP
-#define LB_COLLECTIVE_INTERFACE_HPP
-
-#include "grid_based_algorithms/lb_constants.hpp"
-
-#include <boost/optional.hpp>
-#include <utils/Vector.hpp>
-
-/* collective getter functions */
-boost::optional<Utils::Vector3d>
-mpi_lb_get_interpolated_velocity(Utils::Vector3d const &pos);
-boost::optional<double>
-mpi_lb_get_interpolated_density(Utils::Vector3d const &pos);
-boost::optional<double> mpi_lb_get_density(Utils::Vector3i const &index);
-boost::optional<Utils::Vector19d>
-mpi_lb_get_populations(Utils::Vector3i const &index);
-boost::optional<int> mpi_lb_get_boundary_flag(Utils::Vector3i const &index);
-boost::optional<Utils::Vector3d>
-mpi_lb_get_momentum_density(Utils::Vector3i const &index);
-boost::optional<Utils::Vector6d>
-mpi_lb_get_pressure_tensor(Utils::Vector3i const &index);
-
-/* collective setter functions */
-void mpi_lb_set_population(Utils::Vector3i const &index,
-                           Utils::Vector19d const &population);
-void mpi_lb_set_force_density(Utils::Vector3i const &index,
-                              Utils::Vector3d const &force_density);
-
-/* collective sync functions */
-void mpi_bcast_lb_params(LBParam field);
-
-#endif
diff --git a/src/core/grid_based_algorithms/lb_constants.hpp b/src/core/grid_based_algorithms/lb_constants.hpp
deleted file mode 100644
index e7864bda377..00000000000
--- a/src/core/grid_based_algorithms/lb_constants.hpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2019-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/** \file
- *  Constants and enumerators for LB.
- */
-
-#ifndef LB_CONSTANTS_HPP
-#define LB_CONSTANTS_HPP
-
-/** @brief Parameter fields for lattice Boltzmann
- *
- *  Determine what actions have to take place upon change of the respective
- *  parameter.
- */
-enum class LBParam {
-  DENSITY,           /**< fluid density */
-  VISCOSITY,         /**< fluid kinematic viscosity */
-  AGRID,             /**< grid constant for fluid lattice */
-  EXT_FORCE_DENSITY, /**< external force density acting on the fluid */
-  BULKVISC,          /**< fluid bulk viscosity */
-  KT,                /**< thermal energy */
-  GAMMA_ODD,         /**< Relaxation constant for odd modes */
-  GAMMA_EVEN,        /**< Relaxation constant for even modes */
-  TAU                /**< LB time step */
-};
-
-#endif /* LB_CONSTANTS_HPP */
diff --git a/src/core/grid_based_algorithms/lb_interface.cpp b/src/core/grid_based_algorithms/lb_interface.cpp
index dd9b32d9430..c573b88ee1e 100644
--- a/src/core/grid_based_algorithms/lb_interface.cpp
+++ b/src/core/grid_based_algorithms/lb_interface.cpp
@@ -16,29 +16,26 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#include "lb_interface.hpp"
+
+#include "grid_based_algorithms/lb_interface.hpp"
+#include "grid_based_algorithms/lb_walberla_instance.hpp"
+
 #include "BoxGeometry.hpp"
 #include "MpiCallbacks.hpp"
 #include "communication.hpp"
 #include "config/config.hpp"
-#include "electrokinetics.hpp"
 #include "errorhandling.hpp"
 #include "grid.hpp"
-#include "halo.hpp"
-#include "lb-d3q19.hpp"
-#include "lb.hpp"
-#include "lb_boundaries.hpp"
-#include "lb_collective_interface.hpp"
-#include "lb_constants.hpp"
-#include "lb_interpolation.hpp"
-#include "lbgpu.hpp"
 
 #include <utils/Vector.hpp>
 
+#include <boost/optional.hpp>
+#include <boost/serialization/access.hpp>
+#include <boost/serialization/vector.hpp>
+
 #include <cmath>
-#include <fstream>
+#include <functional>
 #include <limits>
-#include <sstream>
 #include <stdexcept>
 #include <string>
 #include <tuple>
@@ -46,361 +43,55 @@
 
 ActiveLB lattice_switch = ActiveLB::NONE;
 
-ActiveLB lb_lbfluid_get_lattice_switch() { return lattice_switch; }
+namespace LB {
 
-struct NoLBActive : public std::exception {
-  const char *what() const noexcept override { return "LB not activated"; }
-};
+ActiveLB get_lattice_switch() { return lattice_switch; }
 
-void lb_lbfluid_integrate() {
-  if (lattice_switch == ActiveLB::CPU) {
-    lb_integrate();
-  } else if (lattice_switch == ActiveLB::GPU and this_node == 0) {
-#ifdef CUDA
-#ifdef ELECTROKINETICS
-    if (ek_initialized) {
-      ek_integrate();
-    } else {
-#endif
-      lb_integrate_GPU();
-#ifdef ELECTROKINETICS
-    }
-#endif
-#endif
-  }
+int get_steps_per_md_step(double md_timestep) {
+  return static_cast<int>(std::round(get_tau() / md_timestep));
 }
 
-void lb_lbfluid_propagate() {
-  if (lattice_switch != ActiveLB::NONE) {
-    lb_lbfluid_integrate();
-    if (lb_lbfluid_get_kT() > 0.0) {
-      if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-        rng_counter_fluid_gpu->increment();
-#endif
-      } else if (lattice_switch == ActiveLB::CPU) {
-        rng_counter_fluid->increment();
-      }
-    }
-  }
-}
+void init() {}
 
-/**
- * @brief Check the boundary velocities.
- */
-inline void lb_boundary_mach_check() {
-#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU)
-  if (LBBoundaries::sanity_check_mach_limit()) {
-    runtimeErrorMsg() << "Lattice velocity exceeds the Mach number limit";
-  }
+void propagate() {
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
+#ifdef WALBERLA
+    lb_walberla()->integrate();
 #endif
-}
-
-void lb_lbfluid_sanity_checks(double time_step) {
-  if (lattice_switch == ActiveLB::GPU && this_node == 0) {
-#ifdef CUDA
-    lb_GPU_sanity_checks();
-    lb_boundary_mach_check();
-    if (time_step > 0.)
-      check_tau_time_step_consistency(lb_lbfluid_get_tau(), time_step);
-#endif
-  }
-  if (lattice_switch == ActiveLB::CPU) {
-    lb_sanity_checks(lbpar);
-    lb_boundary_mach_check();
-    if (time_step > 0.)
-      check_tau_time_step_consistency(lb_lbfluid_get_tau(), time_step);
   }
 }
 
-void lb_lbfluid_on_integration_start() {
-  if (lattice_switch == ActiveLB::CPU) {
-    halo_communication(update_halo_comm,
-                       reinterpret_cast<char *>(lbfluid[0].data()));
-  }
-}
+void sanity_checks(double time_step) {
+  if (lattice_switch == ActiveLB::NONE)
+    return;
 
-/** (Re-)initialize the fluid. */
-void lb_lbfluid_reinit_parameters() {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    if (this_node == 0)
-      lb_reinit_parameters_gpu();
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
+#ifdef WALBERLA
+    lb_sanity_checks(*lb_walberla(), *lb_walberla_params(), time_step);
 #endif
-  } else if (lattice_switch == ActiveLB::CPU) {
-    lb_reinit_parameters(lbpar);
   }
 }
 
-/** Perform a full initialization of the lattice Boltzmann system.
- *  All derived parameters and the fluid are reset to their default values.
- */
-void lb_lbfluid_init() {
-  if (lattice_switch == ActiveLB::GPU && this_node == 0) {
-#ifdef CUDA
-    lb_init_gpu();
+void lebc_sanity_checks(unsigned int shear_direction,
+                        unsigned int shear_plane_normal) {
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
+#ifdef WALBERLA
+    lb_walberla()->check_lebc(shear_direction, shear_plane_normal);
 #endif
-  } else if (lattice_switch == ActiveLB::CPU) {
-    lb_init(lbpar);
-  }
-}
-
-uint64_t lb_lbfluid_get_rng_state() {
-  if (lattice_switch == ActiveLB::CPU) {
-    return lb_fluid_get_rng_state();
-  }
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    return lb_fluid_get_rng_state_gpu();
-#endif
-  }
-  throw NoLBActive();
-}
-
-void lb_lbfluid_set_rng_state(uint64_t counter) {
-  if (lattice_switch == ActiveLB::CPU) {
-    lb_fluid_set_rng_state(counter);
-  } else if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    lb_fluid_set_rng_state_gpu(counter);
-#endif
-  } else {
-    throw NoLBActive();
-  }
-}
-
-void lb_lbfluid_set_density(double density) {
-  if (density <= 0)
-    throw std::invalid_argument("Density has to be > 0. but got " +
-                                std::to_string(density));
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    lbpar_gpu.rho = static_cast<float>(density);
-    lb_reinit_fluid_gpu();
-    lb_lbfluid_reinit_parameters();
-#endif //  CUDA
-  } else if (lattice_switch == ActiveLB::CPU) {
-    lbpar.density = density;
-    mpi_bcast_lb_params(LBParam::DENSITY);
-  } else {
-    throw NoLBActive();
-  }
-}
-
-double lb_lbfluid_get_density() {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    return static_cast<double>(lbpar_gpu.rho);
-#endif //  CUDA
-  }
-  if (lattice_switch == ActiveLB::CPU) {
-    return lbpar.density;
-  }
-  throw NoLBActive();
-}
-
-void lb_lbfluid_set_viscosity(double viscosity) {
-  if (viscosity <= 0)
-    throw std::invalid_argument("Viscosity has to be >0.");
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    lbpar_gpu.viscosity = static_cast<float>(viscosity);
-    lb_lbfluid_reinit_parameters();
-#endif //  CUDA
-  } else if (lattice_switch == ActiveLB::CPU) {
-    lbpar.viscosity = viscosity;
-    mpi_bcast_lb_params(LBParam::VISCOSITY);
-  } else {
-    throw NoLBActive();
-  }
-}
-
-double lb_lbfluid_get_viscosity() {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    return static_cast<double>(lbpar_gpu.viscosity);
-#endif //  CUDA
-  }
-  if (lattice_switch == ActiveLB::CPU) {
-    return lbpar.viscosity;
-  }
-  throw NoLBActive();
-}
-
-void lb_lbfluid_set_bulk_viscosity(double bulk_viscosity) {
-  if (bulk_viscosity <= 0)
-    throw std::invalid_argument("Bulk viscosity has to be >0. but got " +
-                                std::to_string(bulk_viscosity));
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    lbpar_gpu.bulk_viscosity = static_cast<float>(bulk_viscosity);
-    lbpar_gpu.is_TRT = false;
-    lb_lbfluid_reinit_parameters();
-#endif //  CUDA
-  } else if (lattice_switch == ActiveLB::CPU) {
-    lbpar.bulk_viscosity = bulk_viscosity;
-    lbpar.is_TRT = false;
-    mpi_bcast_lb_params(LBParam::BULKVISC);
-  } else {
-    throw NoLBActive();
-  }
-}
-
-double lb_lbfluid_get_bulk_viscosity() {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    return lbpar_gpu.bulk_viscosity;
-#endif //  CUDA
-  }
-  if (lattice_switch == ActiveLB::CPU) {
-    return lbpar.bulk_viscosity;
-  }
-  throw NoLBActive();
-}
-
-void lb_lbfluid_set_gamma_odd(double gamma_odd) {
-  if (fabs(gamma_odd) > 1)
-    throw std::invalid_argument("Gamma odd has to be <= 1.");
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    lbpar_gpu.gamma_odd = static_cast<float>(gamma_odd);
-    lbpar_gpu.is_TRT = false;
-    lb_lbfluid_reinit_parameters();
-#endif //  CUDA
-  } else if (lattice_switch == ActiveLB::CPU) {
-    lbpar.gamma_odd = gamma_odd;
-    lbpar.is_TRT = false;
-    mpi_bcast_lb_params(LBParam::GAMMA_ODD);
-  } else {
-    throw NoLBActive();
-  }
-}
-
-double lb_lbfluid_get_gamma_odd() {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    return lbpar_gpu.gamma_odd;
-#endif //  CUDA
-  }
-  if (lattice_switch == ActiveLB::CPU) {
-    return lbpar.gamma_odd;
-  }
-  throw NoLBActive();
-}
-
-void lb_lbfluid_set_gamma_even(double gamma_even) {
-  if (fabs(gamma_even) > 1)
-    throw std::invalid_argument("gamma_even has to be <= 1.");
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    lbpar_gpu.gamma_even = static_cast<float>(gamma_even);
-    lbpar_gpu.is_TRT = false;
-    lb_lbfluid_reinit_parameters();
-#endif //  CUDA
-  } else if (lattice_switch == ActiveLB::CPU) {
-    lbpar.gamma_even = gamma_even;
-    lbpar.is_TRT = false;
-    mpi_bcast_lb_params(LBParam::DENSITY);
-  } else {
-    throw NoLBActive();
   }
 }
 
-double lb_lbfluid_get_gamma_even() {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    return lbpar_gpu.gamma_even;
-#endif //  CUDA
-  }
-  if (lattice_switch == ActiveLB::CPU) {
-    return lbpar.gamma_even;
-  }
-  throw NoLBActive();
-}
-
-void lb_lbfluid_set_agrid(double agrid) {
-  if (agrid <= 0)
-    throw std::invalid_argument("agrid has to be > 0.");
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    lb_set_agrid_gpu(agrid);
-    lb_init_gpu();
-#if defined(LB_BOUNDARIES_GPU)
-    LBBoundaries::lb_init_boundaries();
+double get_agrid() {
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
+#ifdef WALBERLA
+    return lb_walberla_params()->get_agrid();
 #endif
-#endif //  CUDA
-  } else if (lattice_switch == ActiveLB::CPU) {
-    lbpar.agrid = agrid;
-    mpi_bcast_lb_params(LBParam::AGRID);
-  } else {
-    throw NoLBActive();
-  }
-}
-
-double lb_lbfluid_get_agrid() {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    return lbpar_gpu.agrid;
-#endif //  CUDA
-  }
-  if (lattice_switch == ActiveLB::CPU) {
-    return lbpar.agrid;
   }
   throw NoLBActive();
 }
 
-void lb_lbfluid_set_ext_force_density(const Utils::Vector3d &force_density) {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    lbpar_gpu.ext_force_density[0] = static_cast<float>(force_density[0]);
-    lbpar_gpu.ext_force_density[1] = static_cast<float>(force_density[1]);
-    lbpar_gpu.ext_force_density[2] = static_cast<float>(force_density[2]);
-    lbpar_gpu.external_force_density = force_density[0] != 0. ||
-                                       force_density[1] != 0. ||
-                                       force_density[2] != 0.;
-    lb_reinit_extern_nodeforce_GPU(&lbpar_gpu);
-
-#endif //  CUDA
-  } else if (lattice_switch == ActiveLB::CPU) {
-    lbpar.ext_force_density = force_density;
-    mpi_bcast_lb_params(LBParam::EXT_FORCE_DENSITY);
-  } else {
-    throw NoLBActive();
-  }
-}
-
-const Utils::Vector3d lb_lbfluid_get_ext_force_density() {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    return {static_cast<double>(lbpar_gpu.ext_force_density[0]),
-            static_cast<double>(lbpar_gpu.ext_force_density[1]),
-            static_cast<double>(lbpar_gpu.ext_force_density[2])};
-#endif //  CUDA
-  }
-  if (lattice_switch == ActiveLB::CPU) {
-    return lbpar.ext_force_density;
-  }
-  throw NoLBActive();
-}
-
-void lb_lbfluid_set_tau(double tau) {
-  if (tau <= 0.)
-    throw std::invalid_argument("LB tau has to be positive.");
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    lbpar_gpu.tau = static_cast<float>(tau);
-    lb_lbfluid_reinit_parameters();
-#endif //  CUDA
-  } else if (lattice_switch == ActiveLB::CPU) {
-    lbpar.tau = tau;
-    mpi_bcast_lb_params(LBParam::TAU);
-  } else {
-    throw NoLBActive();
-  }
-}
-
 void check_tau_time_step_consistency(double tau, double time_step) {
-  // use float epsilon since tau may be a float (GPU LB)
+  // use float epsilon since tau may be a float
   auto const eps = static_cast<double>(std::numeric_limits<float>::epsilon());
   if ((tau - time_step) / (tau + time_step) < -eps)
     throw std::invalid_argument("LB tau (" + std::to_string(tau) +
@@ -415,766 +106,95 @@ void check_tau_time_step_consistency(double tau, double time_step) {
                                 std::to_string(factor));
 }
 
-double lb_lbfluid_get_tau() {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    return lbpar_gpu.tau;
-#endif //  CUDA
-  }
-  if (lattice_switch == ActiveLB::CPU) {
-    return lbpar.tau;
+double get_tau() {
+#ifdef WALBERLA
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
+    return lb_walberla_params()->get_tau();
   }
-  throw NoLBActive();
-}
-
-void lb_lbfluid_set_lattice_switch(ActiveLB local_lattice_switch) {
-  switch (local_lattice_switch) {
-  case ActiveLB::NONE:
-  case ActiveLB::CPU:
-  case ActiveLB::GPU:
-    break;
-  default:
-    throw std::invalid_argument("Invalid lattice switch.");
-  }
-  mpi_set_lattice_switch(local_lattice_switch);
-}
-
-void lb_lbfluid_set_kT(double kT) {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    lbpar_gpu.kT = static_cast<float>(kT);
 #endif
-  } else if (lattice_switch == ActiveLB::CPU) {
-    lbpar.kT = kT;
-    mpi_bcast_lb_params(LBParam::KT);
-  } else {
-    throw NoLBActive();
-  }
+  throw NoLBActive();
 }
 
-double lb_lbfluid_get_kT() {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    return static_cast<double>(lbpar_gpu.kT);
+double get_kT() {
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
+#ifdef WALBERLA
+    return lb_walberla()->get_kT();
 #endif
   }
-  if (lattice_switch == ActiveLB::CPU) {
-    return lbpar.kT;
-  }
   throw NoLBActive();
 }
 
-double lb_lbfluid_get_lattice_speed() {
-  return lb_lbfluid_get_agrid() / lb_lbfluid_get_tau();
-}
+double get_lattice_speed() { return get_agrid() / get_tau(); }
 
-void lb_lbfluid_print_vtk_boundary(const std::string &filename) {
-  std::fstream cpfile;
-  cpfile.open(filename, std::ios::out);
+#ifdef WALBERLA
+namespace Walberla {
 
-  if (!cpfile) {
-    throw std::runtime_error("Could not open '" + filename + "' for writing.");
-  }
+static Utils::Vector3d get_momentum() { return lb_walberla()->get_momentum(); }
 
-  auto const vtk_writer = [&](std::string const &label,
-                              auto const &write_boundaries) {
-    using Utils::Vector3d;
-    cpfile.precision(6);
-    cpfile << std::fixed;
-    auto constexpr vtk_format = Vector3d::formatter(" ");
-    auto const agrid = lb_lbfluid_get_agrid();
-    auto const grid_size = lb_lbfluid_get_shape();
-    auto const origin = Vector3d::broadcast(0.5) * agrid;
-    cpfile << "# vtk DataFile Version 2.0\n"
-           << label << "\n"
-           << "ASCII\n"
-           << "DATASET STRUCTURED_POINTS\n"
-           << "DIMENSIONS " << vtk_format << grid_size << "\n"
-           << "ORIGIN " << vtk_format << origin << "\n"
-           << "SPACING " << vtk_format << Vector3d::broadcast(agrid) << "\n"
-           << "POINT_DATA " << Utils::product(grid_size) << "\n"
-           << "SCALARS boundary float 1\n"
-           << "LOOKUP_TABLE default\n";
-    write_boundaries();
-  };
-
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    std::vector<unsigned int> bound_array(lbpar_gpu.number_of_nodes);
-    lb_get_boundary_flags_GPU(bound_array.data());
-    vtk_writer("lbboundaries", [&]() {
-      for (unsigned int j = 0; j < lbpar_gpu.number_of_nodes; ++j) {
-        cpfile << bound_array[j] << "\n";
-      }
-    });
-#endif //  CUDA
-  } else {
-    vtk_writer("lbboundaries", [&]() {
-      auto const grid_size = lb_lbfluid_get_shape();
-      Utils::Vector3i pos;
-      for (pos[2] = 0; pos[2] < grid_size[2]; pos[2]++)
-        for (pos[1] = 0; pos[1] < grid_size[1]; pos[1]++)
-          for (pos[0] = 0; pos[0] < grid_size[0]; pos[0]++)
-            cpfile << lb_lbnode_get_boundary(pos) << "\n";
-    });
-  }
-  cpfile.close();
+static boost::optional<Utils::Vector3d>
+get_velocity_at_pos(Utils::Vector3d pos) {
+  return lb_walberla()->get_velocity_at_pos(pos);
 }
 
-void lb_lbfluid_print_vtk_velocity(const std::string &filename,
-                                   std::vector<int> bb1, std::vector<int> bb2) {
-  std::fstream cpfile;
-  cpfile.open(filename, std::ios::out);
-
-  if (!cpfile) {
-    throw std::runtime_error("Could not open '" + filename + "' for writing.");
-  }
-
-  auto bb_low = Utils::Vector3i{};
-  auto bb_high = lb_lbfluid_get_shape();
-
-  auto const vtk_writer = [&](std::string const &label, auto const &get_vel) {
-    using Utils::Vector3d;
-    cpfile.precision(6);
-    cpfile << std::fixed;
-    auto constexpr vtk_format = Vector3d::formatter(" ");
-    auto const agrid = lb_lbfluid_get_agrid();
-    auto const bb_dim = bb_high - bb_low;
-    auto const origin = (bb_low + Vector3d::broadcast(0.5)) * agrid;
-    auto const lattice_speed = lb_lbfluid_get_lattice_speed();
-    cpfile << "# vtk DataFile Version 2.0\n"
-           << label << "\n"
-           << "ASCII\n"
-           << "DATASET STRUCTURED_POINTS\n"
-           << "DIMENSIONS " << vtk_format << bb_dim << "\n"
-           << "ORIGIN " << vtk_format << origin << "\n"
-           << "SPACING " << vtk_format << Vector3d::broadcast(agrid) << "\n"
-           << "POINT_DATA " << Utils::product(bb_dim) << "\n"
-           << "SCALARS velocity float 3\n"
-           << "LOOKUP_TABLE default\n";
-
-    Utils::Vector3i pos;
-    for (pos[2] = bb_low[2]; pos[2] < bb_high[2]; pos[2]++)
-      for (pos[1] = bb_low[1]; pos[1] < bb_high[1]; pos[1]++)
-        for (pos[0] = bb_low[0]; pos[0] < bb_high[0]; pos[0]++)
-          cpfile << vtk_format << get_vel(pos) * lattice_speed << "\n";
-  };
-
-  int it = 0;
-  for (auto val1 = bb1.begin(), val2 = bb2.begin();
-       val1 != bb1.end() && val2 != bb2.end(); ++val1, ++val2) {
-    if (*val1 == -1 || *val2 == -1) {
-      break;
-    }
-    auto const lower = std::min(*val1, *val2);
-    auto const upper = std::max(*val1, *val2);
-    if (lower < 0 or upper >= bb_high[it]) {
-      throw std::runtime_error(
-          "Tried to access index " + std::to_string(lower) + " and index " +
-          std::to_string(upper) + " on dimension " + std::to_string(it) +
-          " that has size " + std::to_string(bb_high[it]));
-    }
-    bb_low[it] = lower;
-    bb_high[it] = upper;
-    it++;
-  }
+REGISTER_CALLBACK_ONE_RANK(get_velocity_at_pos)
 
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    host_values.resize(lbpar_gpu.number_of_nodes);
-    lb_get_values_GPU(host_values.data());
-    auto const box_l = lb_lbfluid_get_shape();
-    vtk_writer("lbfluid_gpu", [&box_l](Utils::Vector3i const &pos) {
-      auto const j = box_l[0] * box_l[1] * pos[2] + box_l[0] * pos[1] + pos[0];
-      return Utils::Vector3d{host_values[j].v};
-    });
-#endif //  CUDA
-  } else {
-    vtk_writer("lbfluid_cpu", lb_lbnode_get_velocity);
-  }
-  cpfile.close();
-}
-
-void lb_lbfluid_print_boundary(const std::string &filename) {
-  std::fstream cpfile;
-  cpfile.open(filename, std::ios::out);
-
-  if (!cpfile) {
-    throw std::runtime_error("Could not open '" + filename + "' for writing.");
-  }
-
-  using Utils::Vector3d;
-  auto constexpr vtk_format = Vector3d::formatter(" ");
-  cpfile.precision(6);
-  cpfile << std::fixed;
-
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    std::vector<unsigned int> bound_array(lbpar_gpu.number_of_nodes);
-    lb_get_boundary_flags_GPU(bound_array.data());
-    auto const agrid = lb_lbfluid_get_agrid();
-    Utils::Vector3d pos;
-    for (unsigned int j = 0; j < lbpar_gpu.number_of_nodes; ++j) {
-      auto const k = j / lbpar_gpu.dim[0];
-      auto const l = k / lbpar_gpu.dim[1];
-      pos[0] = (static_cast<double>(j % lbpar_gpu.dim[0]) + 0.5) * agrid;
-      pos[1] = (static_cast<double>(k % lbpar_gpu.dim[1]) + 0.5) * agrid;
-      pos[2] = (static_cast<double>(l) + 0.5) * agrid;
-      cpfile << vtk_format << pos << " " << bound_array[j] << "\n";
-    }
-#endif //  CUDA
-  } else {
-    auto const shift = Vector3d{{0.5, 0.5, 0.5}};
-    auto const agrid = lb_lbfluid_get_agrid();
-    auto const grid_size = lb_lbfluid_get_shape();
-    Utils::Vector3i pos;
-    for (pos[2] = 0; pos[2] < grid_size[2]; pos[2]++)
-      for (pos[1] = 0; pos[1] < grid_size[1]; pos[1]++)
-        for (pos[0] = 0; pos[0] < grid_size[0]; pos[0]++) {
-          auto const flag = (lb_lbnode_get_boundary(pos) != 0) ? 1 : 0;
-          cpfile << vtk_format << (pos + shift) * agrid << " " << flag << "\n";
-        }
-  }
-  cpfile.close();
+static boost::optional<double>
+get_interpolated_density_at_pos(Utils::Vector3d pos) {
+  return lb_walberla()->get_interpolated_density_at_pos(pos);
 }
 
-void lb_lbfluid_print_velocity(const std::string &filename) {
-  std::fstream cpfile;
-  cpfile.open(filename, std::ios::out);
-
-  if (!cpfile) {
-    throw std::runtime_error("Could not open '" + filename + "' for writing.");
-  }
-
-  using Utils::Vector3d;
-  auto constexpr vtk_format = Vector3d::formatter(" ");
-  cpfile.precision(6);
-  cpfile << std::fixed;
+REGISTER_CALLBACK_ONE_RANK(get_interpolated_density_at_pos)
 
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    std::vector<LB_rho_v_pi_gpu> host_values(lbpar_gpu.number_of_nodes);
-    lb_get_values_GPU(host_values.data());
-    auto const agrid = lb_lbfluid_get_agrid();
-    auto const lattice_speed =
-        static_cast<float>(lb_lbfluid_get_lattice_speed());
-    Utils::Vector3d pos;
-    for (unsigned int j = 0; j < lbpar_gpu.number_of_nodes; ++j) {
-      auto const k = j / lbpar_gpu.dim[0];
-      auto const l = k / lbpar_gpu.dim[1];
-      pos[0] = (static_cast<double>(j % lbpar_gpu.dim[0]) + 0.5) * agrid;
-      pos[1] = (static_cast<double>(k % lbpar_gpu.dim[1]) + 0.5) * agrid;
-      pos[2] = (static_cast<double>(l) + 0.5) * agrid;
-      auto const velocity = Utils::Vector3f(host_values[j].v) * lattice_speed;
-      cpfile << vtk_format << pos << " " << vtk_format << velocity << "\n";
-    }
-#endif //  CUDA
-  } else {
-    auto const shift = Vector3d{{0.5, 0.5, 0.5}};
-    auto const agrid = lb_lbfluid_get_agrid();
-    auto const grid_size = lb_lbfluid_get_shape();
-    auto const lattice_speed = lb_lbfluid_get_lattice_speed();
-    Utils::Vector3i pos;
-    for (pos[2] = 0; pos[2] < grid_size[2]; pos[2]++)
-      for (pos[1] = 0; pos[1] < grid_size[1]; pos[1]++)
-        for (pos[0] = 0; pos[0] < grid_size[0]; pos[0]++)
-          cpfile << vtk_format << (pos + shift) * agrid << " " << vtk_format
-                 << lb_lbnode_get_velocity(pos) * lattice_speed << "\n";
-  }
-
-  cpfile.close();
+static Utils::VectorXd<9> get_pressure_tensor() {
+  return lb_walberla()->get_pressure_tensor();
 }
 
-/** Handle for a LB checkpoint file. */
-class LBCheckpointFile {
-private:
-  bool m_binary;
-
-public:
-  std::fstream stream;
+REGISTER_CALLBACK_REDUCTION(get_pressure_tensor, std::plus<>())
 
-  LBCheckpointFile(std::string const &filename, std::ios_base::openmode mode,
-                   bool binary) {
-    m_binary = binary;
-    auto flags = mode;
-    if (m_binary)
-      flags |= std::ios_base::binary;
-    stream.open(filename, flags);
-  }
-
-  ~LBCheckpointFile() = default;
-
-  template <typename T> void write(std::vector<T> const &vector) {
-    if (m_binary) {
-      stream.write(reinterpret_cast<const char *>(vector.data()),
-                   vector.size() * sizeof(T));
-    } else {
-      for (auto const &value : vector) {
-        stream << value << "\n";
-      }
-    }
-  }
-
-  template <typename T, std::size_t N>
-  void write(Utils::Vector<T, N> const &vector) {
-    if (m_binary) {
-      stream.write(reinterpret_cast<const char *>(vector.data()),
-                   N * sizeof(T));
-    } else {
-      stream << Utils::Vector<T, N>::formatter(" ") << vector << "\n";
-    }
-  }
-
-  template <typename T, std::size_t N> void read(Utils::Vector<T, N> &vector) {
-    if (m_binary) {
-      stream.read(reinterpret_cast<char *>(vector.data()), N * sizeof(T));
-    } else {
-      for (auto &value : vector) {
-        stream >> value;
-      }
-    }
-  }
-
-  template <typename T> void read(std::vector<T> &vector) {
-    if (m_binary) {
-      stream.read(reinterpret_cast<char *>(vector.data()),
-                  vector.size() * sizeof(T));
-    } else {
-      for (auto &value : vector) {
-        stream >> value;
-      }
-    }
-  }
-};
-
-void lb_lbfluid_save_checkpoint(const std::string &filename, bool binary) {
-  auto const err_msg = std::string("Error while writing LB checkpoint: ");
-
-  // open file and set exceptions
-  LBCheckpointFile cpfile(filename, std::ios_base::out, binary);
-  if (!cpfile.stream) {
-    throw std::runtime_error(err_msg + "could not open file " + filename);
-  }
-  cpfile.stream.exceptions(std::ios_base::failbit | std::ios_base::badbit);
+} // namespace Walberla
+#endif // WALBERLA
 
-  try {
-    if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-      if (!binary) {
-        cpfile.stream.precision(8);
-        cpfile.stream << std::fixed;
-      }
-
-      auto const grid_size = lb_lbfluid_get_shape();
-      auto const data_length = lbpar_gpu.number_of_nodes * D3Q19::n_vel;
-      cpfile.write(grid_size);
-
-      std::vector<float> host_checkpoint_vd(data_length);
-      lb_save_checkpoint_GPU(host_checkpoint_vd.data());
-      cpfile.write(host_checkpoint_vd);
-#endif //  CUDA
-    } else if (lattice_switch == ActiveLB::CPU) {
-      if (!binary) {
-        cpfile.stream.precision(16);
-        cpfile.stream << std::fixed;
-      }
-
-      auto const grid_size = lb_lbfluid_get_shape();
-      cpfile.write(grid_size);
-
-      for (int i = 0; i < grid_size[0]; i++) {
-        for (int j = 0; j < grid_size[1]; j++) {
-          for (int k = 0; k < grid_size[2]; k++) {
-            auto const ind = Utils::Vector3i{{i, j, k}};
-            auto const pop = mpi_call(::Communication::Result::one_rank,
-                                      mpi_lb_get_populations, ind);
-            cpfile.write(pop);
-          }
-        }
-      }
-    }
-  } catch (std::ios_base::failure const &) {
-    cpfile.stream.close();
-    throw std::runtime_error(err_msg + "could not write data to " + filename);
-  } catch (std::runtime_error const &) {
-    cpfile.stream.close();
-    throw;
-  }
-}
-
-void lb_lbfluid_load_checkpoint(const std::string &filename, bool binary) {
-  auto const err_msg = std::string("Error while reading LB checkpoint: ");
-
-  // open file and set exceptions
-  LBCheckpointFile cpfile(filename, std::ios_base::in, binary);
-  if (!cpfile.stream) {
-    throw std::runtime_error(err_msg + "could not open file " + filename);
-  }
-  cpfile.stream.exceptions(std::ios_base::failbit | std::ios_base::badbit);
-
-  // check the grid size in the checkpoint header matches the current grid size
-  auto const check_header = [&](Utils::Vector3i const &expected_grid_size) {
-    Utils::Vector3i grid_size;
-    cpfile.read(grid_size);
-    if (grid_size != expected_grid_size) {
-      std::stringstream message;
-      message << " grid dimensions mismatch,"
-              << " read [" << grid_size << "],"
-              << " expected [" << expected_grid_size << "].";
-      throw std::runtime_error(err_msg + message.str());
-    }
-  };
-
-  try {
-    if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-      auto const gridsize = lb_lbfluid_get_shape();
-      auto const data_length = lbpar_gpu.number_of_nodes * D3Q19::n_vel;
-      std::vector<float> host_checkpoint_vd(data_length);
-      check_header(gridsize);
-
-      cpfile.read(host_checkpoint_vd);
-      lb_load_checkpoint_GPU(host_checkpoint_vd.data());
-#endif //  CUDA
-    } else if (lattice_switch == ActiveLB::CPU) {
-      auto const gridsize = lb_lbfluid_get_shape();
-      mpi_bcast_lb_params(LBParam::DENSITY);
-      check_header(gridsize);
-
-      Utils::Vector19d pop;
-      for (int i = 0; i < gridsize[0]; i++) {
-        for (int j = 0; j < gridsize[1]; j++) {
-          for (int k = 0; k < gridsize[2]; k++) {
-            auto const ind = Utils::Vector3i{{i, j, k}};
-            cpfile.read(pop);
-            lb_lbnode_set_pop(ind, pop);
-          }
-        }
-      }
-    } else {
-      throw std::runtime_error(
-          "To load an LB checkpoint one needs to have already "
-          "initialized the LB fluid with the same grid size.");
-    }
-    // check EOF
-    if (!binary) {
-      if (cpfile.stream.peek() == '\n') {
-        std::ignore = cpfile.stream.get();
-      }
-    }
-    if (cpfile.stream.peek() != EOF) {
-      throw std::runtime_error(err_msg + "extra data found, expected EOF.");
-    }
-  } catch (std::ios_base::failure const &) {
-    auto const eof_error = cpfile.stream.eof();
-    cpfile.stream.close();
-    if (eof_error) {
-      throw std::runtime_error(err_msg + "EOF found.");
-    }
-    throw std::runtime_error(err_msg + "incorrectly formatted data.");
-  } catch (std::runtime_error const &) {
-    cpfile.stream.close();
-    throw;
-  }
-}
-
-Utils::Vector3i lb_lbfluid_get_shape() {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    return {static_cast<int>(lbpar_gpu.dim[0]),
-            static_cast<int>(lbpar_gpu.dim[1]),
-            static_cast<int>(lbpar_gpu.dim[2])};
-#endif
-  }
-  if (lattice_switch == ActiveLB::CPU) {
-    return lblattice.global_grid;
-  }
-  throw NoLBActive();
-}
-
-bool lb_lbnode_is_index_valid(Utils::Vector3i const &ind) {
-  auto const limit = lb_lbfluid_get_shape();
-  return ind < limit && ind >= Utils::Vector3i{};
-}
-
-double lb_lbnode_get_density(const Utils::Vector3i &ind) {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind);
-    static LB_rho_v_pi_gpu host_print_values;
-    lb_print_node_GPU(single_nodeindex, &host_print_values);
-    return host_print_values.rho;
-#endif //  CUDA
-  }
-  if (lattice_switch == ActiveLB::CPU) {
+Utils::VectorXd<9> const get_pressure_tensor() {
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
+#ifdef WALBERLA
     return ::Communication::mpiCallbacks().call(
-        ::Communication::Result::one_rank, mpi_lb_get_density, ind);
+        ::Communication::Result::reduction, std::plus<>(),
+        Walberla::get_pressure_tensor);
+#endif
   }
   throw NoLBActive();
 }
 
-const Utils::Vector3d lb_lbnode_get_velocity(const Utils::Vector3i &ind) {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    static LB_rho_v_pi_gpu host_print_values;
-    auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind);
-    lb_print_node_GPU(single_nodeindex, &host_print_values);
-    return {static_cast<double>(host_print_values.v[0]),
-            static_cast<double>(host_print_values.v[1]),
-            static_cast<double>(host_print_values.v[2])};
+Utils::Vector3d calc_fluid_momentum() {
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
+#ifdef WALBERLA
+    return Walberla::get_momentum();
 #endif
   }
-  if (lattice_switch == ActiveLB::CPU) {
-    auto const density = ::Communication::mpiCallbacks().call(
-        ::Communication::Result::one_rank, mpi_lb_get_density, ind);
-    auto const momentum_density = ::Communication::mpiCallbacks().call(
-        ::Communication::Result::one_rank, mpi_lb_get_momentum_density, ind);
-    return momentum_density / density;
-  }
   throw NoLBActive();
 }
 
-const Utils::Vector6d
-lb_lbnode_get_pressure_tensor(const Utils::Vector3i &ind) {
-  // Add equilibrium pressure to the diagonal (in LB units)
-  auto const p0 = lb_lbfluid_get_density() * D3Q19::c_sound_sq<double>;
-
-  auto tensor = lb_lbnode_get_pressure_tensor_neq(ind);
-  tensor[0] += p0;
-  tensor[2] += p0;
-  tensor[5] += p0;
-
-  return tensor;
-}
-
-const Utils::Vector6d
-lb_lbnode_get_pressure_tensor_neq(const Utils::Vector3i &ind) {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    Utils::Vector6d tensor{};
-    static LB_rho_v_pi_gpu host_print_values;
-    auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind);
-    lb_print_node_GPU(single_nodeindex, &host_print_values);
-    for (int i = 0; i < 6; i++) {
-      tensor[i] = static_cast<double>(host_print_values.pi[i]);
-    }
-    return tensor;
-#endif //  CUDA
-  }
-  if (lattice_switch == ActiveLB::CPU) {
+Utils::Vector3d const get_interpolated_velocity(Utils::Vector3d const &pos) {
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
+#ifdef WALBERLA
+    auto const folded_pos = folded_position(pos, box_geo);
     return mpi_call(::Communication::Result::one_rank,
-                    mpi_lb_get_pressure_tensor, ind);
-  }
-  throw NoLBActive();
-}
-
-const Utils::Vector6d lb_lbfluid_get_pressure_tensor() {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    auto const stress_tmp = stress_tensor_GPU();
-    Utils::Vector6d tensor(stress_tmp.begin(), stress_tmp.end());
-
-    // Normalize
-    tensor /= static_cast<double>(lbpar_gpu.number_of_nodes);
-
-    // Add equilibrium pressure to the diagonal (in LB units)
-    double const p0 = lb_lbfluid_get_density() * D3Q19::c_sound_sq<double>;
-
-    tensor[0] += p0;
-    tensor[2] += p0;
-    tensor[5] += p0;
-    return tensor;
+                    Walberla::get_velocity_at_pos, folded_pos / get_agrid());
 #endif
   }
-  if (lattice_switch == ActiveLB::CPU) {
-    auto const grid_size = lb_lbfluid_get_shape();
-    Utils::Vector6d tensor{};
-    for (int i = 0; i < grid_size[0]; i++) {
-      for (int j = 0; j < grid_size[1]; j++) {
-        for (int k = 0; k < grid_size[2]; k++) {
-          const Utils::Vector3i node{{i, j, k}};
-          tensor += lb_lbnode_get_pressure_tensor(node);
-        }
-      }
-    }
-
-    tensor /= static_cast<double>(Utils::product(grid_size));
-    return tensor;
-  }
-  throw NoLBActive();
-}
-
-int lb_lbnode_get_boundary(const Utils::Vector3i &ind) {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    unsigned int host_flag;
-    auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind);
-    lb_get_boundary_flag_GPU(single_nodeindex, &host_flag);
-    return static_cast<int>(host_flag);
-#endif //  CUDA
-  }
-  if (lattice_switch == ActiveLB::CPU) {
-    return mpi_call(::Communication::Result::one_rank, mpi_lb_get_boundary_flag,
-                    ind);
-  }
-  throw NoLBActive();
-}
-
-const Utils::Vector19d lb_lbnode_get_pop(const Utils::Vector3i &ind) {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    float population[D3Q19::n_vel];
-
-    lb_lbfluid_get_population(ind, population);
-    Utils::Vector19d p_pop;
-    for (std::size_t i = 0; i < D3Q19::n_vel; ++i)
-      p_pop[i] = static_cast<double>(population[i]);
-    return p_pop;
-#endif //  CUDA
-  }
-  if (lattice_switch == ActiveLB::CPU) {
-    return mpi_call(::Communication::Result::one_rank, mpi_lb_get_populations,
-                    ind);
-  }
   throw NoLBActive();
 }
 
-void lb_lbnode_set_density(const Utils::Vector3i &ind, double p_density) {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind);
-    auto const host_density = static_cast<float>(p_density);
-    lb_set_node_rho_GPU(single_nodeindex, host_density);
-#endif //  CUDA
-  } else if (lattice_switch == ActiveLB::CPU) {
-    auto const tensor = lb_lbnode_get_pressure_tensor(ind);
-    auto const momentum_density =
-        lb_lbnode_get_velocity(ind) * lb_lbnode_get_density(ind);
-    auto const population =
-        lb_get_population_from_density_momentum_density_stress(
-            p_density, momentum_density, tensor);
-    mpi_call_all(mpi_lb_set_population, ind, population);
-  } else {
-    throw NoLBActive();
-  }
-}
-
-void lb_lbnode_set_velocity(const Utils::Vector3i &ind,
-                            const Utils::Vector3d &u) {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    float host_velocity[3];
-    host_velocity[0] = static_cast<float>(u[0]);
-    host_velocity[1] = static_cast<float>(u[1]);
-    host_velocity[2] = static_cast<float>(u[2]);
-    auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind);
-    lb_set_node_velocity_GPU(single_nodeindex, host_velocity);
-#endif //  CUDA
-  } else if (lattice_switch == ActiveLB::CPU) {
-    auto const density = lb_lbnode_get_density(ind);
-    auto const momentum_density = u * density;
-    auto const tensor = lb_lbnode_get_pressure_tensor(ind);
-    auto const population =
-        lb_get_population_from_density_momentum_density_stress(
-            density, momentum_density, tensor);
-    mpi_call_all(mpi_lb_set_population, ind, population);
-    mpi_call_all(mpi_lb_set_force_density, ind, Utils::Vector3d{});
-  } else {
-    throw NoLBActive();
-  }
-}
-
-void lb_lbnode_set_pop(const Utils::Vector3i &ind,
-                       const Utils::Vector19d &p_pop) {
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    float population[D3Q19::n_vel];
-
-    for (std::size_t i = 0; i < D3Q19::n_vel; ++i)
-      population[i] = static_cast<float>(p_pop[i]);
-
-    lb_lbfluid_set_population(ind, population);
-#endif //  CUDA
-  } else if (lattice_switch == ActiveLB::CPU) {
-    mpi_call_all(mpi_lb_set_population, ind, p_pop);
-  } else {
-    throw NoLBActive();
-  }
-}
-
-Utils::Vector3d lb_lbfluid_calc_fluid_momentum() {
-  Utils::Vector3d momentum{};
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    if (::comm_cart.rank() == 0) {
-      lb_calc_fluid_momentum_GPU(momentum.data());
-    }
-#endif
-  } else if (lattice_switch == ActiveLB::CPU) {
-    momentum = mpi_lb_calc_fluid_momentum_local(lbpar, lbfields, lblattice);
-  }
-  return momentum;
-}
-
-const Utils::Vector3d
-lb_lbfluid_get_interpolated_velocity(const Utils::Vector3d &pos) {
-  auto const folded_pos = folded_position(pos, box_geo);
-  auto const interpolation_order = lb_lbinterpolation_get_interpolation_order();
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    Utils::Vector3d interpolated_u{};
-    switch (interpolation_order) {
-    case (InterpolationOrder::linear):
-      lb_get_interpolated_velocity_gpu<8>(folded_pos.data(),
-                                          interpolated_u.data(), 1);
-      break;
-    case (InterpolationOrder::quadratic):
-      lb_get_interpolated_velocity_gpu<27>(folded_pos.data(),
-                                           interpolated_u.data(), 1);
-      break;
-    }
-    return interpolated_u;
+double get_interpolated_density(Utils::Vector3d const &pos) {
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
+#ifdef WALBERLA
+    auto const folded_pos = folded_position(pos, box_geo);
+    return mpi_call(::Communication::Result::one_rank,
+                    Walberla::get_interpolated_density_at_pos,
+                    folded_pos / get_agrid());
 #endif
   }
-  if (lattice_switch == ActiveLB::CPU) {
-    switch (interpolation_order) {
-    case (InterpolationOrder::quadratic):
-      throw std::runtime_error("The non-linear interpolation scheme is not "
-                               "implemented for the CPU LB.");
-    case (InterpolationOrder::linear):
-      return mpi_call(::Communication::Result::one_rank,
-                      mpi_lb_get_interpolated_velocity, folded_pos);
-    }
-  }
   throw NoLBActive();
 }
 
-double lb_lbfluid_get_interpolated_density(const Utils::Vector3d &pos) {
-  auto const folded_pos = folded_position(pos, box_geo);
-  auto const interpolation_order = lb_lbinterpolation_get_interpolation_order();
-  if (lattice_switch == ActiveLB::GPU) {
-    throw std::runtime_error(
-        "Density interpolation is not implemented for the GPU LB.");
-  }
-  if (lattice_switch == ActiveLB::CPU) {
-    switch (interpolation_order) {
-    case (InterpolationOrder::quadratic):
-      throw std::runtime_error("The non-linear interpolation scheme is not "
-                               "implemented for the CPU LB.");
-    case (InterpolationOrder::linear):
-      return mpi_call(::Communication::Result::one_rank,
-                      mpi_lb_get_interpolated_density, folded_pos);
-    }
-  }
-  throw NoLBActive();
-}
-
-void mpi_set_lattice_switch_local(ActiveLB lattice_switch) {
-  ::lattice_switch = lattice_switch;
-}
-
-REGISTER_CALLBACK(mpi_set_lattice_switch_local)
-
-void mpi_set_lattice_switch(ActiveLB lattice_switch) {
-  mpi_call_all(mpi_set_lattice_switch_local, lattice_switch);
-}
+} // namespace LB
diff --git a/src/core/grid_based_algorithms/lb_interface.hpp b/src/core/grid_based_algorithms/lb_interface.hpp
index 3dc14ecc537..ad8a811f322 100644
--- a/src/core/grid_based_algorithms/lb_interface.hpp
+++ b/src/core/grid_based_algorithms/lb_interface.hpp
@@ -20,251 +20,106 @@
 #define CORE_LB_INTERFACE
 
 #include "config/config.hpp"
-#include "grid_based_algorithms/lattice.hpp"
 
 #include <utils/Vector.hpp>
 
 #include <cstdint>
-#include <string>
+#include <stdexcept>
 #include <vector>
 
 /** @brief LB implementation currently active. */
-enum class ActiveLB : int { NONE, CPU, GPU };
+enum class ActiveLB : int { NONE, WALBERLA_LB };
 
 /** @brief Switch determining the type of lattice dynamics. */
 extern ActiveLB lattice_switch;
 
-/**
- * @brief Propagate the LB fluid.
- */
-void lb_lbfluid_propagate();
-
-/**
- * @brief Event handler for integration start.
- */
-void lb_lbfluid_on_integration_start();
+struct NoLBActive : public std::exception {
+  const char *what() const noexcept override { return "LB not activated"; }
+};
 
-/**
- * @brief Perform a full initialization of the lattice-Boltzmann system.
- * All derived parameters and the fluid are reset to their default values.
- */
-void lb_lbfluid_init();
-
-/**
- * @brief Reinitialize the derived parameters for the lattice-Boltzmann system.
- * The current state of the fluid is unchanged.
- */
-void lb_lbfluid_reinit_parameters();
-
-/**
- * @brief Get the current counter of the Philox RNG.
- */
-uint64_t lb_lbfluid_get_rng_state();
-
-/**
- * @brief Set the current counter of the Philox RNG.
- */
-void lb_lbfluid_set_rng_state(uint64_t counter);
+namespace LB {
 
 /**
  * @brief Get the global variable @ref lattice_switch.
  */
-ActiveLB lb_lbfluid_get_lattice_switch();
+ActiveLB get_lattice_switch();
 
-/**
- * @brief Set the global variable @ref lattice_switch.
- */
-void lb_lbfluid_set_lattice_switch(ActiveLB local_lattice_switch);
+int get_steps_per_md_step(double md_timestep);
 
 /**
- * @brief Set the LB time step.
- */
-void lb_lbfluid_set_tau(double p_tau);
-
-/**
- * @brief Check if tau is an integer multiple of time_step, throws if not
- */
-void check_tau_time_step_consistency(double tau, double time_s);
-
-/**
- * @brief Set the global LB density.
- */
-void lb_lbfluid_set_density(double p_dens);
-
-/**
- * @brief Set the global LB viscosity.
- */
-void lb_lbfluid_set_viscosity(double p_visc);
-
-/**
- * @brief Set the global LB bulk viscosity.
- */
-void lb_lbfluid_set_bulk_viscosity(double p_bulk_visc);
-
-/**
- * @brief Set the global LB relaxation parameter for odd modes.
- */
-void lb_lbfluid_set_gamma_odd(double p_gamma_odd);
-
-/**
- * @brief Set the global LB relaxation parameter for even modes.
- */
-void lb_lbfluid_set_gamma_even(double p_gamma_even);
-
-/**
- * @brief Set the global LB lattice spacing.
+ * @brief Propagate the LB fluid.
  */
-void lb_lbfluid_set_agrid(double p_agrid);
+void propagate();
 
 /**
- * @brief Set the external force density acting on the LB fluid.
+ * @brief Perform a full initialization of the lattice-Boltzmann system.
+ * All derived parameters and the fluid are reset to their default values.
  */
-void lb_lbfluid_set_ext_force_density(const Utils::Vector3d &force_density);
+void init();
 
 /**
- * @brief Set the LB fluid thermal energy.
+ * @brief Check if tau is an integer multiple of time_step, throws if not
  */
-void lb_lbfluid_set_kT(double kT);
+void check_tau_time_step_consistency(double tau, double time_step);
 
 /**
  * @brief Perform LB parameter and boundary velocity checks.
  */
-void lb_lbfluid_sanity_checks(double time_step);
+void sanity_checks(double time_step);
 
 /**
- * @brief Set the LB density for a single node.
+ * @brief Perform LB LEbc parameter checks.
  */
-void lb_lbnode_set_density(const Utils::Vector3i &ind, double density);
+void lebc_sanity_checks(unsigned int shear_direction,
+                        unsigned int shear_plane_normal);
 
 /**
  * @brief Set the LB fluid velocity for a single node.
  */
-void lb_lbnode_set_velocity(const Utils::Vector3i &ind,
-                            const Utils::Vector3d &u);
-
-/**
- * @brief Set the LB fluid populations for a single node.
- */
-void lb_lbnode_set_pop(const Utils::Vector3i &ind, const Utils::Vector19d &pop);
+void set_velocity(Utils::Vector3i const &ind, Utils::Vector3d const &u);
 
 /**
  * @brief Get the LB time step.
  */
-double lb_lbfluid_get_tau();
+double get_tau();
 
 /**
  * @brief Get the LB grid spacing.
  */
-double lb_lbfluid_get_agrid();
-
-/**
- * @brief Get the global LB relaxation parameter for odd modes.
- */
-double lb_lbfluid_get_gamma_odd();
-
-/**
- * @brief Get the global LB relaxation parameter for even modes.
- */
-double lb_lbfluid_get_gamma_even();
-
-/**
- * @brief Get the global LB bulk viscosity.
- */
-double lb_lbfluid_get_bulk_viscosity();
-
-/**
- * @brief Get the global LB viscosity.
- */
-double lb_lbfluid_get_viscosity();
-
-/**
- * @brief Get the global LB density.
- */
-double lb_lbfluid_get_density();
-
-/**
- * @brief Get the external force density acting on the LB fluid.
- */
-const Utils::Vector3d lb_lbfluid_get_ext_force_density();
+double get_agrid();
 
 /**
  * @brief Get the thermal energy parameter of the LB fluid.
  */
-double lb_lbfluid_get_kT();
+double get_kT();
 
 /**
  * @brief Get the lattice speed (agrid/tau).
  */
-double lb_lbfluid_get_lattice_speed();
-
-/**
- * @brief Get the LB fluid density for a single node.
- */
-double lb_lbnode_get_density(const Utils::Vector3i &ind);
-
-/**
- * @brief Get the LB fluid velocity for a single node.
- */
-const Utils::Vector3d lb_lbnode_get_velocity(const Utils::Vector3i &ind);
-const Utils::Vector6d lb_lbnode_get_pressure_tensor(const Utils::Vector3i &ind);
-const Utils::Vector6d
-lb_lbnode_get_pressure_tensor_neq(const Utils::Vector3i &ind);
+double get_lattice_speed();
 
 /** @brief Calculate the average pressure tensor of all nodes by accumulating
  *  over all nodes and dividing by the number of nodes.
  *  Returns the lower triangle of the LB pressure tensor.
  */
-const Utils::Vector6d lb_lbfluid_get_pressure_tensor();
-
-/**
- * @brief Get the LB fluid boundary bool for a single node.
- */
-int lb_lbnode_get_boundary(const Utils::Vector3i &ind);
-
-/**
- * @brief Get the LB fluid populations for a single node.
- */
-const Utils::Vector19d lb_lbnode_get_pop(const Utils::Vector3i &ind);
-
-/* IO routines */
-void lb_lbfluid_print_vtk_boundary(const std::string &filename);
-void lb_lbfluid_print_vtk_velocity(const std::string &filename,
-                                   std::vector<int> = {-1, -1, -1},
-                                   std::vector<int> = {-1, -1, -1});
-
-void lb_lbfluid_print_boundary(const std::string &filename);
-void lb_lbfluid_print_velocity(const std::string &filename);
-
-void lb_lbfluid_save_checkpoint(const std::string &filename, bool binary);
-void lb_lbfluid_load_checkpoint(const std::string &filename, bool binary);
-
-/**
- * @brief Checks whether the given node index is within the LB lattice.
- */
-bool lb_lbnode_is_index_valid(const Utils::Vector3i &ind);
-
-/**
- * @brief returns the shape of the LB fluid lattice
- */
-Utils::Vector3i lb_lbfluid_get_shape();
+Utils::VectorXd<9> const get_pressure_tensor();
 
-Utils::Vector3d lb_lbfluid_calc_fluid_momentum();
+Utils::Vector3d calc_fluid_momentum();
 
 /**
  * @brief Calculates the interpolated fluid velocity on the head node process.
  * @param pos Position at which the velocity is to be calculated.
  * @retval interpolated fluid velocity.
  */
-const Utils::Vector3d
-lb_lbfluid_get_interpolated_velocity(const Utils::Vector3d &pos);
+Utils::Vector3d const get_interpolated_velocity(Utils::Vector3d const &pos);
 
 /**
  * @brief Calculates the interpolated fluid density on the head node process.
  * @param pos Position at which the density is to be calculated.
  * @retval interpolated fluid density.
  */
-double lb_lbfluid_get_interpolated_density(const Utils::Vector3d &pos);
+double get_interpolated_density(Utils::Vector3d const &pos);
 
-void mpi_set_lattice_switch(ActiveLB lattice_switch);
+} // namespace LB
 
 #endif
diff --git a/src/core/grid_based_algorithms/lb_interpolation.cpp b/src/core/grid_based_algorithms/lb_interpolation.cpp
index baeaac1446d..05cacf3b2f4 100644
--- a/src/core/grid_based_algorithms/lb_interpolation.cpp
+++ b/src/core/grid_based_algorithms/lb_interpolation.cpp
@@ -17,122 +17,44 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "lb_interpolation.hpp"
+#include "grid_based_algorithms/lb_interpolation.hpp"
+#include "grid_based_algorithms/lb_interface.hpp"
+#include "grid_based_algorithms/lb_walberla_instance.hpp"
 
 #include "communication.hpp"
 #include "config/config.hpp"
-#include "grid_based_algorithms/lattice.hpp"
-#include "lb.hpp"
 
 #include <utils/Vector.hpp>
 
-#include <algorithm>
-#include <cstddef>
+#include <iostream>
 #include <stdexcept>
 
-namespace {
-InterpolationOrder interpolation_order = InterpolationOrder::linear;
-}
-
-void mpi_set_interpolation_order_local(InterpolationOrder const &order) {
-  interpolation_order = order;
-}
-
-REGISTER_CALLBACK(mpi_set_interpolation_order_local)
-
-void lb_lbinterpolation_set_interpolation_order(
-    InterpolationOrder const &order) {
-  mpi_call_all(mpi_set_interpolation_order_local, order);
-}
-
-InterpolationOrder lb_lbinterpolation_get_interpolation_order() {
-  return interpolation_order;
-}
-
-namespace {
-template <typename Op>
-void lattice_interpolation(Lattice const &lattice, Utils::Vector3d const &pos,
-                           Op &&op) {
-  Utils::Vector<std::size_t, 8> node_index{};
-  Utils::Vector6d delta{};
-
-  /* determine elementary lattice cell surrounding the particle
-     and the relative position of the particle in this cell */
-  lattice.map_position_to_lattice(pos, node_index, delta);
-  for (int z = 0; z < 2; z++) {
-    for (int y = 0; y < 2; y++) {
-      for (int x = 0; x < 2; x++) {
-        auto &index = node_index[(z * 2 + y) * 2 + x];
-        auto const w = delta[3 * x + 0] * delta[3 * y + 1] * delta[3 * z + 2];
-
-        op(index, w);
-      }
-    }
-  }
-}
-
-Utils::Vector3d node_u(Lattice::index_t index) {
-#ifdef LB_BOUNDARIES
-  if (lbfields[index].boundary) {
-    return lbfields[index].slip_velocity;
-  }
-#endif // LB_BOUNDARIES
-  auto const modes = lb_calc_modes(index, lbfluid);
-  auto const local_density = lbpar.density + modes[0];
-  return Utils::Vector3d{modes[1], modes[2], modes[3]} / local_density;
-}
-
-double node_dens(Lattice::index_t index) {
-#ifdef LB_BOUNDARIES
-  if (lbfields[index].boundary) {
-    return lbpar.density;
-  }
-#endif // LB_BOUNDARIES
-  auto const modes = lb_calc_modes(index, lbfluid);
-  return lbpar.density + modes[0];
-}
-
-} // namespace
-
 const Utils::Vector3d
 lb_lbinterpolation_get_interpolated_velocity(const Utils::Vector3d &pos) {
-  Utils::Vector3d interpolated_u{};
-
-  /* Calculate fluid velocity at particle's position.
-     This is done by linear interpolation (eq. (11) @cite ahlrichs99a) */
-  lattice_interpolation(lblattice, pos,
-                        [&interpolated_u](Lattice::index_t index, double w) {
-                          interpolated_u += w * node_u(index);
-                        });
-
-  return interpolated_u;
-}
-
-double lb_lbinterpolation_get_interpolated_density(const Utils::Vector3d &pos) {
-  double interpolated_dens = 0.;
-
-  /* Calculate fluid density at the position.
-     This is done by linear interpolation (eq. (11) @cite ahlrichs99a) */
-  lattice_interpolation(lblattice, pos,
-                        [&interpolated_dens](Lattice::index_t index, double w) {
-                          interpolated_dens += w * node_dens(index);
-                        });
-
-  return interpolated_dens;
+  /* calculate fluid velocity at particle's position
+     this is done by linear interpolation
+     (Eq. (11) Ahlrichs and Duenweg, JCP 111(17):8225 (1999)) */
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
+#ifdef WALBERLA
+    auto res = lb_walberla()->get_velocity_at_pos(pos / LB::get_agrid(), true);
+    if (!res) {
+      std::cout << this_node << ": position: [" << pos << "]\n";
+      throw std::runtime_error(
+          "Interpolated velocity could not be obtained from Walberla");
+    }
+    return *res;
+#endif
+  }
+  throw std::runtime_error("No LB active.");
 }
 
 void lb_lbinterpolation_add_force_density(
     const Utils::Vector3d &pos, const Utils::Vector3d &force_density) {
-  switch (interpolation_order) {
-  case (InterpolationOrder::quadratic):
-    throw std::runtime_error("The non-linear interpolation scheme is not "
-                             "implemented for the CPU LB.");
-  case (InterpolationOrder::linear):
-    lattice_interpolation(lblattice, pos,
-                          [&force_density](Lattice::index_t index, double w) {
-                            auto &field = lbfields[index];
-                            field.force_density += w * force_density;
-                          });
-    break;
-  }
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
+#ifdef WALBERLA
+    if (!lb_walberla()->add_force_at_pos(pos / LB::get_agrid(), force_density))
+      throw std::runtime_error("Could not apply force to lb.");
+#endif
+  } else
+    throw std::runtime_error("No LB active.");
 }
diff --git a/src/core/grid_based_algorithms/lb_interpolation.hpp b/src/core/grid_based_algorithms/lb_interpolation.hpp
index 173a4ddbfd1..26de6289db6 100644
--- a/src/core/grid_based_algorithms/lb_interpolation.hpp
+++ b/src/core/grid_based_algorithms/lb_interpolation.hpp
@@ -21,26 +21,6 @@
 
 #include <utils/Vector.hpp>
 
-/**
- * @brief Interpolation order for the LB fluid interpolation.
- * @note For the CPU LB only linear interpolation is available.
- */
-enum class InterpolationOrder { linear, quadratic };
-
-/**
- * @brief Set the interpolation order for the LB.
- */
-void lb_lbinterpolation_set_interpolation_order(
-    InterpolationOrder const &interpolation_order);
-
-// MPI callback exposed for unit testing only
-void mpi_set_interpolation_order_local(InterpolationOrder const &order);
-
-/**
- * @brief Get the interpolation order for the LB.
- */
-InterpolationOrder lb_lbinterpolation_get_interpolation_order();
-
 /**
  * @brief Calculates the fluid velocity at a given position of the
  * lattice.
@@ -48,18 +28,11 @@ InterpolationOrder lb_lbinterpolation_get_interpolation_order();
  * position is not within the local lattice.
  */
 const Utils::Vector3d
-lb_lbinterpolation_get_interpolated_velocity(const Utils::Vector3d &p);
-
-/**
- * @brief Calculates the fluid density at a given position of the lattice.
- * @note It can lead to undefined behaviour if the
- * position is not within the local lattice.
- */
-double lb_lbinterpolation_get_interpolated_density(const Utils::Vector3d &p);
+lb_lbinterpolation_get_interpolated_velocity(const Utils::Vector3d &pos);
 
 /**
  * @brief Add a force density to the fluid at the given position.
  */
-void lb_lbinterpolation_add_force_density(const Utils::Vector3d &p,
+void lb_lbinterpolation_add_force_density(const Utils::Vector3d &pos,
                                           const Utils::Vector3d &force_density);
 #endif
diff --git a/src/core/grid_based_algorithms/lb_particle_coupling.cpp b/src/core/grid_based_algorithms/lb_particle_coupling.cpp
index 9923d0f54f2..a9de5d30392 100644
--- a/src/core/grid_based_algorithms/lb_particle_coupling.cpp
+++ b/src/core/grid_based_algorithms/lb_particle_coupling.cpp
@@ -16,7 +16,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#include "lb_particle_coupling.hpp"
 #include "LocalBox.hpp"
 #include "Particle.hpp"
 #include "cells.hpp"
@@ -24,16 +23,16 @@
 #include "config/config.hpp"
 #include "errorhandling.hpp"
 #include "grid.hpp"
-#include "grid_based_algorithms/OptionalCounter.hpp"
-#include "integrate.hpp"
-#include "lb_interface.hpp"
-#include "lb_interpolation.hpp"
-#include "lbgpu.hpp"
 #include "random.hpp"
 
+#include "grid_based_algorithms/lb_interface.hpp"
+#include "grid_based_algorithms/lb_interpolation.hpp"
+#include "grid_based_algorithms/lb_particle_coupling.hpp"
+
 #include <profiler/profiler.hpp>
 #include <utils/Counter.hpp>
 #include <utils/Vector.hpp>
+#include <utils/math/sqr.hpp>
 
 #include <boost/mpi.hpp>
 
@@ -43,7 +42,7 @@
 #include <stdexcept>
 #include <utility>
 
-static LB_Particle_Coupling lb_particle_coupling;
+LB_Particle_Coupling lb_particle_coupling;
 
 void mpi_bcast_lb_particle_coupling_local() {
   boost::mpi::broadcast(comm_cart, lb_particle_coupling, 0);
@@ -71,20 +70,14 @@ void lb_lbcoupling_deactivate() {
 
 void lb_lbcoupling_set_gamma(double gamma) {
   lb_particle_coupling.gamma = gamma;
-  mpi_bcast_lb_particle_coupling();
 }
 
 double lb_lbcoupling_get_gamma() { return lb_particle_coupling.gamma; }
 
 bool lb_lbcoupling_is_seed_required() {
-  if (lattice_switch == ActiveLB::CPU) {
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
     return not lb_particle_coupling.rng_counter_coupling.is_initialized();
   }
-#ifdef CUDA
-  if (lattice_switch == ActiveLB::GPU) {
-    return not rng_counter_coupling_gpu.is_initialized();
-  }
-#endif
   return false;
 }
 
@@ -93,79 +86,54 @@ uint64_t lb_coupling_get_rng_state_cpu() {
 }
 
 uint64_t lb_lbcoupling_get_rng_state() {
-  if (lattice_switch == ActiveLB::CPU) {
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
     return lb_coupling_get_rng_state_cpu();
   }
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    return lb_coupling_get_rng_state_gpu();
-#endif
-  }
-  return {};
+  throw std::runtime_error("No LB active");
 }
 
 void lb_lbcoupling_set_rng_state(uint64_t counter) {
-  if (lattice_switch == ActiveLB::CPU) {
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
     lb_particle_coupling.rng_counter_coupling =
         Utils::Counter<uint64_t>(counter);
-    mpi_bcast_lb_particle_coupling();
-  } else if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    lb_coupling_set_rng_state_gpu(counter);
-#endif
-  }
+  } else
+    throw std::runtime_error("No LB active");
 }
 
-namespace {
-/**
- * @brief Add a force to the lattice force density.
- * @param pos Position of the force
- * @param force Force in MD units.
- * @param time_step MD time step.
- */
 void add_md_force(Utils::Vector3d const &pos, Utils::Vector3d const &force,
                   double time_step) {
   /* transform momentum transfer to lattice units
      (eq. (12) @cite ahlrichs99a) */
-  auto const delta_j = -(time_step / lb_lbfluid_get_lattice_speed()) * force;
+  auto const delta_j = -(time_step / LB::get_lattice_speed()) * force;
   lb_lbinterpolation_add_force_density(pos, delta_j);
 }
-} // namespace
 
-/** Coupling of a single particle to viscous fluid with Stokesian friction.
- *
- *  Section II.C. @cite ahlrichs99a
- *
- *  @param[in] p             The coupled particle.
- *  @param[in] pos           Local position of particle or its ghost.
- *  @param[in] f_random      Additional force to be included.
- *
- *  @return The viscous coupling force plus @p f_random.
- */
-Utils::Vector3d lb_viscous_coupling(Particle const &p,
-                                    Utils::Vector3d const &pos,
-                                    Utils::Vector3d const &f_random) {
-  /* calculate fluid velocity at particle's position
-     this is done by linear interpolation (eq. (11) @cite ahlrichs99a) */
-  auto const interpolated_u =
-      lb_lbinterpolation_get_interpolated_velocity(pos) *
-      lb_lbfluid_get_lattice_speed();
-
-  Utils::Vector3d v_drift = interpolated_u;
+Utils::Vector3d lb_particle_coupling_drift_vel_offset(const Particle &p) {
+  Utils::Vector3d vel_offset{};
 #ifdef ENGINE
   if (p.swimming().swimming) {
-    v_drift += p.swimming().v_swim * p.calc_director();
+    vel_offset += p.swimming().v_swim * p.calc_director();
   }
 #endif
 
 #ifdef LB_ELECTROHYDRODYNAMICS
-  v_drift += p.mu_E();
+  vel_offset += p.mu_E();
 #endif
+  return vel_offset;
+}
 
-  /* calculate viscous force (eq. (9) @cite ahlrichs99a) */
-  auto const force = -lb_lbcoupling_get_gamma() * (p.v() - v_drift) + f_random;
+Utils::Vector3d lb_drag_force(Particle const &p,
+                              Utils::Vector3d const &shifted_pos,
+                              Utils::Vector3d const &vel_offset) {
+  /* calculate fluid velocity at particle's position
+     this is done by linear interpolation (eq. (11) @cite ahlrichs99a) */
+  auto const interpolated_u =
+      lb_lbinterpolation_get_interpolated_velocity(shifted_pos) *
+      LB::get_lattice_speed();
 
-  return force;
+  Utils::Vector3d v_drift = interpolated_u + vel_offset;
+  /* calculate viscous force (eq. (9) @cite ahlrichs99a) */
+  return -lb_lbcoupling_get_gamma() * (p.v() - v_drift);
 }
 
 /**
@@ -178,27 +146,22 @@ Utils::Vector3d lb_viscous_coupling(Particle const &p,
  */
 inline bool in_local_domain(Utils::Vector3d const &pos, double halo = 0.) {
   auto const halo_vec = Utils::Vector3d::broadcast(halo);
+  auto const lower_corner = local_geo.my_left() - halo_vec;
+  auto const upper_corner = local_geo.my_right() + halo_vec;
 
-  return in_box(
-      pos, {local_geo.my_left() - halo_vec, local_geo.my_right() + halo_vec});
+  return pos >= lower_corner and pos < upper_corner;
 }
 
-/**
- * @brief Check if a position is within the local LB domain
- *       plus halo.
- *
- * @param pos Position to check
- *
- * @return True iff the point is inside of the domain.
- */
 bool in_local_halo(Utils::Vector3d const &pos) {
-  auto const halo = 0.5 * lb_lbfluid_get_agrid();
+  auto const halo = 0.5 * LB::get_agrid();
 
   return in_local_domain(pos, halo);
 }
 
-/** @brief Return a vector of positions shifted by +,- box length in each
- ** coordinate */
+/**
+ * @brief Return a vector of positions shifted by +,- box length in each
+ * coordinate
+ */
 std::vector<Utils::Vector3d> positions_in_halo(Utils::Vector3d pos,
                                                const BoxGeometry &box) {
   std::vector<Utils::Vector3d> res;
@@ -208,6 +171,16 @@ std::vector<Utils::Vector3d> positions_in_halo(Utils::Vector3d pos,
         Utils::Vector3d shift{{double(i), double(j), double(k)}};
         Utils::Vector3d pos_shifted =
             pos + Utils::hadamard_product(box.length(), shift);
+
+        if (box_geo.type() == BoxType::LEES_EDWARDS) {
+          auto le = box_geo.lees_edwards_bc();
+          auto normal_shift = (pos_shifted - pos)[le.shear_plane_normal];
+          if (normal_shift > std::numeric_limits<double>::epsilon())
+            pos_shifted[le.shear_direction] += le.pos_offset;
+          if (normal_shift < -std::numeric_limits<double>::epsilon())
+            pos_shifted[le.shear_direction] -= le.pos_offset;
+        }
+
         if (in_local_halo(pos_shifted)) {
           res.push_back(pos_shifted);
         }
@@ -217,16 +190,20 @@ std::vector<Utils::Vector3d> positions_in_halo(Utils::Vector3d pos,
   return res;
 }
 
-/** @brief Return if locally there exists a physical particle
- ** for a given (ghost) particle */
+/**
+ * @brief Return if locally there exists a physical particle
+ * for a given (ghost) particle
+ */
 bool is_ghost_for_local_particle(const Particle &p) {
   return !cell_structure.get_local_particle(p.id())->is_ghost();
 }
 
-/** @brief Determine if a given particle should be coupled.
- ** In certain cases, there may be more than one ghost for the same particle.
- ** To make sure, that these are only coupled once, ghosts' ids are stored
- ** in an unordered_set. */
+/**
+ * @brief Determine if a given particle should be coupled.
+ * In certain cases, there may be more than one ghost for the same particle.
+ * To make sure, that these are only coupled once, ghosts' ids are stored
+ * in an unordered_set.
+ */
 bool should_be_coupled(const Particle &p,
                        std::unordered_set<int> &coupled_ghost_particles) {
   // always couple physical particles
@@ -247,10 +224,10 @@ bool should_be_coupled(const Particle &p,
 void add_swimmer_force(Particle const &p, double time_step) {
   if (p.swimming().swimming) {
     // calculate source position
-    const double direction =
-        double(p.swimming().push_pull) * p.swimming().dipole_length;
+    auto const magnitude = p.swimming().dipole_length;
+    auto const direction = static_cast<double>(p.swimming().push_pull);
     auto const director = p.calc_director();
-    auto const source_position = p.pos() + direction * director;
+    auto const source_position = p.pos() + direction * magnitude * director;
     auto const force = p.swimming().f_swim * director;
 
     // couple positions including shifts by one box length to add forces
@@ -262,99 +239,88 @@ void add_swimmer_force(Particle const &p, double time_step) {
 }
 #endif
 
+Utils::Vector3d lb_particle_coupling_noise(bool enabled, int part_id,
+                                           const OptionalCounter &rng_counter) {
+  if (enabled) {
+    if (rng_counter) {
+      return Random::noise_uniform<RNGSalt::PARTICLES>(rng_counter->value(), 0,
+                                                       part_id);
+    }
+    throw std::runtime_error(
+        "Access to uninitialized LB particle coupling RNG counter");
+  }
+  return {};
+}
+
+void couple_particle(Particle &p, bool couple_virtual, double noise_amplitude,
+                     const OptionalCounter &rng_counter, double time_step) {
+
+  if (p.is_virtual() and not couple_virtual)
+    return;
+
+  // Calculate coupling force
+  Utils::Vector3d coupling_force = {};
+  for (auto pos : positions_in_halo(p.pos(), box_geo)) {
+    if (in_local_halo(pos)) {
+      auto const drag_force =
+          lb_drag_force(p, pos, lb_particle_coupling_drift_vel_offset(p));
+      auto const random_force =
+          noise_amplitude * lb_particle_coupling_noise(noise_amplitude > 0.0,
+                                                       p.id(), rng_counter);
+      coupling_force = drag_force + random_force;
+      break;
+    }
+  }
+
+  // couple positions including shifts by one box length to add
+  // forces to ghost layers
+  for (auto pos : positions_in_halo(p.pos(), box_geo)) {
+    if (in_local_domain(pos)) {
+      /* Particle is in our LB volume, so this node
+       * is responsible to adding its force */
+      p.force() += coupling_force;
+    }
+    add_md_force(pos, coupling_force, time_step);
+  }
+
+#ifdef ENGINE
+  add_swimmer_force(p, time_step);
+#endif
+}
+
 void lb_lbcoupling_calc_particle_lattice_ia(bool couple_virtual,
                                             const ParticleRange &particles,
                                             const ParticleRange &more_particles,
                                             double time_step) {
   ESPRESSO_PROFILER_CXX_MARK_FUNCTION;
-  if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-    if (lb_particle_coupling.couple_to_md && this_node == 0) {
-      switch (lb_lbinterpolation_get_interpolation_order()) {
-      case (InterpolationOrder::linear):
-        lb_calc_particle_lattice_ia_gpu<8>(
-            couple_virtual, lb_lbcoupling_get_gamma(), time_step);
-        break;
-      case (InterpolationOrder::quadratic):
-        lb_calc_particle_lattice_ia_gpu<27>(
-            couple_virtual, lb_lbcoupling_get_gamma(), time_step);
-        break;
-      }
-    }
-#endif
-  } else if (lattice_switch == ActiveLB::CPU) {
+  if (lattice_switch == ActiveLB::WALBERLA_LB) {
     if (lb_particle_coupling.couple_to_md) {
-      switch (lb_lbinterpolation_get_interpolation_order()) {
-      case (InterpolationOrder::quadratic):
-        throw std::runtime_error("The non-linear interpolation scheme is not "
-                                 "implemented for the CPU LB.");
-      case (InterpolationOrder::linear): {
-        auto const kT = lb_lbfluid_get_kT();
-        /* Eq. (16) @cite ahlrichs99a.
-         * The factor 12 comes from the fact that we use random numbers
-         * from -0.5 to 0.5 (equally distributed) which have variance 1/12.
-         * time_step comes from the discretization.
-         */
-        auto const noise_amplitude =
-            (kT > 0.) ? std::sqrt(12. * 2. * lb_lbcoupling_get_gamma() * kT /
-                                  time_step)
-                      : 0.0;
-
-        auto f_random = [noise_amplitude](int id) -> Utils::Vector3d {
-          if (noise_amplitude > 0.0) {
-            return Random::noise_uniform<RNGSalt::PARTICLES>(
-                lb_particle_coupling.rng_counter_coupling->value(), 0, id);
-          }
-          return {};
-        };
-
-        auto couple_particle = [&](Particle &p) -> void {
-          if (p.is_virtual() and !couple_virtual)
-            return;
-
-          // Calculate coupling force
-          Utils::Vector3d force = {};
-          for (auto pos : positions_in_halo(p.pos(), box_geo)) {
-            if (in_local_halo(pos)) {
-              force = lb_viscous_coupling(p, pos,
-                                          noise_amplitude * f_random(p.id()));
-              break;
-            }
-          }
-
-          // couple positions including shifts by one box length to add
-          // forces to ghost layers
-          for (auto pos : positions_in_halo(p.pos(), box_geo)) {
-            if (in_local_domain(pos)) {
-              /* if the particle is in our LB volume, this node
-               * is responsible to adding its force */
-              p.force() += force;
-            }
-            add_md_force(pos, force, time_step);
-          }
-
-#ifdef ENGINE
-          add_swimmer_force(p, time_step);
-#endif
-        };
-
-        std::unordered_set<int> coupled_ghost_particles;
-
-        /* Couple particles ranges */
-        for (auto &p : particles) {
-          if (should_be_coupled(p, coupled_ghost_particles)) {
-            couple_particle(p);
-          }
+      auto const kT = LB::get_kT() * Utils::sqr(LB::get_lattice_speed());
+      /* Eq. (16) @cite ahlrichs99a.
+       * The factor 12 comes from the fact that we use random numbers
+       * from -0.5 to 0.5 (equally distributed) which have variance 1/12.
+       * time_step comes from the discretization.
+       */
+      auto const noise_amplitude =
+          (kT > 0.)
+              ? std::sqrt(12. * 2. * lb_lbcoupling_get_gamma() * kT / time_step)
+              : 0.0;
+
+      std::unordered_set<int> coupled_ghost_particles;
+
+      /* Couple particles ranges */
+      for (auto &p : particles) {
+        if (should_be_coupled(p, coupled_ghost_particles)) {
+          couple_particle(p, couple_virtual, noise_amplitude,
+                          lb_particle_coupling.rng_counter_coupling, time_step);
         }
+      }
 
-        for (auto &p : more_particles) {
-          if (should_be_coupled(p, coupled_ghost_particles)) {
-            couple_particle(p);
-          }
+      for (auto &p : more_particles) {
+        if (should_be_coupled(p, coupled_ghost_particles)) {
+          couple_particle(p, couple_virtual, noise_amplitude,
+                          lb_particle_coupling.rng_counter_coupling, time_step);
         }
-
-        break;
-      }
       }
     }
   }
@@ -362,13 +328,9 @@ void lb_lbcoupling_calc_particle_lattice_ia(bool couple_virtual,
 
 void lb_lbcoupling_propagate() {
   if (lattice_switch != ActiveLB::NONE) {
-    if (lb_lbfluid_get_kT() > 0.0) {
-      if (lattice_switch == ActiveLB::CPU) {
+    if (LB::get_kT() > 0.0) {
+      if (lattice_switch == ActiveLB::WALBERLA_LB) {
         lb_particle_coupling.rng_counter_coupling->increment();
-      } else if (lattice_switch == ActiveLB::GPU) {
-#ifdef CUDA
-        rng_counter_coupling_gpu->increment();
-#endif
       }
     }
   }
diff --git a/src/core/grid_based_algorithms/lb_particle_coupling.hpp b/src/core/grid_based_algorithms/lb_particle_coupling.hpp
index 362c330858b..c93a98ca008 100644
--- a/src/core/grid_based_algorithms/lb_particle_coupling.hpp
+++ b/src/core/grid_based_algorithms/lb_particle_coupling.hpp
@@ -19,14 +19,22 @@
 #ifndef LB_PARTICLE_COUPLING_HPP
 #define LB_PARTICLE_COUPLING_HPP
 
-#include "BoxGeometry.hpp"
-#include "OptionalCounter.hpp"
+#include "Particle.hpp"
 #include "ParticleRange.hpp"
+#include "grid.hpp"
 
+#include <utils/Counter.hpp>
+#include <utils/Vector.hpp>
+
+#include <boost/optional.hpp>
 #include <boost/serialization/access.hpp>
+#include <boost/serialization/optional.hpp>
 
 #include <cstdint>
 #include <unordered_set>
+#include <vector>
+
+using OptionalCounter = boost::optional<Utils::Counter<uint64_t>>;
 
 /** Calculate particle lattice interactions.
  *  So far, only viscous coupling with Stokesian friction is implemented.
@@ -58,6 +66,69 @@ void lb_lbcoupling_activate();
  */
 void lb_lbcoupling_deactivate();
 
+/**
+ * @brief Check if a position is within the local LB domain plus halo.
+ *
+ * @param pos Position to check
+ *
+ * @return True iff the point is inside of the domain.
+ */
+bool in_local_halo(Utils::Vector3d const &pos);
+
+/** @brief Determine if a given particle should be coupled.
+ *  In certain cases, there may be more than one ghost for the same particle.
+ *  To make sure, that these are only coupled once, ghosts' ids are stored
+ *  in an unordered_set.
+ */
+bool should_be_coupled(const Particle &p,
+                       std::unordered_set<int> &coupled_ghost_particles);
+
+/**
+ * @brief Add a force to the lattice force density.
+ * @param pos Position of the force
+ * @param force Force in MD units.
+ * @param time_step MD time step.
+ */
+void add_md_force(Utils::Vector3d const &pos, Utils::Vector3d const &force,
+                  double time_step);
+
+Utils::Vector3d lb_particle_coupling_noise(bool enabled, int part_id,
+                                           const OptionalCounter &rng_counter);
+
+// internal function exposed for unit testing
+std::vector<Utils::Vector3d> positions_in_halo(Utils::Vector3d pos,
+                                               const BoxGeometry &box);
+
+// internal function exposed for unit testing
+void couple_particle(Particle &p, bool couple_virtual, double noise_amplitude,
+                     const OptionalCounter &rng_counter, double time_step);
+
+// internal function exposed for unit testing
+void add_swimmer_force(Particle const &p, double time_step);
+
+/**
+ * @brief Calculate particle drift velocity offset due to ENGINE and
+ * ELECTROHYDRODYNAMICS.
+ */
+Utils::Vector3d lb_particle_coupling_drift_vel_offset(const Particle &p);
+
+void mpi_bcast_lb_particle_coupling();
+
+/** @brief Calculate drag force on a single particle.
+ *
+ *  See section II.C. @cite ahlrichs99a
+ *
+ *  @param[in] p           The coupled particle
+ *  @param[in] shifted_pos The particle position with optional shift
+ *  @param[in] vel_offset  Velocity offset to be added to interpolated LB
+ *                         velocity before calculating the force
+ *
+ *  @return The viscous coupling force
+ */
+Utils::Vector3d lb_drag_force(Particle const &p,
+                              Utils::Vector3d const &shifted_pos,
+                              Utils::Vector3d const &vel_offset);
+
 struct LB_Particle_Coupling {
   OptionalCounter rng_counter_coupling = {};
   /** @brief Friction coefficient for the particle coupling. */
@@ -74,32 +145,7 @@ struct LB_Particle_Coupling {
   }
 };
 
-// expose functions that are also used to couple lb_inertialess_tracers
-template <class T, std::size_t N>
-using Box = std::pair<Utils::Vector<T, N>, Utils::Vector<T, N>>;
-
-/**
- * @brief Check if a position is in a box.
- *
- * The left boundary belong to the box, the
- * right one does not. Periodic boundaries are
- * not considered.
- *
- * @param pos Position to check
- * @param box Box to check
- *
- * @return True iff the point is inside of the box.
- */
-template <class T, std::size_t N>
-bool in_box(Utils::Vector<T, N> const &pos, Box<T, N> const &box) {
-  return (pos >= box.first) and (pos < box.second);
-}
-
-bool in_local_halo(Utils::Vector3d const &pos);
-std::vector<Utils::Vector3d> positions_in_halo(Utils::Vector3d pos,
-                                               const BoxGeometry &box);
-bool is_ghost_for_local_particle(const Particle &p);
-bool should_be_coupled(const Particle &p,
-                       std::unordered_set<int> &coupled_ghost_particles);
+// internal global exposed for unit testing
+extern LB_Particle_Coupling lb_particle_coupling;
 
 #endif
diff --git a/src/core/grid_based_algorithms/lb_walberla_instance.cpp b/src/core/grid_based_algorithms/lb_walberla_instance.cpp
new file mode 100644
index 00000000000..c5f2ecab70c
--- /dev/null
+++ b/src/core/grid_based_algorithms/lb_walberla_instance.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+#include "lb_walberla_instance.hpp"
+
+#include "communication.hpp"
+#include "errorhandling.hpp"
+#include "grid.hpp"
+#include "integrate.hpp"
+#include "lb_interface.hpp"
+
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <boost/mpi/collectives/all_reduce.hpp>
+#include <boost/optional.hpp>
+
+#include <functional>
+#include <memory>
+#include <stdexcept>
+
+static std::weak_ptr<LBWalberlaBase> lb_walberla_instance;
+static std::shared_ptr<LBWalberlaParams> lb_walberla_params_instance;
+
+std::shared_ptr<LBWalberlaBase> lb_walberla() {
+  auto lb_walberla_instance_handle = ::lb_walberla_instance.lock();
+  if (not lb_walberla_instance_handle) {
+    throw std::runtime_error(
+        "Attempted access to uninitialized LBWalberla instance.");
+  }
+  return lb_walberla_instance_handle;
+}
+
+std::shared_ptr<LBWalberlaParams> lb_walberla_params() {
+  if (not ::lb_walberla_params_instance) {
+    throw std::runtime_error(
+        "Attempted access to uninitialized LBWalberlaParams instance.");
+  }
+  return ::lb_walberla_params_instance;
+}
+
+void lb_sanity_checks(LBWalberlaBase const &lb_fluid,
+                      LBWalberlaParams const &lb_params, double md_time_step) {
+  auto const agrid = lb_params.get_agrid();
+  auto const tau = lb_params.get_tau();
+  // waLBerla and ESPResSo must agree on domain decomposition
+  auto [lb_my_left, lb_my_right] = lb_fluid.get_lattice().get_local_domain();
+  lb_my_left *= agrid;
+  lb_my_right *= agrid;
+  auto const my_left = local_geo.my_left();
+  auto const my_right = local_geo.my_right();
+  auto const tol = agrid / 1E6;
+  if ((lb_my_left - my_left).norm2() > tol or
+      (lb_my_right - my_right).norm2() > tol) {
+    runtimeErrorMsg() << "\nMPI rank " << this_node << ": "
+                      << "left ESPResSo: [" << my_left << "], "
+                      << "left waLBerla: [" << lb_my_left << "]"
+                      << "\nMPI rank " << this_node << ": "
+                      << "right ESPResSo: [" << my_right << "], "
+                      << "right waLBerla: [" << lb_my_right << "]";
+    throw std::runtime_error(
+        "waLBerla and ESPResSo disagree about domain decomposition.");
+  }
+  // LB time step and MD time step must agree
+  if (md_time_step > 0.) {
+    LB::check_tau_time_step_consistency(tau, md_time_step);
+  }
+}
+
+void activate_lb_walberla(std::shared_ptr<LBWalberlaBase> lb_fluid,
+                          std::shared_ptr<LBWalberlaParams> lb_params) {
+  if (::lattice_switch != ActiveLB::NONE) {
+    throw std::runtime_error("Cannot add a second LB instance");
+  }
+  lb_sanity_checks(*lb_fluid, *lb_params, get_time_step());
+  auto const &lebc = ::box_geo.lees_edwards_bc();
+  lb_fluid->check_lebc(lebc.shear_direction, lebc.shear_plane_normal);
+  ::lb_walberla_instance = std::weak_ptr<LBWalberlaBase>{lb_fluid};
+  ::lb_walberla_params_instance = lb_params;
+  ::lattice_switch = ActiveLB::WALBERLA_LB;
+}
+
+void deactivate_lb_walberla() {
+  ::lb_walberla_instance.reset();
+  ::lb_walberla_params_instance.reset();
+  ::lattice_switch = ActiveLB::NONE;
+}
+
+#endif
diff --git a/src/core/grid_based_algorithms/lb_walberla_instance.hpp b/src/core/grid_based_algorithms/lb_walberla_instance.hpp
new file mode 100644
index 00000000000..90bf1c65a8e
--- /dev/null
+++ b/src/core/grid_based_algorithms/lb_walberla_instance.hpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef GRID_BASED_ALGORITHMS_LBWALBERLA_INSTANCE_HPP
+#define GRID_BASED_ALGORITHMS_LBWALBERLA_INSTANCE_HPP
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
+
+#include <memory>
+
+struct LBWalberlaParams {
+  LBWalberlaParams(double agrid, double tau) : m_agrid(agrid), m_tau(tau) {}
+  double get_agrid() const { return m_agrid; };
+  double get_tau() const { return m_tau; };
+
+private:
+  double m_agrid;
+  double m_tau;
+};
+
+/** @brief Access the per-MPI-node waLBerla LB instance */
+std::shared_ptr<LBWalberlaBase> lb_walberla();
+
+/** @brief Access the waLBerla parameters */
+std::shared_ptr<LBWalberlaParams> lb_walberla_params();
+
+void lb_sanity_checks(LBWalberlaBase const &lb_fluid,
+                      LBWalberlaParams const &lb_params, double md_time_step);
+
+/** @brief Register a waLBerla LB instance and update lattice switch. */
+void activate_lb_walberla(std::shared_ptr<LBWalberlaBase> lb_fluid,
+                          std::shared_ptr<LBWalberlaParams> lb_params);
+
+/** @brief De-register a waLBerla LB instance and update lattice switch. */
+void deactivate_lb_walberla();
+
+#endif // WALBERLA
+
+#endif
diff --git a/src/core/grid_based_algorithms/lbboundaries/LBBoundary.hpp b/src/core/grid_based_algorithms/lbboundaries/LBBoundary.hpp
deleted file mode 100644
index ead5c0d7497..00000000000
--- a/src/core/grid_based_algorithms/lbboundaries/LBBoundary.hpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef LBBOUNDARIES_LBBOUNDARY_HPP
-#define LBBOUNDARIES_LBBOUNDARY_HPP
-
-#include "config/config.hpp"
-
-#include <shapes/NoWhere.hpp>
-#include <shapes/Shape.hpp>
-
-#include <utils/Vector.hpp>
-
-#include <memory>
-
-namespace LBBoundaries {
-#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU)
-class LBBoundary;
-Utils::Vector3d lbboundary_get_force(LBBoundary const *lbb);
-void lb_init_boundaries();
-#endif
-class LBBoundary {
-public:
-  LBBoundary()
-      : m_shape(std::make_shared<Shapes::NoWhere>()),
-        m_velocity(Utils::Vector3d{0, 0, 0}),
-        m_force(Utils::Vector3d{0, 0, 0}) {
-#ifdef EK_BOUNDARIES
-    m_charge_density = 0.0;
-    m_net_charge = 0.0;
-#endif
-  }
-
-  /* Calculate distance from the lbboundary */
-  void calc_dist(const Utils::Vector3d &pos, double &dist,
-                 Utils::Vector3d &vec) const {
-    m_shape->calculate_dist(pos, dist, vec);
-  }
-
-  void set_shape(std::shared_ptr<Shapes::Shape> const &shape) {
-    m_shape = shape;
-  }
-
-  void set_velocity(const Utils::Vector3d &velocity) {
-    m_velocity = velocity;
-#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU)
-    lb_init_boundaries();
-#endif
-  }
-  void reset_force() { m_force = Utils::Vector3d{0, 0, 0}; }
-
-  Shapes::Shape const &shape() const { return *m_shape; }
-  Utils::Vector3d &velocity() { return m_velocity; }
-  Utils::Vector3d &force() { return m_force; }
-  Utils::Vector3d get_force() const {
-#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU)
-    return lbboundary_get_force(this);
-#else
-    throw std::runtime_error("Needs LB_BOUNDARIES or LB_BOUNDARIES_GPU.");
-#endif
-  }
-
-#ifdef EK_BOUNDARIES // TODO: ugly. Better would be a class EKBoundaries,
-                     // deriving from LBBoundaries, but that requires completely
-                     // different initialization infrastructure.
-  void set_charge_density(double charge_density) {
-    m_charge_density = static_cast<float>(charge_density);
-  }
-  void set_net_charge(double net_charge) {
-    m_net_charge = static_cast<float>(net_charge);
-  }
-
-  float &charge_density() { return m_charge_density; }
-  float &net_charge() { return m_net_charge; }
-#endif
-
-private:
-  /** Private data members */
-  std::shared_ptr<Shapes::Shape> m_shape;
-  Utils::Vector3d m_velocity;
-  Utils::Vector3d m_force;
-
-#ifdef EK_BOUNDARIES // TODO: ugly. Better would be a class EKBoundaries,
-                     // deriving from LBBoundaries, but that requires completely
-                     // different initialization infrastructure.
-  float m_charge_density;
-  float m_net_charge;
-#endif
-};
-
-} /* namespace LBBoundaries */
-
-#endif
diff --git a/src/core/grid_based_algorithms/lbgpu.cpp b/src/core/grid_based_algorithms/lbgpu.cpp
deleted file mode 100644
index 083327d0ce1..00000000000
--- a/src/core/grid_based_algorithms/lbgpu.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/** \file
- *  %Lattice Boltzmann on GPUs.
- *
- *  The corresponding header file is lbgpu.hpp.
- */
-
-#include "config/config.hpp"
-
-#ifdef CUDA
-
-#include "lbgpu.hpp"
-
-#include "communication.hpp"
-#include "cuda_interface.hpp"
-#include "errorhandling.hpp"
-#include "grid.hpp"
-#include "integrate.hpp"
-#include "lb-d3q19.hpp"
-
-#include <utils/math/sqr.hpp>
-
-#include <cmath>
-#include <limits>
-#include <vector>
-
-LB_parameters_gpu lbpar_gpu = {
-    // rho
-    0.f,
-    // mu
-    0.f,
-    // viscosity
-    0.f,
-    // gamma_shear
-    0.f,
-    // gamma_bulk
-    0.f,
-    // gamma_odd
-    0.f,
-    // gamma_even
-    0.f,
-    // is_TRT
-    false,
-    // bulk_viscosity
-    -1.f,
-    // agrid
-    -1.f,
-    // tau
-    -1.f,
-    // dim
-    {{{0u, 0u, 0u}}},
-    // number_of_nodes
-    0u,
-#ifdef LB_BOUNDARIES_GPU
-    // number_of_boundnodes
-    0u,
-#endif
-    // external_force_density
-    false,
-    // ext_force_density
-    {{{0.f, 0.f, 0.f}}},
-    // Thermal energy
-    0.f};
-
-/** this is the array that stores the hydrodynamic fields for the output */
-std::vector<LB_rho_v_pi_gpu> host_values(0);
-
-#ifdef ELECTROKINETICS
-bool ek_initialized = false;
-#endif
-
-/** (Re-)initialize the fluid according to the given value of rho. */
-void lb_reinit_fluid_gpu() {
-  lb_reinit_parameters_gpu();
-  if (lbpar_gpu.number_of_nodes != 0u) {
-    lb_reinit_GPU(&lbpar_gpu);
-    lb_reinit_extern_nodeforce_GPU(&lbpar_gpu);
-  }
-}
-
-/** (Re-)initialize the fluid.
- *  See @cite dunweg07a and @cite dhumieres09a.
- */
-void lb_reinit_parameters_gpu() {
-  lbpar_gpu.mu = 0.f;
-
-  if (lbpar_gpu.viscosity > 0.f && lbpar_gpu.agrid > 0.f &&
-      lbpar_gpu.tau > 0.f) {
-    /* Eq. (80) @cite dunweg07a. */
-    lbpar_gpu.gamma_shear = 1.f - 2.f / (6.f * lbpar_gpu.viscosity + 1.f);
-  }
-
-  if (lbpar_gpu.bulk_viscosity > 0.f) {
-    /* Eq. (81) @cite dunweg07a. */
-    lbpar_gpu.gamma_bulk = 1.f - 2.f / (9.f * lbpar_gpu.bulk_viscosity + 1.f);
-  }
-
-  // By default, gamma_even and gamma_odd are chosen such that the MRT becomes
-  // a TRT with ghost mode relaxation factors that minimize unphysical wall
-  // slip at bounce-back boundaries. For the relation between the gammas
-  // achieving this, consult @cite dhumieres09a.
-  // Note that the relaxation operator in ESPResSo is defined as
-  //  m* = m_eq + gamma * (m - m_eq)
-  // as opposed to this reference, where
-  //  m* = m + lambda * (m - m_eq)
-
-  if (lbpar_gpu.is_TRT) {
-    lbpar_gpu.gamma_bulk = lbpar_gpu.gamma_shear;
-    lbpar_gpu.gamma_even = lbpar_gpu.gamma_shear;
-    lbpar_gpu.gamma_odd =
-        -(7.f * lbpar_gpu.gamma_even + 1.f) / (lbpar_gpu.gamma_even + 7.f);
-  }
-
-  if (lbpar_gpu.kT > 0.f) { /* fluctuating hydrodynamics ? */
-
-    /* Eq. (51) @cite dunweg07a.*/
-    /* Note that the modes are not normalized as in the paper here! */
-    lbpar_gpu.mu = lbpar_gpu.kT * Utils::sqr(lbpar_gpu.tau) /
-                   D3Q19::c_sound_sq<float> / Utils::sqr(lbpar_gpu.agrid);
-  }
-
-  lb_set_agrid_gpu(lbpar_gpu.agrid);
-
-#ifdef ELECTROKINETICS
-  if (ek_initialized) {
-    lbpar_gpu.tau = static_cast<float>(get_time_step());
-  }
-#endif
-
-  reinit_parameters_GPU(&lbpar_gpu);
-}
-
-/** Performs a full initialization of the lattice Boltzmann system.
- *  All derived parameters and the fluid are reset to their default values.
- */
-void lb_init_gpu_local() {
-  if (this_node == 0) {
-    /* set parameters for transfer to gpu */
-    lb_reinit_parameters_gpu();
-    lb_init_GPU(lbpar_gpu);
-  }
-  gpu_init_particle_comm(this_node);
-  cuda_bcast_global_part_params();
-}
-
-REGISTER_CALLBACK(lb_init_gpu_local)
-
-void lb_init_gpu() { mpi_call_all(lb_init_gpu_local); }
-
-void lb_GPU_sanity_checks() {
-  if (this_node == 0) {
-    if (lbpar_gpu.agrid < 0.f) {
-      runtimeErrorMsg() << "Lattice Boltzmann agrid not set";
-    }
-    if (lbpar_gpu.tau < 0.f) {
-      runtimeErrorMsg() << "Lattice Boltzmann time step not set";
-    }
-    if (lbpar_gpu.rho < 0.f) {
-      runtimeErrorMsg() << "Lattice Boltzmann fluid density not set";
-    }
-    if (lbpar_gpu.viscosity < 0.f) {
-      runtimeErrorMsg() << "Lattice Boltzmann fluid viscosity not set";
-    }
-  }
-}
-
-void lb_set_agrid_gpu(double agrid) {
-  lbpar_gpu.agrid = static_cast<float>(agrid);
-
-  lbpar_gpu.dim[0] =
-      static_cast<unsigned int>(round(box_geo.length()[0] / agrid));
-  lbpar_gpu.dim[1] =
-      static_cast<unsigned int>(round(box_geo.length()[1] / agrid));
-  lbpar_gpu.dim[2] =
-      static_cast<unsigned int>(round(box_geo.length()[2] / agrid));
-
-  Utils::Vector<float, 3> box_from_dim(
-      Utils::Vector<unsigned int, 3>(lbpar_gpu.dim) * agrid);
-  Utils::Vector<float, 3> box_lf(box_geo.length());
-
-  auto const rel_difference_vec =
-      Utils::hadamard_division(box_lf - box_from_dim, box_lf);
-  auto const commensurable = std::all_of(
-      rel_difference_vec.begin(), rel_difference_vec.end(), [](auto d) {
-        return std::abs(d) < std::numeric_limits<float>::epsilon();
-      });
-  if (not commensurable) {
-    runtimeErrorMsg() << "Lattice spacing agrid=" << agrid
-                      << " is incompatible with one of the box dimensions: "
-                      << "[" << box_geo.length() << "]";
-  }
-  lbpar_gpu.number_of_nodes =
-      std::accumulate(lbpar_gpu.dim.begin(), lbpar_gpu.dim.end(), 1u,
-                      std::multiplies<unsigned int>());
-}
-
-#endif // CUDA
diff --git a/src/core/grid_based_algorithms/lbgpu.cuh b/src/core/grid_based_algorithms/lbgpu.cuh
deleted file mode 100644
index 352bed60596..00000000000
--- a/src/core/grid_based_algorithms/lbgpu.cuh
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/** \file
- *  %Lattice Boltzmann on GPUs.
- *
- *  Implementation in lbgpu_cuda.cu.
- */
-
-#ifndef LBGPU_CUH
-#define LBGPU_CUH
-
-#include "config/config.hpp"
-
-#ifdef CUDA
-#include <curand_kernel.h>
-
-#include <utils/Array.hpp>
-
-/** Velocity densities for the lattice Boltzmann system. */
-struct LB_nodes_gpu {
-  /** velocity density of the node */
-  Utils::Array<float, 19> *populations = nullptr;
-  unsigned int *boundary = nullptr;
-  Utils::Array<float, 3> *boundary_velocity = nullptr;
-};
-
-__device__ __inline__ float
-calc_mode_x_from_n(Utils::Array<float, 19> const &populations, int x) {
-  switch (x) {
-  case 0:
-    return populations[0] + populations[1] + populations[2] + populations[3] +
-           populations[4] + populations[5] + populations[6] + populations[7] +
-           populations[8] + populations[9] + populations[10] + populations[11] +
-           populations[12] + populations[13] + populations[14] +
-           populations[15] + populations[16] + populations[17] +
-           populations[18];
-  case 1:
-    return (populations[1] - populations[2]) +
-           (populations[7] - populations[8]) +
-           (populations[9] - populations[10]) +
-           (populations[11] - populations[12]) +
-           (populations[13] - populations[14]);
-  case 2:
-    return (populations[3] - populations[4]) +
-           (populations[7] - populations[8]) -
-           (populations[9] - populations[10]) +
-           (populations[15] - populations[16]) +
-           (populations[17] - populations[18]);
-  case 3:
-    return (populations[5] - populations[6]) +
-           (populations[11] - populations[12]) -
-           (populations[13] - populations[14]) +
-           (populations[15] - populations[16]) -
-           (populations[17] - populations[18]);
-  case 4:
-    return -populations[0] + populations[7] + populations[8] + populations[9] +
-           populations[10] + populations[11] + populations[12] +
-           populations[13] + populations[14] + populations[15] +
-           populations[16] + populations[17] + populations[18];
-  case 5:
-    return (populations[1] + populations[2]) -
-           (populations[3] + populations[4]) +
-           (populations[11] + populations[12]) +
-           (populations[13] + populations[14]) -
-           (populations[15] + populations[16]) -
-           (populations[17] + populations[18]);
-  case 6:
-    return (populations[1] + populations[2]) +
-           (populations[3] + populations[4]) -
-           (populations[11] + populations[12]) -
-           (populations[13] + populations[14]) -
-           (populations[15] + populations[16]) -
-           (populations[17] + populations[18]) -
-           2.0f * ((populations[5] + populations[6]) -
-                   (populations[7] + populations[8]) -
-                   (populations[9] + populations[10]));
-  case 7:
-    return (populations[7] + populations[8]) -
-           (populations[9] + populations[10]);
-  case 8:
-    return (populations[11] + populations[12]) -
-           (populations[13] + populations[14]);
-  case 9:
-    return (populations[15] + populations[16]) -
-           (populations[17] + populations[18]);
-  case 10:
-    return -2.0f * (populations[1] - populations[2]) +
-           (populations[7] - populations[8]) +
-           (populations[9] - populations[10]) +
-           (populations[11] - populations[12]) +
-           (populations[13] - populations[14]);
-  case 11:
-    return -2.0f * (populations[3] - populations[4]) +
-           (populations[7] - populations[8]) -
-           (populations[9] - populations[10]) +
-           (populations[15] - populations[16]) +
-           (populations[17] - populations[18]);
-  case 12:
-    return -2.0f * (populations[5] - populations[6]) +
-           (populations[11] - populations[12]) -
-           (populations[13] - populations[14]) +
-           (populations[15] - populations[16]) -
-           (populations[17] - populations[18]);
-  case 13:
-    return (populations[7] - populations[8]) +
-           (populations[9] - populations[10]) -
-           (populations[11] - populations[12]) -
-           (populations[13] - populations[14]);
-  case 14:
-    return (populations[7] - populations[8]) -
-           (populations[9] - populations[10]) -
-           (populations[15] - populations[16]) -
-           (populations[17] - populations[18]);
-  case 15:
-    return (populations[11] - populations[12]) -
-           (populations[13] - populations[14]) -
-           (populations[15] - populations[16]) +
-           (populations[17] - populations[18]);
-  case 16:
-    return populations[0] + populations[7] + populations[8] + populations[9] +
-           populations[10] + populations[11] + populations[12] +
-           populations[13] + populations[14] + populations[15] +
-           populations[16] + populations[17] + populations[18] -
-           2.0f * ((populations[1] + populations[2]) +
-                   (populations[3] + populations[4]) +
-                   (populations[5] + populations[6]));
-  case 17:
-    return -(populations[1] + populations[2]) +
-           (populations[3] + populations[4]) +
-           (populations[11] + populations[12]) +
-           (populations[13] + populations[14]) -
-           (populations[15] + populations[16]) -
-           (populations[17] + populations[18]);
-  case 18:
-    return -(populations[1] + populations[2]) -
-           (populations[3] + populations[4]) -
-           (populations[11] + populations[12]) -
-           (populations[13] + populations[14]) -
-           (populations[15] + populations[16]) -
-           (populations[17] + populations[18]) +
-           2.0f * ((populations[5] + populations[6]) +
-                   (populations[7] + populations[8]) +
-                   (populations[9] + populations[10]));
-  }
-  return 0.0;
-}
-
-/**
- *  @param[in]  node_index        Node index around (8) particle
- *  @param[out] mode              Local register values mode
- *  @param[in]  n_a               Local node residing in array a
- */
-__device__ __inline__ void
-calc_mass_and_momentum_mode(Utils::Array<float, 4> &mode, LB_nodes_gpu n_a,
-                            unsigned int node_index) {
-  /* mass mode */
-  mode[0] = calc_mode_x_from_n(n_a.populations[node_index], 0);
-
-  /* momentum modes */
-  mode[1] = calc_mode_x_from_n(n_a.populations[node_index], 1);
-
-  mode[2] = calc_mode_x_from_n(n_a.populations[node_index], 2);
-
-  mode[3] = calc_mode_x_from_n(n_a.populations[node_index], 3);
-}
-
-struct LB_boundaries_gpu {
-  /** For each fluid node this array contains either
-   *  0 if the node is not a boundary, or the index of
-   *  the boundary in LBBoundaries::lbboundaries minus one.
-   */
-  unsigned int *index = nullptr;
-  /** If the node is a boundary node, this contains the
-   *  velocity of the boundary
-   */
-  Utils::Array<float, 3> *velocity = nullptr;
-};
-
-inline __device__ float4 random_wrapper_philox(unsigned int index,
-                                               unsigned int mode,
-                                               uint64_t philox_counter) {
-  // Split the 64 bit counter into two 32 bit ints.
-  auto const philox_counter_hi = static_cast<uint32_t>(philox_counter >> 32);
-  auto const philox_counter_low = static_cast<uint32_t>(philox_counter);
-  uint4 rnd_ints =
-      curand_Philox4x32_10(make_uint4(index, philox_counter_hi, 0, mode),
-                           make_uint2(philox_counter_low, 0));
-  float4 rnd_floats;
-  rnd_floats.w = static_cast<float>(rnd_ints.w) * CURAND_2POW32_INV +
-                 (CURAND_2POW32_INV / 2.0f);
-  rnd_floats.x = static_cast<float>(rnd_ints.x) * CURAND_2POW32_INV +
-                 (CURAND_2POW32_INV / 2.0f);
-  rnd_floats.y = static_cast<float>(rnd_ints.y) * CURAND_2POW32_INV +
-                 (CURAND_2POW32_INV / 2.0f);
-  rnd_floats.z = static_cast<float>(rnd_ints.z) * CURAND_2POW32_INV +
-                 (CURAND_2POW32_INV / 2.0f);
-  return rnd_floats;
-}
-
-#endif // CUDA
-#endif
diff --git a/src/core/grid_based_algorithms/lbgpu.hpp b/src/core/grid_based_algorithms/lbgpu.hpp
deleted file mode 100644
index 7f3d27b7c4c..00000000000
--- a/src/core/grid_based_algorithms/lbgpu.hpp
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/** \file
- *  %Lattice Boltzmann implementation on GPUs.
- *
- *  Implementation in lbgpu.cpp.
- */
-
-#ifndef LBGPU_HPP
-#define LBGPU_HPP
-
-#include "config/config.hpp"
-
-#ifdef CUDA
-#include "OptionalCounter.hpp"
-
-#include <utils/Vector.hpp>
-#include <utils/index.hpp>
-
-#include <cstddef>
-#include <cstdint>
-#include <vector>
-
-/* For the D3Q19 model most functions have a separate implementation
- * where the coefficients and the velocity vectors are hardcoded
- * explicitly. This saves a lot of multiplications with 1's and 0's
- * thus making the code more efficient. */
-#define LBQ 19
-
-/** Parameters for the lattice Boltzmann system for GPU. */
-struct LB_parameters_gpu {
-  /** number density (LB units) */
-  float rho;
-  /** mu (LJ units) */
-  float mu;
-  /** viscosity (LJ) units */
-  float viscosity;
-  /** relaxation rate of shear modes */
-  float gamma_shear;
-  /** relaxation rate of bulk modes */
-  float gamma_bulk;
-  /** relaxation rate of odd modes */
-  float gamma_odd;
-  /** relaxation rate of even modes */
-  float gamma_even;
-  /** flag determining whether gamma_shear, gamma_odd, and gamma_even are
-   *  calculated from gamma_shear in such a way to yield a TRT LB with minimized
-   *  slip at bounce-back boundaries
-   */
-  bool is_TRT;
-
-  float bulk_viscosity;
-
-  /** lattice spacing (LJ units) */
-  float agrid;
-
-  /** time step for fluid propagation (LJ units)
-   *  Note: Has to be larger than MD time step!
-   */
-  float tau;
-
-  Utils::Array<unsigned int, 3> dim;
-
-  unsigned int number_of_nodes;
-#ifdef LB_BOUNDARIES_GPU
-  unsigned int number_of_boundnodes;
-#endif
-
-  bool external_force_density;
-
-  Utils::Array<float, 3> ext_force_density;
-
-  // Thermal energy
-  float kT;
-};
-
-/* this structure is almost duplicated for memory efficiency. When the stress
-   tensor element are needed at every timestep, this features should be
-   explicitly switched on */
-struct LB_rho_v_pi_gpu {
-  /** density of the node */
-  float rho;
-  /** velocity of the node */
-  Utils::Array<float, 3> v;
-  /** pressure tensor */
-  Utils::Array<float, 6> pi;
-};
-
-struct LB_node_force_density_gpu {
-  Utils::Array<float, 3> *force_density;
-#if defined(VIRTUAL_SITES_INERTIALESS_TRACERS) || defined(EK_DEBUG)
-
-  // We need the node forces for the velocity interpolation at the virtual
-  // particles' position. However, LBM wants to reset them immediately
-  // after the LBM update. This variable keeps a backup
-  Utils::Array<float, 3> *force_density_buf;
-#endif
-};
-
-/************************************************************/
-/** \name Exported Variables */
-/************************************************************/
-/**@{*/
-
-/** Switch indicating momentum exchange between particles and fluid */
-extern LB_parameters_gpu lbpar_gpu;
-extern std::vector<LB_rho_v_pi_gpu> host_values;
-#ifdef ELECTROKINETICS
-extern LB_node_force_density_gpu node_f;
-extern bool ek_initialized;
-#endif
-extern OptionalCounter rng_counter_fluid_gpu;
-extern OptionalCounter rng_counter_coupling_gpu;
-
-/**@}*/
-
-/************************************************************/
-/** \name Exported Functions */
-/************************************************************/
-/**@{*/
-/** Conserved quantities for the lattice Boltzmann system. */
-struct LB_rho_v_gpu {
-
-  /** density of the node */
-  float rho;
-  /** velocity of the node */
-
-  Utils::Array<float, 3> v;
-};
-void lb_GPU_sanity_checks();
-
-void lb_get_boundary_force_pointer(float **pointer_address);
-void lb_get_para_pointer(LB_parameters_gpu **pointer_address);
-
-/** Perform a full initialization of the lattice Boltzmann system.
- *  All derived parameters and the fluid are reset to their default values.
- */
-void lb_init_gpu();
-
-/** (Re-)initialize the derived parameters for the lattice Boltzmann system.
- *  The current state of the fluid is unchanged.
- */
-void lb_reinit_parameters_gpu();
-
-/** (Re-)initialize the fluid. */
-void lb_reinit_fluid_gpu();
-
-/** Reset the forces on the fluid nodes */
-void reset_LB_force_densities_GPU(bool buffer = true);
-
-void lb_init_GPU(const LB_parameters_gpu &lbpar_gpu);
-
-/** Integrate the lattice-Boltzmann system for one time step. */
-void lb_integrate_GPU();
-
-void lb_get_values_GPU(LB_rho_v_pi_gpu *host_values);
-void lb_print_node_GPU(unsigned single_nodeindex,
-                       LB_rho_v_pi_gpu *host_print_values);
-#ifdef LB_BOUNDARIES_GPU
-void lb_init_boundaries_GPU(std::size_t n_lb_boundaries,
-                            unsigned number_of_boundnodes,
-                            int *host_boundary_node_list,
-                            int *host_boundary_index_list,
-                            float *lb_bounday_velocity);
-#endif
-
-void lb_set_agrid_gpu(double agrid);
-
-template <std::size_t no_of_neighbours>
-void lb_calc_particle_lattice_ia_gpu(bool couple_virtual, double friction,
-                                     double time_step);
-
-void lb_calc_fluid_mass_GPU(double *mass);
-void lb_calc_fluid_momentum_GPU(double *host_mom);
-void lb_get_boundary_flag_GPU(unsigned int single_nodeindex,
-                              unsigned int *host_flag);
-void lb_get_boundary_flags_GPU(unsigned int *host_bound_array);
-
-void lb_set_node_velocity_GPU(unsigned single_nodeindex, float *host_velocity);
-void lb_set_node_rho_GPU(unsigned single_nodeindex, float host_rho);
-
-void reinit_parameters_GPU(LB_parameters_gpu *lbpar_gpu);
-void lb_reinit_extern_nodeforce_GPU(LB_parameters_gpu *lbpar_gpu);
-void lb_reinit_GPU(LB_parameters_gpu *lbpar_gpu);
-void lb_gpu_get_boundary_forces(std::vector<double> &forces);
-void lb_save_checkpoint_GPU(float *host_checkpoint_vd);
-void lb_load_checkpoint_GPU(float const *host_checkpoint_vd);
-
-void lb_lbfluid_set_population(const Utils::Vector3i &, float[LBQ]);
-void lb_lbfluid_get_population(const Utils::Vector3i &, float[LBQ]);
-
-template <std::size_t no_of_neighbours>
-void lb_get_interpolated_velocity_gpu(double const *positions,
-                                      double *velocities, int length);
-void linear_velocity_interpolation(double const *positions, double *velocities,
-                                   int length);
-void quadratic_velocity_interpolation(double const *positions,
-                                      double *velocities, int length);
-Utils::Array<float, 6> stress_tensor_GPU();
-uint64_t lb_fluid_get_rng_state_gpu();
-void lb_fluid_set_rng_state_gpu(uint64_t counter);
-uint64_t lb_coupling_get_rng_state_gpu();
-void lb_coupling_set_rng_state_gpu(uint64_t counter);
-
-/** Calculate the node index from its coordinates */
-inline unsigned int calculate_node_index(LB_parameters_gpu const &lbpar_gpu,
-                                         Utils::Vector3i const &coord) {
-  return static_cast<unsigned>(
-      Utils::get_linear_index(coord, Utils::Vector3i(lbpar_gpu.dim)));
-}
-/**@}*/
-
-#endif /*  CUDA */
-
-#endif /*  LBGPU_HPP */
diff --git a/src/core/grid_based_algorithms/lbgpu_cuda.cu b/src/core/grid_based_algorithms/lbgpu_cuda.cu
deleted file mode 100644
index bd291f646b7..00000000000
--- a/src/core/grid_based_algorithms/lbgpu_cuda.cu
+++ /dev/null
@@ -1,2703 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/** \file
- *  %Lattice Boltzmann on GPUs.
- *
- *  The corresponding header file is lbgpu.cuh.
- */
-
-#include "config/config.hpp"
-
-#ifdef CUDA
-
-#include "grid_based_algorithms/OptionalCounter.hpp"
-#include "grid_based_algorithms/lb-d3q19.hpp"
-#include "grid_based_algorithms/lb_boundaries.hpp"
-#include "grid_based_algorithms/lbgpu.cuh"
-#include "grid_based_algorithms/lbgpu.hpp"
-
-#include "cuda_interface.hpp"
-#include "cuda_utils.cuh"
-#include "errorhandling.hpp"
-#include "lbgpu.hpp"
-
-#include <utils/Array.hpp>
-#include <utils/Counter.hpp>
-
-#include <thrust/device_ptr.h>
-#include <thrust/device_vector.h>
-#include <thrust/functional.h>
-#include <thrust/host_vector.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/transform_reduce.h>
-#include <thrust/tuple.h>
-
-#include <cuda.h>
-#include <curand_kernel.h>
-
-#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <cstdio>
-#include <cstdlib>
-#include <vector>
-
-/** struct for hydrodynamic fields: this is for internal use
- *  (i.e. stores values in LB units) and should not be used for
- *  printing values
- */
-static LB_rho_v_gpu *device_rho_v = nullptr;
-
-/** struct for hydrodynamic fields: this is the interface
- *  and stores values in MD units. It should not be used
- *  as an input for any LB calculations. TODO: in the future,
- *  one might want to have several structures for printing
- *  separately rho, v, pi without having to compute/store
- *  the complete set.
- */
-static LB_rho_v_pi_gpu *print_rho_v_pi = nullptr;
-
-/** @name structs for velocity densities */
-/**@{*/
-static LB_nodes_gpu nodes_a;
-static LB_nodes_gpu nodes_b;
-/**@}*/
-
-/** boundary information */
-static LB_boundaries_gpu boundaries;
-
-/** struct for node force density */
-LB_node_force_density_gpu node_f = {
-    // force_density
-    nullptr,
-#if defined(VIRTUAL_SITES_INERTIALESS_TRACERS) || defined(EK_DEBUG)
-    // force_density_buf
-    nullptr
-#endif
-};
-
-#ifdef LB_BOUNDARIES_GPU
-/** @brief Force on the boundary nodes */
-static float *lb_boundary_force = nullptr;
-#endif
-
-/** @brief Whether LB GPU was initialized */
-static bool *device_gpu_lb_initialized = nullptr;
-
-/** @brief Direction of data transfer between @ref nodes_a and @ref nodes_b
- *  during integration in @ref lb_integrate_GPU
- */
-static bool intflag = true;
-LB_nodes_gpu *current_nodes = nullptr;
-
-/** Parameters residing in constant memory */
-__device__ __constant__ LB_parameters_gpu para[1];
-
-static constexpr float sqrt12 = 3.4641016151377544f;
-static constexpr unsigned int threads_per_block = 64;
-OptionalCounter rng_counter_coupling_gpu;
-OptionalCounter rng_counter_fluid_gpu;
-
-/** Transformation from 1d array-index to xyz
- *  @param[in]  index   Node index / thread index
- */
-template <typename T> __device__ uint3 index_to_xyz(T index) {
-  auto const x = index % para->dim[0];
-  index /= para->dim[0];
-  auto const y = index % para->dim[1];
-  index /= para->dim[1];
-  auto const z = index;
-  return {x, y, z};
-}
-
-/** Transformation from xyz to 1d array-index
- *  @param[in] x,y,z     The xyz array
- */
-template <typename T> __device__ T xyz_to_index(T x, T y, T z) {
-  return x +
-         static_cast<T>(para->dim[0]) * (y + static_cast<T>(para->dim[1]) * z);
-}
-
-/** Calculate modes from the populations (space-transform).
- *  @param[in]  populations    Populations of one node.
- *  @param[out] mode    Modes corresponding to given @p populations.
- */
-__device__ void calc_m_from_n(Utils::Array<float, 19> const &populations,
-                              Utils::Array<float, 19> &mode) {
-  /**
-   * The following convention and equations from @cite dunweg09a are used:
-   * The \f$\hat{c}_i\f$ are given by:
-   *
-   * \f{align*}{
-   *   c_{ 0} &= ( 0, 0, 0) \\
-   *   c_{ 1} &= ( 1, 0, 0) \\
-   *   c_{ 2} &= (-1, 0, 0) \\
-   *   c_{ 3} &= ( 0, 1, 0) \\
-   *   c_{ 4} &= ( 0,-1, 0) \\
-   *   c_{ 5} &= ( 0, 0, 1) \\
-   *   c_{ 6} &= ( 0, 0,-1) \\
-   *   c_{ 7} &= ( 1, 1, 0) \\
-   *   c_{ 8} &= (-1,-1, 0) \\
-   *   c_{ 9} &= ( 1,-1, 0) \\
-   *   c_{10} &= (-1, 1, 0) \\
-   *   c_{11} &= ( 1, 0, 1) \\
-   *   c_{12} &= (-1, 0,-1) \\
-   *   c_{13} &= ( 1, 0,-1) \\
-   *   c_{14} &= (-1, 0, 1) \\
-   *   c_{15} &= ( 0, 1, 1) \\
-   *   c_{16} &= ( 0,-1,-1) \\
-   *   c_{17} &= ( 0, 1,-1) \\
-   *   c_{18} &= ( 0,-1, 1)
-   *  \f}
-   *
-   *  The basis vectors (modes) are constructed as follows (eq. (111)):
-   *  \f[m_k = \sum_{i} e_{ki} n_{i}\f] where the \f$e_{ki}\f$ form a
-   *  linear transformation (matrix) that is given by (modified from table 1):
-   *
-   *  \f{align*}{
-   *    e_{ 0,i} &= 1 \\
-   *    e_{ 1,i} &= \hat{c}_{i,x} \\
-   *    e_{ 2,i} &= \hat{c}_{i,y} \\
-   *    e_{ 3,i} &= \hat{c}_{i,z} \\
-   *    e_{ 4,i} &= \hat{c}_{i}^2 - 1 \\
-   *    e_{ 5,i} &= \hat{c}_{i,x}^2 - \hat{c}_{i,y}^2 \\
-   *    e_{ 6,i} &= \hat{c}_{i}^2 - 3 \hat{c}_{i,z}^2 \\
-   *    e_{ 7,i} &= \hat{c}_{i,x} \hat{c}_{i,y} \\
-   *    e_{ 8,i} &= \hat{c}_{i,x} \hat{c}_{i,z} \\
-   *    e_{ 9,i} &= \hat{c}_{i,y} \hat{c}_{i,z} \\
-   *    e_{10,i} &= (3 \hat{c}_{i}^2 - 5) \hat{c}_{i,x} \\
-   *    e_{11,i} &= (3 \hat{c}_{i}^2 - 5) \hat{c}_{i,y} \\
-   *    e_{12,i} &= (3 \hat{c}_{i}^2 - 5) \hat{c}_{i,z} \\
-   *    e_{13,i} &= (\hat{c}_{i,y}^2 - \hat{c}_{i,z}^2) \hat{c}_{i,x} \\
-   *    e_{14,i} &= (\hat{c}_{i,x}^2 - \hat{c}_{i,z}^2) \hat{c}_{i,y} \\
-   *    e_{15,i} &= (\hat{c}_{i,x}^2 - \hat{c}_{i,y}^2) \hat{c}_{i,z} \\
-   *    e_{16,i} &= 3 \hat{c}_{i}^4 - 6 \hat{c}_{i}^2 + 1 \\
-   *    e_{17,i} &= (2 \hat{c}_{i}^2 - 3) (\hat{c}_{i,x}^2 - \hat{c}_{i,y}^2) \\
-   *    e_{18,i} &= (2 \hat{c}_{i}^2 - 3) (\hat{c}_{i}^2 - 3 \hat{c}_{i,z}^2)
-   *  \f}
-   *
-   *  Such that the transformation matrix is given by:
-   *
-   *  \code{.cpp}
-   *   {{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-   *    { 0, 1,-1, 0, 0, 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 0, 0, 0, 0},
-   *    { 0, 0, 0, 1,-1, 0, 0, 1,-1,-1, 1, 0, 0, 0, 0, 1,-1, 1,-1},
-   *    { 0, 0, 0, 0, 0, 1,-1, 0, 0, 0, 0, 1,-1,-1, 1, 1,-1,-1, 1},
-   *    {-1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-   *    { 0, 1, 1,-1,-1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,-1,-1,-1,-1},
-   *    { 0, 1, 1, 1, 1,-2,-2, 2, 2, 2, 2,-1,-1,-1,-1,-1,-1,-1,-1},
-   *    { 0, 0, 0, 0, 0, 0, 0, 1, 1,-1,-1, 0, 0, 0, 0, 0, 0, 0, 0},
-   *    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,-1,-1, 0, 0, 0, 0},
-   *    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,-1,-1},
-   *    { 0,-2, 2, 0, 0, 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 0, 0, 0, 0},
-   *    { 0, 0, 0,-2, 2, 0, 0, 1,-1,-1, 1, 0, 0, 0, 0, 1,-1, 1,-1},
-   *    { 0, 0, 0, 0, 0,-2, 2, 0, 0, 0, 0, 1,-1,-1, 1, 1,-1,-1, 1},
-   *    { 0, 0, 0, 0, 0, 0, 0, 1,-1, 1,-1,-1, 1,-1, 1, 0, 0, 0, 0},
-   *    { 0, 0, 0, 0, 0, 0, 0, 1,-1,-1, 1, 0, 0, 0, 0,-1, 1,-1, 1},
-   *    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,-1,-1, 1,-1, 1, 1,-1},
-   *    { 1,-2,-2,-2,-2,-2,-2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-   *    { 0,-1,-1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,-1,-1,-1,-1},
-   *    { 0,-1,-1,-1,-1, 2, 2, 2, 2, 2, 2,-1,-1,-1,-1,-1,-1,-1,-1}}
-   *  \endcode
-   *
-   *  with weights
-   *
-   *  \f[q^{c_{i}} = ( 1/3, 1/18, 1/18, 1/18,
-   *                  1/18, 1/18, 1/18, 1/36,
-   *                  1/36, 1/36, 1/36, 1/36,
-   *                  1/36, 1/36, 1/36, 1/36,
-   *                  1/36, 1/36, 1/36 )\f]
-   *
-   *  which makes the transformation satisfy the following
-   *  orthogonality condition (eq. (109)):
-   *  \f[\sum_{i} q^{c_{i}} e_{ki} e_{li} = w_{k} \delta_{kl}\f]
-   *  where the weights are:
-   *
-   *  \f[w_{i} = (  1, 1/3, 1/3, 1/3,
-   *              2/3, 4/9, 4/3, 1/9,
-   *              1/9, 1/9, 2/3, 2/3,
-   *              2/3, 2/9, 2/9, 2/9,
-   *                2, 4/9, 4/3 )\f]
-   */
-  for (int i = 0; i < 19; ++i) {
-    mode[i] = calc_mode_x_from_n(populations, i);
-  }
-}
-
-__device__ void reset_LB_force_densities(unsigned int index,
-                                         LB_node_force_density_gpu node_f,
-                                         bool buffer = true) {
-#if defined(VIRTUAL_SITES_INERTIALESS_TRACERS) || defined(EK_DEBUG)
-  // Store backup of the node forces
-  if (buffer) {
-    node_f.force_density_buf[index] = node_f.force_density[index];
-  }
-#endif
-
-  if (para->external_force_density) {
-    node_f.force_density[index] = para->ext_force_density;
-  } else {
-    node_f.force_density[index] = {};
-  }
-}
-
-__global__ void
-reset_LB_force_densities_kernel(LB_node_force_density_gpu node_f,
-                                bool buffer = true) {
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-
-  if (index < para->number_of_nodes)
-    reset_LB_force_densities(index, node_f, buffer);
-}
-
-void reset_LB_force_densities_GPU(bool buffer) {
-  dim3 dim_grid =
-      calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(reset_LB_force_densities_kernel, dim_grid, threads_per_block,
-             node_f, buffer);
-}
-
-/**
- *  @param[in]  modes    Local register values modes
- *  @param[in]  index   Node index / thread index
- *  @param[in]  node_f  Local node force
- *  @param[out] d_v     Local device values
- */
-__device__ void update_rho_v(Utils::Array<float, 19> const &modes,
-                             unsigned int index,
-                             LB_node_force_density_gpu const &node_f,
-                             LB_rho_v_gpu *d_v) {
-  float Rho_tot = 0.0f;
-  Utils::Array<float, 3> u_tot = {};
-
-  /* re-construct the real density
-   * remember that the populations are stored as differences to their
-   * equilibrium value */
-
-  d_v[index].rho = modes[0] + para->rho;
-  Rho_tot += modes[0] + para->rho;
-  u_tot[0] += modes[1];
-  u_tot[1] += modes[2];
-  u_tot[2] += modes[3];
-
-  /** If forces are present, the momentum density is redefined to
-   *  include one half-step of the force action. See the
-   *  Chapman-Enskog expansion in @cite ladd01a.
-   */
-
-  u_tot[0] += 0.5f * node_f.force_density[index][0];
-  u_tot[1] += 0.5f * node_f.force_density[index][1];
-  u_tot[2] += 0.5f * node_f.force_density[index][2];
-
-  u_tot[0] /= Rho_tot;
-  u_tot[1] /= Rho_tot;
-  u_tot[2] /= Rho_tot;
-
-  d_v[index].v[0] = u_tot[0];
-  d_v[index].v[1] = u_tot[1];
-  d_v[index].v[2] = u_tot[2];
-}
-
-/** lb_relax_modes, means collision update of the modes
- *  @param[in] index     Node index / thread index
- *  @param[in,out] mode  Local register values mode
- *  @param[in] node_f    Local node force
- *  @param[in,out] d_v   Local device values
- */
-__device__ void relax_modes(Utils::Array<float, 19> &mode, unsigned int index,
-                            LB_node_force_density_gpu node_f,
-                            LB_rho_v_gpu *d_v) {
-  float u_tot[3] = {0.0f, 0.0f, 0.0f};
-
-  update_rho_v(mode, index, node_f, d_v);
-
-  u_tot[0] = d_v[index].v[0];
-  u_tot[1] = d_v[index].v[1];
-  u_tot[2] = d_v[index].v[2];
-
-  float Rho;
-  float j[3];
-  Utils::Array<float, 6> modes_from_pi_eq;
-
-  Rho = mode[0] + para->rho;
-  j[0] = Rho * u_tot[0];
-  j[1] = Rho * u_tot[1];
-  j[2] = Rho * u_tot[2];
-
-  /* equilibrium part of the stress modes (eq13 schiller) */
-
-  modes_from_pi_eq[0] = ((j[0] * j[0]) + (j[1] * j[1]) + (j[2] * j[2])) / Rho;
-  modes_from_pi_eq[1] = ((j[0] * j[0]) - (j[1] * j[1])) / Rho;
-  modes_from_pi_eq[2] =
-      (((j[0] * j[0]) + (j[1] * j[1]) + (j[2] * j[2])) - 3.0f * (j[2] * j[2])) /
-      Rho;
-  modes_from_pi_eq[3] = j[0] * j[1] / Rho;
-  modes_from_pi_eq[4] = j[0] * j[2] / Rho;
-  modes_from_pi_eq[5] = j[1] * j[2] / Rho;
-
-  /* relax the stress modes (eq14 schiller) */
-
-  mode[4] =
-      modes_from_pi_eq[0] + para->gamma_bulk * (mode[4] - modes_from_pi_eq[0]);
-  mode[5] =
-      modes_from_pi_eq[1] + para->gamma_shear * (mode[5] - modes_from_pi_eq[1]);
-  mode[6] =
-      modes_from_pi_eq[2] + para->gamma_shear * (mode[6] - modes_from_pi_eq[2]);
-  mode[7] =
-      modes_from_pi_eq[3] + para->gamma_shear * (mode[7] - modes_from_pi_eq[3]);
-  mode[8] =
-      modes_from_pi_eq[4] + para->gamma_shear * (mode[8] - modes_from_pi_eq[4]);
-  mode[9] =
-      modes_from_pi_eq[5] + para->gamma_shear * (mode[9] - modes_from_pi_eq[5]);
-
-  /* relax the ghost modes (project them out) */
-  /* ghost modes have no equilibrium part due to orthogonality */
-
-  mode[10] = para->gamma_odd * mode[10];
-  mode[11] = para->gamma_odd * mode[11];
-  mode[12] = para->gamma_odd * mode[12];
-  mode[13] = para->gamma_odd * mode[13];
-  mode[14] = para->gamma_odd * mode[14];
-  mode[15] = para->gamma_odd * mode[15];
-  mode[16] = para->gamma_even * mode[16];
-  mode[17] = para->gamma_even * mode[17];
-  mode[18] = para->gamma_even * mode[18];
-}
-
-/** Thermalization of the modes with Gaussian random numbers
- *  @param[in] index     Node index / thread index
- *  @param[in,out] mode  Local register values mode
- *  @param[in]  philox_counter   Philox counter
- */
-__device__ void thermalize_modes(Utils::Array<float, 19> &mode,
-                                 unsigned int index, uint64_t philox_counter) {
-  float Rho;
-  float4 random_floats;
-  /* mass mode */
-  Rho = mode[0] + para->rho;
-
-  /* stress modes */
-  random_floats = random_wrapper_philox(index, 4, philox_counter);
-  mode[4] += sqrtf(Rho * (para->mu * (2.0f / 3.0f) *
-                          (1.0f - (para->gamma_bulk * para->gamma_bulk)))) *
-             (random_floats.w - 0.5f) * sqrt12;
-  mode[5] += sqrtf(Rho * (para->mu * (4.0f / 9.0f) *
-                          (1.0f - (para->gamma_shear * para->gamma_shear)))) *
-             (random_floats.x - 0.5f) * sqrt12;
-
-  mode[6] += sqrtf(Rho * (para->mu * (4.0f / 3.0f) *
-                          (1.0f - (para->gamma_shear * para->gamma_shear)))) *
-             (random_floats.y - 0.5f) * sqrt12;
-  mode[7] += sqrtf(Rho * (para->mu * (1.0f / 9.0f) *
-                          (1.0f - (para->gamma_shear * para->gamma_shear)))) *
-             (random_floats.z - 0.5f) * sqrt12;
-
-  random_floats = random_wrapper_philox(index, 8, philox_counter);
-  mode[8] += sqrtf(Rho * (para->mu * (1.0f / 9.0f) *
-                          (1.0f - (para->gamma_shear * para->gamma_shear)))) *
-             (random_floats.w - 0.5f) * sqrt12;
-  mode[9] += sqrtf(Rho * (para->mu * (1.0f / 9.0f) *
-                          (1.0f - (para->gamma_shear * para->gamma_shear)))) *
-             (random_floats.x - 0.5f) * sqrt12;
-
-  /* ghost modes */
-  mode[10] += sqrtf(Rho * (para->mu * (2.0f / 3.0f) *
-                           (1.0f - (para->gamma_odd * para->gamma_odd)))) *
-              (random_floats.y - 0.5f) * sqrt12;
-  mode[11] += sqrtf(Rho * (para->mu * (2.0f / 3.0f) *
-                           (1.0f - (para->gamma_odd * para->gamma_odd)))) *
-              (random_floats.z - 0.5f) * sqrt12;
-
-  random_floats = random_wrapper_philox(index, 12, philox_counter);
-  mode[12] += sqrtf(Rho * (para->mu * (2.0f / 3.0f) *
-                           (1.0f - (para->gamma_odd * para->gamma_odd)))) *
-              (random_floats.w - 0.5f) * sqrt12;
-  mode[13] += sqrtf(Rho * (para->mu * (2.0f / 9.0f) *
-                           (1.0f - (para->gamma_odd * para->gamma_odd)))) *
-              (random_floats.x - 0.5f) * sqrt12;
-
-  mode[14] += sqrtf(Rho * (para->mu * (2.0f / 9.0f) *
-                           (1.0f - (para->gamma_odd * para->gamma_odd)))) *
-              (random_floats.y - 0.5f) * sqrt12;
-  mode[15] += sqrtf(Rho * (para->mu * (2.0f / 9.0f) *
-                           (1.0f - (para->gamma_odd * para->gamma_odd)))) *
-              (random_floats.z - 0.5f) * sqrt12;
-
-  random_floats = random_wrapper_philox(index, 16, philox_counter);
-  mode[16] += sqrtf(Rho * (para->mu * (2.0f) *
-                           (1.0f - (para->gamma_even * para->gamma_even)))) *
-              (random_floats.w - 0.5f) * sqrt12;
-  mode[17] += sqrtf(Rho * (para->mu * (4.0f / 9.0f) *
-                           (1.0f - (para->gamma_even * para->gamma_even)))) *
-              (random_floats.x - 0.5f) * sqrt12;
-
-  mode[18] += sqrtf(Rho * (para->mu * (4.0f / 3.0f) *
-                           (1.0f - (para->gamma_even * para->gamma_even)))) *
-              (random_floats.y - 0.5f) * sqrt12;
-}
-
-/** Normalization of the modes need before back-transformation into velocity
- *  space
- *  @param[in,out] mode  Local register values mode
- */
-__device__ void normalize_modes(Utils::Array<float, 19> &mode) {
-  /* normalization factors enter in the back transformation */
-  mode[0] *= 1.0f;
-  mode[1] *= 3.0f;
-  mode[2] *= 3.0f;
-  mode[3] *= 3.0f;
-  mode[4] *= 3.0f / 2.0f;
-  mode[5] *= 9.0f / 4.0f;
-  mode[6] *= 3.0f / 4.0f;
-  mode[7] *= 9.0f;
-  mode[8] *= 9.0f;
-  mode[9] *= 9.0f;
-  mode[10] *= 3.0f / 2.0f;
-  mode[11] *= 3.0f / 2.0f;
-  mode[12] *= 3.0f / 2.0f;
-  mode[13] *= 9.0f / 2.0f;
-  mode[14] *= 9.0f / 2.0f;
-  mode[15] *= 9.0f / 2.0f;
-  mode[16] *= 1.0f / 2.0f;
-  mode[17] *= 9.0f / 4.0f;
-  mode[18] *= 3.0f / 4.0f;
-}
-
-/** Back-transformation from modespace to densityspace and streaming with
- *  the push method using pbc
- *  @param[in]  index  Node index / thread index
- *  @param[in]  mode   Local register values mode
- *  @param[out] n_b    Local node residing in array b
- */
-__device__ void calc_n_from_modes_push(LB_nodes_gpu n_b,
-                                       Utils::Array<float, 19> const &mode,
-                                       unsigned int index) {
-  auto const xyz = index_to_xyz(index);
-  unsigned int x = xyz.x;
-  unsigned int y = xyz.y;
-  unsigned int z = xyz.z;
-
-  n_b.populations[x + para->dim[0] * y + para->dim[0] * para->dim[1] * z][0] =
-      1.0f / 3.0f * (mode[0] - mode[4] + mode[16]);
-
-  n_b.populations[(x + 1) % para->dim[0] + para->dim[0] * y +
-                  para->dim[0] * para->dim[1] * z][1] =
-      1.0f / 18.0f *
-      (mode[0] + mode[1] + mode[5] + mode[6] - mode[17] - mode[18] -
-       2.0f * (mode[10] + mode[16]));
-
-  n_b.populations[(para->dim[0] + x - 1) % para->dim[0] + para->dim[0] * y +
-                  para->dim[0] * para->dim[1] * z][2] =
-      1.0f / 18.0f *
-      (mode[0] - mode[1] + mode[5] + mode[6] - mode[17] - mode[18] +
-       2.0f * (mode[10] - mode[16]));
-
-  n_b.populations[x + para->dim[0] * ((y + 1) % para->dim[1]) +
-                  para->dim[0] * para->dim[1] * z][3] =
-      1.0f / 18.0f *
-      (mode[0] + mode[2] - mode[5] + mode[6] + mode[17] - mode[18] -
-       2.0f * (mode[11] + mode[16]));
-
-  n_b.populations[x + para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) +
-                  para->dim[0] * para->dim[1] * z][4] =
-      1.0f / 18.0f *
-      (mode[0] - mode[2] - mode[5] + mode[6] + mode[17] - mode[18] +
-       2.0f * (mode[11] - mode[16]));
-
-  n_b.populations[x + para->dim[0] * y +
-                  para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][5] =
-      1.0f / 18.0f *
-      (mode[0] + mode[3] - 2.0f * (mode[6] + mode[12] + mode[16] - mode[18]));
-
-  n_b.populations[x + para->dim[0] * y +
-                  para->dim[0] * para->dim[1] *
-                      ((para->dim[2] + z - 1) % para->dim[2])][6] =
-      1.0f / 18.0f *
-      (mode[0] - mode[3] - 2.0f * (mode[6] - mode[12] + mode[16] - mode[18]));
-
-  n_b.populations[(x + 1) % para->dim[0] +
-                  para->dim[0] * ((y + 1) % para->dim[1]) +
-                  para->dim[0] * para->dim[1] * z][7] =
-      1.0f / 36.0f *
-      (mode[0] + mode[1] + mode[2] + mode[4] + 2.0f * mode[6] + mode[7] +
-       mode[10] + mode[11] + mode[13] + mode[14] + mode[16] + 2.0f * mode[18]);
-
-  n_b.populations[(para->dim[0] + x - 1) % para->dim[0] +
-                  para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) +
-                  para->dim[0] * para->dim[1] * z][8] =
-      1.0f / 36.0f *
-      (mode[0] - mode[1] - mode[2] + mode[4] + 2.0f * mode[6] + mode[7] -
-       mode[10] - mode[11] - mode[13] - mode[14] + mode[16] + 2.0f * mode[18]);
-
-  n_b.populations[(x + 1) % para->dim[0] +
-                  para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) +
-                  para->dim[0] * para->dim[1] * z][9] =
-      1.0f / 36.0f *
-      (mode[0] + mode[1] - mode[2] + mode[4] + 2.0f * mode[6] - mode[7] +
-       mode[10] - mode[11] + mode[13] - mode[14] + mode[16] + 2.0f * mode[18]);
-
-  n_b.populations[(para->dim[0] + x - 1) % para->dim[0] +
-                  para->dim[0] * ((y + 1) % para->dim[1]) +
-                  para->dim[0] * para->dim[1] * z][10] =
-      1.0f / 36.0f *
-      (mode[0] - mode[1] + mode[2] + mode[4] + 2.0f * mode[6] - mode[7] -
-       mode[10] + mode[11] - mode[13] + mode[14] + mode[16] + 2.0f * mode[18]);
-
-  n_b.populations[(x + 1) % para->dim[0] + para->dim[0] * y +
-                  para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][11] =
-      1.0f / 36.0f *
-      (mode[0] + mode[1] + mode[3] + mode[4] + mode[5] - mode[6] + mode[8] +
-       mode[10] + mode[12] - mode[13] + mode[15] + mode[16] + mode[17] -
-       mode[18]);
-
-  n_b.populations[(para->dim[0] + x - 1) % para->dim[0] + para->dim[0] * y +
-                  para->dim[0] * para->dim[1] *
-                      ((para->dim[2] + z - 1) % para->dim[2])][12] =
-      1.0f / 36.0f *
-      (mode[0] - mode[1] - mode[3] + mode[4] + mode[5] - mode[6] + mode[8] -
-       mode[10] - mode[12] + mode[13] - mode[15] + mode[16] + mode[17] -
-       mode[18]);
-
-  n_b.populations[(x + 1) % para->dim[0] + para->dim[0] * y +
-                  para->dim[0] * para->dim[1] *
-                      ((para->dim[2] + z - 1) % para->dim[2])][13] =
-      1.0f / 36.0f *
-      (mode[0] + mode[1] - mode[3] + mode[4] + mode[5] - mode[6] - mode[8] +
-       mode[10] - mode[12] - mode[13] - mode[15] + mode[16] + mode[17] -
-       mode[18]);
-
-  n_b.populations[(para->dim[0] + x - 1) % para->dim[0] + para->dim[0] * y +
-                  para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][14] =
-      1.0f / 36.0f *
-      (mode[0] - mode[1] + mode[3] + mode[4] + mode[5] - mode[6] - mode[8] -
-       mode[10] + mode[12] + mode[13] + mode[15] + mode[16] + mode[17] -
-       mode[18]);
-
-  n_b.populations[x + para->dim[0] * ((y + 1) % para->dim[1]) +
-                  para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][15] =
-      1.0f / 36.0f *
-      (mode[0] + mode[2] + mode[3] + mode[4] - mode[5] - mode[6] + mode[9] +
-       mode[11] + mode[12] - mode[14] - mode[15] + mode[16] - mode[17] -
-       mode[18]);
-
-  n_b.populations[x + para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) +
-                  para->dim[0] * para->dim[1] *
-                      ((para->dim[2] + z - 1) % para->dim[2])][16] =
-      1.0f / 36.0f *
-      (mode[0] - mode[2] - mode[3] + mode[4] - mode[5] - mode[6] + mode[9] -
-       mode[11] - mode[12] + mode[14] + mode[15] + mode[16] - mode[17] -
-       mode[18]);
-
-  n_b.populations[x + para->dim[0] * ((y + 1) % para->dim[1]) +
-                  para->dim[0] * para->dim[1] *
-                      ((para->dim[2] + z - 1) % para->dim[2])][17] =
-      1.0f / 36.0f *
-      (mode[0] + mode[2] - mode[3] + mode[4] - mode[5] - mode[6] - mode[9] +
-       mode[11] - mode[12] - mode[14] + mode[15] + mode[16] - mode[17] -
-       mode[18]);
-
-  n_b.populations[x + para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) +
-                  para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][18] =
-      1.0f / 36.0f *
-      (mode[0] - mode[2] + mode[3] + mode[4] - mode[5] - mode[6] - mode[9] -
-       mode[11] + mode[12] + mode[14] - mode[15] + mode[16] - mode[17] -
-       mode[18]);
-}
-
-/** Bounce back boundary conditions.
- *
- *  The populations that have propagated into a boundary node
- *  are bounced back to the node they came from. This results
- *  in no slip boundary conditions, cf. @cite ladd01a.
- *
- *  @param[in]  index   Node index / thread index
- *  @param[in]  n_curr  Local node receiving the current node field
- *  @param[in]  boundaries  Constant velocity at the boundary, set by the user
- *  @param[out] lb_boundary_force     Force on the boundary nodes
- */
-__device__ void bounce_back_boundaries(LB_nodes_gpu n_curr,
-                                       LB_boundaries_gpu boundaries,
-                                       unsigned int index,
-                                       float *lb_boundary_force) {
-  int c[3];
-  float shift, weight, pop_to_bounce_back;
-  float boundary_force[3] = {0.0f, 0.0f, 0.0f};
-  std::size_t to_index, to_index_x, to_index_y, to_index_z;
-  unsigned population, inverse;
-
-  if (boundaries.index[index] != 0) {
-    auto const v = boundaries.velocity[index];
-
-    auto const xyz = index_to_xyz(index);
-
-    unsigned int x = xyz.x;
-    unsigned int y = xyz.y;
-    unsigned int z = xyz.z;
-
-    /* store populations temporary in second lattice to avoid race conditions */
-
-    // TODO : PUT IN EQUILIBRIUM CONTRIBUTION TO THE BOUNCE-BACK DENSITY FOR THE
-    // BOUNDARY FORCE
-    // TODO : INITIALIZE BOUNDARY FORCE PROPERLY, HAS NONZERO ELEMENTS IN FIRST
-    // STEP
-    // TODO : SET INTERNAL BOUNDARY NODE VALUES TO ZERO
-
-#define BOUNCEBACK()                                                           \
-  shift = 2.0f / para->agrid * para->rho * 3.0f * weight * para->tau *         \
-          (v[0] * static_cast<float>(c[0]) + v[1] * static_cast<float>(c[1]) + \
-           v[2] * static_cast<float>(c[2]));                                   \
-  pop_to_bounce_back = n_curr.populations[index][population];                  \
-  to_index_x =                                                                 \
-      (x + static_cast<unsigned>(c[0]) + para->dim[0]) % para->dim[0];         \
-  to_index_y =                                                                 \
-      (y + static_cast<unsigned>(c[1]) + para->dim[1]) % para->dim[1];         \
-  to_index_z =                                                                 \
-      (z + static_cast<unsigned>(c[2]) + para->dim[2]) % para->dim[2];         \
-  to_index = to_index_x + para->dim[0] * to_index_y +                          \
-             para->dim[0] * para->dim[1] * to_index_z;                         \
-  if (n_curr.boundary[to_index] == 0) {                                        \
-    boundary_force[0] +=                                                       \
-        (2.0f * pop_to_bounce_back + shift) * static_cast<float>(c[0]);        \
-    boundary_force[1] +=                                                       \
-        (2.0f * pop_to_bounce_back + shift) * static_cast<float>(c[1]);        \
-    boundary_force[2] +=                                                       \
-        (2.0f * pop_to_bounce_back + shift) * static_cast<float>(c[2]);        \
-    n_curr.populations[to_index][inverse] = pop_to_bounce_back + shift;        \
-  }
-
-    // the resting population does nothing, i.e., population 0.
-    c[0] = 1;
-    c[1] = 0;
-    c[2] = 0;
-    weight = 1.f / 18.f;
-    population = 2;
-    inverse = 1;
-    BOUNCEBACK();
-
-    c[0] = -1;
-    c[1] = 0;
-    c[2] = 0;
-    weight = 1.f / 18.f;
-    population = 1;
-    inverse = 2;
-    BOUNCEBACK();
-
-    c[0] = 0;
-    c[1] = 1;
-    c[2] = 0;
-    weight = 1.f / 18.f;
-    population = 4;
-    inverse = 3;
-    BOUNCEBACK();
-
-    c[0] = 0;
-    c[1] = -1;
-    c[2] = 0;
-    weight = 1.f / 18.f;
-    population = 3;
-    inverse = 4;
-    BOUNCEBACK();
-
-    c[0] = 0;
-    c[1] = 0;
-    c[2] = 1;
-    weight = 1.f / 18.f;
-    population = 6;
-    inverse = 5;
-    BOUNCEBACK();
-
-    c[0] = 0;
-    c[1] = 0;
-    c[2] = -1;
-    weight = 1.f / 18.f;
-    population = 5;
-    inverse = 6;
-    BOUNCEBACK();
-
-    c[0] = 1;
-    c[1] = 1;
-    c[2] = 0;
-    weight = 1.f / 36.f;
-    population = 8;
-    inverse = 7;
-    BOUNCEBACK();
-
-    c[0] = -1;
-    c[1] = -1;
-    c[2] = 0;
-    weight = 1.f / 36.f;
-    population = 7;
-    inverse = 8;
-    BOUNCEBACK();
-
-    c[0] = 1;
-    c[1] = -1;
-    c[2] = 0;
-    weight = 1.f / 36.f;
-    population = 10;
-    inverse = 9;
-    BOUNCEBACK();
-
-    c[0] = -1;
-    c[1] = 1;
-    c[2] = 0;
-    weight = 1.f / 36.f;
-    population = 9;
-    inverse = 10;
-    BOUNCEBACK();
-
-    c[0] = 1;
-    c[1] = 0;
-    c[2] = 1;
-    weight = 1.f / 36.f;
-    population = 12;
-    inverse = 11;
-    BOUNCEBACK();
-
-    c[0] = -1;
-    c[1] = 0;
-    c[2] = -1;
-    weight = 1.f / 36.f;
-    population = 11;
-    inverse = 12;
-    BOUNCEBACK();
-
-    c[0] = 1;
-    c[1] = 0;
-    c[2] = -1;
-    weight = 1.f / 36.f;
-    population = 14;
-    inverse = 13;
-    BOUNCEBACK();
-
-    c[0] = -1;
-    c[1] = 0;
-    c[2] = 1;
-    weight = 1.f / 36.f;
-    population = 13;
-    inverse = 14;
-    BOUNCEBACK();
-
-    c[0] = 0;
-    c[1] = 1;
-    c[2] = 1;
-    weight = 1.f / 36.f;
-    population = 16;
-    inverse = 15;
-    BOUNCEBACK();
-
-    c[0] = 0;
-    c[1] = -1;
-    c[2] = -1;
-    weight = 1.f / 36.f;
-    population = 15;
-    inverse = 16;
-    BOUNCEBACK();
-
-    c[0] = 0;
-    c[1] = 1;
-    c[2] = -1;
-    weight = 1.f / 36.f;
-    population = 18;
-    inverse = 17;
-    BOUNCEBACK();
-
-    c[0] = 0;
-    c[1] = -1;
-    c[2] = 1;
-    weight = 1.f / 36.f;
-    population = 17;
-    inverse = 18;
-    BOUNCEBACK();
-
-    atomicAdd(&lb_boundary_force[3 * (n_curr.boundary[index] - 1) + 0],
-              boundary_force[0]);
-    atomicAdd(&lb_boundary_force[3 * (n_curr.boundary[index] - 1) + 1],
-              boundary_force[1]);
-    atomicAdd(&lb_boundary_force[3 * (n_curr.boundary[index] - 1) + 2],
-              boundary_force[2]);
-  }
-}
-
-/** Add external forces within the modespace, needed for particle-interaction
- *  @param[in]     index   Node index / thread index
- *  @param[in,out] mode    Local register values mode
- *  @param[in,out] node_f  Local node force
- *  @param[in]     d_v     Local device values
- */
-__device__ void apply_forces(unsigned int index, Utils::Array<float, 19> &mode,
-                             LB_node_force_density_gpu node_f,
-                             LB_rho_v_gpu *d_v) {
-  float u[3] = {0.0f, 0.0f, 0.0f}, C[6] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
-  /* Note: the values d_v were calculated in relax_modes() */
-
-  u[0] = d_v[index].v[0];
-  u[1] = d_v[index].v[1];
-  u[2] = d_v[index].v[2];
-
-  C[0] += (1.0f + para->gamma_shear) * u[0] * node_f.force_density[index][0] +
-          1.0f / 3.0f * (para->gamma_bulk - para->gamma_shear) *
-              (u[0] * node_f.force_density[index][0] +
-               u[1] * node_f.force_density[index][1] +
-               u[2] * node_f.force_density[index][2]);
-
-  C[2] += (1.0f + para->gamma_shear) * u[1] * node_f.force_density[index][1] +
-          1.0f / 3.0f * (para->gamma_bulk - para->gamma_shear) *
-              (u[0] * node_f.force_density[index][0] +
-               u[1] * node_f.force_density[index][1] +
-               u[2] * node_f.force_density[index][2]);
-
-  C[5] += (1.0f + para->gamma_shear) * u[2] * node_f.force_density[index][2] +
-          1.0f / 3.0f * (para->gamma_bulk - para->gamma_shear) *
-              (u[0] * node_f.force_density[index][0] +
-               u[1] * node_f.force_density[index][1] +
-               u[2] * node_f.force_density[index][2]);
-
-  C[1] += 1.0f / 2.0f * (1.0f + para->gamma_shear) *
-          (u[0] * node_f.force_density[index][1] +
-           u[1] * node_f.force_density[index][0]);
-
-  C[3] += 1.0f / 2.0f * (1.0f + para->gamma_shear) *
-          (u[0] * node_f.force_density[index][2] +
-           u[2] * node_f.force_density[index][0]);
-
-  C[4] += 1.0f / 2.0f * (1.0f + para->gamma_shear) *
-          (u[1] * node_f.force_density[index][2] +
-           u[2] * node_f.force_density[index][1]);
-
-  /* update momentum modes */
-  mode[1] += node_f.force_density[index][0];
-  mode[2] += node_f.force_density[index][1];
-  mode[3] += node_f.force_density[index][2];
-
-  /* update stress modes */
-  mode[4] += C[0] + C[2] + C[5];
-  mode[5] += C[0] - C[2];
-  mode[6] += C[0] + C[2] - 2.0f * C[5];
-  mode[7] += C[1];
-  mode[8] += C[3];
-  mode[9] += C[4];
-
-  reset_LB_force_densities(index, node_f);
-}
-
-__device__ Utils::Array<float, 19>
-stress_modes(LB_rho_v_gpu const &rho_v, const Utils::Array<float, 19> &modes) {
-  /* note that d_v[index].v[] already includes the 1/2 f term, accounting
-   * for the pre- and post-collisional average
-   */
-  auto const density = rho_v.rho;
-  Utils::Array<float, 3> j{density * rho_v.v[0], density * rho_v.v[1],
-                           density * rho_v.v[2]};
-  // equilibrium part of the stress modes, which comes from
-  // the equality between modes and stress tensor components
-
-  /* m4 = trace(pi) - rho
-     m5 = pi_xx - pi_yy
-     m6 = trace(pi) - 3 pi_zz
-     m7 = pi_xy
-     m8 = pi_xz
-     m9 = pi_yz */
-
-  // and plugging in the Euler stress for the equilibrium:
-  // pi_eq = rho_0*c_s^2*I3 + (j \otimes j)/rho
-  // with I3 the 3D identity matrix and
-  // rho = \trace(rho_0*c_s^2*I3), which yields
-
-  /* m4_from_pi_eq = j.j
-     m5_from_pi_eq = j_x*j_x - j_y*j_y
-     m6_from_pi_eq = j.j - 3*j_z*j_z
-     m7_from_pi_eq = j_x*j_y
-     m8_from_pi_eq = j_x*j_z
-     m9_from_pi_eq = j_y*j_z */
-
-  // where the / density term has been dropped. We thus obtain:
-  /* Now we must predict the outcome of the next collision */
-  /* We immediately average pre- and post-collision. */
-  /* TODO: need a reference for this. */
-  Utils::Array<float, 6> modes_from_pi_eq{
-      (j[0] * j[0] + j[1] * j[1] + j[2] * j[2]) / density,
-      (j[0] * j[0] - j[1] * j[1]) / density,
-      (j[0] * j[0] + j[1] * j[1] + j[2] * j[2] - 3.0f * j[2] * j[2]) / density,
-      j[0] * j[1] / density,
-      j[0] * j[2] / density,
-      j[1] * j[2] / density};
-  auto res = modes;
-  res[4] = modes_from_pi_eq[0] +
-           (0.5f + 0.5f * para->gamma_bulk) * (modes[4] - modes_from_pi_eq[0]);
-  res[5] = modes_from_pi_eq[1] +
-           (0.5f + 0.5f * para->gamma_shear) * (modes[5] - modes_from_pi_eq[1]);
-  res[6] = modes_from_pi_eq[2] +
-           (0.5f + 0.5f * para->gamma_shear) * (modes[6] - modes_from_pi_eq[2]);
-  res[7] = modes_from_pi_eq[3] +
-           (0.5f + 0.5f * para->gamma_shear) * (modes[7] - modes_from_pi_eq[3]);
-  res[8] = modes_from_pi_eq[4] +
-           (0.5f + 0.5f * para->gamma_shear) * (modes[8] - modes_from_pi_eq[4]);
-  res[9] = modes_from_pi_eq[5] +
-           (0.5f + 0.5f * para->gamma_shear) * (modes[9] - modes_from_pi_eq[5]);
-  return res;
-}
-
-/** Calculate the stress tensor.
- *  Transform the stress tensor components according to the modes that
- *  correspond to those used by U. Schiller. In terms of populations this
- *  expression then corresponds exactly to those in eq. (116)-(121) in
- *  @cite dunweg07a, when these are written out in populations.
- *  But to ensure this, the expression in Schiller's modes has to be
- *  different!
- *  @param[in]  modes   Local register values modes
- */
-__device__ Utils::Array<float, 6>
-stress_from_stress_modes(Utils::Array<float, 19> const &modes) {
-  return {(2.0f * (modes[0] + modes[4]) + modes[6] + 3.0f * modes[5]) / 6.0f,
-          modes[7],
-          (2.0f * (modes[0] + modes[4]) + modes[6] - 3.0f * modes[5]) / 6.0f,
-          modes[8],
-          modes[9],
-          (modes[0] + modes[4] - modes[6]) / 3.0f};
-}
-
-/** Calculate hydrodynamic fields in LB units
- *  @param[in]  n_a     Local node residing in array a for boundary flag
- *  @param[in]  modes   Local register values modes
- *  @param[out] d_p_v   Local print values
- *  @param[out] d_v     Local device values
- *  @param[in]  node_f  Local node force
- *  @param[in]  index   Node index / thread index
- *  @param[in]  print_index  Node index / thread index
- *  TODO: code duplication with \ref calc_values_from_m
- */
-__device__ void
-calc_values_in_LB_units(LB_nodes_gpu n_a, Utils::Array<float, 19> const &modes,
-                        LB_rho_v_pi_gpu *d_p_v, LB_rho_v_gpu *d_v,
-                        LB_node_force_density_gpu node_f, unsigned int index,
-                        unsigned int print_index) {
-
-  if (n_a.boundary[index] == 0) {
-    /* Ensure we are working with the current values of d_v */
-    update_rho_v(modes, index, node_f, d_v);
-
-    d_p_v[print_index].rho = d_v[index].rho;
-
-    d_p_v[print_index].v = d_v[index].v;
-    auto const modes_tmp = stress_modes(d_v[index], modes);
-
-    d_p_v[print_index].pi = stress_from_stress_modes(modes_tmp);
-
-  } else {
-    d_p_v[print_index].rho = 0.0f;
-    d_p_v[print_index].v = {};
-    d_p_v[print_index].pi = {};
-  }
-}
-
-/** Calculate hydrodynamic fields in MD units
- *  @param[out] mode_single   Local register values mode
- *  @param[in]  d_v_single    Local device values
- *  @param[out] rho_out       Density
- *  @param[out] j_out         Momentum
- *  @param[out] pi_out        Pressure tensor
- */
-__device__ void calc_values_from_m(Utils::Array<float, 19> const &mode_single,
-                                   LB_rho_v_gpu const &d_v_single,
-                                   float *rho_out, float *j_out,
-                                   Utils::Array<float, 6> &pi_out) {
-  *rho_out = d_v_single.rho;
-  float Rho = d_v_single.rho;
-  j_out[0] = Rho * d_v_single.v[0];
-  j_out[1] = Rho * d_v_single.v[1];
-  j_out[2] = Rho * d_v_single.v[2];
-
-  // Now we must predict the outcome of the next collision
-  // We immediately average pre- and post-collision.
-  // Transform the stress tensor components according to the mode_singles.
-  pi_out = stress_from_stress_modes(stress_modes(d_v_single, mode_single));
-}
-
-/** Interpolation kernel.
- *  See @cite dunweg09a
- *  @param u Distance to grid point in units of agrid
- *  @retval Value for the interpolation function.
- */
-__device__ __inline__ float
-three_point_polynomial_smallerequal_than_half(float u) {
-  return 1.f / 3.f * (1.f + sqrtf(1.f - 3.f * u * u));
-}
-
-/** Interpolation kernel.
- *  See @cite dunweg09a
- *  @param u Distance to grid point in units of agrid
- *  @retval Value for the interpolation function.
- */
-__device__ __inline__ float three_point_polynomial_larger_than_half(float u) {
-  return 1.f / 6.f *
-         (5.f + -3 * fabsf(u) - sqrtf(-2.f + 6.f * fabsf(u) - 3.f * u * u));
-}
-
-/**
- * @brief Get velocity of at index.
- */
-__device__ __inline__ float3 node_velocity(float rho_eq, LB_nodes_gpu n_a,
-                                           unsigned index) {
-  auto const boundary_index = n_a.boundary[index];
-
-  if (boundary_index) {
-    auto const inv_lattice_speed = para->tau / para->agrid;
-    auto const &u = n_a.boundary_velocity[index];
-    return make_float3(inv_lattice_speed * u[0], inv_lattice_speed * u[1],
-                       inv_lattice_speed * u[2]);
-  }
-
-  auto const rho = rho_eq + calc_mode_x_from_n(n_a.populations[index], 0);
-  return make_float3(calc_mode_x_from_n(n_a.populations[index], 1) / rho,
-                     calc_mode_x_from_n(n_a.populations[index], 2) / rho,
-                     calc_mode_x_from_n(n_a.populations[index], 3) / rho);
-}
-
-__device__ __inline__ float3
-velocity_interpolation(LB_nodes_gpu n_a, float const *particle_position,
-                       Utils::Array<unsigned int, 27> &node_indices,
-                       Utils::Array<float, 27> &delta) {
-  Utils::Array<int, 3> center_node_index{};
-  Utils::Array<float3, 3> temp_delta{};
-
-  for (unsigned i = 0; i < 3; ++i) {
-    // position of particle in units of agrid.
-    auto const scaled_pos = particle_position[i] / para->agrid - 0.5f;
-    center_node_index[i] = static_cast<int>(rint(scaled_pos));
-    // distance to center node in agrid
-    auto const dist = scaled_pos - static_cast<float>(center_node_index[i]);
-    // distance to left node in agrid
-    auto const dist_m1 =
-        scaled_pos - static_cast<float>(center_node_index[i] - 1);
-    // distance to right node in agrid
-    auto const dist_p1 =
-        scaled_pos - static_cast<float>(center_node_index[i] + 1);
-    if (i == 0) {
-      temp_delta[0].x = three_point_polynomial_larger_than_half(dist_m1);
-      temp_delta[1].x = three_point_polynomial_smallerequal_than_half(dist);
-      temp_delta[2].x = three_point_polynomial_larger_than_half(dist_p1);
-    } else if (i == 1) {
-      temp_delta[0].y = three_point_polynomial_larger_than_half(dist_m1);
-      temp_delta[1].y = three_point_polynomial_smallerequal_than_half(dist);
-      temp_delta[2].y = three_point_polynomial_larger_than_half(dist_p1);
-    } else if (i == 2) {
-      temp_delta[0].z = three_point_polynomial_larger_than_half(dist_m1);
-      temp_delta[1].z = three_point_polynomial_smallerequal_than_half(dist);
-      temp_delta[2].z = three_point_polynomial_larger_than_half(dist_p1);
-    }
-  }
-
-  auto fold_if_necessary = [](int ind, int dim) {
-    if (ind >= dim) {
-      return ind - dim;
-    }
-    if (ind < 0) {
-      return ind + dim;
-    }
-    return ind;
-  };
-
-  unsigned cnt = 0;
-  float3 interpolated_u{0.0f, 0.0f, 0.0f};
-#pragma unroll 1
-  for (int i = 0; i < 3; ++i) {
-#pragma unroll 1
-    for (int j = 0; j < 3; ++j) {
-#pragma unroll 1
-      for (int k = 0; k < 3; ++k) {
-        auto const x = fold_if_necessary(center_node_index[0] - 1 + i,
-                                         static_cast<int>(para->dim[0]));
-        auto const y = fold_if_necessary(center_node_index[1] - 1 + j,
-                                         static_cast<int>(para->dim[1]));
-        auto const z = fold_if_necessary(center_node_index[2] - 1 + k,
-                                         static_cast<int>(para->dim[2]));
-        delta[cnt] = temp_delta[i].x * temp_delta[j].y * temp_delta[k].z;
-        auto const index = static_cast<unsigned>(xyz_to_index(x, y, z));
-        node_indices[cnt] = index;
-
-        auto const node_u = node_velocity(para->rho, n_a, index);
-        interpolated_u.x += delta[cnt] * node_u.x;
-        interpolated_u.y += delta[cnt] * node_u.y;
-        interpolated_u.z += delta[cnt] * node_u.z;
-
-        ++cnt;
-      }
-    }
-  }
-  return interpolated_u;
-}
-
-/** Velocity interpolation.
- *  Eq. (12) @cite ahlrichs99a.
- *  @param[in]  n_a                Local node residing in array a
- *  @param[in]  particle_position  Particle position
- *  @param[out] node_index         Node index around (8) particle
- *  @param[out] delta              Weighting of particle position
- *  @retval Interpolated velocity
- */
-__device__ __inline__ float3
-velocity_interpolation(LB_nodes_gpu n_a, float const *particle_position,
-                       Utils::Array<unsigned int, 8> &node_index,
-                       Utils::Array<float, 8> &delta) {
-  Utils::Array<int, 3> left_node_index;
-  Utils::Array<float, 6> temp_delta;
-  // Eq. (10) and (11) in @cite ahlrichs99a page 8227
-#pragma unroll
-  for (unsigned i = 0; i < 3; ++i) {
-    auto const scaledpos = particle_position[i] / para->agrid - 0.5f;
-    left_node_index[i] = static_cast<int>(floorf(scaledpos));
-    temp_delta[3 + i] = scaledpos - static_cast<float>(left_node_index[i]);
-    temp_delta[i] = 1.0f - temp_delta[3 + i];
-  }
-
-  delta[0] = temp_delta[0] * temp_delta[1] * temp_delta[2];
-  delta[1] = temp_delta[3] * temp_delta[1] * temp_delta[2];
-  delta[2] = temp_delta[0] * temp_delta[4] * temp_delta[2];
-  delta[3] = temp_delta[3] * temp_delta[4] * temp_delta[2];
-  delta[4] = temp_delta[0] * temp_delta[1] * temp_delta[5];
-  delta[5] = temp_delta[3] * temp_delta[1] * temp_delta[5];
-  delta[6] = temp_delta[0] * temp_delta[4] * temp_delta[5];
-  delta[7] = temp_delta[3] * temp_delta[4] * temp_delta[5];
-
-  // modulo for negative numbers is strange at best, shift to make sure we are
-  // positive
-  int const x = (left_node_index[0] + static_cast<int>(para->dim[0])) %
-                static_cast<int>(para->dim[0]);
-  int const y = (left_node_index[1] + static_cast<int>(para->dim[1])) %
-                static_cast<int>(para->dim[1]);
-  int const z = (left_node_index[2] + static_cast<int>(para->dim[2])) %
-                static_cast<int>(para->dim[2]);
-  auto fold_if_necessary = [](int ind, int dim) {
-    return ind >= dim ? ind % dim : ind;
-  };
-  auto const xp1 = fold_if_necessary(x + 1, static_cast<int>(para->dim[0]));
-  auto const yp1 = fold_if_necessary(y + 1, static_cast<int>(para->dim[1]));
-  auto const zp1 = fold_if_necessary(z + 1, static_cast<int>(para->dim[2]));
-  node_index[0] = static_cast<unsigned>(xyz_to_index(x, y, z));
-  node_index[1] = static_cast<unsigned>(xyz_to_index(xp1, y, z));
-  node_index[2] = static_cast<unsigned>(xyz_to_index(x, yp1, z));
-  node_index[3] = static_cast<unsigned>(xyz_to_index(xp1, yp1, z));
-  node_index[4] = static_cast<unsigned>(xyz_to_index(x, y, zp1));
-  node_index[5] = static_cast<unsigned>(xyz_to_index(xp1, y, zp1));
-  node_index[6] = static_cast<unsigned>(xyz_to_index(x, yp1, zp1));
-  node_index[7] = static_cast<unsigned>(xyz_to_index(xp1, yp1, zp1));
-
-  float3 interpolated_u{0.0f, 0.0f, 0.0f};
-  for (unsigned i = 0; i < 8; ++i) {
-    auto const node_u = node_velocity(para->rho, n_a, node_index[i]);
-    interpolated_u.x += delta[i] * node_u.x;
-    interpolated_u.y += delta[i] * node_u.y;
-    interpolated_u.z += delta[i] * node_u.z;
-  }
-  return interpolated_u;
-}
-
-/** Calculate viscous force.
- *  Eq. (12) @cite ahlrichs99a.
- *  @param[in]  n_a                Local node residing in array a
- *  @param[out] delta              Weighting of particle position
- *  @param[out] delta_j            Weighting of particle momentum
- *  @param[in,out] particle_data   Particle position and velocity
- *  @param[in,out] particle_force  Particle force
- *  @param[in]  part_index         Particle id / thread id
- *  @param[out] node_index         Node index around (8) particle
- *  @param[in]  flag_cs            Determine if we are at the centre (0,
- *                                 typical) or at the source (1, swimmer only)
- *  @param[in]  philox_counter     Philox counter
- *  @param[in]  friction           Friction constant for the particle coupling
- *  @param[in]  time_step          MD time step
- *  @tparam no_of_neighbours       The number of neighbours to consider for
- *                                 interpolation
- */
-template <std::size_t no_of_neighbours>
-__device__ void calc_viscous_force(
-    LB_nodes_gpu n_a, Utils::Array<float, no_of_neighbours> &delta,
-    CUDA_particle_data *particle_data, float *particle_force,
-    unsigned int part_index, float *delta_j,
-    Utils::Array<unsigned int, no_of_neighbours> &node_index, bool flag_cs,
-    uint64_t philox_counter, float friction, float time_step) {
-  auto const flag_cs_float = static_cast<float>(flag_cs);
-  // Zero out workspace
-#pragma unroll
-  for (int jj = 0; jj < 3; ++jj) {
-    delta_j[jj] = 0.0f;
-  }
-
-  // Zero out only if we are at the centre of the particle <=> flag_cs = 0
-  particle_force[3 * part_index + 0] =
-      flag_cs_float * particle_force[3 * part_index + 0];
-  particle_force[3 * part_index + 1] =
-      flag_cs_float * particle_force[3 * part_index + 1];
-  particle_force[3 * part_index + 2] =
-      flag_cs_float * particle_force[3 * part_index + 2];
-
-  float position[3];
-  position[0] = particle_data[part_index].p[0];
-  position[1] = particle_data[part_index].p[1];
-  position[2] = particle_data[part_index].p[2];
-
-  float velocity[3];
-  velocity[0] = particle_data[part_index].v[0];
-  velocity[1] = particle_data[part_index].v[1];
-  velocity[2] = particle_data[part_index].v[2];
-
-#ifdef ENGINE
-  // First calculate interpolated velocity for dipole source,
-  // such that we don't overwrite mode, etc. for the rest of the function
-  float direction = float(particle_data[part_index].swim.push_pull) *
-                    particle_data[part_index].swim.dipole_length;
-  // Extrapolate position by dipole length if we are at the centre of the
-  // particle
-  position[0] +=
-      flag_cs_float * direction * particle_data[part_index].swim.director[0];
-  position[1] +=
-      flag_cs_float * direction * particle_data[part_index].swim.director[1];
-  position[2] +=
-      flag_cs_float * direction * particle_data[part_index].swim.director[2];
-#endif
-
-  float3 const interpolated_u =
-      velocity_interpolation(n_a, position, node_index, delta);
-
-#ifdef ENGINE
-  velocity[0] -= particle_data[part_index].swim.v_swim *
-                 particle_data[part_index].swim.director[0];
-  velocity[1] -= particle_data[part_index].swim.v_swim *
-                 particle_data[part_index].swim.director[1];
-  velocity[2] -= particle_data[part_index].swim.v_swim *
-                 particle_data[part_index].swim.director[2];
-
-  // The first three components are v_center, the last three v_source
-  // Do not use within LB, because these have already been converted back to MD
-  // units
-  particle_data[part_index].swim.v_cs[0 + 3 * flag_cs] =
-      interpolated_u.x * para->agrid / para->tau;
-  particle_data[part_index].swim.v_cs[1 + 3 * flag_cs] =
-      interpolated_u.y * para->agrid / para->tau;
-  particle_data[part_index].swim.v_cs[2 + 3 * flag_cs] =
-      interpolated_u.z * para->agrid / para->tau;
-#endif
-
-  /* take care to rescale velocities with time_step and transform to MD units
-   * (eq. (9) @cite ahlrichs99a) */
-
-  /* Viscous force */
-  float3 viscforce_density{0.0f, 0.0f, 0.0f};
-  viscforce_density.x -=
-      friction * (velocity[0] - interpolated_u.x * para->agrid / para->tau);
-  viscforce_density.y -=
-      friction * (velocity[1] - interpolated_u.y * para->agrid / para->tau);
-  viscforce_density.z -=
-      friction * (velocity[2] - interpolated_u.z * para->agrid / para->tau);
-
-#ifdef LB_ELECTROHYDRODYNAMICS
-  viscforce_density.x += friction * particle_data[part_index].mu_E[0];
-  viscforce_density.y += friction * particle_data[part_index].mu_E[1];
-  viscforce_density.z += friction * particle_data[part_index].mu_E[2];
-#endif
-
-  if (para->kT > 0.0) {
-    /* add stochastic force of zero mean (eq. (15) @cite ahlrichs99a) */
-    float4 random_floats = random_wrapper_philox(
-        static_cast<unsigned>(particle_data[part_index].identity), LBQ * 32,
-        philox_counter);
-    /* lb_coupl_pref is stored in MD units (force).
-     * Eq. (16) @cite ahlrichs99a.
-     * The factor 12 comes from the fact that we use random numbers
-     * from -0.5 to 0.5 (equally distributed) which have variance 1/12.
-     * time_step comes from the discretization.
-     */
-    float lb_coupl_pref = sqrtf(12.f * 2.f * friction * para->kT / time_step);
-    viscforce_density.x += lb_coupl_pref * (random_floats.w - 0.5f);
-    viscforce_density.y += lb_coupl_pref * (random_floats.x - 0.5f);
-    viscforce_density.z += lb_coupl_pref * (random_floats.y - 0.5f);
-  }
-  /* delta_j for transform momentum transfer to lattice units which is done
-     in calc_node_force (eq. (12) @cite ahlrichs99a) */
-
-  // only add to particle_force for particle centre <=> (1-flag_cs) = 1
-  particle_force[3 * part_index + 0] +=
-      (1 - flag_cs_float) * viscforce_density.x;
-  particle_force[3 * part_index + 1] +=
-      (1 - flag_cs_float) * viscforce_density.y;
-  particle_force[3 * part_index + 2] +=
-      (1 - flag_cs_float) * viscforce_density.z;
-
-  // only add to particle_force for particle centre <=> (1-flag_cs) = 1
-  delta_j[0] -= ((1 - flag_cs_float) * viscforce_density.x) * time_step *
-                para->tau / para->agrid;
-  delta_j[1] -= ((1 - flag_cs_float) * viscforce_density.y) * time_step *
-                para->tau / para->agrid;
-  delta_j[2] -= ((1 - flag_cs_float) * viscforce_density.z) * time_step *
-                para->tau / para->agrid;
-
-#ifdef ENGINE
-  // add swimming force to source position
-  delta_j[0] -= flag_cs_float * particle_data[part_index].swim.f_swim *
-                particle_data[part_index].swim.director[0] * time_step *
-                para->tau / para->agrid;
-  delta_j[1] -= flag_cs_float * particle_data[part_index].swim.f_swim *
-                particle_data[part_index].swim.director[1] * time_step *
-                para->tau / para->agrid;
-  delta_j[2] -= flag_cs_float * particle_data[part_index].swim.f_swim *
-                particle_data[part_index].swim.director[2] * time_step *
-                para->tau / para->agrid;
-#endif
-}
-
-/** Calculate the node force caused by the particles, with atomicAdd due to
- *  avoiding race conditions.
- *  Eq. (14) @cite ahlrichs99a.
- *  @param[in]  delta              Weighting of particle position
- *  @param[in]  delta_j            Weighting of particle momentum
- *  @param[in]  node_index         Node index around (8) particle
- *  @param[out] node_f             Node force
- *  @tparam no_of_neighbours       The number of neighbours to consider for
- *                                 interpolation
- */
-template <std::size_t no_of_neighbours>
-__device__ void
-calc_node_force(Utils::Array<float, no_of_neighbours> const &delta,
-                float const *delta_j,
-                Utils::Array<unsigned int, no_of_neighbours> const &node_index,
-                LB_node_force_density_gpu node_f) {
-  for (std::size_t node = 0; node < no_of_neighbours; ++node) {
-    for (unsigned i = 0; i < 3; ++i) {
-      atomicAdd(&(node_f.force_density[node_index[node]][i]),
-                delta[node] * delta_j[i]);
-    }
-  }
-}
-
-/*********************************************************/
-/** \name System setup and Kernel functions */
-/*********************************************************/
-
-/** Kernel to calculate local populations from hydrodynamic fields.
- *  The mapping is given in terms of the equilibrium distribution.
- *
- *  Eq. (2.15) @cite ladd94a.
- *  Eq. (4) in @cite usta05a.
- *
- *  @param[out] n_a        %Lattice site
- *  @param[out] gpu_check  Additional check if GPU kernel are executed
- *  @param[out] d_v        Local device values
- *  @param[in]  node_f     Node forces
- */
-__global__ void calc_n_from_rho_j_pi(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v,
-                                     LB_node_force_density_gpu node_f,
-                                     bool *gpu_check) {
-  /* TODO: this can handle only a uniform density, something similar, but local,
-           has to be called every time the fields are set by the user ! */
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-  if (index < para->number_of_nodes) {
-    Utils::Array<float, 19> mode;
-
-    gpu_check[0] = true;
-
-    /* default values for fields in lattice units */
-    float Rho = para->rho;
-    Utils::Array<float, 3> v{};
-    Utils::Array<float, 6> pi = {{Rho * D3Q19::c_sound_sq<float>, 0.0f,
-                                  Rho * D3Q19::c_sound_sq<float>, 0.0f, 0.0f,
-                                  Rho * D3Q19::c_sound_sq<float>}};
-    Utils::Array<float, 6> local_pi{};
-    float rhoc_sq = Rho * D3Q19::c_sound_sq<float>;
-    float avg_rho = para->rho;
-    float local_rho, trace;
-    Utils::Array<float, 3> local_j{};
-
-    local_rho = Rho;
-
-    local_j[0] = Rho * v[0];
-    local_j[1] = Rho * v[1];
-    local_j[2] = Rho * v[2];
-
-    local_pi = pi;
-
-    // reduce the pressure tensor to the part needed here.
-
-    local_pi[0] -= rhoc_sq;
-    local_pi[2] -= rhoc_sq;
-    local_pi[5] -= rhoc_sq;
-
-    trace = local_pi[0] + local_pi[2] + local_pi[5];
-
-    float rho_times_coeff;
-    float tmp1, tmp2;
-
-    /* update the q=0 sublattice */
-    n_a.populations[index][0] =
-        1.0f / 3.0f * (local_rho - avg_rho) - 1.0f / 2.0f * trace;
-
-    /* update the q=1 sublattice */
-    rho_times_coeff = 1.0f / 18.0f * (local_rho - avg_rho);
-
-    n_a.populations[index][1] = rho_times_coeff + 1.0f / 6.0f * local_j[0] +
-                                1.0f / 4.0f * local_pi[0] -
-                                1.0f / 12.0f * trace;
-    n_a.populations[index][2] = rho_times_coeff - 1.0f / 6.0f * local_j[0] +
-                                1.0f / 4.0f * local_pi[0] -
-                                1.0f / 12.0f * trace;
-    n_a.populations[index][3] = rho_times_coeff + 1.0f / 6.0f * local_j[1] +
-                                1.0f / 4.0f * local_pi[2] -
-                                1.0f / 12.0f * trace;
-    n_a.populations[index][4] = rho_times_coeff - 1.0f / 6.0f * local_j[1] +
-                                1.0f / 4.0f * local_pi[2] -
-                                1.0f / 12.0f * trace;
-    n_a.populations[index][5] = rho_times_coeff + 1.0f / 6.0f * local_j[2] +
-                                1.0f / 4.0f * local_pi[5] -
-                                1.0f / 12.0f * trace;
-    n_a.populations[index][6] = rho_times_coeff - 1.0f / 6.0f * local_j[2] +
-                                1.0f / 4.0f * local_pi[5] -
-                                1.0f / 12.0f * trace;
-
-    /* update the q=2 sublattice */
-    rho_times_coeff = 1.0f / 36.0f * (local_rho - avg_rho);
-
-    tmp1 = local_pi[0] + local_pi[2];
-    tmp2 = 2.0f * local_pi[1];
-    n_a.populations[index][7] =
-        rho_times_coeff + 1.0f / 12.0f * (local_j[0] + local_j[1]) +
-        1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[index][8] =
-        rho_times_coeff - 1.0f / 12.0f * (local_j[0] + local_j[1]) +
-        1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[index][9] =
-        rho_times_coeff + 1.0f / 12.0f * (local_j[0] - local_j[1]) +
-        1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[index][10] =
-        rho_times_coeff - 1.0f / 12.0f * (local_j[0] - local_j[1]) +
-        1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace;
-
-    tmp1 = local_pi[0] + local_pi[5];
-    tmp2 = 2.0f * local_pi[3];
-
-    n_a.populations[index][11] =
-        rho_times_coeff + 1.0f / 12.0f * (local_j[0] + local_j[2]) +
-        1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[index][12] =
-        rho_times_coeff - 1.0f / 12.0f * (local_j[0] + local_j[2]) +
-        1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[index][13] =
-        rho_times_coeff + 1.0f / 12.0f * (local_j[0] - local_j[2]) +
-        1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[index][14] =
-        rho_times_coeff - 1.0f / 12.0f * (local_j[0] - local_j[2]) +
-        1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace;
-
-    tmp1 = local_pi[2] + local_pi[5];
-    tmp2 = 2.0f * local_pi[4];
-
-    n_a.populations[index][15] =
-        rho_times_coeff + 1.0f / 12.0f * (local_j[1] + local_j[2]) +
-        1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[index][16] =
-        rho_times_coeff - 1.0f / 12.0f * (local_j[1] + local_j[2]) +
-        1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[index][17] =
-        rho_times_coeff + 1.0f / 12.0f * (local_j[1] - local_j[2]) +
-        1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[index][18] =
-        rho_times_coeff - 1.0f / 12.0f * (local_j[1] - local_j[2]) +
-        1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace;
-
-    calc_m_from_n(n_a.populations[index], mode);
-    update_rho_v(mode, index, node_f, d_v);
-  }
-}
-
-__global__ void set_force_density(unsigned single_nodeindex,
-                                  float const *force_density,
-                                  LB_node_force_density_gpu node_f) {
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-
-  if (index == 0) {
-    node_f.force_density[single_nodeindex][0] = force_density[0];
-    node_f.force_density[single_nodeindex][1] = force_density[1];
-    node_f.force_density[single_nodeindex][2] = force_density[2];
-  }
-}
-
-/** Kernel to calculate local populations from hydrodynamic fields
- *  from given flow field velocities. The mapping is given in terms of
- *  the equilibrium distribution.
- *
- *  Eq. (2.15) @cite ladd94a.
- *  Eq. (4) in @cite usta05a.
- *
- *  @param[out] n_a               Current nodes array (double buffering!)
- *  @param[in]  single_nodeindex  Single node index
- *  @param[in]  velocity          Velocity
- *  @param[out] d_v               Local device values
- *  @param[in]  node_f            Node forces
- */
-__global__ void set_u_from_rho_v_pi(LB_nodes_gpu n_a, unsigned single_nodeindex,
-                                    float const *velocity, LB_rho_v_gpu *d_v,
-                                    LB_node_force_density_gpu node_f) {
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-
-  if (index == 0) {
-    float local_rho;
-    float local_j[3];
-    float local_pi[6];
-    float trace, avg_rho;
-    float rho_times_coeff;
-    float tmp1, tmp2;
-
-    Utils::Array<float, 19> mode_for_pi;
-    float rho_from_m;
-    float j_from_m[3];
-    Utils::Array<float, 6> pi_from_m;
-
-    // Calculate the modes for this node
-
-    calc_m_from_n(n_a.populations[single_nodeindex], mode_for_pi);
-
-    // Reset the d_v
-
-    update_rho_v(mode_for_pi, single_nodeindex, node_f, d_v);
-
-    // Calculate the density, velocity, and pressure tensor
-    // in LB unit for this node
-
-    calc_values_from_m(mode_for_pi, d_v[single_nodeindex], &rho_from_m,
-                       j_from_m, pi_from_m);
-
-    // Take LB component density and calculate the equilibrium part
-    local_rho = rho_from_m;
-    avg_rho = para->rho;
-
-    // Take LB component velocity and make it a momentum
-
-    local_j[0] = local_rho * velocity[0];
-    local_j[1] = local_rho * velocity[1];
-    local_j[2] = local_rho * velocity[2];
-    // Take LB component pressure tensor and put in equilibrium
-
-    local_pi[0] = pi_from_m[0];
-    local_pi[1] = pi_from_m[1];
-    local_pi[2] = pi_from_m[2];
-    local_pi[3] = pi_from_m[3];
-    local_pi[4] = pi_from_m[4];
-    local_pi[5] = pi_from_m[5];
-
-    trace = local_pi[0] + local_pi[2] + local_pi[5];
-
-    // update the q=0 sublattice
-
-    n_a.populations[single_nodeindex][0] =
-        1.0f / 3.0f * (local_rho - avg_rho) - 1.0f / 2.0f * trace;
-
-    // update the q=1 sublattice
-
-    rho_times_coeff = 1.0f / 18.0f * (local_rho - avg_rho);
-
-    n_a.populations[single_nodeindex][1] =
-        rho_times_coeff + 1.0f / 6.0f * local_j[0] + 1.0f / 4.0f * local_pi[0] -
-        1.0f / 12.0f * trace;
-    n_a.populations[single_nodeindex][2] =
-        rho_times_coeff - 1.0f / 6.0f * local_j[0] + 1.0f / 4.0f * local_pi[0] -
-        1.0f / 12.0f * trace;
-    n_a.populations[single_nodeindex][3] =
-        rho_times_coeff + 1.0f / 6.0f * local_j[1] + 1.0f / 4.0f * local_pi[2] -
-        1.0f / 12.0f * trace;
-    n_a.populations[single_nodeindex][4] =
-        rho_times_coeff - 1.0f / 6.0f * local_j[1] + 1.0f / 4.0f * local_pi[2] -
-        1.0f / 12.0f * trace;
-    n_a.populations[single_nodeindex][5] =
-        rho_times_coeff + 1.0f / 6.0f * local_j[2] + 1.0f / 4.0f * local_pi[5] -
-        1.0f / 12.0f * trace;
-    n_a.populations[single_nodeindex][6] =
-        rho_times_coeff - 1.0f / 6.0f * local_j[2] + 1.0f / 4.0f * local_pi[5] -
-        1.0f / 12.0f * trace;
-
-    // update the q=2 sublattice
-
-    rho_times_coeff = 1.0f / 36.0f * (local_rho - avg_rho);
-
-    tmp1 = local_pi[0] + local_pi[2];
-    tmp2 = 2.0f * local_pi[1];
-
-    n_a.populations[single_nodeindex][7] =
-        rho_times_coeff + 1.0f / 12.0f * (local_j[0] + local_j[1]) +
-        1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[single_nodeindex][8] =
-        rho_times_coeff - 1.0f / 12.0f * (local_j[0] + local_j[1]) +
-        1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[single_nodeindex][9] =
-        rho_times_coeff + 1.0f / 12.0f * (local_j[0] - local_j[1]) +
-        1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[single_nodeindex][10] =
-        rho_times_coeff - 1.0f / 12.0f * (local_j[0] - local_j[1]) +
-        1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace;
-
-    tmp1 = local_pi[0] + local_pi[5];
-    tmp2 = 2.0f * local_pi[3];
-
-    n_a.populations[single_nodeindex][11] =
-        rho_times_coeff + 1.0f / 12.0f * (local_j[0] + local_j[2]) +
-        1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[single_nodeindex][12] =
-        rho_times_coeff - 1.0f / 12.0f * (local_j[0] + local_j[2]) +
-        1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[single_nodeindex][13] =
-        rho_times_coeff + 1.0f / 12.0f * (local_j[0] - local_j[2]) +
-        1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[single_nodeindex][14] =
-        rho_times_coeff - 1.0f / 12.0f * (local_j[0] - local_j[2]) +
-        1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace;
-
-    tmp1 = local_pi[2] + local_pi[5];
-    tmp2 = 2.0f * local_pi[4];
-
-    n_a.populations[single_nodeindex][15] =
-        rho_times_coeff + 1.0f / 12.0f * (local_j[1] + local_j[2]) +
-        1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[single_nodeindex][16] =
-        rho_times_coeff - 1.0f / 12.0f * (local_j[1] + local_j[2]) +
-        1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[single_nodeindex][17] =
-        rho_times_coeff + 1.0f / 12.0f * (local_j[1] - local_j[2]) +
-        1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace;
-    n_a.populations[single_nodeindex][18] =
-        rho_times_coeff - 1.0f / 12.0f * (local_j[1] - local_j[2]) +
-        1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace;
-
-    // Calculate the modes for this node
-
-    calc_m_from_n(n_a.populations[single_nodeindex], mode_for_pi);
-
-    // Update the density and velocity field for this mode
-
-    update_rho_v(mode_for_pi, single_nodeindex, node_f, d_v);
-  }
-}
-
-/** Calculate the mass of the whole fluid kernel
- *  @param[out] sum  Resulting mass
- *  @param[in]  n_a  Local node residing in array a
- */
-__global__ void calc_mass(LB_nodes_gpu n_a, float *sum) {
-
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-
-  if (index < para->number_of_nodes) {
-    Utils::Array<float, 4> mode;
-    calc_mass_and_momentum_mode(mode, n_a, index);
-    float Rho = mode[0] + para->rho;
-    atomicAdd(&(sum[0]), Rho);
-  }
-}
-
-/** (Re-)initialize the node force density / set the external force
- *  density in lb units
- *  @param[out] node_f  Local node force density
- */
-__global__ void reinit_node_force(LB_node_force_density_gpu node_f) {
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-
-  if (index < para->number_of_nodes) {
-    node_f.force_density[index][0] = para->ext_force_density[0];
-    node_f.force_density[index][1] = para->ext_force_density[1];
-    node_f.force_density[index][2] = para->ext_force_density[2];
-  }
-}
-
-/** Kernel to set the local density
- *
- *  @param[out] n_a              Current nodes array (double buffering!)
- *  @param[in] single_nodeindex  Node to set the velocity for
- *  @param[in] rho               Density to set
- *  @param[in] d_v               Local modes
- */
-__global__ void set_rho(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v,
-                        unsigned single_nodeindex, float rho) {
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-  /* Note: this sets the velocities to zero */
-  if (index == 0) {
-    float local_rho;
-
-    /* default values for fields in lattice units */
-    local_rho = (rho - para->rho);
-    d_v[single_nodeindex].rho = rho;
-
-    n_a.populations[single_nodeindex][0] = 1.0f / 3.0f * local_rho;
-    n_a.populations[single_nodeindex][1] = 1.0f / 18.0f * local_rho;
-    n_a.populations[single_nodeindex][2] = 1.0f / 18.0f * local_rho;
-    n_a.populations[single_nodeindex][3] = 1.0f / 18.0f * local_rho;
-    n_a.populations[single_nodeindex][4] = 1.0f / 18.0f * local_rho;
-    n_a.populations[single_nodeindex][5] = 1.0f / 18.0f * local_rho;
-    n_a.populations[single_nodeindex][6] = 1.0f / 18.0f * local_rho;
-    n_a.populations[single_nodeindex][7] = 1.0f / 36.0f * local_rho;
-    n_a.populations[single_nodeindex][8] = 1.0f / 36.0f * local_rho;
-    n_a.populations[single_nodeindex][9] = 1.0f / 36.0f * local_rho;
-    n_a.populations[single_nodeindex][10] = 1.0f / 36.0f * local_rho;
-    n_a.populations[single_nodeindex][11] = 1.0f / 36.0f * local_rho;
-    n_a.populations[single_nodeindex][12] = 1.0f / 36.0f * local_rho;
-    n_a.populations[single_nodeindex][13] = 1.0f / 36.0f * local_rho;
-    n_a.populations[single_nodeindex][14] = 1.0f / 36.0f * local_rho;
-    n_a.populations[single_nodeindex][15] = 1.0f / 36.0f * local_rho;
-    n_a.populations[single_nodeindex][16] = 1.0f / 36.0f * local_rho;
-    n_a.populations[single_nodeindex][17] = 1.0f / 36.0f * local_rho;
-    n_a.populations[single_nodeindex][18] = 1.0f / 36.0f * local_rho;
-  }
-}
-
-/** Set the boundary flag for all boundary nodes
- *  @param[in]  boundary_node_list    Indices of the boundary nodes
- *  @param[in]  boundary_index_list   Flag for the corresponding boundary
- *  @param[in]  boundary_velocities   Boundary velocities
- *  @param[in]  number_of_boundnodes  Number of boundary nodes
- *  @param[in]  boundaries            Boundary information
- */
-__global__ void init_boundaries(int const *boundary_node_list,
-                                int const *boundary_index_list,
-                                float const *boundary_velocities,
-                                unsigned number_of_boundnodes,
-                                LB_boundaries_gpu boundaries) {
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-
-  if (index < number_of_boundnodes) {
-    auto const node_index = boundary_node_list[index];
-    auto const boundary_index = boundary_index_list[index];
-
-    Utils::Array<float, 3> v = {
-        boundary_velocities[3 * (boundary_index - 1) + 0],
-        boundary_velocities[3 * (boundary_index - 1) + 1],
-        boundary_velocities[3 * (boundary_index - 1) + 2]};
-
-    boundaries.index[node_index] = static_cast<unsigned>(boundary_index);
-    boundaries.velocity[node_index] = v;
-  }
-}
-
-/** Reset the boundary flag of every node */
-__global__ void reset_boundaries(LB_boundaries_gpu boundaries) {
-  std::size_t index = blockIdx.y * gridDim.x * blockDim.x +
-                      blockDim.x * blockIdx.x + threadIdx.x;
-  if (index < para->number_of_nodes) {
-    boundaries.index[index] = 0;
-  }
-}
-
-/** Integration step of the LB-fluid-solver
- *  @param[in]     n_a     Local node residing in array a
- *  @param[out]    n_b     Local node residing in array b
- *  @param[in,out] d_v     Local device values
- *  @param[in,out] node_f  Local node force density
- *  @param[in]     philox_counter  Philox counter
- */
-__global__ void integrate(LB_nodes_gpu n_a, LB_nodes_gpu n_b, LB_rho_v_gpu *d_v,
-                          LB_node_force_density_gpu node_f,
-                          uint64_t philox_counter) {
-  /* every node is connected to a thread via the index */
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-  /* the 19 moments (modes) are only temporary register values */
-  Utils::Array<float, 19> mode;
-
-  if (index < para->number_of_nodes) {
-    calc_m_from_n(n_a.populations[index], mode);
-    relax_modes(mode, index, node_f, d_v);
-    thermalize_modes(mode, index, philox_counter);
-    apply_forces(index, mode, node_f, d_v);
-    normalize_modes(mode);
-    calc_n_from_modes_push(n_b, mode, index);
-  }
-}
-
-/** Integration step of the LB-fluid-solver
- *  @param[in]     n_a     Local node residing in array a
- *  @param[out]    n_b     Local node residing in array b
- *  @param[in,out] d_v     Local device values
- *  @param[in,out] node_f  Local node force density
- */
-__global__ void integrate(LB_nodes_gpu n_a, LB_nodes_gpu n_b, LB_rho_v_gpu *d_v,
-                          LB_node_force_density_gpu node_f) {
-  /* every node is connected to a thread via the index */
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-  /* the 19 moments (modes) are only temporary register values */
-  Utils::Array<float, 19> mode;
-
-  if (index < para->number_of_nodes) {
-    calc_m_from_n(n_a.populations[index], mode);
-    relax_modes(mode, index, node_f, d_v);
-    apply_forces(index, mode, node_f, d_v);
-    normalize_modes(mode);
-    calc_n_from_modes_push(n_b, mode, index);
-  }
-}
-
-/** Particle interaction kernel
- *  @param[in]  n_a                 Local node residing in array a
- *  @param[in,out]  particle_data   Particle position and velocity
- *  @param[in,out]  particle_force  Particle force
- *  @param[out] node_f              Local node force
- *  @param[in]  couple_virtual      If true, virtual particles are also coupled
- *  @param[in]  philox_counter      Philox counter
- *  @param[in]  friction            Friction constant for the particle coupling
- *  @param[in]  time_step           MD time step
- *  @tparam     no_of_neighbours    The number of neighbours to consider for
- *                                  interpolation
- */
-template <std::size_t no_of_neighbours>
-__global__ void
-calc_fluid_particle_ia(LB_nodes_gpu n_a,
-                       Utils::Span<CUDA_particle_data> particle_data,
-                       float *particle_force, LB_node_force_density_gpu node_f,
-                       bool couple_virtual, uint64_t philox_counter,
-                       float friction, float time_step) {
-
-  unsigned int part_index = blockIdx.y * gridDim.x * blockDim.x +
-                            blockDim.x * blockIdx.x + threadIdx.x;
-  Utils::Array<unsigned int, no_of_neighbours> node_index;
-  Utils::Array<float, no_of_neighbours> delta;
-  float delta_j[3];
-  if (part_index < particle_data.size()) {
-#if defined(VIRTUAL_SITES)
-    if (!particle_data[part_index].is_virtual || couple_virtual)
-#endif
-    {
-      /* force acting on the particle. delta_j will be used later to compute the
-       * force that acts back onto the fluid. */
-      calc_viscous_force<no_of_neighbours>(
-          n_a, delta, particle_data.data(), particle_force, part_index, delta_j,
-          node_index, false, philox_counter, friction, time_step);
-      calc_node_force<no_of_neighbours>(delta, delta_j, node_index, node_f);
-
-#ifdef ENGINE
-      if (particle_data[part_index].swim.swimming) {
-        calc_viscous_force<no_of_neighbours>(
-            n_a, delta, particle_data.data(), particle_force, part_index,
-            delta_j, node_index, true, philox_counter, friction, time_step);
-        calc_node_force<no_of_neighbours>(delta, delta_j, node_index, node_f);
-      }
-#endif
-    }
-  }
-}
-
-#ifdef LB_BOUNDARIES_GPU
-/** Bounce back boundary kernel
- *  @param[in]  n_curr  Pointer to local node receiving the current node field
- *  @param[in]  boundaries  Constant velocity at the boundary, set by the user
- *  @param[out] lb_boundary_force     Force on the boundary nodes
- */
-__global__ void apply_boundaries(LB_nodes_gpu n_curr,
-                                 LB_boundaries_gpu boundaries,
-                                 float *lb_boundary_force) {
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-
-  if (index < para->number_of_nodes)
-    bounce_back_boundaries(n_curr, boundaries, index, lb_boundary_force);
-}
-
-#endif
-
-/** Get physical values of the nodes (density, velocity, ...)
- *  @param[in]  n_a     Local node residing in array a
- *  @param[out] p_v     Local print values
- *  @param[out] d_v     Local device values
- *  @param[in]  node_f  Local node force
- */
-__global__ void
-get_mesoscopic_values_in_LB_units(LB_nodes_gpu n_a, LB_rho_v_pi_gpu *p_v,
-                                  LB_rho_v_gpu *d_v,
-                                  LB_node_force_density_gpu node_f) {
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-
-  if (index < para->number_of_nodes) {
-    Utils::Array<float, 19> mode;
-    calc_m_from_n(n_a.populations[index], mode);
-    calc_values_in_LB_units(n_a, mode, p_v, d_v, node_f, index, index);
-  }
-}
-
-/** Get boundary flags
- *  @param[in]  n_a                 Local node residing in array a
- *  @param[out] device_bound_array  Local device values
- */
-__global__ void lb_get_boundaries(LB_nodes_gpu n_a,
-                                  unsigned int *device_bound_array) {
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-
-  if (index < para->number_of_nodes)
-    device_bound_array[index] = n_a.boundary[index];
-}
-
-/** Print single node values kernel
- *  @param[in]  single_nodeindex  Node index
- *  @param[out] d_p_v   Result
- *  @param[in]  n_a     Local node residing in array a
- *  @param[out] d_v     Local device values
- *  @param[in]  node_f  Local node force
- */
-__global__ void lb_print_node(unsigned int single_nodeindex,
-                              LB_rho_v_pi_gpu *d_p_v, LB_nodes_gpu n_a,
-                              LB_rho_v_gpu *d_v,
-                              LB_node_force_density_gpu node_f) {
-  Utils::Array<float, 19> mode;
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-
-  if (index == 0) {
-    calc_m_from_n(n_a.populations[single_nodeindex], mode);
-
-    /* the following actually copies rho and v from d_v, and calculates pi */
-    calc_values_in_LB_units(n_a, mode, d_p_v, d_v, node_f, single_nodeindex, 0);
-  }
-}
-
-__global__ void momentum(LB_nodes_gpu n_a, LB_node_force_density_gpu node_f,
-                         float *sum) {
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-
-  if (index < para->number_of_nodes) {
-    float j[3] = {0.0f, 0.0f, 0.0f};
-    Utils::Array<float, 4> mode{};
-
-    calc_mass_and_momentum_mode(mode, n_a, index);
-
-    j[0] += mode[1] + 0.5f * node_f.force_density[index][0];
-    j[1] += mode[2] + 0.5f * node_f.force_density[index][1];
-    j[2] += mode[3] + 0.5f * node_f.force_density[index][2];
-
-#ifdef LB_BOUNDARIES_GPU
-    if (n_a.boundary[index])
-      j[0] = j[1] = j[2] = 0.0f;
-#endif
-
-    atomicAdd(&(sum[0]), j[0]);
-    atomicAdd(&(sum[1]), j[1]);
-    atomicAdd(&(sum[2]), j[2]);
-  }
-}
-
-/** Print single node boundary flag
- *  @param[in]  single_nodeindex  Node index
- *  @param[out] device_flag       Result
- *  @param[in]  n_a               Local node residing in array a
- */
-__global__ void lb_get_boundary_flag(unsigned int single_nodeindex,
-                                     unsigned int *device_flag,
-                                     LB_nodes_gpu n_a) {
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-
-  if (index == 0)
-    device_flag[0] = n_a.boundary[single_nodeindex];
-}
-
-/**********************************************************************/
-/* Host functions to setup and call kernels*/
-/**********************************************************************/
-
-void lb_get_para_pointer(LB_parameters_gpu **pointer_address) {
-  auto const error = cudaGetSymbolAddress((void **)pointer_address, para);
-  if (error != cudaSuccess) {
-    fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(error));
-    errexit();
-  }
-}
-
-void lb_get_boundary_force_pointer(float **pointer_address) {
-#ifdef LB_BOUNDARIES_GPU
-  *pointer_address = lb_boundary_force;
-#endif
-}
-
-/** Initialization for the lb gpu fluid called from host
- *  @param lbpar_gpu   Pointer to parameters to setup the lb field
- */
-void lb_init_GPU(const LB_parameters_gpu &lbpar_gpu) {
-#define free_realloc_and_clear(var, size)                                      \
-  {                                                                            \
-    if ((var) != nullptr)                                                      \
-      cuda_safe_mem(cudaFree((var)));                                          \
-    cuda_safe_mem(cudaMalloc((void **)&(var), size));                          \
-    cudaMemset(var, 0, size);                                                  \
-  }
-
-  /* Allocate structs in device memory*/
-  free_realloc_and_clear(device_rho_v,
-                         lbpar_gpu.number_of_nodes * sizeof(LB_rho_v_gpu));
-
-  /* TODO: this is almost a copy of device_rho_v; think about eliminating
-   * it, and maybe pi can be added to device_rho_v in this case */
-  free_realloc_and_clear(print_rho_v_pi,
-                         lbpar_gpu.number_of_nodes * sizeof(LB_rho_v_pi_gpu));
-  free_realloc_and_clear(nodes_a.populations,
-                         lbpar_gpu.number_of_nodes *
-                             sizeof(Utils::Array<float, 19>));
-  free_realloc_and_clear(nodes_b.populations,
-                         lbpar_gpu.number_of_nodes *
-                             sizeof(Utils::Array<float, 19>));
-  free_realloc_and_clear(node_f.force_density,
-                         lbpar_gpu.number_of_nodes *
-                             sizeof(Utils::Array<float, 3>));
-#if defined(VIRTUAL_SITES_INERTIALESS_TRACERS) || defined(EK_DEBUG)
-  free_realloc_and_clear(node_f.force_density_buf,
-                         lbpar_gpu.number_of_nodes *
-                             sizeof(Utils::Array<float, 3>));
-#endif
-  free_realloc_and_clear(boundaries.index,
-                         lbpar_gpu.number_of_nodes * sizeof(unsigned int));
-  free_realloc_and_clear(boundaries.velocity,
-                         lbpar_gpu.number_of_nodes *
-                             sizeof(Utils::Array<float, 3>));
-
-  nodes_a.boundary = nodes_b.boundary = boundaries.index;
-  nodes_a.boundary_velocity = nodes_b.boundary_velocity = boundaries.velocity;
-
-  /* write parameters in const memory */
-  cuda_safe_mem(
-      cudaMemcpyToSymbol(para, &lbpar_gpu, sizeof(LB_parameters_gpu)));
-
-  free_realloc_and_clear(device_gpu_lb_initialized, sizeof(bool));
-
-  dim3 dim_grid =
-      calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(reset_boundaries, dim_grid, threads_per_block, boundaries);
-
-  /* calc of velocity densities from given parameters and initialize the
-   * Node_Force array with zero */
-  KERNELCALL(reinit_node_force, dim_grid, threads_per_block, (node_f));
-  KERNELCALL(calc_n_from_rho_j_pi, dim_grid, threads_per_block, nodes_a,
-             device_rho_v, node_f, device_gpu_lb_initialized);
-
-  intflag = true;
-  current_nodes = &nodes_a;
-  bool host_gpu_lb_initialized = false;
-  cuda_safe_mem(cudaMemcpy(&host_gpu_lb_initialized, device_gpu_lb_initialized,
-                           sizeof(bool), cudaMemcpyDeviceToHost));
-  cudaDeviceSynchronize();
-
-  if (!host_gpu_lb_initialized) {
-    fprintf(stderr, "initialization of LB GPU code failed!\n");
-    errexit();
-  }
-}
-
-/** Reinitialization for the lb gpu fluid called from host
- *  @param lbpar_gpu   Pointer to parameters to setup the lb field
- */
-void lb_reinit_GPU(LB_parameters_gpu *lbpar_gpu) {
-  /* write parameters in const memory */
-  cuda_safe_mem(cudaMemcpyToSymbol(para, lbpar_gpu, sizeof(LB_parameters_gpu)));
-
-  dim3 dim_grid =
-      calculate_dim_grid(lbpar_gpu->number_of_nodes, 4, threads_per_block);
-
-  /* calc of velocity densities from given parameters and initialize the
-   * Node_Force array with zero */
-  KERNELCALL(calc_n_from_rho_j_pi, dim_grid, threads_per_block, nodes_a,
-             device_rho_v, node_f, device_gpu_lb_initialized);
-}
-
-#ifdef LB_BOUNDARIES_GPU
-/** Setup and call boundaries from the host
- *  @param host_n_lb_boundaries        Number of LB boundaries
- *  @param number_of_boundnodes        Number of boundnodes
- *  @param host_boundary_node_list     The indices of the boundary nodes
- *  @param host_boundary_index_list    The flag representing the corresponding
- *                                     boundary
- *  @param host_lb_boundary_velocity   The constant velocity at the boundary,
- *                                     set by the user
- */
-void lb_init_boundaries_GPU(std::size_t host_n_lb_boundaries,
-                            unsigned number_of_boundnodes,
-                            int *host_boundary_node_list,
-                            int *host_boundary_index_list,
-                            float *host_lb_boundary_velocity) {
-
-  float *boundary_velocity = nullptr;
-  int *boundary_node_list = nullptr;
-  int *boundary_index_list = nullptr;
-
-  auto const size_of_boundindex = number_of_boundnodes * sizeof(int);
-  cuda_safe_mem(cudaMalloc((void **)&boundary_node_list, size_of_boundindex));
-  cuda_safe_mem(cudaMalloc((void **)&boundary_index_list, size_of_boundindex));
-  cuda_safe_mem(cudaMemcpy(boundary_index_list, host_boundary_index_list,
-                           size_of_boundindex, cudaMemcpyHostToDevice));
-  cuda_safe_mem(cudaMemcpy(boundary_node_list, host_boundary_node_list,
-                           size_of_boundindex, cudaMemcpyHostToDevice));
-  cuda_safe_mem(cudaMalloc((void **)&lb_boundary_force,
-                           3 * host_n_lb_boundaries * sizeof(float)));
-  cuda_safe_mem(cudaMalloc((void **)&boundary_velocity,
-                           3 * host_n_lb_boundaries * sizeof(float)));
-  cuda_safe_mem(
-      cudaMemcpy(boundary_velocity, host_lb_boundary_velocity,
-                 3 * LBBoundaries::lbboundaries.size() * sizeof(float),
-                 cudaMemcpyHostToDevice));
-
-  /* values for the kernel call */
-  dim3 dim_grid =
-      calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(reset_boundaries, dim_grid, threads_per_block, boundaries);
-
-  if (LBBoundaries::lbboundaries.empty()) {
-    cudaDeviceSynchronize();
-    return;
-  }
-
-  if (number_of_boundnodes == 0) {
-    fprintf(stderr,
-            "WARNING: boundary cmd executed but no boundary node found!\n");
-  } else {
-    dim3 dim_grid_bound =
-        calculate_dim_grid(number_of_boundnodes, 4, threads_per_block);
-
-    KERNELCALL(init_boundaries, dim_grid_bound, threads_per_block,
-               boundary_node_list, boundary_index_list, boundary_velocity,
-               number_of_boundnodes, boundaries);
-  }
-
-  cudaFree(boundary_velocity);
-  cudaFree(boundary_node_list);
-  cudaFree(boundary_index_list);
-
-  cudaDeviceSynchronize();
-}
-#endif
-/** Setup and call extern single node force initialization from the host
- *  @param lbpar_gpu    Host parameter struct
- */
-void lb_reinit_extern_nodeforce_GPU(LB_parameters_gpu *lbpar_gpu) {
-  cuda_safe_mem(cudaMemcpyToSymbol(para, lbpar_gpu, sizeof(LB_parameters_gpu)));
-
-  dim3 dim_grid =
-      calculate_dim_grid(lbpar_gpu->number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(reinit_node_force, dim_grid, threads_per_block, node_f);
-}
-
-/** Setup and call particle kernel from the host
- *  @tparam no_of_neighbours       The number of neighbours to consider for
- *                                 interpolation
- */
-template <std::size_t no_of_neighbours>
-void lb_calc_particle_lattice_ia_gpu(bool couple_virtual, double friction,
-                                     double time_step) {
-  auto device_particles = gpu_get_particle_pointer();
-
-  if (device_particles.empty()) {
-    return;
-  }
-
-  dim3 dim_grid = calculate_dim_grid(
-      static_cast<unsigned>(device_particles.size()), 4, threads_per_block);
-  if (lbpar_gpu.kT > 0.f) {
-    assert(rng_counter_coupling_gpu);
-    KERNELCALL(calc_fluid_particle_ia<no_of_neighbours>, dim_grid,
-               threads_per_block, *current_nodes, device_particles,
-               gpu_get_particle_force_pointer(), node_f, couple_virtual,
-               rng_counter_coupling_gpu->value(), static_cast<float>(friction),
-               static_cast<float>(time_step));
-  } else {
-    // We use a dummy value for the RNG counter if no temperature is set.
-    KERNELCALL(calc_fluid_particle_ia<no_of_neighbours>, dim_grid,
-               threads_per_block, *current_nodes, device_particles,
-               gpu_get_particle_force_pointer(), node_f, couple_virtual, 0,
-               static_cast<float>(friction), static_cast<float>(time_step));
-  }
-}
-template void lb_calc_particle_lattice_ia_gpu<8>(bool couple_virtual,
-                                                 double friction,
-                                                 double time_step);
-template void lb_calc_particle_lattice_ia_gpu<27>(bool couple_virtual,
-                                                  double friction,
-                                                  double time_step);
-
-/** Setup and call kernel for getting macroscopic fluid values of all nodes
- *  @param host_values   struct to save the gpu values
- */
-void lb_get_values_GPU(LB_rho_v_pi_gpu *host_values) {
-  dim3 dim_grid =
-      calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(get_mesoscopic_values_in_LB_units, dim_grid, threads_per_block,
-             *current_nodes, print_rho_v_pi, device_rho_v, node_f);
-  cuda_safe_mem(cudaMemcpy(host_values, print_rho_v_pi,
-                           lbpar_gpu.number_of_nodes * sizeof(LB_rho_v_pi_gpu),
-                           cudaMemcpyDeviceToHost));
-}
-
-/** Get all the boundary flags for all nodes
- *  @param host_bound_array   here go the values of the boundary flag
- */
-void lb_get_boundary_flags_GPU(unsigned int *host_bound_array) {
-  unsigned int *device_bound_array;
-  cuda_safe_mem(cudaMalloc((void **)&device_bound_array,
-                           lbpar_gpu.number_of_nodes * sizeof(unsigned int)));
-
-  dim3 dim_grid =
-      calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(lb_get_boundaries, dim_grid, threads_per_block, *current_nodes,
-             device_bound_array);
-
-  cuda_safe_mem(cudaMemcpy(host_bound_array, device_bound_array,
-                           lbpar_gpu.number_of_nodes * sizeof(unsigned int),
-                           cudaMemcpyDeviceToHost));
-
-  cudaFree(device_bound_array);
-}
-
-/** Setup and call kernel for getting macroscopic fluid values of a single
- *  node
- */
-void lb_print_node_GPU(unsigned single_nodeindex,
-                       LB_rho_v_pi_gpu *host_print_values) {
-  LB_rho_v_pi_gpu *device_print_values;
-  cuda_safe_mem(
-      cudaMalloc((void **)&device_print_values, sizeof(LB_rho_v_pi_gpu)));
-  unsigned threads_per_block_print = 1;
-  unsigned blocks_per_grid_print_y = 1;
-  unsigned blocks_per_grid_print_x = 1;
-  dim3 dim_grid_print =
-      make_uint3(blocks_per_grid_print_x, blocks_per_grid_print_y, 1);
-
-  KERNELCALL(lb_print_node, dim_grid_print, threads_per_block_print,
-             single_nodeindex, device_print_values, *current_nodes,
-             device_rho_v, node_f);
-
-  cuda_safe_mem(cudaMemcpy(host_print_values, device_print_values,
-                           sizeof(LB_rho_v_pi_gpu), cudaMemcpyDeviceToHost));
-  cudaFree(device_print_values);
-}
-
-/** Setup and call kernel to calculate the total momentum of the hole fluid
- *  @param mass   value of the mass calculated on the GPU
- */
-void lb_calc_fluid_mass_GPU(double *mass) {
-  float *tot_mass;
-  float cpu_mass = 0.0f;
-  cuda_safe_mem(cudaMalloc((void **)&tot_mass, sizeof(float)));
-  cuda_safe_mem(
-      cudaMemcpy(tot_mass, &cpu_mass, sizeof(float), cudaMemcpyHostToDevice));
-
-  dim3 dim_grid =
-      calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(calc_mass, dim_grid, threads_per_block, *current_nodes, tot_mass);
-
-  cuda_safe_mem(
-      cudaMemcpy(&cpu_mass, tot_mass, sizeof(float), cudaMemcpyDeviceToHost));
-
-  cudaFree(tot_mass);
-  mass[0] = (double)(cpu_mass);
-}
-
-/** Setup and call kernel to calculate the total momentum of the whole fluid
- *  @param host_mom   value of the momentum calculated on the GPU
- */
-void lb_calc_fluid_momentum_GPU(double *host_mom) {
-  float *tot_momentum;
-  float host_momentum[3] = {0.0f, 0.0f, 0.0f};
-  cuda_safe_mem(cudaMalloc((void **)&tot_momentum, 3 * sizeof(float)));
-  cuda_safe_mem(cudaMemcpy(tot_momentum, host_momentum, 3 * sizeof(float),
-                           cudaMemcpyHostToDevice));
-
-  dim3 dim_grid =
-      calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block);
-
-  KERNELCALL(momentum, dim_grid, threads_per_block, *current_nodes, node_f,
-             tot_momentum);
-
-  cuda_safe_mem(cudaMemcpy(host_momentum, tot_momentum, 3 * sizeof(float),
-                           cudaMemcpyDeviceToHost));
-
-  cudaFree(tot_momentum);
-  auto const lattice_speed = lbpar_gpu.agrid / lbpar_gpu.tau;
-  host_mom[0] = static_cast<double>(host_momentum[0] * lattice_speed);
-  host_mom[1] = static_cast<double>(host_momentum[1] * lattice_speed);
-  host_mom[2] = static_cast<double>(host_momentum[2] * lattice_speed);
-}
-
-/** Setup and call kernel for getting macroscopic fluid values of all nodes
- *  @param[out] host_checkpoint_vd   LB populations
- */
-void lb_save_checkpoint_GPU(float *const host_checkpoint_vd) {
-  cuda_safe_mem(cudaMemcpy(host_checkpoint_vd, current_nodes->populations,
-                           lbpar_gpu.number_of_nodes * 19 * sizeof(float),
-                           cudaMemcpyDeviceToHost));
-}
-
-/** Setup and call kernel for getting macroscopic fluid values of all nodes
- *  @param[in] host_checkpoint_vd    LB populations
- */
-void lb_load_checkpoint_GPU(float const *const host_checkpoint_vd) {
-  current_nodes = &nodes_a;
-  intflag = true;
-
-  cuda_safe_mem(
-      cudaMemcpy(current_nodes->populations, host_checkpoint_vd,
-                 lbpar_gpu.number_of_nodes * sizeof(Utils::Array<float, 19>),
-                 cudaMemcpyHostToDevice));
-}
-
-/** Setup and call kernel to get the boundary flag of a single node
- *  @param single_nodeindex   number of the node to get the flag for
- *  @param host_flag          here goes the value of the boundary flag
- */
-void lb_get_boundary_flag_GPU(unsigned int single_nodeindex,
-                              unsigned int *host_flag) {
-  unsigned int *device_flag;
-  cuda_safe_mem(cudaMalloc((void **)&device_flag, sizeof(unsigned int)));
-  unsigned threads_per_block_flag = 1;
-  unsigned blocks_per_grid_flag_y = 1;
-  unsigned blocks_per_grid_flag_x = 1;
-  dim3 dim_grid_flag =
-      make_uint3(blocks_per_grid_flag_x, blocks_per_grid_flag_y, 1);
-
-  KERNELCALL(lb_get_boundary_flag, dim_grid_flag, threads_per_block_flag,
-             single_nodeindex, device_flag, *current_nodes);
-
-  cuda_safe_mem(cudaMemcpy(host_flag, device_flag, sizeof(unsigned int),
-                           cudaMemcpyDeviceToHost));
-
-  cudaFree(device_flag);
-}
-
-/** Set the density at a single node
- *  @param single_nodeindex   the node to set the velocity for
- *  @param host_rho           the density to set
- */
-void lb_set_node_rho_GPU(unsigned single_nodeindex, float host_rho) {
-  unsigned threads_per_block_flag = 1;
-  unsigned blocks_per_grid_flag_y = 1;
-  unsigned blocks_per_grid_flag_x = 1;
-  dim3 dim_grid_flag =
-      make_uint3(blocks_per_grid_flag_x, blocks_per_grid_flag_y, 1);
-  KERNELCALL(set_rho, dim_grid_flag, threads_per_block_flag, *current_nodes,
-             device_rho_v, single_nodeindex, host_rho);
-}
-
-/** Set the net velocity at a single node
- *  @param single_nodeindex   the node to set the velocity for
- *  @param host_velocity      the velocity to set
- */
-void lb_set_node_velocity_GPU(unsigned single_nodeindex, float *host_velocity) {
-  float *device_velocity;
-  cuda_safe_mem(cudaMalloc((void **)&device_velocity, 3 * sizeof(float)));
-  cuda_safe_mem(cudaMemcpy(device_velocity, host_velocity, 3 * sizeof(float),
-                           cudaMemcpyHostToDevice));
-  unsigned threads_per_block_flag = 1;
-  unsigned blocks_per_grid_flag_y = 1;
-  unsigned blocks_per_grid_flag_x = 1;
-  dim3 dim_grid_flag =
-      make_uint3(blocks_per_grid_flag_x, blocks_per_grid_flag_y, 1);
-
-  KERNELCALL(set_u_from_rho_v_pi, dim_grid_flag, threads_per_block_flag,
-             *current_nodes, single_nodeindex, device_velocity, device_rho_v,
-             node_f);
-  float force_density[3] = {0.0f, 0.0f, 0.0f};
-  float *device_force_density;
-  cuda_safe_mem(cudaMalloc((void **)&device_force_density, 3 * sizeof(float)));
-  cuda_safe_mem(cudaMemcpy(device_force_density, force_density,
-                           3 * sizeof(float), cudaMemcpyHostToDevice));
-  KERNELCALL(set_force_density, dim_grid_flag, threads_per_block_flag,
-             single_nodeindex, device_force_density, node_f);
-  cudaFree(device_velocity);
-  cudaFree(device_force_density);
-}
-
-/** Reinitialize parameters
- *  @param lbpar_gpu   struct containing the parameters of the fluid
- */
-void reinit_parameters_GPU(LB_parameters_gpu *lbpar_gpu) {
-  /* write parameters in const memory */
-  cuda_safe_mem(cudaMemcpyToSymbol(para, lbpar_gpu, sizeof(LB_parameters_gpu)));
-}
-
-/** Integration kernel for the lb gpu fluid update called from host */
-void lb_integrate_GPU() {
-  dim3 dim_grid =
-      calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block);
-#ifdef LB_BOUNDARIES_GPU
-  if (!LBBoundaries::lbboundaries.empty()) {
-    cuda_safe_mem(
-        cudaMemset(lb_boundary_force, 0,
-                   3 * LBBoundaries::lbboundaries.size() * sizeof(float)));
-  }
-#endif
-
-  /* call of fluid step */
-  if (intflag) {
-    if (lbpar_gpu.kT > 0.0) {
-      assert(rng_counter_fluid_gpu);
-      KERNELCALL(integrate, dim_grid, threads_per_block, nodes_a, nodes_b,
-                 device_rho_v, node_f, rng_counter_fluid_gpu->value());
-    } else {
-      KERNELCALL(integrate, dim_grid, threads_per_block, nodes_a, nodes_b,
-                 device_rho_v, node_f);
-    }
-    current_nodes = &nodes_b;
-    intflag = false;
-  } else {
-    if (lbpar_gpu.kT > 0.0) {
-      assert(rng_counter_fluid_gpu);
-      KERNELCALL(integrate, dim_grid, threads_per_block, nodes_b, nodes_a,
-                 device_rho_v, node_f, rng_counter_fluid_gpu->value());
-    } else {
-      KERNELCALL(integrate, dim_grid, threads_per_block, nodes_b, nodes_a,
-                 device_rho_v, node_f);
-    }
-    current_nodes = &nodes_a;
-    intflag = true;
-  }
-
-#ifdef LB_BOUNDARIES_GPU
-  if (!LBBoundaries::lbboundaries.empty()) {
-    KERNELCALL(apply_boundaries, dim_grid, threads_per_block, *current_nodes,
-               boundaries, lb_boundary_force);
-  }
-#endif
-}
-
-void lb_gpu_get_boundary_forces(std::vector<double> &forces) {
-#ifdef LB_BOUNDARIES_GPU
-  std::vector<float> temp(3 * LBBoundaries::lbboundaries.size());
-  cuda_safe_mem(cudaMemcpy(temp.data(), lb_boundary_force,
-                           temp.size() * sizeof(float),
-                           cudaMemcpyDeviceToHost));
-  std::transform(temp.begin(), temp.end(), forces.begin(),
-                 [](float val) { return -static_cast<double>(val); });
-#endif
-}
-
-struct lb_lbfluid_mass_of_particle {
-  __host__ __device__ float operator()(CUDA_particle_data particle) const {
-#ifdef MASS
-    return particle.mass;
-#else
-    return 1.f;
-#endif
-  }
-};
-
-/** Set the populations of a specific node on the GPU
- *  @param[out] n_a         Local node residing in array a
- *  @param[in]  population  New population
- *  @param[in]  x           x-coordinate of node
- *  @param[in]  y           y-coordinate of node
- *  @param[in]  z           z-coordinate of node
- */
-__global__ void lb_lbfluid_set_population_kernel(LB_nodes_gpu n_a,
-                                                 float const population[LBQ],
-                                                 int x, int y, int z) {
-  auto const index = static_cast<unsigned>(xyz_to_index(x, y, z));
-
-  for (unsigned i = 0; i < LBQ; ++i) {
-    n_a.populations[index][i] = population[i];
-  }
-}
-
-/** Interface to set the populations of a specific node for the GPU
- *  @param[in] xyz              Node coordinates
- *  @param[in] population_host  Population
- */
-void lb_lbfluid_set_population(const Utils::Vector3i &xyz,
-                               float population_host[LBQ]) {
-  float *population_device;
-  cuda_safe_mem(cudaMalloc((void **)&population_device, LBQ * sizeof(float)));
-  cuda_safe_mem(cudaMemcpy(population_device, population_host,
-                           LBQ * sizeof(float), cudaMemcpyHostToDevice));
-
-  dim3 dim_grid = make_uint3(1, 1, 1);
-  KERNELCALL(lb_lbfluid_set_population_kernel, dim_grid, 1, *current_nodes,
-             population_device, xyz[0], xyz[1], xyz[2]);
-
-  cuda_safe_mem(cudaFree(population_device));
-}
-
-/** Get the populations of a specific node on the GPU
- *  @param[in]  n_a         Local node residing in array a
- *  @param[out] population  Population
- *  @param[in]  x           x-coordinate of node
- *  @param[in]  y           y-coordinate of node
- *  @param[in]  z           z-coordinate of node
- */
-__global__ void lb_lbfluid_get_population_kernel(LB_nodes_gpu n_a,
-                                                 float population[LBQ], int x,
-                                                 int y, int z) {
-  auto const index = static_cast<unsigned>(xyz_to_index(x, y, z));
-
-  for (unsigned i = 0; i < LBQ; ++i) {
-    population[i] = n_a.populations[index][i];
-  }
-}
-
-/** Interface to get the populations of a specific node for the GPU
- *  @param[in]  xyz              Node coordinates
- *  @param[out] population_host  Population
- */
-void lb_lbfluid_get_population(const Utils::Vector3i &xyz,
-                               float population_host[LBQ]) {
-  float *population_device;
-  cuda_safe_mem(cudaMalloc((void **)&population_device, LBQ * sizeof(float)));
-
-  dim3 dim_grid = make_uint3(1, 1, 1);
-  KERNELCALL(lb_lbfluid_get_population_kernel, dim_grid, 1, *current_nodes,
-             population_device, xyz[0], xyz[1], xyz[2]);
-
-  cuda_safe_mem(cudaMemcpy(population_host, population_device,
-                           LBQ * sizeof(float), cudaMemcpyDeviceToHost));
-
-  cuda_safe_mem(cudaFree(population_device));
-}
-
-/**
- * @brief Velocity interpolation functor
- * @tparam no_of_neighbours     The number of neighbours to consider for
- *                              interpolation
- */
-template <std::size_t no_of_neighbours> struct interpolation {
-  LB_nodes_gpu current_nodes_gpu;
-  LB_rho_v_gpu *d_v_gpu;
-  interpolation(LB_nodes_gpu _current_nodes_gpu, LB_rho_v_gpu *_d_v_gpu)
-      : current_nodes_gpu(_current_nodes_gpu), d_v_gpu(_d_v_gpu) {}
-  __device__ float3 operator()(const float3 &position) const {
-    float _position[3] = {position.x, position.y, position.z};
-    Utils::Array<unsigned int, no_of_neighbours> node_indices;
-    Utils::Array<float, no_of_neighbours> delta;
-    return velocity_interpolation(current_nodes_gpu, _position, node_indices,
-                                  delta);
-  }
-};
-
-struct Plus : public thrust::binary_function<Utils::Array<float, 6>,
-                                             Utils::Array<float, 6>,
-                                             Utils::Array<float, 6>> {
-
-  __device__ Utils::Array<float, 6>
-  operator()(Utils::Array<float, 6> const &a, Utils::Array<float, 6> const &b) {
-    return {a[0] + b[0], a[1] + b[1], a[2] + b[2],
-            a[3] + b[3], a[4] + b[4], a[5] + b[5]};
-  }
-};
-
-struct Stress {
-  template <typename T>
-  __device__ Utils::Array<float, 6> operator()(T const &t) const {
-    Utils::Array<float, 19> modes;
-    calc_m_from_n(thrust::get<0>(t), modes); // NOLINT
-    return stress_from_stress_modes(stress_modes(thrust::get<1>(t), modes));
-  }
-};
-
-Utils::Array<float, 6> stress_tensor_GPU() {
-  if (not current_nodes->populations or not device_rho_v)
-    throw std::runtime_error("LB not initialized");
-
-  auto pop_begin = thrust::device_pointer_cast(current_nodes->populations);
-  auto rho_v_begin = thrust::device_pointer_cast(device_rho_v);
-  auto begin =
-      thrust::make_zip_iterator(thrust::make_tuple(pop_begin, rho_v_begin));
-
-  auto pop_end =
-      thrust::device_pointer_cast(pop_begin + lbpar_gpu.number_of_nodes);
-  auto rho_v_end =
-      thrust::device_pointer_cast(rho_v_begin + lbpar_gpu.number_of_nodes);
-  auto end = thrust::make_zip_iterator(thrust::make_tuple(pop_end, rho_v_end));
-
-  return thrust::transform_reduce(begin, end, Stress(),
-                                  Utils::Array<float, 6>{}, Plus());
-};
-
-template <std::size_t no_of_neighbours>
-void lb_get_interpolated_velocity_gpu(double const *positions,
-                                      double *velocities, int length) {
-  auto const size = static_cast<unsigned>(length);
-  thrust::host_vector<float3> positions_host(size);
-  for (unsigned p = 0; p < 3 * size; p += 3) {
-    // Cast double coming from python to float.
-    positions_host[p / 3].x = static_cast<float>(positions[p]);
-    positions_host[p / 3].y = static_cast<float>(positions[p + 1]);
-    positions_host[p / 3].z = static_cast<float>(positions[p + 2]);
-  }
-  thrust::device_vector<float3> positions_device = positions_host;
-  thrust::device_vector<float3> velocities_device(size);
-  thrust::transform(
-      positions_device.begin(), positions_device.end(),
-      velocities_device.begin(),
-      interpolation<no_of_neighbours>(*current_nodes, device_rho_v));
-  thrust::host_vector<float3> velocities_host = velocities_device;
-  unsigned index = 0;
-  for (auto v : velocities_host) {
-    velocities[index] = static_cast<double>(v.x);
-    velocities[index + 1] = static_cast<double>(v.y);
-    velocities[index + 2] = static_cast<double>(v.z);
-    index += 3;
-  }
-}
-template void lb_get_interpolated_velocity_gpu<8>(double const *positions,
-                                                  double *velocities,
-                                                  int length);
-template void lb_get_interpolated_velocity_gpu<27>(double const *positions,
-                                                   double *velocities,
-                                                   int length);
-
-void linear_velocity_interpolation(double const *positions, double *velocities,
-                                   int length) {
-  return lb_get_interpolated_velocity_gpu<8>(positions, velocities, length);
-}
-
-void quadratic_velocity_interpolation(double const *positions,
-                                      double *velocities, int length) {
-  return lb_get_interpolated_velocity_gpu<27>(positions, velocities, length);
-}
-
-void lb_coupling_set_rng_state_gpu(uint64_t counter) {
-  rng_counter_coupling_gpu = Utils::Counter<uint64_t>(counter);
-}
-
-void lb_fluid_set_rng_state_gpu(uint64_t counter) {
-  rng_counter_fluid_gpu = Utils::Counter<uint64_t>(counter);
-}
-
-uint64_t lb_coupling_get_rng_state_gpu() {
-  assert(rng_counter_coupling_gpu);
-  return rng_counter_coupling_gpu->value();
-}
-uint64_t lb_fluid_get_rng_state_gpu() {
-  assert(rng_counter_fluid_gpu);
-  return rng_counter_fluid_gpu->value();
-}
-
-#endif /* CUDA */
diff --git a/src/core/integrate.cpp b/src/core/integrate.cpp
index a229ccecde8..8bb284e44bb 100644
--- a/src/core/integrate.cpp
+++ b/src/core/integrate.cpp
@@ -44,6 +44,7 @@
 #include "event.hpp"
 #include "forces.hpp"
 #include "grid.hpp"
+#include "grid_based_algorithms/ek_container.hpp"
 #include "grid_based_algorithms/lb_interface.hpp"
 #include "grid_based_algorithms/lb_particle_coupling.hpp"
 #include "interactions.hpp"
@@ -73,6 +74,12 @@
 #include <callgrind.h>
 #endif
 
+#ifdef WALBERLA
+#ifdef WALBERLA_STATIC_ASSERT
+#error "waLberla headers should not be visible to the ESPResSo core"
+#endif
+#endif
+
 int integ_switch = INTEG_METHOD_NVT;
 
 /** Time step for the integration. */
@@ -92,6 +99,7 @@ bool recalc_forces = true;
 static double verlet_reuse = 0.0;
 
 static int fluid_step = 0;
+static int ek_step = 0;
 
 namespace {
 volatile std::sig_atomic_t ctrl_C = 0;
@@ -101,6 +109,8 @@ namespace LeesEdwards {
 /** @brief Currently active Lees-Edwards protocol. */
 static std::shared_ptr<ActiveProtocol> protocol = nullptr;
 
+std::weak_ptr<ActiveProtocol> get_protocol() { return protocol; }
+
 /**
  * @brief Update the Lees-Edwards parameters of the box geometry
  * for the current simulation time.
@@ -348,7 +358,7 @@ int integrate(int n_steps, int reuse_forces) {
     force_calc(cell_structure, time_step, temperature);
 
 #ifdef VIRTUAL_SITES
-    virtual_sites()->after_force_calc();
+    virtual_sites()->after_force_calc(time_step);
 #endif
     integrator_step_2(particles, temperature);
     LeesEdwards::run_kernel<LeesEdwards::UpdateOffset>();
@@ -361,16 +371,48 @@ int integrate(int n_steps, int reuse_forces) {
 
     // propagate one-step functionalities
     if (integ_switch != INTEG_METHOD_STEEPEST_DESCENT) {
-      if (lb_lbfluid_get_lattice_switch() != ActiveLB::NONE) {
-        auto const tau = lb_lbfluid_get_tau();
-        auto const lb_steps_per_md_step =
-            static_cast<int>(std::round(tau / time_step));
+      auto const lb_active = LB::get_lattice_switch() != ActiveLB::NONE;
+#ifdef WALBERLA
+      auto const ek_active = not EK::ek_container.empty();
+#else
+      auto constexpr ek_active = false;
+#endif
+
+      if (lb_active and ek_active) {
+        // assume that they are coupled, which is not necessarily true
+        auto const lb_steps_per_md_step = LB::get_steps_per_md_step(time_step);
+        auto const ek_steps_per_md_step = EK::get_steps_per_md_step(time_step);
+
+        if (lb_steps_per_md_step != ek_steps_per_md_step) {
+          runtimeErrorMsg()
+              << "LB and EK are active but with different time steps.";
+        }
+
+        // only use fluid_step in this case
+        assert(fluid_step == ek_step);
+
+        fluid_step += 1;
+        if (fluid_step >= lb_steps_per_md_step) {
+          fluid_step = 0;
+          LB::propagate();
+          EK::propagate();
+        }
+        lb_lbcoupling_propagate();
+      } else if (lb_active) {
+        auto const lb_steps_per_md_step = LB::get_steps_per_md_step(time_step);
         fluid_step += 1;
         if (fluid_step >= lb_steps_per_md_step) {
           fluid_step = 0;
-          lb_lbfluid_propagate();
+          LB::propagate();
         }
         lb_lbcoupling_propagate();
+      } else if (ek_active) {
+        auto const ek_steps_per_md_step = EK::get_steps_per_md_step(time_step);
+        ek_step += 1;
+        if (ek_step >= ek_steps_per_md_step) {
+          ek_step = 0;
+          EK::propagate();
+        }
       }
 
 #ifdef VIRTUAL_SITES
@@ -507,8 +549,9 @@ void increment_sim_time(double amount) { sim_time += amount; }
 void set_time_step(double value) {
   if (value <= 0.)
     throw std::domain_error("time_step must be > 0.");
-  if (lb_lbfluid_get_lattice_switch() != ActiveLB::NONE)
-    check_tau_time_step_consistency(lb_lbfluid_get_tau(), value);
+  if (LB::get_lattice_switch() != ActiveLB::NONE) {
+    LB::check_tau_time_step_consistency(LB::get_tau(), value);
+  }
   ::time_step = value;
   on_timestep_change();
 }
diff --git a/src/core/lees_edwards/lees_edwards.hpp b/src/core/lees_edwards/lees_edwards.hpp
index abadb0966d5..629fd4cfed9 100644
--- a/src/core/lees_edwards/lees_edwards.hpp
+++ b/src/core/lees_edwards/lees_edwards.hpp
@@ -26,6 +26,7 @@
 #include <cmath>
 #include <memory>
 
+#include <iostream>
 namespace LeesEdwards {
 class UpdateOffset {
 protected:
@@ -36,9 +37,6 @@ class UpdateOffset {
 
   void operator()(Particle &p, double pos_prefactor = 1.0) const {
     // Disabled as long as we do not use a two step LE update
-    //    p.lees_edwards_offset() -= pos_prefactor *
-    //                               static_cast<double>(p.lees_edwards_flag())
-    //                               * m_le.pos_offset / 2;
   }
 };
 
@@ -92,6 +90,9 @@ inline Utils::Vector3d verlet_list_offset(BoxGeometry const &box,
   return {};
 }
 
+/** @brief Get currently active Lees-Edwards protocol. */
+std::weak_ptr<ActiveProtocol> get_protocol();
+
 /** @brief Set a new Lees-Edwards protocol. */
 void set_protocol(std::shared_ptr<ActiveProtocol> new_protocol);
 
diff --git a/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp b/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp
index a49718719d9..0d8cea95aaa 100644
--- a/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp
+++ b/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp
@@ -39,9 +39,8 @@ CylindricalLBFluxDensityProfileAtParticlePositions::evaluate(
 
   for (auto p : particles) {
     auto const pos = folded_position(traits.position(p), box_geo);
-    auto const v = lb_lbfluid_get_interpolated_velocity(pos) *
-                   lb_lbfluid_get_lattice_speed();
-    auto const flux_dens = lb_lbfluid_get_interpolated_density(pos) * v;
+    auto const v = LB::get_interpolated_velocity(pos) * LB::get_lattice_speed();
+    auto const flux_dens = LB::get_interpolated_density(pos) * v;
 
     histogram.update(Utils::transform_coordinate_cartesian_to_cylinder(
                          pos - transform_params->center(),
diff --git a/src/core/observables/CylindricalLBVelocityProfile.cpp b/src/core/observables/CylindricalLBVelocityProfile.cpp
index 068184b16c4..b0b98bc7be3 100644
--- a/src/core/observables/CylindricalLBVelocityProfile.cpp
+++ b/src/core/observables/CylindricalLBVelocityProfile.cpp
@@ -33,8 +33,8 @@ namespace Observables {
 std::vector<double> CylindricalLBVelocityProfile::operator()() const {
   Utils::CylindricalHistogram<double, 3> histogram(n_bins(), limits());
   for (auto const &p : sampling_positions) {
-    auto const velocity = lb_lbfluid_get_interpolated_velocity(p) *
-                          lb_lbfluid_get_lattice_speed();
+    auto const velocity =
+        LB::get_interpolated_velocity(p) * LB::get_lattice_speed();
     auto const pos_shifted = p - transform_params->center();
     auto const pos_cyl = Utils::transform_coordinate_cartesian_to_cylinder(
         pos_shifted, transform_params->axis(), transform_params->orientation());
diff --git a/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp b/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp
index 35bbb4f8bb0..d7ec93b4c0c 100644
--- a/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp
+++ b/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp
@@ -37,8 +37,7 @@ std::vector<double> CylindricalLBVelocityProfileAtParticlePositions::evaluate(
 
   for (auto const &p : particles) {
     auto const pos = folded_position(traits.position(p), box_geo);
-    auto const v = lb_lbfluid_get_interpolated_velocity(pos) *
-                   lb_lbfluid_get_lattice_speed();
+    auto const v = LB::get_interpolated_velocity(pos) * LB::get_lattice_speed();
 
     histogram.update(
         Utils::transform_coordinate_cartesian_to_cylinder(
diff --git a/src/core/observables/LBFluidPressureTensor.hpp b/src/core/observables/LBFluidPressureTensor.hpp
index 758278239f5..933d4c17c11 100644
--- a/src/core/observables/LBFluidPressureTensor.hpp
+++ b/src/core/observables/LBFluidPressureTensor.hpp
@@ -33,12 +33,9 @@ class LBFluidPressureTensor : public Observable {
   std::vector<std::size_t> shape() const override { return {3, 3}; }
   std::vector<double> operator()() const override {
     auto const unit_conversion =
-        1. / (lb_lbfluid_get_agrid() * Utils::sqr(lb_lbfluid_get_tau()));
-    auto const lower_triangle =
-        lb_lbfluid_get_pressure_tensor() * unit_conversion;
-    return {lower_triangle[0], lower_triangle[1], lower_triangle[3],
-            lower_triangle[1], lower_triangle[2], lower_triangle[4],
-            lower_triangle[3], lower_triangle[4], lower_triangle[5]};
+        1. / (LB::get_agrid() * Utils::sqr(LB::get_tau()));
+    auto const tensor = LB::get_pressure_tensor() * unit_conversion;
+    return tensor.as_vector();
   }
 };
 
diff --git a/src/core/observables/LBVelocityProfile.cpp b/src/core/observables/LBVelocityProfile.cpp
index 2471dfb9ebc..4c8b2740d37 100644
--- a/src/core/observables/LBVelocityProfile.cpp
+++ b/src/core/observables/LBVelocityProfile.cpp
@@ -32,8 +32,7 @@ namespace Observables {
 std::vector<double> LBVelocityProfile::operator()() const {
   Utils::Histogram<double, 3> histogram(n_bins(), limits());
   for (auto const &p : sampling_positions) {
-    const auto v = lb_lbfluid_get_interpolated_velocity(p) *
-                   lb_lbfluid_get_lattice_speed();
+    const auto v = LB::get_interpolated_velocity(p) * LB::get_lattice_speed();
     histogram.update(p, v);
   }
   auto hist_tmp = histogram.get_histogram();
diff --git a/src/core/thermostat.cpp b/src/core/thermostat.cpp
index bc56e228838..e0780821818 100644
--- a/src/core/thermostat.cpp
+++ b/src/core/thermostat.cpp
@@ -28,6 +28,7 @@
 #include "bonded_interactions/thermalized_bond_utils.hpp"
 #include "communication.hpp"
 #include "dpd.hpp"
+#include "errorhandling.hpp"
 #include "event.hpp"
 #include "integrate.hpp"
 #include "npt.hpp"
@@ -196,7 +197,11 @@ void mpi_set_thermo_virtual(bool thermo_virtual) {
 
 void mpi_set_temperature_local(double temperature) {
   ::temperature = temperature;
-  on_temperature_change();
+  try {
+    on_temperature_change();
+  } catch (std::exception const &err) {
+    runtimeErrorMsg() << err.what();
+  }
   on_thermostat_param_change();
 }
 
@@ -228,4 +233,4 @@ REGISTER_CALLBACK(mpi_set_nptiso_gammas_local)
 void mpi_set_nptiso_gammas(double gamma0, double gammav) {
   mpi_call_all(mpi_set_nptiso_gammas_local, gamma0, gammav);
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/src/core/unit_tests/CMakeLists.txt b/src/core/unit_tests/CMakeLists.txt
index 2ece0659ee7..f623143c189 100644
--- a/src/core/unit_tests/CMakeLists.txt
+++ b/src/core/unit_tests/CMakeLists.txt
@@ -59,8 +59,6 @@ unit_test(NAME lees_edwards_test SRC lees_edwards_test.cpp DEPENDS
           espresso::core)
 unit_test(NAME BoxGeometry_test SRC BoxGeometry_test.cpp DEPENDS espresso::core)
 unit_test(NAME LocalBox_test SRC LocalBox_test.cpp DEPENDS espresso::core)
-unit_test(NAME Lattice_test SRC Lattice_test.cpp DEPENDS espresso::core)
-unit_test(NAME lb_exceptions SRC lb_exceptions.cpp DEPENDS espresso::core)
 unit_test(NAME Verlet_list_test SRC Verlet_list_test.cpp DEPENDS espresso::core
           NUM_PROC 4)
 unit_test(NAME VerletCriterion_test SRC VerletCriterion_test.cpp DEPENDS
@@ -79,3 +77,14 @@ if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
   unit_test(NAME specfunc_test SRC specfunc_test.cpp DEPENDS espresso::utils
             espresso::core)
 endif()
+unit_test(NAME lb_particle_coupling_test SRC lb_particle_coupling_test.cpp
+          DEPENDS espresso::core Boost::mpi MPI::MPI_CXX NUM_PROC 2)
+unit_test(NAME ek_interface_test SRC ek_interface_test.cpp DEPENDS
+          espresso::core Boost::mpi MPI::MPI_CXX NUM_PROC 2)
+if(ESPRESSO_BUILD_WITH_WALBERLA)
+  target_link_libraries(
+    lb_particle_coupling_test PRIVATE espresso::walberla
+                                      espresso::walberla::cpp_flags)
+  target_link_libraries(ek_interface_test PRIVATE espresso::walberla
+                                                  espresso::walberla::cpp_flags)
+endif()
diff --git a/src/core/unit_tests/EspressoSystemStandAlone_test.cpp b/src/core/unit_tests/EspressoSystemStandAlone_test.cpp
index f1e156175ab..f0ef4040df5 100644
--- a/src/core/unit_tests/EspressoSystemStandAlone_test.cpp
+++ b/src/core/unit_tests/EspressoSystemStandAlone_test.cpp
@@ -25,6 +25,7 @@
 namespace utf = boost::unit_test;
 
 #include "ParticleFactory.hpp"
+#include "particle_management.hpp"
 
 #include "EspressoSystemStandAlone.hpp"
 #include "Particle.hpp"
@@ -72,25 +73,6 @@ static void remove_translational_motion() {
   Galilei{}.kill_particle_motion(false);
 }
 
-static auto copy_particle_to_head_node(boost::mpi::communicator const &comm,
-                                       int p_id) {
-  boost::optional<Particle> result{};
-  auto p = ::cell_structure.get_local_particle(p_id);
-  if (p and not p->is_ghost()) {
-    if (comm.rank() == 0) {
-      result = *p;
-    } else {
-      comm.send(0, p_id, *p);
-    }
-  }
-  if (comm.rank() == 0 and not result) {
-    Particle p{};
-    comm.recv(boost::mpi::any_source, p_id, p);
-    result = p;
-  }
-  return result;
-}
-
 BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) {
   auto constexpr tol = 8. * 100. * std::numeric_limits<double>::epsilon();
   auto const comm = boost::mpi::communicator();
diff --git a/src/core/unit_tests/Lattice_test.cpp b/src/core/unit_tests/Lattice_test.cpp
deleted file mode 100644
index f44ebe114e0..00000000000
--- a/src/core/unit_tests/Lattice_test.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (C) 2021-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#define BOOST_TEST_MODULE Lattice class tests
-#define BOOST_TEST_DYN_LINK
-#include <boost/test/unit_test.hpp>
-
-#include "grid_based_algorithms/lattice.hpp"
-
-#include <utils/Vector.hpp>
-
-#include <cstddef>
-#include <limits>
-#include <stdexcept>
-
-BOOST_AUTO_TEST_CASE(test_basic_lattice) {
-  // create a lattice for the second domain of a 2x1x1 partition of the box
-  auto const halo_size = Lattice::index_t{1};
-  auto const agrid = 0.5;
-  auto const offset = 0.5;
-  auto const box_length = Utils::Vector3d{{6., 6., 6.}};
-  auto const local_box = Utils::Vector3d{{3., 6., 6.}};
-  auto const node_pos = Utils::Vector3i{{1, 0, 0}};
-  auto const node_grid = Utils::Vector3i{{2, 1, 1}};
-  Lattice lattice(agrid, offset, halo_size, local_box, box_length, box_length,
-                  node_pos, node_grid);
-
-  // check struct members
-  BOOST_CHECK_EQUAL(lattice.halo_size, halo_size);
-  BOOST_CHECK_EQUAL(lattice.agrid, agrid);
-  BOOST_CHECK_EQUAL(lattice.offset, offset);
-  BOOST_CHECK_EQUAL(lattice.halo_grid_volume, (6 + 2) * (12 + 2) * (12 + 2));
-  auto const elementwise = boost::test_tools::per_element();
-  auto const ref_grid = Utils::Vector3i{{6, 12, 12}};
-  auto const ref_global_grid = Utils::hadamard_product(node_grid, ref_grid);
-  auto const local_index_offset = Utils::hadamard_product(node_pos, ref_grid);
-  BOOST_TEST(lattice.local_box == local_box, elementwise);
-  BOOST_TEST(lattice.node_grid == node_grid, elementwise);
-  BOOST_TEST(lattice.grid == ref_grid, elementwise);
-  BOOST_TEST(lattice.global_grid == ref_global_grid, elementwise);
-  BOOST_TEST(lattice.local_index_offset == local_index_offset, elementwise);
-
-  // check methods
-  BOOST_CHECK(lattice.is_local({11, 11, 11}));
-  BOOST_CHECK(lattice.is_local({6, 11, 11}));
-  BOOST_CHECK(!lattice.is_local({5, 11, 11}));
-  BOOST_CHECK(!lattice.is_local({12, 12, 12}));
-  BOOST_CHECK(!lattice.is_local({0, 0, 0}));
-  auto const global_index = Utils::Vector3i{{11, 11, 11}};
-  auto const local_index = Utils::Vector3i{{6, 12, 12}};
-  BOOST_TEST(lattice.local_index(global_index) == local_index, elementwise);
-}
-
-BOOST_AUTO_TEST_CASE(test_map_position_to_lattice) {
-  using boost::test_tools::per_element;
-  auto const halo_size = Lattice::index_t{1};
-  auto const agrid = 1.0;
-  auto const offset = 0.5;
-  auto const box_l = Utils::Vector3d{{6., 6., 6.}};
-  auto const local_box = Utils::Vector3d{{6., 6., 6.}};
-  auto const node_pos = Utils::Vector3i{{0, 0, 0}};
-  auto const node_grid = Utils::Vector3i{{1, 1, 1}};
-  Lattice lattice(agrid, offset, halo_size, local_box, box_l, box_l, node_pos,
-                  node_grid);
-
-  // check methods
-  auto const slice_x = 6u + 2u;
-  auto const slice_xy = slice_x * slice_x;
-  auto const slice_xyz = 2u * 6u * 6u;
-  Utils::Vector<std::size_t, 8> const origin_index = {
-      0u,        1u,
-      slice_x,   slice_x + 1u,
-      slice_xy,  slice_xy + 1u,
-      slice_xyz, slice_xyz + 1u};
-  auto const delta1_ref = Utils::Vector6d{{.5, .5, .5, .5, .5, .5}};
-  auto const delta2_ref = Utils::Vector6d{{1., 1., 1., 0., 0., 0.}};
-  Utils::Vector<std::size_t, 8> node_index1;
-  Utils::Vector<std::size_t, 8> node_index2;
-  Utils::Vector<std::size_t, 8> idx;
-  Utils::Vector6d delta1;
-  Utils::Vector6d delta2;
-  Utils::Vector6d dx;
-
-  // check inside local domain (edge cases)
-  auto const my_origin = Utils::Vector3d::broadcast(0.);
-  auto const my_lb_left = Utils::Vector3d::broadcast(-offset);
-  auto const my_lb_right = Utils::Vector3d::broadcast(offset - 1e-12) + box_l;
-  lattice.map_position_to_lattice(my_origin, node_index1, delta1);
-  lattice.map_position_to_lattice(my_lb_left, node_index2, delta2);
-  lattice.map_position_to_lattice(my_lb_right, idx, dx);
-  BOOST_TEST(node_index1 == origin_index, per_element());
-  BOOST_TEST(node_index2 == origin_index, per_element());
-  BOOST_TEST(delta1 == delta1_ref, per_element());
-  BOOST_TEST(delta2 == delta2_ref, per_element());
-
-  // check almost inside local domain
-  auto constexpr epsilon = std::numeric_limits<double>::epsilon();
-  if (epsilon != epsilon / 2.) { // check for machine precision
-    auto const outside = Utils::Vector3d::broadcast(-offset - epsilon / 2.);
-    lattice.map_position_to_lattice(outside, node_index2, delta2);
-    BOOST_TEST(node_index2 == origin_index, per_element());
-  }
-
-  // check outside local domain
-  BOOST_CHECK_THROW(lattice.map_position_to_lattice({-2., -2., -2.}, idx, dx),
-                    std::runtime_error);
-  BOOST_CHECK_THROW(lattice.map_position_to_lattice({6.5, 6.5, 6.5}, idx, dx),
-                    std::runtime_error);
-}
diff --git a/src/core/unit_tests/Verlet_list_test.cpp b/src/core/unit_tests/Verlet_list_test.cpp
index 2e8533f9ee0..57b341df586 100644
--- a/src/core/unit_tests/Verlet_list_test.cpp
+++ b/src/core/unit_tests/Verlet_list_test.cpp
@@ -33,6 +33,7 @@ namespace utf = boost::unit_test;
 namespace bdata = boost::unit_test::data;
 
 #include "ParticleFactory.hpp"
+#include "particle_management.hpp"
 
 #include "EspressoSystemStandAlone.hpp"
 #include "Particle.hpp"
@@ -61,25 +62,6 @@ namespace espresso {
 static std::unique_ptr<EspressoSystemStandAlone> system;
 } // namespace espresso
 
-static auto copy_particle_to_head_node(boost::mpi::communicator const &comm,
-                                       int p_id) {
-  boost::optional<Particle> result{};
-  auto p = ::cell_structure.get_local_particle(p_id);
-  if (p and not p->is_ghost()) {
-    if (comm.rank() == 0) {
-      result = *p;
-    } else {
-      comm.send(0, p_id, *p);
-    }
-  }
-  if (comm.rank() == 0 and not result) {
-    Particle p{};
-    comm.recv(boost::mpi::any_source, p_id, p);
-    result = p;
-  }
-  return result;
-}
-
 namespace Testing {
 /**
  * Helper class to setup an integrator and particle properties such that the
diff --git a/src/core/unit_tests/ek_interface_test.cpp b/src/core/unit_tests/ek_interface_test.cpp
new file mode 100644
index 00000000000..1d01ef76b59
--- /dev/null
+++ b/src/core/unit_tests/ek_interface_test.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define BOOST_TEST_MODULE LB particle coupling test
+#define BOOST_TEST_DYN_LINK
+#define BOOST_TEST_NO_MAIN
+#include <boost/test/unit_test.hpp>
+
+#include "EspressoSystemStandAlone.hpp"
+#include "config/config.hpp"
+#include "errorhandling.hpp"
+#include "grid_based_algorithms/ek_container.hpp"
+#include "grid_based_algorithms/ek_reactions.hpp"
+
+#include <utils/Vector.hpp>
+
+#include <boost/mpi/communicator.hpp>
+
+#include <memory>
+#include <stdexcept>
+#include <string>
+
+static struct {
+  double kT = 1.3E-4;
+  double density = 1.4;
+  double diffusion = 3e-3;
+  double valency = 1.;
+  bool advection = true;
+  bool friction_coupling = true;
+  double tau = 0.01;
+  double time_step = 0.01;
+  double agrid = 1.;
+  double skin = 0.51;
+  Utils::Vector3d ext_efield = Utils::Vector3d{{0.01, 0.02, 0.03}};
+  Utils::Vector3d box_dimensions = Utils::Vector3d::broadcast(8.);
+  Utils::Vector3i grid_dimensions = Utils::Vector3i::broadcast(8);
+} params;
+
+namespace espresso {
+// ESPResSo system instance
+static std::unique_ptr<EspressoSystemStandAlone> system;
+} // namespace espresso
+
+static auto get_n_runtime_errors() { return check_runtime_errors_local(); }
+
+#ifdef WALBERLA
+
+#include "grid.hpp"
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/EKContainer.hpp>
+#include <walberla_bridge/electrokinetics/EKinWalberlaBase.hpp>
+#include <walberla_bridge/electrokinetics/ek_poisson_none_init.hpp>
+#include <walberla_bridge/electrokinetics/ek_walberla_init.hpp>
+
+BOOST_AUTO_TEST_CASE(ek_interface_walberla) {
+  {
+    // tau setters and getters
+    BOOST_CHECK_EQUAL(EK::ek_container.get_tau(), 0.);
+    BOOST_CHECK_EQUAL(EK::get_tau(), 0.);
+    BOOST_CHECK_EQUAL(EK::get_steps_per_md_step(1.), 0);
+    EK::ek_container.set_tau(2.);
+    BOOST_CHECK_EQUAL(EK::ek_container.get_tau(), 2.);
+    BOOST_CHECK_EQUAL(EK::get_tau(), 2.);
+    BOOST_CHECK_EQUAL(EK::get_steps_per_md_step(1.), 2);
+    BOOST_CHECK_EQUAL(EK::get_steps_per_md_step(2.), 1);
+    BOOST_CHECK_EQUAL(EK::get_steps_per_md_step(5.), 0);
+  }
+
+  {
+    // setup a minimal EK model without coupling to LB
+    auto constexpr n_ghost_layers = 1u;
+    auto constexpr single_precision = true;
+    auto ek_lattice = std::make_shared<LatticeWalberla>(
+        params.grid_dimensions, ::node_grid, n_ghost_layers);
+    auto ek_species = new_ek_walberla(
+        ek_lattice, params.diffusion, params.kT, params.valency,
+        params.ext_efield, params.density, false, false, single_precision);
+    auto ek_solver_none = new_ek_poisson_none(ek_lattice, single_precision);
+
+    BOOST_REQUIRE(EK::ek_reactions.empty());
+    BOOST_REQUIRE(EK::ek_container.empty());
+    BOOST_REQUIRE(not EK::ek_container.is_poisson_solver_set());
+    EK::propagate(); // no-op
+    BOOST_REQUIRE_EQUAL(get_n_runtime_errors(), 0);
+    EK::ek_container.set_poisson_solver(ek_solver_none);
+    BOOST_REQUIRE(EK::ek_container.is_poisson_solver_set());
+    BOOST_REQUIRE(EK::ek_container.empty());
+    EK::ek_container.set_tau(0.);
+    BOOST_CHECK_THROW(EK::ek_container.add(ek_species), std::runtime_error);
+    EK::ek_container.set_tau(2.);
+    EK::ek_container.add(ek_species);
+    BOOST_REQUIRE(not EK::ek_container.empty());
+    EK::propagate(); // no-op
+    BOOST_REQUIRE_EQUAL(get_n_runtime_errors(), 0);
+    EK::ek_container.remove(ek_species);
+    BOOST_REQUIRE(EK::ek_container.empty());
+    EK::propagate(); // no-op
+    BOOST_REQUIRE_EQUAL(get_n_runtime_errors(), 0);
+  }
+}
+
+#else // WALBERLA
+
+BOOST_AUTO_TEST_CASE(ek_interface) {
+  {
+    EK::propagate(); // no-op
+    BOOST_CHECK_THROW(EK::get_tau(), NoEKActive);
+    BOOST_CHECK_THROW(EK::get_tau(), std::exception);
+    BOOST_CHECK_THROW(EK::get_steps_per_md_step(1.), std::exception);
+    auto const err_msg = std::string(NoEKActive().what());
+    auto const ref_msg = std::string("EK not activated");
+    BOOST_CHECK_EQUAL(err_msg, ref_msg);
+  }
+}
+
+#endif // WALBERLA
+
+int main(int argc, char **argv) {
+  espresso::system = std::make_unique<EspressoSystemStandAlone>(argc, argv);
+  espresso::system->set_box_l(params.box_dimensions);
+  espresso::system->set_time_step(params.time_step);
+  espresso::system->set_skin(params.skin);
+
+  boost::mpi::communicator world;
+  assert(world.size() <= 2);
+
+  return boost::unit_test::unit_test_main(init_unit_test, argc, argv);
+}
diff --git a/src/core/unit_tests/lb_exceptions.cpp b/src/core/unit_tests/lb_exceptions.cpp
deleted file mode 100644
index 6e1b4232644..00000000000
--- a/src/core/unit_tests/lb_exceptions.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (C) 2021-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#define BOOST_TEST_MODULE LB exception mechanism
-#define BOOST_TEST_DYN_LINK
-#include <boost/test/unit_test.hpp>
-
-#include "grid_based_algorithms/lb.hpp"
-#include "grid_based_algorithms/lb_interface.hpp"
-#include "grid_based_algorithms/lb_interpolation.hpp"
-#include "grid_based_algorithms/lb_particle_coupling.hpp"
-
-#include <stdexcept>
-
-BOOST_AUTO_TEST_CASE(exceptions) {
-  // getters and setters
-  BOOST_CHECK_THROW(lb_lbfluid_get_rng_state(), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_set_rng_state(0u), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_set_density(-1.), std::invalid_argument);
-  BOOST_CHECK_THROW(lb_lbfluid_set_density(1.), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_get_density(), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_set_viscosity(-1.), std::invalid_argument);
-  BOOST_CHECK_THROW(lb_lbfluid_set_viscosity(1.), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_get_viscosity(), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_set_bulk_viscosity(-1.), std::invalid_argument);
-  BOOST_CHECK_THROW(lb_lbfluid_set_bulk_viscosity(1.), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_get_bulk_viscosity(), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_set_gamma_odd(2.), std::invalid_argument);
-  BOOST_CHECK_THROW(lb_lbfluid_set_gamma_odd({}), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_get_gamma_odd(), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_set_gamma_even(2.), std::invalid_argument);
-  BOOST_CHECK_THROW(lb_lbfluid_set_gamma_even({}), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_get_gamma_even(), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_set_agrid(-1.), std::invalid_argument);
-  BOOST_CHECK_THROW(lb_lbfluid_set_agrid(1.), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_get_agrid(), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_set_ext_force_density({}), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_get_ext_force_density(), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_set_tau(-1.), std::invalid_argument);
-  BOOST_CHECK_THROW(lb_lbfluid_set_tau(1.), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_get_tau(), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_set_kT({}), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_get_kT(), std::exception);
-  BOOST_CHECK_THROW(lb_lbnode_get_boundary({}), std::exception);
-  BOOST_CHECK_THROW(lb_lbnode_set_density({}, {}), std::exception);
-  BOOST_CHECK_THROW(lb_lbnode_get_density({}), std::exception);
-  BOOST_CHECK_THROW(lb_lbnode_set_velocity({}, {}), std::exception);
-  BOOST_CHECK_THROW(lb_lbnode_get_velocity({}), std::exception);
-  BOOST_CHECK_THROW(lb_lbnode_set_pop({}, {}), std::exception);
-  BOOST_CHECK_THROW(lb_lbnode_get_pop({}), std::exception);
-  BOOST_CHECK_THROW(lb_lbnode_get_pressure_tensor({}), std::exception);
-  BOOST_CHECK_THROW(lb_lbnode_get_pressure_tensor_neq({}), std::exception);
-  // particle coupling and interpolation
-  BOOST_CHECK_EQUAL(lb_lbcoupling_get_rng_state(), 0u);
-  BOOST_CHECK_THROW(lb_lbfluid_get_interpolated_velocity({}), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_get_interpolated_density({}), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_get_shape(), std::exception);
-  BOOST_CHECK_EQUAL(lb_lbfluid_calc_fluid_momentum(), Utils::Vector3d{});
-  BOOST_CHECK_THROW(lb_lbfluid_set_lattice_switch(static_cast<ActiveLB>(100)),
-                    std::invalid_argument);
-  ::lattice_switch = ActiveLB::CPU;
-  mpi_set_interpolation_order_local(InterpolationOrder::quadratic);
-  BOOST_CHECK_THROW(lb_lbfluid_get_interpolated_density({}), std::exception);
-  BOOST_CHECK_THROW(lb_lbfluid_get_interpolated_velocity({}),
-                    std::runtime_error);
-  BOOST_CHECK_THROW(lb_lbinterpolation_add_force_density({}, {}),
-                    std::runtime_error);
-  ::lattice_switch = ActiveLB::GPU;
-  BOOST_CHECK_THROW(lb_lbfluid_get_interpolated_density({}), std::exception);
-  ::lattice_switch = ActiveLB::NONE;
-  mpi_set_interpolation_order_local(InterpolationOrder::linear);
-#ifdef ADDITIONAL_CHECKS
-  {
-    std::stringstream stream_xy{};
-    log_buffer_diff(stream_xy, 0, 1, 2, 3, -1);
-    BOOST_CHECK_EQUAL(stream_xy.str(),
-                      "buffers differ in dir=0 at node index=1 x=2 y=3\n");
-    std::stringstream stream_xyz{};
-    log_buffer_diff(stream_xyz, 0, 1, 2, 3, 4);
-    BOOST_CHECK_EQUAL(stream_xyz.str(),
-                      "buffers differ in dir=0 at node index=1 x=2 y=3 z=4\n");
-  }
-#endif // ADDITIONAL_CHECKS
-}
diff --git a/src/core/unit_tests/lb_particle_coupling_test.cpp b/src/core/unit_tests/lb_particle_coupling_test.cpp
new file mode 100644
index 00000000000..4e308d089d5
--- /dev/null
+++ b/src/core/unit_tests/lb_particle_coupling_test.cpp
@@ -0,0 +1,612 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define BOOST_TEST_MODULE LB particle coupling test
+#define BOOST_TEST_DYN_LINK
+#define BOOST_TEST_NO_MAIN
+#include <boost/test/data/monomorphic.hpp>
+#include <boost/test/data/test_case.hpp>
+#include <boost/test/unit_test.hpp>
+namespace bdata = boost::unit_test::data;
+namespace utf = boost::unit_test;
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "ParticleFactory.hpp"
+#include "particle_management.hpp"
+
+#include "EspressoSystemStandAlone.hpp"
+#include "Particle.hpp"
+#include "cells.hpp"
+#include "errorhandling.hpp"
+#include "event.hpp"
+#include "grid.hpp"
+#include "grid_based_algorithms/lb_interface.hpp"
+#include "grid_based_algorithms/lb_interpolation.hpp"
+#include "grid_based_algorithms/lb_particle_coupling.hpp"
+#include "grid_based_algorithms/lb_walberla_instance.hpp"
+#include "particle_node.hpp"
+#include "random.hpp"
+#include "thermostat.hpp"
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
+#include <walberla_bridge/lattice_boltzmann/lb_walberla_init.hpp>
+
+#include <utils/Vector.hpp>
+#include <utils/math/sqr.hpp>
+
+#include <boost/mpi.hpp>
+
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <cstddef>
+#include <limits>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+// multiply by 6 to account for error accumulation
+auto constexpr eps = 6. * std::numeric_limits<double>::epsilon();
+
+static struct {
+  unsigned int seed = 23u;
+  double kT = 0.;
+  double viscosity = 1e-3;
+  double density = 0.5;
+  double tau = 0.01;
+  double time_step = 0.01;
+  double agrid = 1.;
+  double skin = 0.5;
+  Utils::Vector3d box_dimensions = Utils::Vector3d::broadcast(8.);
+  Utils::Vector3i grid_dimensions = Utils::Vector3i::broadcast(8);
+  auto force_md_to_lb(Utils::Vector3d const &md_force) const {
+    return (-this->time_step * this->tau / this->agrid) * md_force;
+  }
+} params;
+
+/** Boost unit test dataset */
+std::vector<double> const kTs{0., 1E-4};
+
+namespace espresso {
+// ESPResSo system instance
+static std::unique_ptr<EspressoSystemStandAlone> system;
+// ESPResSo actors
+static std::shared_ptr<LBWalberlaParams> lb_params;
+static std::shared_ptr<LatticeWalberla> lb_lattice;
+static std::shared_ptr<LBWalberlaBase> lb_fluid;
+
+static auto make_lb_actor() {
+  auto constexpr n_ghost_layers = 1u;
+  auto constexpr single_precision = false;
+  lb_params = std::make_shared<LBWalberlaParams>(params.agrid, params.tau);
+  lb_lattice = std::make_shared<LatticeWalberla>(params.grid_dimensions,
+                                                 ::node_grid, n_ghost_layers);
+  lb_fluid = new_lb_walberla(lb_lattice, params.viscosity, params.density,
+                             single_precision);
+  lb_fluid->set_collision_model(params.kT, params.seed);
+  lb_fluid->ghost_communication();
+}
+
+static void add_lb_actor() { activate_lb_walberla(lb_fluid, lb_params); }
+
+static void remove_lb_actor() { deactivate_lb_walberla(); }
+
+static void set_lb_kT(double kT) {
+  lb_fluid->set_collision_model(kT, params.seed);
+}
+} // namespace espresso
+
+namespace LB {
+static auto get_force_to_be_applied(Utils::Vector3d const &pos) {
+  auto const agrid = espresso::lb_params->get_agrid();
+  auto const ind = Utils::Vector3i{static_cast<int>(pos[0] / agrid),
+                                   static_cast<int>(pos[1] / agrid),
+                                   static_cast<int>(pos[2] / agrid)};
+  auto const res = espresso::lb_fluid->get_node_force_to_be_applied(ind);
+  if (!res) {
+    auto const comm = boost::mpi::communicator();
+    std::stringstream what;
+    what << "Force to be applied could not be obtained from Walberla "
+         << "on MPI rank " << comm.rank() << ": position = [" << pos << "]";
+    throw std::runtime_error(what.str());
+  }
+  return *res;
+}
+} // namespace LB
+
+/** Fixture to manage the lifetime of the LB actor. */
+struct CleanupActorLB : public ParticleFactory {
+  CleanupActorLB() : ParticleFactory() {
+    params.kT = 0.;
+    espresso::make_lb_actor();
+    espresso::add_lb_actor();
+  }
+
+  // NOLINTNEXTLINE(bugprone-exception-escape)
+  ~CleanupActorLB() { espresso::remove_lb_actor(); }
+};
+
+BOOST_FIXTURE_TEST_SUITE(suite, CleanupActorLB)
+
+static void lb_lbcoupling_broadcast() {
+  boost::mpi::communicator world;
+  boost::mpi::broadcast(world, lb_particle_coupling, 0);
+}
+
+BOOST_AUTO_TEST_CASE(activate) {
+  lb_lbcoupling_deactivate();
+  lb_lbcoupling_broadcast();
+  lb_lbcoupling_activate();
+  lb_lbcoupling_broadcast();
+  BOOST_CHECK(lb_particle_coupling.couple_to_md);
+}
+
+BOOST_AUTO_TEST_CASE(de_activate) {
+  lb_lbcoupling_activate();
+  lb_lbcoupling_broadcast();
+  lb_lbcoupling_deactivate();
+  lb_lbcoupling_broadcast();
+  BOOST_CHECK(not lb_particle_coupling.couple_to_md);
+}
+
+BOOST_AUTO_TEST_CASE(rng_initial_state) {
+  BOOST_CHECK(lb_lbcoupling_is_seed_required());
+  BOOST_CHECK(!lb_particle_coupling.rng_counter_coupling);
+}
+
+BOOST_AUTO_TEST_CASE(rng) {
+  lb_lbcoupling_set_rng_state(17);
+  BOOST_REQUIRE(lb_particle_coupling.rng_counter_coupling);
+  BOOST_CHECK_EQUAL(lb_lbcoupling_get_rng_state(), 17);
+  BOOST_CHECK(not lb_lbcoupling_is_seed_required());
+  auto const step1_random1 = lb_particle_coupling_noise(
+      true, 1, lb_particle_coupling.rng_counter_coupling);
+  auto const step1_random2 = lb_particle_coupling_noise(
+      true, 4, lb_particle_coupling.rng_counter_coupling);
+  auto const step1_random2_try2 = lb_particle_coupling_noise(
+      true, 4, lb_particle_coupling.rng_counter_coupling);
+  BOOST_CHECK(step1_random1 != step1_random2);
+  BOOST_CHECK(step1_random2 == step1_random2_try2);
+
+  // Propagation queries kT from LB, so LB needs to be initialized
+  espresso::set_lb_kT(1E-4);
+  lb_lbcoupling_propagate();
+
+  BOOST_REQUIRE(lb_particle_coupling.rng_counter_coupling);
+  BOOST_CHECK_EQUAL(lb_lbcoupling_get_rng_state(), 18);
+  auto const step2_random1 = lb_particle_coupling_noise(
+      true, 1, lb_particle_coupling.rng_counter_coupling);
+  auto const step2_random2 = lb_particle_coupling_noise(
+      true, 4, lb_particle_coupling.rng_counter_coupling);
+  BOOST_CHECK(step1_random1 != step2_random1);
+  BOOST_CHECK(step1_random1 != step2_random2);
+
+  auto const step3_norandom = lb_particle_coupling_noise(
+      false, 4, lb_particle_coupling.rng_counter_coupling);
+  BOOST_CHECK((step3_norandom == Utils::Vector3d{0., 0., 0.}));
+}
+
+BOOST_AUTO_TEST_CASE(access_outside_domain) {
+  auto const invalid_pos = 2 * params.box_dimensions;
+  BOOST_CHECK_THROW(lb_lbinterpolation_get_interpolated_velocity(invalid_pos),
+                    std::runtime_error);
+  BOOST_CHECK_THROW(lb_lbinterpolation_add_force_density(invalid_pos, {}),
+                    std::runtime_error);
+}
+
+BOOST_AUTO_TEST_CASE(drift_vel_offset) {
+  Particle p{};
+  BOOST_CHECK_EQUAL(lb_particle_coupling_drift_vel_offset(p).norm(), 0);
+  Utils::Vector3d expected{};
+#ifdef ENGINE
+  p.swimming().swimming = true;
+  p.swimming().v_swim = 2.;
+  expected += p.swimming().v_swim * p.calc_director();
+#endif
+#ifdef LB_ELECTROHYDRODYNAMICS
+  p.mu_E() = Utils::Vector3d{-2., 1.5, 1.};
+  expected += p.mu_E();
+#endif
+  BOOST_CHECK_SMALL(
+      (lb_particle_coupling_drift_vel_offset(p) - expected).norm(), eps);
+}
+
+BOOST_DATA_TEST_CASE(drag_force, bdata::make(kTs), kT) {
+  espresso::set_lb_kT(kT);
+  Particle p{};
+  p.v() = {-2.5, 1.5, 2.};
+  p.pos() = lb_walberla()->get_lattice().get_local_domain().first;
+  lb_lbcoupling_set_gamma(0.2);
+  Utils::Vector3d drift_offset{-1., 1., 1.};
+
+  // Drag force in quiescent fluid
+  {
+    auto const observed = lb_drag_force(p, p.pos(), drift_offset);
+    const Utils::Vector3d expected{0.3, -0.1, -.2};
+    BOOST_CHECK_SMALL((observed - expected).norm(), eps);
+  }
+}
+
+#ifdef ENGINE
+BOOST_DATA_TEST_CASE(swimmer_force, bdata::make(kTs), kT) {
+  espresso::set_lb_kT(kT);
+  auto const first_lb_node =
+      lb_walberla()->get_lattice().get_local_domain().first;
+  Particle p{};
+  p.swimming().swimming = true;
+  p.swimming().f_swim = 2.;
+  p.swimming().dipole_length = 3.;
+  p.swimming().push_pull = 1;
+  p.pos() = first_lb_node + Utils::Vector3d::broadcast(0.5);
+
+  auto const coupling_pos =
+      p.pos() +
+      Utils::Vector3d{0., 0., p.swimming().dipole_length / params.agrid};
+
+  // swimmer coupling
+  {
+    if (in_local_halo(p.pos())) {
+      add_swimmer_force(p, params.time_step);
+    }
+    if (in_local_halo(coupling_pos)) {
+      auto const interpolated = LB::get_force_to_be_applied(coupling_pos);
+      auto const expected =
+          params.force_md_to_lb(Utils::Vector3d{0., 0., p.swimming().f_swim});
+
+      // interpolation happened on the expected LB cell
+      BOOST_CHECK_SMALL((interpolated - expected).norm(), eps);
+    }
+
+    // all other LB cells have no force
+    for (int i = 0; i < params.grid_dimensions[0]; ++i) {
+      for (int j = 0; j < params.grid_dimensions[1]; ++j) {
+        for (int k = 0; k < params.grid_dimensions[2]; ++k) {
+          auto const pos = Utils::Vector3d{
+              0.5 + static_cast<double>(i) * params.agrid,
+              0.5 + static_cast<double>(j) * params.agrid,
+              0.5 + static_cast<double>(k) * params.agrid,
+          };
+          if ((pos - coupling_pos).norm() < 1e-6)
+            continue;
+          if (in_local_halo(pos)) {
+            auto const interpolated = LB::get_force_to_be_applied(pos);
+            BOOST_CHECK_SMALL(interpolated.norm(), eps);
+          }
+        }
+      }
+    }
+  }
+
+  // remove force of the particle from the fluid
+  {
+    if (in_local_halo(coupling_pos)) {
+      add_md_force(coupling_pos, -Utils::Vector3d{0., 0., p.swimming().f_swim},
+                   params.time_step);
+      auto const reset = LB::get_force_to_be_applied(coupling_pos);
+      BOOST_REQUIRE_SMALL(reset.norm(), eps);
+    }
+  }
+}
+#endif // ENGINE
+
+BOOST_DATA_TEST_CASE(particle_coupling, bdata::make(kTs), kT) {
+  espresso::set_lb_kT(kT);
+  lb_lbcoupling_set_rng_state(17);
+  auto const first_lb_node =
+      lb_walberla()->get_lattice().get_local_domain().first;
+  auto const gamma = 0.2;
+  auto const noise =
+      (kT > 0.) ? std::sqrt(24. * gamma * kT / params.time_step) : 0.0;
+  auto &rng = lb_particle_coupling.rng_counter_coupling;
+  Particle p{};
+  Utils::Vector3d expected = noise * Random::noise_uniform<RNGSalt::PARTICLES>(
+                                         rng->value(), 0, p.id());
+#ifdef ENGINE
+  p.swimming().swimming = true;
+  p.swimming().v_swim = 2.;
+  p.swimming().push_pull = 1;
+  expected += gamma * p.swimming().v_swim * p.calc_director();
+#endif
+#ifdef LB_ELECTROHYDRODYNAMICS
+  p.mu_E() = Utils::Vector3d{-2., 1.5, 1.};
+  expected += gamma * p.mu_E();
+#endif
+  p.pos() = first_lb_node + Utils::Vector3d::broadcast(0.5);
+  lb_lbcoupling_set_gamma(gamma);
+
+  // coupling
+  {
+    if (in_local_halo(p.pos())) {
+      couple_particle(p, false, noise, rng, params.time_step);
+      BOOST_CHECK_SMALL((p.force() - expected).norm(), eps);
+
+      auto const interpolated = LB::get_force_to_be_applied(p.pos());
+      BOOST_CHECK_SMALL((interpolated - params.force_md_to_lb(expected)).norm(),
+                        eps);
+    }
+  }
+
+  // remove force of the particle from the fluid
+  {
+    if (in_local_halo(p.pos())) {
+      add_md_force(p.pos(), -expected, params.time_step);
+    }
+  }
+}
+
+BOOST_DATA_TEST_CASE_F(CleanupActorLB, coupling_particle_lattice_ia,
+                       bdata::make(kTs), kT) {
+  auto const comm = boost::mpi::communicator();
+  auto const rank = comm.rank();
+  espresso::set_lb_kT(kT);
+  lb_lbcoupling_set_rng_state(17);
+  auto const first_lb_node =
+      lb_walberla()->get_lattice().get_local_domain().first;
+  auto const gamma = 0.2;
+  auto const noise = std::sqrt(24. * gamma * kT / params.time_step *
+                               Utils::sqr(params.agrid / params.tau));
+  auto &rng = lb_particle_coupling.rng_counter_coupling;
+
+  auto const pid = 0;
+  auto const skin = params.skin;
+  auto const &box_l = params.box_dimensions;
+  create_particle({box_l[0] / 2. - skin * 2., skin * 2., skin * 2.}, 0, 0);
+
+  // sanity checks
+  BOOST_REQUIRE_EQUAL(get_particle_node_parallel(pid), rank ? -1 : 0);
+  BOOST_REQUIRE_EQUAL(
+      ErrorHandling::mpi_gather_runtime_errors_all(rank == 0).size(), 0);
+
+#ifdef ENGINE
+  set_particle_property(pid, &Particle::swimming,
+                        ParticleParametersSwimming{true, 0., 2., 1, 3.});
+#endif
+#ifdef LB_ELECTROHYDRODYNAMICS
+  set_particle_property(pid, &Particle::mu_E, Utils::Vector3d{-2., 1.5, 1.});
+#endif
+
+  auto expected =
+      noise * Random::noise_uniform<RNGSalt::PARTICLES>(rng->value(), 0, pid);
+  auto const p_opt = copy_particle_to_head_node(comm, pid);
+  if (rank == 0) {
+    auto const &p = *p_opt;
+#ifdef ENGINE
+    expected += gamma * p.swimming().v_swim * p.calc_director();
+#endif
+#ifdef LB_ELECTROHYDRODYNAMICS
+    expected += gamma * p.mu_E();
+#endif
+  }
+  boost::mpi::broadcast(comm, expected, 0);
+  auto const p_pos = first_lb_node + Utils::Vector3d::broadcast(0.5);
+  set_particle_pos(pid, p_pos);
+  lb_lbcoupling_set_gamma(gamma);
+
+  for (bool with_ghosts : {false, true}) {
+    {
+      if (with_ghosts) {
+        cells_update_ghosts(global_ghost_flags());
+      }
+      if (rank == 0) {
+        auto const particles = ::cell_structure.local_particles();
+        auto const ghost_particles = ::cell_structure.ghost_particles();
+        BOOST_REQUIRE_GE(particles.size(), 1);
+        BOOST_REQUIRE_GE(ghost_particles.size(), static_cast<int>(with_ghosts));
+      }
+    }
+
+    // check box shifts
+    if (rank == 0) {
+      auto constexpr reference_shifts =
+          std::array<Utils::Vector3i, 8>{{{{0, 0, 0}},
+                                          {{0, 0, 8}},
+                                          {{0, 8, 0}},
+                                          {{0, 8, 8}},
+                                          {{8, 0, 0}},
+                                          {{8, 0, 8}},
+                                          {{8, 8, 0}},
+                                          {{8, 8, 8}}}};
+      boost::mpi::communicator world;
+      assert(world.size() <= 4);
+      auto const cutoff = 8 / world.size();
+      {
+        auto const shifts = positions_in_halo({0., 0., 0.}, box_geo);
+        BOOST_REQUIRE_EQUAL(shifts.size(), cutoff);
+        for (std::size_t i = 0; i < shifts.size(); ++i) {
+          BOOST_REQUIRE_EQUAL(shifts[i], reference_shifts[i]);
+        }
+      }
+      {
+        auto const reference_shift = Utils::Vector3d{1., 1., 1.};
+        auto const shifts = positions_in_halo({1., 1., 1.}, box_geo);
+        BOOST_REQUIRE_EQUAL(shifts.size(), 1);
+        BOOST_REQUIRE_EQUAL(shifts[0], reference_shift);
+      }
+      {
+        auto const reference_origin = Utils::Vector3d{1., 2., 0.};
+        auto const reference_shift = Utils::Vector3d{1., 2., 8.};
+        auto const shifts = positions_in_halo({1., 2., 0.}, box_geo);
+        BOOST_REQUIRE_EQUAL(shifts.size(), 2);
+        BOOST_REQUIRE_EQUAL(shifts[0], reference_origin);
+        BOOST_REQUIRE_EQUAL(shifts[1], reference_shift);
+      }
+    }
+
+    // check without LB coupling
+    {
+      lb_lbcoupling_deactivate();
+      lb_lbcoupling_broadcast();
+      auto const particles = ::cell_structure.local_particles();
+      auto const ghost_particles = ::cell_structure.ghost_particles();
+      lb_lbcoupling_calc_particle_lattice_ia(thermo_virtual, particles,
+                                             ghost_particles, params.time_step);
+      auto const p_opt = copy_particle_to_head_node(comm, pid);
+      if (rank == 0) {
+        auto const &p = *p_opt;
+        BOOST_CHECK_EQUAL(p.force().norm(), 0.);
+      }
+    }
+
+    // check with LB coupling
+    {
+      lb_lbcoupling_activate();
+      lb_lbcoupling_broadcast();
+      auto const particles = ::cell_structure.local_particles();
+      auto const ghost_particles = ::cell_structure.ghost_particles();
+      Utils::Vector3d lb_before{};
+      {
+        auto const p_opt = copy_particle_to_head_node(comm, pid);
+        if (rank == 0) {
+          auto const &p = *p_opt;
+          // get original LB force
+          lb_before = LB::get_force_to_be_applied(p.pos());
+        }
+      }
+      // couple particle to LB
+      lb_lbcoupling_calc_particle_lattice_ia(thermo_virtual, particles,
+                                             ghost_particles, params.time_step);
+      {
+        auto const p_opt = copy_particle_to_head_node(comm, pid);
+        if (rank == 0) {
+          auto const &p = *p_opt;
+          // check particle force
+          BOOST_CHECK_SMALL((p.force() - expected).norm(), eps);
+          // check LB force
+          auto const lb_after = LB::get_force_to_be_applied(p.pos());
+          auto const lb_expected = params.force_md_to_lb(expected) + lb_before;
+          BOOST_CHECK_SMALL((lb_after - lb_expected).norm(), eps);
+        }
+      }
+      // remove force of the particle from the fluid
+      set_particle_property(pid, &Particle::force, Utils::Vector3d{});
+      add_md_force(p_pos, -expected, params.time_step);
+    }
+  }
+
+  // clean-up and sanity checks
+  {
+    boost::mpi::communicator world;
+    auto const error_message_ref = std::string(
+        "Recalculating forces, so the LB coupling forces are not included in "
+        "the particle force the first time step. This only matters if it "
+        "happens frequently during sampling.");
+    auto const error_messages =
+        ErrorHandling::mpi_gather_runtime_errors_all(world.rank() == 0);
+    for (auto const &error_message : error_messages) {
+      BOOST_CHECK_EQUAL(error_message.what(), error_message_ref);
+    }
+  }
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+bool test_lb_domain_mismatch_local() {
+  boost::mpi::communicator world;
+  auto const node_grid_original = ::node_grid;
+  auto const node_grid_reversed =
+      Utils::Vector3i{{::node_grid[2], ::node_grid[1], ::node_grid[0]}};
+  auto const n_ghost_layers = 1u;
+  auto const params = LBWalberlaParams(0.5, 0.01);
+  ::node_grid = node_grid_reversed;
+  auto const lattice = std::make_shared<LatticeWalberla>(
+      Utils::Vector3i{12, 12, 12}, node_grid_original, n_ghost_layers);
+  auto const ptr = new_lb_walberla(lattice, 1.0, 1.0, false);
+  ptr->set_collision_model(0.0, 0);
+  ::node_grid = node_grid_original;
+  if (world.rank() == 0) {
+    try {
+      lb_sanity_checks(*ptr, params, params.get_tau());
+    } catch (std::runtime_error const &err) {
+      auto const what_ref = std::string("waLBerla and ESPResSo disagree "
+                                        "about domain decomposition.");
+      return err.what() == what_ref;
+    }
+  }
+  return false;
+}
+
+BOOST_AUTO_TEST_CASE(exceptions) {
+  {
+    using std::exception;
+    // accessing uninitialized pointers is not allowed
+    BOOST_CHECK_THROW(lb_walberla(), std::runtime_error);
+    BOOST_CHECK_THROW(lb_walberla_params(), std::runtime_error);
+    // getters and setters
+    BOOST_CHECK_THROW(LB::get_agrid(), exception);
+    BOOST_CHECK_THROW(LB::get_tau(), exception);
+    BOOST_CHECK_THROW(LB::get_kT(), exception);
+    BOOST_CHECK_THROW(LB::get_pressure_tensor(), exception);
+    BOOST_CHECK_THROW(LB::get_force_to_be_applied({-10., -10., -10.}),
+                      std::runtime_error);
+    // coupling, interpolation, boundaries
+    BOOST_CHECK_THROW(lb_lbcoupling_get_rng_state(), std::runtime_error);
+    BOOST_CHECK_THROW(lb_lbcoupling_set_rng_state(0ul), std::runtime_error);
+    BOOST_CHECK_THROW(lb_particle_coupling_noise(true, 0, OptionalCounter{}),
+                      std::runtime_error);
+    BOOST_CHECK_THROW(lb_lbinterpolation_get_interpolated_velocity({}),
+                      std::runtime_error);
+    BOOST_CHECK_THROW(lb_lbinterpolation_add_force_density({}, {}),
+                      std::runtime_error);
+    BOOST_CHECK_THROW(LB::get_interpolated_velocity({}), exception);
+    BOOST_CHECK_THROW(LB::get_interpolated_density({}), exception);
+    BOOST_CHECK_THROW(LB::calc_fluid_momentum(), exception);
+  }
+
+  // waLBerla and ESPResSo must agree on domain decomposition
+  {
+    boost::mpi::communicator world;
+    auto const has_thrown_correct_exception = test_lb_domain_mismatch_local();
+    auto const n_errors = check_runtime_errors_local();
+    auto const error_queue =
+        ErrorHandling::mpi_gather_runtime_errors_all(world.rank() == 0);
+    if (world.rank() == 0) {
+      BOOST_TEST_REQUIRE(has_thrown_correct_exception);
+      BOOST_REQUIRE_EQUAL(n_errors, 1);
+      BOOST_REQUIRE_EQUAL(error_queue.size(), 1);
+      auto const what_ref = std::string("MPI rank 0: left ESPResSo: [0, 0, 0], "
+                                        "left waLBerla: [0, 0, 0]");
+      for (auto const &error : error_queue) {
+        auto const error_what = error.what().substr(1, what_ref.size());
+        BOOST_CHECK_EQUAL(error_what, what_ref);
+      }
+    }
+  }
+}
+
+int main(int argc, char **argv) {
+  espresso::system = std::make_unique<EspressoSystemStandAlone>(argc, argv);
+  espresso::system->set_box_l(params.box_dimensions);
+  espresso::system->set_time_step(params.time_step);
+  espresso::system->set_skin(params.skin);
+
+  boost::mpi::communicator world;
+  assert(world.size() <= 2);
+
+  return boost::unit_test::unit_test_main(init_unit_test, argc, argv);
+}
+
+#else // WALBERLA
+int main(int argc, char **argv) {}
+#endif
diff --git a/src/core/unit_tests/particle_management.hpp b/src/core/unit_tests/particle_management.hpp
new file mode 100644
index 00000000000..faf3d9565db
--- /dev/null
+++ b/src/core/unit_tests/particle_management.hpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef ESPRESSO_SRC_CORE_UNIT_TESTS_PARTICLE_MANAGEMENT_HPP
+#define ESPRESSO_SRC_CORE_UNIT_TESTS_PARTICLE_MANAGEMENT_HPP
+
+#include "Particle.hpp"
+#include "cells.hpp"
+
+#include <boost/mpi/communicator.hpp>
+#include <boost/optional.hpp>
+
+inline auto copy_particle_to_head_node(boost::mpi::communicator const &comm,
+                                       int p_id) {
+  boost::optional<Particle> result{};
+  auto p = ::cell_structure.get_local_particle(p_id);
+  if (p and not p->is_ghost()) {
+    if (comm.rank() == 0) {
+      result = *p;
+    } else {
+      comm.send(0, p_id, *p);
+    }
+  }
+  if (comm.rank() == 0 and not result) {
+    Particle p{};
+    comm.recv(boost::mpi::any_source, p_id, p);
+    result = p;
+  }
+  return result;
+}
+
+#endif // ESPRESSO_SRC_CORE_UNIT_TESTS_PARTICLE_MANAGEMENT_HPP
diff --git a/src/core/virtual_sites/CMakeLists.txt b/src/core/virtual_sites/CMakeLists.txt
index 81a2cc09fcd..13401606e58 100644
--- a/src/core/virtual_sites/CMakeLists.txt
+++ b/src/core/virtual_sites/CMakeLists.txt
@@ -19,7 +19,5 @@
 
 target_sources(
   espresso_core
-  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/lb_inertialess_tracers.cpp
-          ${CMAKE_CURRENT_SOURCE_DIR}/lb_inertialess_tracers_cuda_interface.cpp
-          ${CMAKE_CURRENT_SOURCE_DIR}/VirtualSitesInertialessTracers.cpp
+  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/VirtualSitesInertialessTracers.cpp
           ${CMAKE_CURRENT_SOURCE_DIR}/VirtualSitesRelative.cpp)
diff --git a/src/core/virtual_sites/VirtualSites.hpp b/src/core/virtual_sites/VirtualSites.hpp
index 8a80741e201..0d172059d22 100644
--- a/src/core/virtual_sites/VirtualSites.hpp
+++ b/src/core/virtual_sites/VirtualSites.hpp
@@ -51,7 +51,7 @@ class VirtualSites {
   /** Back-transfer forces (and torques) to non-virtual particles. */
   virtual void back_transfer_forces_and_torques() const {}
   /** @brief Called after force calculation (and before rattle/shake) */
-  virtual void after_force_calc() {}
+  virtual void after_force_calc(double) {}
   virtual void after_lb_propagation(double) {}
   /** @brief Pressure contribution. */
   virtual Utils::Matrix<double, 3, 3> pressure_tensor() const { return {}; }
diff --git a/src/core/virtual_sites/VirtualSitesInertialessTracers.cpp b/src/core/virtual_sites/VirtualSitesInertialessTracers.cpp
index f22415d1499..6c06d9d6657 100644
--- a/src/core/virtual_sites/VirtualSitesInertialessTracers.cpp
+++ b/src/core/virtual_sites/VirtualSitesInertialessTracers.cpp
@@ -23,43 +23,85 @@
 #include "VirtualSitesInertialessTracers.hpp"
 
 #include "cells.hpp"
-#include "communication.hpp"
 #include "errorhandling.hpp"
+#include "forces.hpp"
 #include "grid_based_algorithms/lb_interface.hpp"
-#include "virtual_sites/lb_inertialess_tracers.hpp"
+#include "grid_based_algorithms/lb_interpolation.hpp"
+#include "grid_based_algorithms/lb_particle_coupling.hpp"
+#include "integrate.hpp"
 
-#include <algorithm>
+#include <unordered_set>
 
-static void check_no_vs_exist(char const *const message) {
-  if (std::any_of(cell_structure.local_particles().begin(),
-                  cell_structure.local_particles().end(),
-                  [](Particle const &p) { return p.is_virtual(); })) {
-    runtimeErrorMsg() << "Inertialess Tracers: " << message;
+static bool lb_active_check() {
+  if (lattice_switch == ActiveLB::NONE) {
+    runtimeErrorMsg() << "LB needs to be active for inertialess tracers.";
+    return false;
   }
+  return true;
 }
 
-void VirtualSitesInertialessTracers::after_force_calc() {
-  // Now the forces are computed and need to go into the LB fluid
-  if (lattice_switch == ActiveLB::CPU) {
-    IBM_ForcesIntoFluid_CPU();
-    return;
+void VirtualSitesInertialessTracers::after_force_calc(double time_step) {
+  auto const to_lb_units =
+      (lattice_switch == ActiveLB::NONE) ? 0. : 1. / LB::get_agrid();
+
+  // Distribute summed-up forces from physical particles to ghosts
+  init_forces_ghosts(cell_structure.ghost_particles());
+  cells_update_ghosts(Cells::DATA_PART_FORCE);
+
+  // Set to store ghost particles (ids) that have already been coupled
+  std::unordered_set<int> coupled_ghost_particles;
+  // Apply particle forces to the LB fluid at particle positions
+  // For physical particles, also set particle velocity = fluid velocity
+  for (auto &p : cell_structure.local_particles()) {
+    if (!p.is_virtual())
+      continue;
+    if (!lb_active_check()) {
+      return;
+    }
+    if (should_be_coupled(p, coupled_ghost_particles)) {
+      for (auto pos : positions_in_halo(p.pos(), box_geo)) {
+        add_md_force(pos * to_lb_units, -p.force(), time_step);
+      }
+    }
   }
-#ifdef CUDA
-  if (lattice_switch == ActiveLB::GPU) {
-    IBM_ForcesIntoFluid_GPU(cell_structure.local_particles(), this_node);
-    if (comm_cart.size() != 1 and this_node != 0) {
-      check_no_vs_exist("The LB GPU method cannot integrate virtual sites when "
-                        "more than 1 MPI ranks are used. The particles on MPI "
-                        "rank >= 2 are now in an undeterminate state.");
+  for (auto const &p : cell_structure.ghost_particles()) {
+    if (!p.is_virtual())
+      continue;
+    if (!lb_active_check()) {
+      return;
+    }
+    if (should_be_coupled(p, coupled_ghost_particles)) {
+      for (auto pos : positions_in_halo(p.pos(), box_geo)) {
+        add_md_force(pos * to_lb_units, -p.force(), time_step);
+      }
     }
-    return;
   }
-#endif
-  check_no_vs_exist("No LB method was active but virtual sites present.");
+
+  // Clear ghost forces to avoid double counting later
+  init_forces_ghosts(cell_structure.ghost_particles());
 }
 
 void VirtualSitesInertialessTracers::after_lb_propagation(double time_step) {
-  IBM_UpdateParticlePositions(cell_structure.local_particles(), time_step,
-                              this_node);
+  auto const to_md_units =
+      (lattice_switch == ActiveLB::NONE) ? 0. : LB::get_lattice_speed();
+
+  // Advect particles
+  for (auto &p : cell_structure.local_particles()) {
+    if (!p.is_virtual())
+      continue;
+    if (!lb_active_check()) {
+      return;
+    }
+    p.v() = lb_lbinterpolation_get_interpolated_velocity(p.pos()) * to_md_units;
+    for (unsigned int i = 0; i < 3; i++) {
+      if (!p.is_fixed_along(i)) {
+        p.pos()[i] += p.v()[i] * time_step;
+      }
+    }
+    // Verlet list update check
+    if ((p.pos() - p.pos_at_last_verlet_update()).norm2() > skin * skin) {
+      cell_structure.set_resort_particles(Cells::RESORT_LOCAL);
+    }
+  }
 }
 #endif // VIRTUAL_SITES_INERTIALESS_TRACERS
diff --git a/src/core/virtual_sites/VirtualSitesInertialessTracers.hpp b/src/core/virtual_sites/VirtualSitesInertialessTracers.hpp
index 92abc09d7b7..0fbc25bb1be 100644
--- a/src/core/virtual_sites/VirtualSitesInertialessTracers.hpp
+++ b/src/core/virtual_sites/VirtualSitesInertialessTracers.hpp
@@ -29,7 +29,7 @@
  * instantaneously transferred to the fluid
  */
 class VirtualSitesInertialessTracers : public VirtualSites {
-  void after_force_calc() override;
+  void after_force_calc(double time_step) override;
   void after_lb_propagation(double time_step) override;
 };
 
diff --git a/src/core/virtual_sites/lb_inertialess_tracers.cpp b/src/core/virtual_sites/lb_inertialess_tracers.cpp
deleted file mode 100644
index 2fd0015a73c..00000000000
--- a/src/core/virtual_sites/lb_inertialess_tracers.cpp
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/// \file
-/// \brief Main of the Bayreuth Immersed-Boundary implementation
-
-#include "config/config.hpp"
-
-#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS
-
-#include "virtual_sites/lb_inertialess_tracers.hpp"
-
-#include "Particle.hpp"
-#include "cells.hpp"
-#include "grid.hpp"
-#include "grid_based_algorithms/lb.hpp"
-#include "grid_based_algorithms/lb_boundaries.hpp"
-#include "grid_based_algorithms/lb_interface.hpp"
-#include "grid_based_algorithms/lb_particle_coupling.hpp"
-#include "integrate.hpp"
-#include "lb_inertialess_tracers_cuda_interface.hpp"
-
-#include <utils/Vector.hpp>
-#include <utils/math/int_pow.hpp>
-#include <utils/math/sqr.hpp>
-
-#include <cstddef>
-#include <unordered_set>
-
-void CoupleIBMParticleToFluid(Particle const &p, Utils::Vector3d const &pos);
-void ParticleVelocitiesFromLB_CPU();
-bool IsHalo(std::size_t indexCheck);
-
-static bool *isHaloCache = nullptr;
-
-/** Put the calculated force stored on the ibm particles into the fluid by
- *  updating the @ref lbfields structure.
- *  Called from the integration loop right after the forces have been
- *  calculated.
- */
-void IBM_ForcesIntoFluid_CPU() {
-  // Update the forces on the ghost particles
-  cell_structure.ghosts_update(Cells::DATA_PART_FORCE);
-
-  // Loop over local cells
-  for (auto &p : cell_structure.local_particles()) {
-    if (p.is_virtual()) {
-      CoupleIBMParticleToFluid(p, p.pos());
-    }
-  }
-
-  for (auto &p : cell_structure.ghost_particles()) {
-    // for ghost particles we have to check if they lie
-    // in the range of the local lattice nodes
-    if (in_local_halo(p.pos())) {
-      if (p.is_virtual()) {
-        CoupleIBMParticleToFluid(p, p.pos());
-      }
-    }
-  }
-}
-
-/** Interpolate LB velocity at the particle positions and propagate the
- *  particles.
- *  Called from the integration loop right after the LB update.
- */
-void IBM_UpdateParticlePositions(ParticleRange const &particles,
-                                 double time_step, int this_node) {
-  // Get velocities
-  if (lattice_switch == ActiveLB::CPU)
-    ParticleVelocitiesFromLB_CPU();
-#ifdef CUDA
-  if (lattice_switch == ActiveLB::GPU)
-    ParticleVelocitiesFromLB_GPU(particles, this_node);
-#endif
-
-  // Euler integrator
-  for (auto &p : particles) {
-    if (p.is_virtual()) {
-      for (unsigned int axis = 0; axis < 3; axis++) {
-#ifdef EXTERNAL_FORCES
-        if (not p.is_fixed_along(axis))
-#endif
-          p.pos()[axis] += p.v()[axis] * time_step;
-      }
-    }
-  }
-
-  if (cell_structure.check_resort_required(particles, skin)) {
-    cell_structure.set_resort_particles(Cells::RESORT_LOCAL);
-  }
-}
-
-/** Put the momentum of a given particle into the LB fluid. */
-void CoupleIBMParticleToFluid(Particle const &p, Utils::Vector3d const &pos) {
-  // Convert units from MD to LB
-  auto const delta_j = p.force() * Utils::int_pow<4>(lbpar.tau) / lbpar.agrid;
-
-  // Get indices and weights of affected nodes using discrete delta function
-  Utils::Vector<std::size_t, 8> node_index{};
-  Utils::Vector6d delta{};
-  lblattice.map_position_to_lattice(pos, node_index, delta);
-
-  // Loop over all affected nodes
-  for (int z = 0; z < 2; z++) {
-    for (int y = 0; y < 2; y++) {
-      for (int x = 0; x < 2; x++) {
-        // Do not put force into a halo node
-        if (!IsHalo(static_cast<int>(node_index[(z * 2 + y) * 2 + x]))) {
-          // Add force into the lbfields structure
-          auto &local_f =
-              lbfields[node_index[(z * 2 + y) * 2 + x]].force_density;
-
-          local_f +=
-              delta[3 * x + 0] * delta[3 * y + 1] * delta[3 * z + 2] * delta_j;
-        }
-      }
-    }
-  }
-}
-
-/** Calculate the LB fluid velocity at a particle position.
- *  Very similar to the velocity interpolation done in standard ESPResSo,
- *  except that we add the f/2 contribution, cf. @cite guo02a.
- *  The fluid velocity is obtained by linear interpolation,
- *  cf. eq. (11) in @cite ahlrichs99a.
- */
-template <bool ReturnVelocity>
-Utils::Vector3d GetIBMInterpolatedVelocity(Utils::Vector3d const &pos) {
-  auto const f_ext =
-      lbpar.ext_force_density * Utils::sqr(lbpar.agrid * lbpar.tau);
-
-  /* determine elementary lattice cell surrounding the particle
-     and the relative position of the particle in this cell */
-  Utils::Vector<std::size_t, 8> node_index{};
-  Utils::Vector6d delta{};
-  lblattice.map_position_to_lattice(pos, node_index, delta);
-
-  // This for the f/2 contribution to the velocity
-  Utils::Vector3d force_added = {};
-  Utils::Vector3d interpolated_u = {};
-
-  for (int z = 0; z < 2; z++) {
-    for (int y = 0; y < 2; y++) {
-      for (int x = 0; x < 2; x++) {
-        auto const index = node_index[(z * 2 + y) * 2 + x];
-        auto const local_delta =
-            delta[3 * x + 0] * delta[3 * y + 1] * delta[3 * z + 2];
-        const auto &f = lbfields[index].force_density_buf;
-
-        double local_density;
-        Utils::Vector3d local_j;
-
-        // This can be done more easily without copying the code twice.
-        // We probably can even set the boundary velocity directly.
-#ifdef LB_BOUNDARIES
-        if (lbfields[index].boundary) {
-          if (ReturnVelocity) {
-            local_density = lbpar.density;
-            auto const i = lbfields[index].boundary - 1;
-            local_j = lbpar.density * LBBoundaries::lbboundaries[i]->velocity();
-          }
-        } else
-#endif
-        {
-          auto const modes = lb_calc_modes(static_cast<int>(index), lbfluid);
-          local_density = lbpar.density + modes[0];
-
-          if (ReturnVelocity) {
-            // Add the +f/2 contribution!!
-            local_j[0] = modes[1] + f[0] / 2.;
-            local_j[1] = modes[2] + f[1] / 2.;
-            local_j[2] = modes[3] + f[2] / 2.;
-          } else {
-            // Keep track of the forces that we added to the fluid.
-            // This is necessary for communication because this part is executed
-            // for real and ghost particles.
-            // Later on we sum the real and ghost contributions.
-            force_added += local_delta * (f - f_ext) / (2. * local_density);
-          }
-        }
-
-        // Interpolate velocity
-        if (ReturnVelocity) {
-          interpolated_u += local_j * (local_delta / local_density);
-        }
-      }
-    }
-  }
-
-  auto const unit_conversion = lbpar.agrid / lbpar.tau;
-  if (ReturnVelocity) {
-    return interpolated_u * unit_conversion;
-  }
-  return force_added * unit_conversion;
-}
-
-/** Build a cache structure which contains a flag for each LB node whether
- * that node is a halo node or not.
- */
-bool IsHalo(std::size_t indexCheck) {
-  // First call --> build cache
-  if (isHaloCache == nullptr) {
-    isHaloCache = new bool[lblattice.halo_grid_volume];
-    // Assume everything is a halo and correct in the next step
-    for (int i = 0; i < lblattice.halo_grid_volume; i++)
-      isHaloCache[i] = true;
-    // Loop through and check where indexCheck occurs
-    auto index = lblattice.halo_offset;
-    for (int z = 1; z <= lblattice.grid[2]; z++) {
-      for (int y = 1; y <= lblattice.grid[1]; y++) {
-        for (int x = 1; x <= lblattice.grid[0]; x++) {
-          isHaloCache[index] = false;
-          ++index;
-        }
-        index += 2; /* skip halo region */
-      }
-      index += 2 * lblattice.halo_grid[0]; /* skip halo region */
-    }
-  }
-
-  // Return
-  return isHaloCache[indexCheck];
-}
-
-/**
- * @brief Check if a position is within the local box + halo.
- *
- * @param pos Position to check
- * @param halo Halo
- *
- * @return True iff the point is inside of the box up to halo.
- */
-inline bool in_local_domain(Utils::Vector3d const &pos, double halo = 0.) {
-  auto const halo_vec = Utils::Vector3d::broadcast(halo);
-
-  return in_box(
-      pos, {local_geo.my_left() - halo_vec, local_geo.my_right() + halo_vec});
-}
-
-/** Get particle velocities from LB and set the velocity field in the
- * particles data structure.
- */
-void ParticleVelocitiesFromLB_CPU() {
-  std::unordered_set<int> coupled_ghost_particles;
-
-  // Loop over particles in local cells.
-  // Here all contributions are included: velocity, external force and
-  // particle force.
-  for (auto &p : cell_structure.local_particles()) {
-    if (p.is_virtual() and should_be_coupled(p, coupled_ghost_particles)) {
-      for (auto pos : positions_in_halo(p.pos(), box_geo)) {
-        if (in_local_domain(pos)) {
-          p.force() = GetIBMInterpolatedVelocity<true>(pos);
-          break;
-        }
-      }
-    }
-  }
-  // Loop over particles in ghost cells
-  // Here we only add the particle forces stemming from the ghosts
-  for (auto &p : cell_structure.ghost_particles()) {
-    if (p.is_virtual() and should_be_coupled(p, coupled_ghost_particles)) {
-      for (auto pos : positions_in_halo(p.pos(), box_geo)) {
-        if (in_local_domain(pos)) {
-          p.force() = GetIBMInterpolatedVelocity<true>(pos);
-          break;
-        }
-      }
-    } else {
-      p.force() = {};
-    }
-  }
-
-  // Now the local particles contain a velocity (stored in the force field)
-  // and the ghosts contain the rest of the velocity in their respective force
-  // fields.
-  // We need to add these. Since we have stored them in the force, not the
-  // velocity fields, we can use the standard force communicator and then
-  // transfer to the velocity afterwards.
-  // Note that this overwrites the actual force which would be a problem for
-  // real particles.
-  // This could be solved by keeping a backup of the local forces before this
-  // operation is attempted.
-  cell_structure.ghosts_reduce_forces();
-
-  // Transfer to velocity field
-  for (auto &p : cell_structure.local_particles()) {
-    if (p.is_virtual()) {
-      p.v() = p.force();
-    }
-  }
-}
-#endif // VIRTUAL_SITES_INERTIALESS_TRACERS
diff --git a/src/core/virtual_sites/lb_inertialess_tracers_cuda.cu b/src/core/virtual_sites/lb_inertialess_tracers_cuda.cu
deleted file mode 100644
index a1385036be2..00000000000
--- a/src/core/virtual_sites/lb_inertialess_tracers_cuda.cu
+++ /dev/null
@@ -1,408 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-// This is an internal file of the IMMERSED BOUNDARY implementation
-// It should not be included by any main ESPResSo routines
-// Functions to be exported for ESPResSo are in ibm_main.hpp
-
-#include "config/config.hpp"
-
-#if defined(VIRTUAL_SITES_INERTIALESS_TRACERS) && defined(CUDA)
-
-#include "virtual_sites/lb_inertialess_tracers.hpp"
-#include "virtual_sites/lb_inertialess_tracers_cuda_interface.hpp"
-
-#include "Particle.hpp"
-#include "cuda_interface.hpp"
-#include "cuda_utils.cuh"
-#include "grid_based_algorithms/lb_boundaries.hpp"
-#include "grid_based_algorithms/lbgpu.cuh"
-#include "grid_based_algorithms/lbgpu.hpp"
-
-#include <cuda.h>
-
-#include <cstddef>
-
-// Other functions for internal use
-void InitCUDA_IBM(std::size_t numParticles);
-
-// Our own global variables
-IBM_CUDA_ParticleDataInput *IBM_ParticleDataInput_device = nullptr;
-IBM_CUDA_ParticleDataOutput *IBM_ParticleDataOutput_device = nullptr;
-bool IBM_initialized = false;
-std::size_t IBM_numParticlesCache = 0; // To detect a change in particle number
-                                       // which requires reallocation of memory
-
-// These variables are defined in lbgpu_cuda.cu, but we also want them here
-extern LB_node_force_density_gpu node_f;
-extern LB_nodes_gpu *current_nodes;
-
-// These variables are static in lbgpu_cuda.cu, so we need to duplicate them
-// here. They are initialized in ForcesIntoFluid. The pointers are on the host,
-// but point into device memory.
-LB_parameters_gpu *para_gpu = nullptr;
-float *lb_boundary_velocity_IBM = nullptr;
-
-static constexpr unsigned int threads_per_block = 64;
-
-__global__ void
-ForcesIntoFluid_Kernel(const IBM_CUDA_ParticleDataInput *const particle_input,
-                       std::size_t number_of_particles,
-                       LB_node_force_density_gpu node_f,
-                       const LB_parameters_gpu *const paraP) {
-  const unsigned int particleIndex = blockIdx.y * gridDim.x * blockDim.x +
-                                     blockDim.x * blockIdx.x + threadIdx.x;
-  const LB_parameters_gpu &para = *paraP;
-
-  if (particleIndex < number_of_particles &&
-      particle_input[particleIndex].is_virtual) {
-    // MD to LB units: mass is not affected, length are scaled by agrid, times
-    // by para.tau
-    const float factor = 1 / para.agrid * para.tau * para.tau;
-    const float particleForce[3] = {particle_input[particleIndex].f[0] * factor,
-                                    particle_input[particleIndex].f[1] * factor,
-                                    particle_input[particleIndex].f[2] *
-                                        factor};
-    const float pos[3] = {particle_input[particleIndex].pos[0],
-                          particle_input[particleIndex].pos[1],
-                          particle_input[particleIndex].pos[2]};
-
-    // First part is the same as for interpolation --> merge into a single
-    // function
-    float temp_delta[6];
-    float delta[8];
-    int my_left[3];
-    unsigned int node_index[8];
-    for (int i = 0; i < 3; ++i) {
-      const float scaledpos = pos[i] / para.agrid - 0.5f;
-      my_left[i] = static_cast<int>(floorf(scaledpos));
-      temp_delta[3 + i] = scaledpos - static_cast<float>(my_left[i]);
-      temp_delta[i] = 1.f - temp_delta[3 + i];
-    }
-
-    delta[0] = temp_delta[0] * temp_delta[1] * temp_delta[2];
-    delta[1] = temp_delta[3] * temp_delta[1] * temp_delta[2];
-    delta[2] = temp_delta[0] * temp_delta[4] * temp_delta[2];
-    delta[3] = temp_delta[3] * temp_delta[4] * temp_delta[2];
-    delta[4] = temp_delta[0] * temp_delta[1] * temp_delta[5];
-    delta[5] = temp_delta[3] * temp_delta[1] * temp_delta[5];
-    delta[6] = temp_delta[0] * temp_delta[4] * temp_delta[5];
-    delta[7] = temp_delta[3] * temp_delta[4] * temp_delta[5];
-
-    // modulo for negative numbers is strange at best, shift to make sure we are
-    // positive
-    auto const x = static_cast<unsigned int>(my_left[0] + para.dim[0]);
-    auto const y = static_cast<unsigned int>(my_left[1] + para.dim[1]);
-    auto const z = static_cast<unsigned int>(my_left[2] + para.dim[2]);
-
-    node_index[0] = x % para.dim[0] + para.dim[0] * (y % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * (z % para.dim[2]);
-    node_index[1] = (x + 1) % para.dim[0] + para.dim[0] * (y % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * (z % para.dim[2]);
-    node_index[2] = x % para.dim[0] + para.dim[0] * ((y + 1) % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * (z % para.dim[2]);
-    node_index[3] = (x + 1) % para.dim[0] +
-                    para.dim[0] * ((y + 1) % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * (z % para.dim[2]);
-    node_index[4] = x % para.dim[0] + para.dim[0] * (y % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]);
-    node_index[5] = (x + 1) % para.dim[0] + para.dim[0] * (y % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]);
-    node_index[6] = x % para.dim[0] + para.dim[0] * ((y + 1) % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]);
-    node_index[7] = (x + 1) % para.dim[0] +
-                    para.dim[0] * ((y + 1) % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]);
-
-    for (int i = 0; i < 8; ++i) {
-      // Atomic add is essential because this runs in parallel!
-      atomicAdd(&(node_f.force_density[node_index[i]][0]),
-                (particleForce[0] * delta[i]));
-      atomicAdd(&(node_f.force_density[node_index[i]][1]),
-                (particleForce[1] * delta[i]));
-      atomicAdd(&(node_f.force_density[node_index[i]][2]),
-                (particleForce[2] * delta[i]));
-    }
-  }
-}
-
-__global__ void ParticleVelocitiesFromLB_Kernel(
-    LB_nodes_gpu n_curr,
-    const IBM_CUDA_ParticleDataInput *const particles_input,
-    std::size_t number_of_particles,
-    IBM_CUDA_ParticleDataOutput *const particles_output,
-    LB_node_force_density_gpu node_f, const float *const lb_boundary_velocity,
-    const LB_parameters_gpu *const paraP) {
-
-  const unsigned int particleIndex = blockIdx.y * gridDim.x * blockDim.x +
-                                     blockDim.x * blockIdx.x + threadIdx.x;
-
-  const LB_parameters_gpu &para = *paraP;
-
-  if (particleIndex < number_of_particles &&
-      particles_input[particleIndex].is_virtual) {
-
-    // Get position
-    float pos[3] = {particles_input[particleIndex].pos[0],
-                    particles_input[particleIndex].pos[1],
-                    particles_input[particleIndex].pos[2]};
-    float v[3] = {0};
-
-    // This part is copied from get_interpolated_velocity
-    // + we add the force + we consider boundaries
-
-    float temp_delta[6];
-    float delta[8];
-    int my_left[3];
-    unsigned int node_index[8];
-    Utils::Array<float, 4> mode;
-#pragma unroll
-    for (int i = 0; i < 3; ++i) {
-      const float scaledpos = pos[i] / para.agrid - 0.5f;
-      my_left[i] = static_cast<int>(floorf(scaledpos));
-      temp_delta[3 + i] = scaledpos - static_cast<float>(my_left[i]);
-      temp_delta[i] = 1.f - temp_delta[3 + i];
-    }
-
-    delta[0] = temp_delta[0] * temp_delta[1] * temp_delta[2];
-    delta[1] = temp_delta[3] * temp_delta[1] * temp_delta[2];
-    delta[2] = temp_delta[0] * temp_delta[4] * temp_delta[2];
-    delta[3] = temp_delta[3] * temp_delta[4] * temp_delta[2];
-    delta[4] = temp_delta[0] * temp_delta[1] * temp_delta[5];
-    delta[5] = temp_delta[3] * temp_delta[1] * temp_delta[5];
-    delta[6] = temp_delta[0] * temp_delta[4] * temp_delta[5];
-    delta[7] = temp_delta[3] * temp_delta[4] * temp_delta[5];
-
-    // modulo for negative numbers is strange at best, shift to make sure we are
-    // positive
-    auto const x = static_cast<unsigned int>(my_left[0] + para.dim[0]);
-    auto const y = static_cast<unsigned int>(my_left[1] + para.dim[1]);
-    auto const z = static_cast<unsigned int>(my_left[2] + para.dim[2]);
-
-    node_index[0] = x % para.dim[0] + para.dim[0] * (y % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * (z % para.dim[2]);
-    node_index[1] = (x + 1) % para.dim[0] + para.dim[0] * (y % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * (z % para.dim[2]);
-    node_index[2] = x % para.dim[0] + para.dim[0] * ((y + 1) % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * (z % para.dim[2]);
-    node_index[3] = (x + 1) % para.dim[0] +
-                    para.dim[0] * ((y + 1) % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * (z % para.dim[2]);
-    node_index[4] = x % para.dim[0] + para.dim[0] * (y % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]);
-    node_index[5] = (x + 1) % para.dim[0] + para.dim[0] * (y % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]);
-    node_index[6] = x % para.dim[0] + para.dim[0] * ((y + 1) % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]);
-    node_index[7] = (x + 1) % para.dim[0] +
-                    para.dim[0] * ((y + 1) % para.dim[1]) +
-                    para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]);
-
-    for (int i = 0; i < 8; ++i) {
-      double local_rho;
-      double local_j[3];
-#ifdef LB_BOUNDARIES_GPU
-      if (n_curr.boundary[node_index[i]]) {
-        // Boundary node
-        auto const boundary_index =
-            static_cast<int>(n_curr.boundary[node_index[i]]);
-
-        // lb_boundary_velocity is given in MD units --> convert to LB and
-        // reconvert back at the end of this function
-        local_rho = para.rho;
-        local_j[0] =
-            para.rho * lb_boundary_velocity[3 * (boundary_index - 1) + 0];
-        local_j[1] =
-            para.rho * lb_boundary_velocity[3 * (boundary_index - 1) + 1];
-        local_j[2] =
-            para.rho * lb_boundary_velocity[3 * (boundary_index - 1) + 2];
-
-      } else
-#endif
-      {
-        calc_mass_and_momentum_mode(mode, n_curr, node_index[i]);
-        local_rho = para.rho + mode[0];
-
-        // Add the +f/2 contribution!!
-        local_j[0] = mode[1] + node_f.force_density_buf[node_index[i]][0] / 2.f;
-        local_j[1] = mode[2] + node_f.force_density_buf[node_index[i]][1] / 2.f;
-        local_j[2] = mode[3] + node_f.force_density_buf[node_index[i]][2] / 2.f;
-      }
-
-      // Interpolate velocity
-      v[0] += static_cast<float>(delta[i] * local_j[0] / local_rho);
-      v[1] += static_cast<float>(delta[i] * local_j[1] / local_rho);
-      v[2] += static_cast<float>(delta[i] * local_j[2] / local_rho);
-    }
-
-    // Rescale and store output
-    particles_output[particleIndex].v[0] = v[0] * para.agrid / para.tau;
-    particles_output[particleIndex].v[1] = v[1] * para.agrid / para.tau;
-    particles_output[particleIndex].v[2] = v[2] * para.agrid / para.tau;
-  }
-}
-
-__global__ void ResetLBForces_Kernel(LB_node_force_density_gpu node_f,
-                                     const LB_parameters_gpu *const paraP) {
-
-  const std::size_t index = blockIdx.y * gridDim.x * blockDim.x +
-                            blockDim.x * blockIdx.x + threadIdx.x;
-  const LB_parameters_gpu &para = *paraP;
-
-  if (index < para.number_of_nodes) {
-    const float force_factor = powf(para.agrid, 2) * para.tau * para.tau;
-    if (para.external_force_density) {
-      node_f.force_density[index][0] = para.ext_force_density[0] * force_factor;
-      node_f.force_density[index][1] = para.ext_force_density[1] * force_factor;
-      node_f.force_density[index][2] = para.ext_force_density[2] * force_factor;
-    } else {
-      node_f.force_density[index] = {};
-    }
-  }
-}
-
-/** Transfer particle forces into the LB fluid.
- *  Called from @ref integrate.
- *  This must be the first CUDA-IBM function to be called because it also does
- *  some initialization.
- */
-void IBM_ForcesIntoFluid_GPU(ParticleRange const &particles, int this_node) {
-  // This function does
-  // (1) Gather forces from all particles via MPI
-  // (2) Copy forces to the GPU
-  // (3) interpolate on the LBM grid and spread forces
-
-  auto const numParticles = gpu_get_particle_pointer().size();
-
-  // Storage only needed on head node
-  if (this_node == 0 &&
-      (IBM_ParticleDataInput_host.empty() || !IBM_initialized ||
-       numParticles != IBM_numParticlesCache))
-    InitCUDA_IBM(numParticles);
-
-  // We gather particle positions and forces from all nodes
-  IBM_cuda_mpi_get_particles(particles);
-
-  // GPU only on head node
-  if (this_node == 0 && numParticles > 0) {
-
-    // Copy data to device
-    cuda_safe_mem(cudaMemcpy(IBM_ParticleDataInput_device,
-                             IBM_ParticleDataInput_host.data(),
-                             numParticles * sizeof(IBM_CUDA_ParticleDataInput),
-                             cudaMemcpyHostToDevice));
-
-    // Kernel call for spreading the forces on the LB grid
-    dim3 dim_grid = calculate_dim_grid(static_cast<unsigned>(numParticles), 4,
-                                       threads_per_block);
-    KERNELCALL(ForcesIntoFluid_Kernel, dim_grid, threads_per_block,
-               IBM_ParticleDataInput_device, numParticles, node_f, para_gpu);
-  }
-}
-
-void InitCUDA_IBM(std::size_t const numParticles) {
-
-  // Check if we have to delete
-  if (!IBM_ParticleDataInput_host.empty()) {
-    IBM_ParticleDataInput_host.clear();
-    IBM_ParticleDataOutput_host.clear();
-    cuda_safe_mem(cudaFree(IBM_ParticleDataInput_device));
-    cuda_safe_mem(cudaFree(IBM_ParticleDataOutput_device));
-    cuda_safe_mem(cudaFree(lb_boundary_velocity_IBM));
-  }
-
-  // Back and forth communication of positions and velocities
-  IBM_ParticleDataInput_host.resize(numParticles);
-  IBM_ParticleDataOutput_host.resize(numParticles);
-  cuda_safe_mem(cudaMalloc((void **)&IBM_ParticleDataInput_device,
-                           numParticles * sizeof(IBM_CUDA_ParticleDataInput)));
-  cuda_safe_mem(cudaMalloc((void **)&IBM_ParticleDataOutput_device,
-                           numParticles * sizeof(IBM_CUDA_ParticleDataOutput)));
-
-  // Use LB parameters
-  lb_get_para_pointer(&para_gpu);
-
-  // Copy boundary velocities to the GPU
-  // First put them into correct format
-#ifdef LB_BOUNDARIES_GPU
-  auto *host_lb_boundary_velocity =
-      new float[3 * (LBBoundaries::lbboundaries.size() + 1)];
-
-  for (int n = 0; n < LBBoundaries::lbboundaries.size(); n++) {
-    host_lb_boundary_velocity[3 * n + 0] =
-        static_cast<float>(LBBoundaries::lbboundaries[n]->velocity()[0]);
-    host_lb_boundary_velocity[3 * n + 1] =
-        static_cast<float>(LBBoundaries::lbboundaries[n]->velocity()[1]);
-    host_lb_boundary_velocity[3 * n + 2] =
-        static_cast<float>(LBBoundaries::lbboundaries[n]->velocity()[2]);
-  }
-
-  host_lb_boundary_velocity[3 * LBBoundaries::lbboundaries.size() + 0] = 0.0f;
-  host_lb_boundary_velocity[3 * LBBoundaries::lbboundaries.size() + 1] = 0.0f;
-  host_lb_boundary_velocity[3 * LBBoundaries::lbboundaries.size() + 2] = 0.0f;
-
-  cuda_safe_mem(
-      cudaMalloc((void **)&lb_boundary_velocity_IBM,
-                 3 * LBBoundaries::lbboundaries.size() * sizeof(float)));
-  cuda_safe_mem(
-      cudaMemcpy(lb_boundary_velocity_IBM, host_lb_boundary_velocity,
-                 3 * LBBoundaries::lbboundaries.size() * sizeof(float),
-                 cudaMemcpyHostToDevice));
-
-  delete[] host_lb_boundary_velocity;
-#endif
-
-  IBM_numParticlesCache = numParticles;
-  IBM_initialized = true;
-}
-
-/** Call a kernel function to interpolate the velocity at each IBM particle's
- *  position. Store velocity in the particle data structure.
- */
-void ParticleVelocitiesFromLB_GPU(ParticleRange const &particles,
-                                  int this_node) {
-  // This function performs three steps:
-  // (1) interpolate velocities on GPU
-  // (2) transfer velocities back to CPU
-  // (3) spread velocities to local cells via MPI
-
-  auto const numParticles = gpu_get_particle_pointer().size();
-
-  // GPU only on head node
-  if (this_node == 0 && numParticles > 0) {
-    // Kernel call
-    dim3 dim_grid = calculate_dim_grid(static_cast<unsigned>(numParticles), 4,
-                                       threads_per_block);
-    KERNELCALL(ParticleVelocitiesFromLB_Kernel, dim_grid, threads_per_block,
-               *current_nodes, IBM_ParticleDataInput_device, numParticles,
-               IBM_ParticleDataOutput_device, node_f, lb_boundary_velocity_IBM,
-               para_gpu);
-
-    // Copy velocities from device to host
-    cuda_safe_mem(cudaMemcpy(IBM_ParticleDataOutput_host.data(),
-                             IBM_ParticleDataOutput_device,
-                             numParticles * sizeof(IBM_CUDA_ParticleDataOutput),
-                             cudaMemcpyDeviceToHost));
-  }
-
-  // Scatter to all nodes
-  IBM_cuda_mpi_send_velocities(particles);
-}
-
-#endif
diff --git a/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.cpp b/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.cpp
deleted file mode 100644
index 53ab47eb9eb..00000000000
--- a/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-// This is an internal file of the IMMERSED BOUNDARY implementation
-// It should not be included by any main ESPResSo routines
-// Functions to be exported for ESPResSo are in ibm_main.hpp
-
-#include "config/config.hpp"
-
-#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS
-
-#include "Particle.hpp"
-#include "communication.hpp"
-#include "grid.hpp"
-#include "integrate.hpp"
-#include "serialization/ibm_cuda_particle_velocities_input.hpp"
-#include "virtual_sites/lb_inertialess_tracers_cuda_interface.hpp"
-
-#include <utils/mpi/gather_buffer.hpp>
-#include <utils/mpi/scatter_buffer.hpp>
-
-#include <vector>
-
-// Variables for communication
-std::vector<IBM_CUDA_ParticleDataInput> IBM_ParticleDataInput_host = {};
-std::vector<IBM_CUDA_ParticleDataOutput> IBM_ParticleDataOutput_host = {};
-
-static void pack_particles(ParticleRange const &particles,
-                           std::vector<IBM_CUDA_ParticleDataInput> &buffer) {
-
-  int i = 0;
-  for (auto const &part : particles) {
-    auto const pos = folded_position(part.pos(), box_geo);
-
-    buffer[i].pos[0] = static_cast<float>(pos[0]);
-    buffer[i].pos[1] = static_cast<float>(pos[1]);
-    buffer[i].pos[2] = static_cast<float>(pos[2]);
-
-    buffer[i].f[0] = static_cast<float>(part.force()[0]);
-    buffer[i].f[1] = static_cast<float>(part.force()[1]);
-    buffer[i].f[2] = static_cast<float>(part.force()[2]);
-
-    buffer[i].is_virtual = part.is_virtual();
-
-    i++;
-  }
-}
-
-/** Gather particle positions on the head node in order to communicate them
- *  to GPU. We transfer all particles (real and virtual), but actually we would
- *  only need the virtual ones. Room for improvement...
- *  Analogous to @ref cuda_mpi_get_particles.
- */
-void IBM_cuda_mpi_get_particles(ParticleRange const &particles) {
-  auto const n_part = particles.size();
-
-  if (this_node > 0) {
-    static std::vector<IBM_CUDA_ParticleDataInput> buffer;
-    buffer.resize(n_part);
-    /* pack local parts into buffer */
-    pack_particles(particles, buffer);
-
-    Utils::Mpi::gather_buffer(buffer, comm_cart);
-  } else {
-    /* Pack own particles */
-    pack_particles(particles, IBM_ParticleDataInput_host);
-
-    Utils::Mpi::gather_buffer(IBM_ParticleDataInput_host, comm_cart);
-  }
-}
-
-static void set_velocities(ParticleRange const &particles,
-                           std::vector<IBM_CUDA_ParticleDataOutput> &buffer) {
-  int i = 0;
-  for (auto &part : particles) {
-    if (part.is_virtual()) {
-      for (int j = 0; j < 3; j++)
-        part.v()[j] = static_cast<double>(buffer[i].v[j]);
-    }
-    i++;
-  }
-}
-
-/** Particle velocities have been communicated from GPU, now transmit to all
- *  nodes. Analogous to @ref cuda_mpi_send_forces.
- */
-void IBM_cuda_mpi_send_velocities(ParticleRange const &particles) {
-  auto const n_part = static_cast<int>(particles.size());
-
-  if (this_node > 0) {
-    static std::vector<IBM_CUDA_ParticleDataOutput> buffer;
-    /* Alloc buffer */
-    buffer.resize(n_part);
-
-    Utils::Mpi::scatter_buffer(buffer.data(), n_part, comm_cart);
-
-    set_velocities(particles, buffer);
-  } else {
-    /* Scatter forces */
-    Utils::Mpi::scatter_buffer(IBM_ParticleDataOutput_host.data(), n_part,
-                               comm_cart);
-
-    set_velocities(particles, IBM_ParticleDataOutput_host);
-  }
-}
-
-#endif
diff --git a/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.hpp b/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.hpp
deleted file mode 100644
index ddac4997d20..00000000000
--- a/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.hpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-// *******
-// This is an internal file of the IMMERSED BOUNDARY implementation
-// It should not be included by any main ESPResSo routines
-// Functions to be exported for ESPResSo are in ibm_main.hpp
-
-#ifndef IBM_CUDA_INTERFACE_HPP
-#define IBM_CUDA_INTERFACE_HPP
-
-#include "config/config.hpp"
-
-#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS
-
-#include "ParticleRange.hpp"
-
-#include <vector>
-
-// *********** Communication functions ********
-// Implemented in real C++, but called from the ibm_cuda.cu
-void IBM_cuda_mpi_send_velocities(ParticleRange const &particles);
-void IBM_cuda_mpi_get_particles(ParticleRange const &particles);
-
-void ParticleVelocitiesFromLB_GPU(ParticleRange const &particles,
-                                  int this_node);
-
-// ******** data types for CUDA and MPI communication ******
-struct IBM_CUDA_ParticleDataInput {
-  float pos[3];
-  float f[3];
-  bool is_virtual;
-};
-
-struct IBM_CUDA_ParticleDataOutput {
-  float v[3];
-};
-
-// ******** global variables for CUDA and MPI communication ******
-extern std::vector<IBM_CUDA_ParticleDataInput> IBM_ParticleDataInput_host;
-extern std::vector<IBM_CUDA_ParticleDataOutput> IBM_ParticleDataOutput_host;
-
-#endif
-
-#endif
diff --git a/src/python/espressomd/CMakeLists.txt b/src/python/espressomd/CMakeLists.txt
index 5ab4542b2fe..14b3031c224 100644
--- a/src/python/espressomd/CMakeLists.txt
+++ b/src/python/espressomd/CMakeLists.txt
@@ -47,6 +47,7 @@ file(GLOB cython_AUX *.py)
 set(cython_AUX "${cython_AUX}" CACHE INTERNAL "cython_AUX")
 
 add_subdirectory(io)
+add_subdirectory(detail)
 
 list(REMOVE_DUPLICATES cython_SRC)
 
diff --git a/src/script_interface/lbboundaries/CMakeLists.txt b/src/python/espressomd/detail/CMakeLists.txt
similarity index 81%
rename from src/script_interface/lbboundaries/CMakeLists.txt
rename to src/python/espressomd/detail/CMakeLists.txt
index c98d7a553e5..926a01b5f2e 100644
--- a/src/script_interface/lbboundaries/CMakeLists.txt
+++ b/src/python/espressomd/detail/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2020-2022 The ESPResSo project
+# Copyright (C) 2023 The ESPResSo project
 #
 # This file is part of ESPResSo.
 #
@@ -17,5 +17,5 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 
-target_sources(espresso_script_interface
-               PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/initialize.cpp)
+configure_file(__init__.py __init__.py COPYONLY)
+configure_file(walberla.py walberla.py COPYONLY)
diff --git a/src/python/espressomd/detail/__init__.py b/src/python/espressomd/detail/__init__.py
new file mode 100644
index 00000000000..0d4274ef1c8
--- /dev/null
+++ b/src/python/espressomd/detail/__init__.py
@@ -0,0 +1,18 @@
+#
+# Copyright (C) 2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
diff --git a/src/python/espressomd/detail/walberla.py b/src/python/espressomd/detail/walberla.py
new file mode 100644
index 00000000000..6ec64dc94a4
--- /dev/null
+++ b/src/python/espressomd/detail/walberla.py
@@ -0,0 +1,171 @@
+#
+# Copyright (C) 2020-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import itertools
+import numpy as np
+
+import espressomd.shapes
+import espressomd.code_features
+from espressomd.script_interface import ScriptInterfaceHelper, script_interface_register
+
+
+@script_interface_register
+class LatticeWalberla(ScriptInterfaceHelper):
+    """
+    Interface to a waBLerla lattice.
+    """
+    _so_name = "walberla::LatticeWalberla"
+    _so_creation_policy = "GLOBAL"
+
+    def __init__(self, *args, **kwargs):
+        if not espressomd.code_features.has_features("WALBERLA"):
+            raise NotImplementedError("Feature WALBERLA not compiled in")
+
+        if "sip" not in kwargs:
+            params = self.default_params()
+            params.update(kwargs)
+            super().__init__(*args, **params)
+            self._params = {k: getattr(self, k) for k in self.valid_keys()}
+        else:
+            super().__init__(**kwargs)
+
+    def valid_keys(self):
+        return {"agrid", "n_ghost_layers"}
+
+    def required_keys(self):
+        return self.valid_keys()
+
+    def default_params(self):
+        return {}
+
+    def get_node_indices_inside_shape(self, shape):
+        if not isinstance(shape, espressomd.shapes.Shape):
+            raise ValueError(
+                "Parameter 'shape' must be derived from espressomd.shapes.Shape")
+        agrid = self.agrid
+        idxs = itertools.product(*map(range, self.shape))
+        for idx in idxs:
+            pos = (np.asarray(idx) + 0.5) * agrid
+            if shape.is_inside(position=pos):
+                yield idx
+
+    def get_shape_bitmask(self, shape):
+        """Create a bitmask for the given shape."""
+        if not isinstance(shape, espressomd.shapes.Shape):
+            raise ValueError(
+                "Parameter 'shape' must be derived from espressomd.shapes.Shape")
+        mask_flat = shape.call_method("rasterize", grid_size=self.shape,
+                                      grid_spacing=self.agrid, grid_offset=0.5)
+        return np.reshape(mask_flat, self.shape).astype(bool)
+
+
+class LatticeModel:
+
+    def save_checkpoint(self, path, binary):
+        tmp_path = path + ".__tmp__"
+        self.call_method("save_checkpoint", path=tmp_path, mode=int(binary))
+        os.rename(tmp_path, path)
+
+    def load_checkpoint(self, path, binary):
+        return self.call_method("load_checkpoint", path=path, mode=int(binary))
+
+    def get_nodes_inside_shape(self, shape=None):
+        """
+        Provide a generator for iterating over all nodes inside the given shape.
+
+        Parameters
+        ----------
+        shape : :class:`espressomd.shapes.Shape`
+            Shape to use as filter.
+
+        """
+        for idx in self.lattice.get_node_indices_inside_shape(shape):
+            yield self[idx]
+
+    def get_shape_bitmask(self, shape=None):
+        """
+        Create a bitmask for the given shape.
+
+        Parameters
+        ----------
+        shape : :class:`espressomd.shapes.Shape`
+            Shape to rasterize.
+
+        """
+        return self.lattice.get_shape_bitmask(shape=shape)
+
+
+def get_slice_bounding_box(slices, grid_size):
+    shape = []
+    slice_lower_corner = []
+    slice_upper_corner = []
+    for i in range(3):
+        indices = np.arange(grid_size[i])
+        if isinstance(slices[i], slice):
+            if slices[i].step not in [None, 1]:
+                raise NotImplementedError(
+                    "Slices with step != 1 are not supported")
+            indices = indices[slices[i]]
+        else:
+            if isinstance(slices[i], (int, np.integer)):
+                indices = [indices[slices[i]]]
+            else:
+                raise NotImplementedError(
+                    "Tuple-based indexing is not supported")
+        if len(indices) == 0:
+            slice_lower_corner.append(0)
+            slice_upper_corner.append(0)
+            shape.append(0)
+        elif isinstance(slices[i], (int, np.integer)):
+            slice_lower_corner.append(indices[0])
+            slice_upper_corner.append(indices[0] + 1)
+        else:
+            slice_lower_corner.append(indices[0])
+            slice_upper_corner.append(indices[-1] + 1)
+            shape.append(len(indices))
+    return {"slice_lower_corner": slice_lower_corner,
+            "slice_upper_corner": slice_upper_corner,
+            "shape": shape}
+
+
+class VTKOutputBase(ScriptInterfaceHelper):
+
+    def __init__(self, *args, **kwargs):
+        if not espressomd.code_features.has_features("WALBERLA"):
+            raise NotImplementedError("Feature WALBERLA not compiled in")
+        if "sip" not in kwargs:
+            params = self.default_params()
+            params.update(kwargs)
+            if isinstance(params["observables"], str):
+                params["observables"] = [params["observables"]]
+            super().__init__(*args, **params)
+        else:
+            super().__init__(**kwargs)
+
+    def valid_observables(self):
+        return set(self.call_method("get_valid_observable_names"))
+
+    def valid_keys(self):
+        return {"delta_N", "execution_count", "observables", "identifier",
+                "base_folder", "prefix", "enabled"}
+
+    def default_params(self):
+        return {"delta_N": 0, "enabled": True, "execution_count": 0,
+                "base_folder": "vtk_out", "prefix": "simulation_step"}
diff --git a/src/python/espressomd/electrokinetics.pxd b/src/python/espressomd/electrokinetics.pxd
deleted file mode 100644
index ab268adab32..00000000000
--- a/src/python/espressomd/electrokinetics.pxd
+++ /dev/null
@@ -1,166 +0,0 @@
-# Copyright (C) 2010-2022 The ESPResSo project
-#
-# This file is part of ESPResSo.
-#
-# ESPResSo is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# ESPResSo is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-include "myconfig.pxi"
-from libcpp cimport bool
-
-IF ELECTROKINETICS and CUDA:
-    cdef extern from "grid_based_algorithms/electrokinetics.hpp":
-
-        DEF MAX_NUMBER_OF_SPECIES = 10
-
-        # EK data struct
-        IF EK_DEBUG:
-            ctypedef struct EKParameters:
-                float agrid
-                float time_step
-                float lb_density
-                unsigned int dim_x
-                unsigned int dim_y
-                unsigned int dim_z
-                unsigned int number_of_nodes
-                float viscosity
-                float bulk_viscosity
-                float gamma_odd
-                float gamma_even
-                float friction
-                float T
-                float prefactor
-                float lb_ext_force_density[3]
-                unsigned int number_of_species
-                int reaction_species[3]
-                float rho_reactant_reservoir
-                float rho_product0_reservoir
-                float rho_product1_reservoir
-                float reaction_ct_rate
-                float reaction_fraction_0
-                float reaction_fraction_1
-                float mass_reactant
-                float mass_product0
-                float mass_product1
-                int stencil
-                int number_of_boundary_nodes
-                float fluctuation_amplitude
-                bool fluctuations
-                bool advection
-                bool fluidcoupling_ideal_contribution
-                float * charge_potential
-                float * j
-                float * lb_force_density_previous
-                float * j_fluc
-                float * rho[MAX_NUMBER_OF_SPECIES]
-                int species_index[MAX_NUMBER_OF_SPECIES]
-                float density[MAX_NUMBER_OF_SPECIES]
-                float D[MAX_NUMBER_OF_SPECIES]
-                float d[MAX_NUMBER_OF_SPECIES]
-                float valency[MAX_NUMBER_OF_SPECIES]
-                float ext_force_density[3][MAX_NUMBER_OF_SPECIES]
-                char * node_is_catalyst
-                bool es_coupling
-                float * charge_potential_buffer
-                float * electric_field
-        ELSE:
-            ctypedef struct EKParameters:
-                float agrid
-                float time_step
-                float lb_density
-                unsigned int dim_x
-                unsigned int dim_y
-                unsigned int dim_z
-                unsigned int number_of_nodes
-                float viscosity
-                float bulk_viscosity
-                float gamma_odd
-                float gamma_even
-                float friction
-                float T
-                float prefactor
-                float lb_ext_force_density[3]
-                unsigned int number_of_species
-                int reaction_species[3]
-                float rho_reactant_reservoir
-                float rho_product0_reservoir
-                float rho_product1_reservoir
-                float reaction_ct_rate
-                float reaction_fraction_0
-                float reaction_fraction_1
-                float mass_reactant
-                float mass_product0
-                float mass_product1
-                int stencil
-                int number_of_boundary_nodes
-                float fluctuation_amplitude
-                bool fluctuations
-                bool advection
-                bool fluidcoupling_ideal_contribution
-                float * charge_potential
-                float * j
-                float * lb_force_density_previous
-                float * rho[MAX_NUMBER_OF_SPECIES]
-                int species_index[MAX_NUMBER_OF_SPECIES]
-                float density[MAX_NUMBER_OF_SPECIES]
-                float D[MAX_NUMBER_OF_SPECIES]
-                float d[MAX_NUMBER_OF_SPECIES]
-                float valency[MAX_NUMBER_OF_SPECIES]
-                float ext_force_density[3][MAX_NUMBER_OF_SPECIES]
-                char * node_is_catalyst
-                bool es_coupling
-                float * charge_potential_buffer
-                float * electric_field
-
-        cdef extern EKParameters ek_parameters
-
-        # EK functions
-        void ek_print_parameters()
-        void ek_print_lbpar()
-        unsigned int ek_calculate_boundary_mass()
-        int ek_print_vtk_density(int species, char * filename)
-        int ek_print_vtk_flux(int species, char * filename)
-        int ek_print_vtk_flux_fluc(int species, char * filename)
-        int ek_print_vtk_flux_link(int species, char * filename)
-        int ek_print_vtk_potential(char * filename)
-        int ek_print_vtk_lbforce_density(char * filename)
-        int ek_lb_print_vtk_density(char * filename)
-        int ek_lb_print_vtk_velocity(char * filename)
-        int ek_init()
-        void ek_set_agrid(float agrid) except +
-        void ek_set_lb_density(float lb_density) except +
-        void ek_set_viscosity(float viscosity) except +
-        void ek_set_friction(float friction) except +
-        void ek_set_lb_ext_force_density(float lb_ext_force_dens_x, float lb_ext_force_dens_y, float lb_ext_force_dens_z) except +
-        void ek_set_T(float T) except +
-        void ek_set_prefactor(float prefactor) except +
-        void ek_set_bulk_viscosity(float bulk_viscosity) except +
-        void ek_set_gamma_odd(float gamma_odd) except +
-        void ek_set_gamma_even(float gamma_even) except +
-        void ek_set_density(int species, float density)
-        void ek_set_D(int species, float D)
-        void ek_set_valency(int species, float valency)
-        void ek_set_ext_force_density(int species, float ext_force_density_x, float ext_force_density_y, float ext_force_density_z)
-        void ek_set_stencil(int stencil) except +
-        void ek_set_advection(bool advection) except +
-        void ek_set_fluctuations(bool fluctuations) except +
-        void ek_set_fluctuation_amplitude(float fluctuation_amplitude) except +
-        void ek_set_fluidcoupling(bool ideal_contribution) except +
-        void ek_set_electrostatics_coupling(bool electrostatics_coupling) except +
-        int ek_node_get_density(int species, int x, int y, int z, double * density)
-        int ek_node_get_flux(int species, int x, int y, int z, double * flux)
-        int ek_node_get_potential(int x, int y, int z, double * potential)
-        int ek_node_set_density(int species, int x, int y, int z, double density)
-        float ek_calculate_net_charge()
-        int ek_neutralize_system(int species)
-
-        int ek_print_vtk_particle_potential(char * filename)
diff --git a/src/python/espressomd/electrokinetics.py b/src/python/espressomd/electrokinetics.py
new file mode 100644
index 00000000000..48532405f8f
--- /dev/null
+++ b/src/python/espressomd/electrokinetics.py
@@ -0,0 +1,700 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import itertools
+import numpy as np
+
+from . import utils
+from .detail.walberla import VTKOutputBase, LatticeWalberla  # pylint: disable=unused-import
+from .script_interface import ScriptInterfaceHelper, script_interface_register, ScriptObjectList, array_variant
+import espressomd.detail.walberla
+import espressomd.shapes
+import espressomd.code_features
+
+
+@script_interface_register
+class EKFFT(ScriptInterfaceHelper):
+    """
+    A FFT-based Poisson solver.
+
+    """
+
+    _so_name = "walberla::EKFFT"
+    _so_creation_policy = "GLOBAL"
+
+    def __init__(self, *args, **kwargs):
+        if not espressomd.code_features.has_features("WALBERLA_FFT"):
+            raise NotImplementedError("Feature WALBERLA not compiled in")
+
+        super().__init__(*args, **kwargs)
+
+
+@script_interface_register
+class EKNone(ScriptInterfaceHelper):
+    """
+    The default Poisson solver.
+    Imposes a null electrostatic potential everywhere.
+
+    """
+    _so_name = "walberla::EKNone"
+    _so_creation_policy = "GLOBAL"
+
+    def __init__(self, *args, **kwargs):
+        if not espressomd.code_features.has_features("WALBERLA"):
+            raise NotImplementedError("Feature WALBERLA not compiled in")
+
+        super().__init__(*args, **kwargs)
+
+
+@script_interface_register
+class EKContainer(ScriptObjectList):
+    _so_name = "walberla::EKContainer"
+
+    def __init__(self, *args, **kwargs):
+        if not espressomd.code_features.has_features("WALBERLA"):
+            raise NotImplementedError("Feature WALBERLA not compiled in")
+
+        super().__init__(*args, **kwargs)
+
+    def add(self, ekspecies):
+        self.call_method("add", object=ekspecies)
+
+    def remove(self, ekspecies):
+        self.call_method("remove", object=ekspecies)
+
+    def clear(self):
+        self.call_method("clear")
+
+
+@script_interface_register
+class EKSpecies(ScriptInterfaceHelper,
+                espressomd.detail.walberla.LatticeModel):
+    """
+    The advection-diffusion-reaction method for chemical species using waLBerla.
+
+    Parameters
+    ----------
+    lattice : :obj:`espressomd.electrokinetics.LatticeWalberla <espressomd.detail.walberla.LatticeWalberla>`
+        Lattice object.
+    tau : :obj:`float`
+        EK time step, must be an integer multiple of the MD time step.
+    density : :obj:`float`
+        Species density.
+    diffusion : :obj:`float`
+        Species diffusion coefficient.
+    valency : :obj:`float`
+        Species valency.
+    advection : :obj:`bool`
+        Whether to enable advection.
+    friction_coupling : :obj:`bool`
+        Whether to enable friction coupling.
+    ext_efield : (3,) array_like of :obj:`float`, optional
+        External electrical field.
+    kT : :obj:`float`, optional
+        Thermal energy of the simulated heat bath (for thermalized species).
+        Set it to 0 for an unthermalized species.
+    single_precision : :obj:`bool`, optional
+        Use single-precision floating-point arithmetic.
+
+    Methods
+    -------
+    clear_density_boundaries()
+        Remove density boundary conditions.
+
+    clear_flux_boundaries()
+        Remove flux boundary conditions.
+
+    clear_boundaries()
+        Remove all boundary conditions.
+
+    save_checkpoint()
+        Write EK densities and boundary conditions to a file.
+
+        Parameters
+        ----------
+        path : :obj:`str`
+            Destination file path.
+        binary : :obj:`bool`
+            Whether to write in binary or ASCII mode.
+
+    load_checkpoint()
+        Load EK densities and boundary conditions from a file.
+
+        Parameters
+        ----------
+        path : :obj:`str`
+            File path to read from.
+        binary : :obj:`bool`
+            Whether to read in binary or ASCII mode.
+
+    add_vtk_writer()
+        Attach a VTK writer.
+
+        Parameters
+        ----------
+        vtk : :class:`espressomd.electrokinetics.VTKOutput`
+            VTK writer.
+
+    remove_vtk_writer()
+        Detach a VTK writer.
+
+        Parameters
+        ----------
+        vtk : :class:`espressomd.electrokinetics.VTKOutput`
+            VTK writer.
+
+    clear_vtk_writers()
+        Detach all VTK writers.
+
+    """
+
+    _so_name = "walberla::EKSpecies"
+    _so_creation_policy = "GLOBAL"
+    _so_bind_methods = (
+        "clear_density_boundaries",
+        "clear_flux_boundaries",
+        "clear_boundaries",
+        "add_vtk_writer",
+        "remove_vtk_writer",
+        "clear_vtk_writers",
+    )
+
+    def __init__(self, *args, **kwargs):
+        if not espressomd.code_features.has_features("WALBERLA"):
+            raise NotImplementedError("Feature WALBERLA not compiled in")
+
+        if "sip" not in kwargs:
+            params = self.default_params()
+            params.update(kwargs)
+            super().__init__(*args, **params)
+        else:
+            super().__init__(**kwargs)
+
+    def default_params(self):
+        return {"single_precision": False,
+                "kT": 0., "ext_efield": [0., 0., 0.]}
+
+    def __getitem__(self, key):
+        if isinstance(key, (tuple, list, np.ndarray)) and len(key) == 3:
+            if any(isinstance(item, slice) for item in key):
+                return EKSpeciesSlice(
+                    parent_sip=self, slice_range=key, node_grid=self.shape)
+            else:
+                return EKSpeciesNode(parent_sip=self, index=np.array(key))
+
+        raise TypeError(
+            f"{key} is not a valid index. Should be a point on the "
+            "nodegrid e.g. ek[0,0,0], or a slice, e.g. ek[:,0,0]")
+
+    def add_boundary_from_shape(self, shape, value, boundary_type):
+        """
+        Set boundary conditions from a shape.
+
+        Parameters
+        ----------
+        shape : :obj:`espressomd.shapes.Shape`
+            Shape to rasterize.
+        value : (O,) or (L, M, N, O) array_like of :obj:`float`, optional
+            Boundary numerical value. If a single value of shape ``(O,)``
+            is given, it will be broadcast to all nodes inside the shape,
+            otherwise ``L, M, N`` must be equal to the EK grid dimensions.
+        boundary_type : Union[:class:`~espressomd.electrokinetics.DensityBoundary`,
+                              :class:`~espressomd.electrokinetics.FluxBoundary`] (optional)
+            Type of the boundary condition.
+
+        """
+        if not issubclass(boundary_type, (FluxBoundary, DensityBoundary)):
+            raise TypeError(
+                "Parameter 'boundary_type' must be a subclass of FluxBoundary or DensityBoundary")
+
+        if not hasattr(value, "__iter__"):
+            value = (value, )
+
+        value = np.array(value, dtype=float)
+        utils.check_type_or_throw_except(
+            shape, 1, espressomd.shapes.Shape, "expected an espressomd.shapes.Shape")
+        if issubclass(boundary_type, FluxBoundary):
+            if np.shape(value) not in [(3,), tuple(self.shape) + (3,)]:
+                raise ValueError(
+                    f"Cannot process flux value grid of shape {np.shape(value)}")
+        if issubclass(boundary_type, DensityBoundary):
+            if np.shape(value) not in [(1,), tuple(self.shape) + (1,)]:
+                raise ValueError(
+                    f"Cannot process density value grid of shape {np.shape(value)}")
+
+        mask = self.get_shape_bitmask(shape=shape).astype(int)
+        if issubclass(boundary_type, FluxBoundary):
+            boundaries_update_method = "update_flux_boundary_from_shape"
+        else:
+            boundaries_update_method = "update_density_boundary_from_shape"
+        self.call_method(
+            boundaries_update_method,
+            raster=array_variant(mask.flatten()),
+            values=array_variant(value.flatten()))
+
+
+class FluxBoundary:
+    """
+    Hold flux information for the flux boundary
+    condition at a single node.
+
+    """
+
+    def __init__(self, flux):
+        utils.check_type_or_throw_except(
+            flux, 3, float, "FluxBoundary flux must be three floats")
+        self.flux = flux
+
+
+class DensityBoundary:
+    """
+    Hold density information for the density boundary
+    condition at a single node.
+
+    """
+
+    def __init__(self, density):
+        utils.check_type_or_throw_except(
+            density, 1, float, "DensityBoundary flux must be one float")
+        self.density = density
+
+
+@script_interface_register
+class EKSpeciesNode(ScriptInterfaceHelper):
+    _so_name = "walberla::EKSpeciesNode"
+    _so_creation_policy = "GLOBAL"
+
+    def required_keys(self):
+        return {"parent_sip", "index"}
+
+    def validate_params(self, params):
+        utils.check_required_keys(self.required_keys(), params.keys())
+        utils.check_type_or_throw_except(
+            params["index"], 3, int, "The index of an EK species node consists of three integers.")
+
+    def __init__(self, *args, **kwargs):
+        if "sip" not in kwargs:
+            self.validate_params(kwargs)
+            super().__init__(*args, **kwargs)
+            utils.handle_errors("EKSpeciesNode instantiation failed")
+        else:
+            super().__init__(**kwargs)
+
+    def __reduce__(self):
+        raise NotImplementedError("Cannot serialize EK species node objects")
+
+    def __eq__(self, obj):
+        return isinstance(obj, EKSpeciesNode) and self.index == obj.index
+
+    def __hash__(self):
+        return hash(self.index)
+
+    @property
+    def index(self):
+        return tuple(self._index)
+
+    @index.setter
+    def index(self, value):
+        raise RuntimeError("Parameter 'index' is read-only.")
+
+    @property
+    def density(self):
+        return self.call_method("get_density")
+
+    @density.setter
+    def density(self, value):
+        self.call_method("set_density", value=value)
+
+    @property
+    def is_boundary(self):
+        return self.call_method("get_is_boundary")
+
+    @is_boundary.setter
+    def is_boundary(self, value):
+        raise RuntimeError("Property 'is_boundary' is read-only.")
+
+    @property
+    def density_boundary(self):
+        """
+        Returns
+        -------
+        :class:`~espressomd.electrokinetics.DensityBoundary`
+            If the node is a boundary node
+        ``None``
+            If the node is not a boundary node
+        """
+        density = self.call_method("get_node_density_at_boundary")
+        if density is not None:
+            return DensityBoundary(density)
+        return None
+
+    @density_boundary.setter
+    def density_boundary(self, value):
+        """
+        Parameters
+        ----------
+        value : :class:`~espressomd.electrokinetics.DensityBoundary` or ``None``
+            If value is :class:`~espressomd.EkSpecies.DensityBoundary`,
+            set the node to be a boundary node with the specified density.
+            If value is ``None``, the node will become a domain node.
+
+        """
+
+        if isinstance(value, DensityBoundary):
+            value = value.density
+        elif value is not None:
+            raise TypeError(
+                "Parameter 'value' must be an instance of DensityBoundary or None")
+        self.call_method("set_node_density_at_boundary", value=value)
+
+    @property
+    def flux_boundary(self):
+        """
+        Returns
+        -------
+        :class:`~espressomd.electrokinetics.FluxBoundary`
+            If the node is a boundary node
+        ``None``
+            If the node is not a boundary node
+
+        """
+        flux = self.call_method("get_node_flux_at_boundary")
+        if flux is not None:
+            return FluxBoundary(flux)
+        return None
+
+    @flux_boundary.setter
+    def flux_boundary(self, value):
+        """
+        Parameters
+        ----------
+        value : :class:`~espressomd.electrokinetics.FluxBoundary` or ``None``
+            If value is :class:`~espressomd.EkSpecies.FluxBoundary`,
+            set the node to be a boundary node with the specified flux.
+            If value is ``None``, the node will become a domain node.
+
+        """
+
+        if isinstance(value, FluxBoundary):
+            value = value.flux
+        elif value is not None:
+            raise TypeError(
+                "Parameter 'value' must be an instance of FluxBoundary or None")
+        self.call_method("set_node_flux_at_boundary", value=value)
+
+
+@script_interface_register
+class EKSpeciesSlice(ScriptInterfaceHelper):
+    _so_name = "walberla::EKSpeciesSlice"
+    _so_creation_policy = "GLOBAL"
+
+    def required_keys(self):
+        return {"parent_sip", "slice_range"}
+
+    def validate_params(self, params):
+        utils.check_required_keys(self.required_keys(), params.keys())
+
+    def __init__(self, *args, **kwargs):
+        if "sip" in kwargs:
+            super().__init__(**kwargs)
+        else:
+            self.validate_params(kwargs)
+            slice_range = kwargs.pop("slice_range")
+            grid_size = kwargs["parent_sip"].shape
+            extra_kwargs = espressomd.detail.walberla.get_slice_bounding_box(
+                slice_range, grid_size)
+            node = EKSpeciesNode(index=np.array([0, 0, 0]), **kwargs)
+            super().__init__(*args, node_sip=node, **kwargs, **extra_kwargs)
+            utils.handle_errors("EKSpeciesSlice instantiation failed")
+
+    def __iter__(self):
+        lower, upper = self.call_method("get_slice_ranges")
+        indices = [list(range(lower[i], upper[i])) for i in range(3)]
+        lb_sip = self.call_method("get_ek_sip")
+        for index in itertools.product(*indices):
+            yield EKSpeciesNode(parent_sip=lb_sip, index=np.array(index))
+
+    def __reduce__(self):
+        raise NotImplementedError("Cannot serialize EK species slice objects")
+
+    def _getter(self, attr):
+        value_grid, shape = self.call_method(f"get_{attr}")
+        if attr == "flux_at_boundary":
+            value_grid = [
+                None if x is None else FluxBoundary(x) for x in value_grid]
+        elif attr == "density_at_boundary":
+            value_grid = [
+                None if x is None else DensityBoundary(x) for x in value_grid]
+        return utils.array_locked(np.reshape(value_grid, shape))
+
+    def _setter(self, attr, values):
+        dimensions = self.call_method("get_slice_size")
+        if 0 in dimensions:
+            raise AttributeError(
+                f"Cannot set properties of an empty '{self.__class__.__name__}' object")
+
+        values = np.copy(values)
+        value_shape = tuple(self.call_method("get_value_shape", name=attr))
+        target_shape = (*dimensions, *value_shape)
+
+        # broadcast if only one element was provided
+        if values.shape == value_shape or values.shape == () and value_shape == (1,):
+            values = np.full(target_shape, values)
+
+        def shape_squeeze(shape):
+            return tuple(x for x in shape if x != 1)
+
+        if shape_squeeze(values.shape) != shape_squeeze(target_shape):
+            raise ValueError(
+                f"Input-dimensions of '{attr}' array {values.shape} does not match slice dimensions {target_shape}")
+
+        self.call_method(f"set_{attr}", values=values.flatten())
+
+    @property
+    def density(self):
+        return self._getter("density",)
+
+    @density.setter
+    def density(self, value):
+        self._setter("density", value)
+
+    @property
+    def is_boundary(self):
+        return self._getter("is_boundary")
+
+    @is_boundary.setter
+    def is_boundary(self, value):
+        raise RuntimeError("Property 'is_boundary' is read-only.")
+
+    @property
+    def density_boundary(self):
+        """
+        Returns
+        -------
+        (N, M, L) array_like of :class:`~espressomd.electrokinetics.DensityBoundary`
+            If the nodes are boundary nodes
+        (N, M, L) array_like of ``None``
+            If the nodes are not boundary nodes
+
+        """
+
+        return self._getter("density_at_boundary")
+
+    @density_boundary.setter
+    def density_boundary(self, values):
+        """
+        Parameters
+        ----------
+        values : (N, M, L) array_like of :class:`~espressomd.electrokinetics.DensityBoundary` or obj:`None`
+            If values are :class:`~espressomd.electrokinetics.DensityBoundary`,
+            set the nodes to be boundary nodes with the specified density.
+            If values are obj:`None`, the nodes will become domain nodes.
+
+        """
+
+        type_error_msg = "Parameter 'values' must be an array_like of DensityBoundary or None"
+        values = np.copy(values)
+        if values.dtype != np.dtype("O"):
+            raise TypeError(type_error_msg)
+        for index in np.ndindex(*values.shape):
+            if values[index] is not None:
+                if not isinstance(values[index], DensityBoundary):
+                    raise TypeError(type_error_msg)
+                values[index] = np.array(values[index].density)
+        self._setter("density_at_boundary", values=values)
+
+    @property
+    def flux_boundary(self):
+        """
+        Returns
+        -------
+        (N, M, L) array_like of :class:`~espressomd.electrokinetics.FluxBoundary`
+            If the nodes are boundary nodes
+        (N, M, L) array_like of `None``
+            If the nodes are not boundary nodes
+
+        """
+
+        return self._getter("flux_at_boundary")
+
+    @flux_boundary.setter
+    def flux_boundary(self, values):
+        """
+        Parameters
+        ----------
+        values : (N, M, L) array_like of :class:`~espressomd.electrokinetics.FluxBoundary` or obj:`None`
+            If values are :class:`~espressomd.lb.FluxBoundary`,
+            set the nodes to be boundary nodes with the specified flux.
+            If values are obj:`None`, the nodes will become domain nodes.
+
+        """
+
+        type_error_msg = "Parameter 'values' must be an array_like of FluxBoundary or None"
+        values = np.copy(values)
+        if values.dtype != np.dtype("O"):
+            raise TypeError(type_error_msg)
+        for index in np.ndindex(*values.shape):
+            if values[index] is not None:
+                if not isinstance(values[index], FluxBoundary):
+                    raise TypeError(type_error_msg)
+                values[index] = np.array(values[index].flux)
+        self._setter("flux_at_boundary", values=values)
+
+
+@script_interface_register
+class VTKOutput(VTKOutputBase):
+    """
+    Create a VTK writer.
+
+    Files are written to ``<base_folder>/<identifier>/<prefix>_*.vtu``.
+    Summary is written to ``<base_folder>/<identifier>.pvd``.
+
+    Manual VTK callbacks can be called at any time to take a snapshot
+    of the current state of the EK species.
+
+    Automatic VTK callbacks can be disabled at any time and re-enabled later.
+    Please note that the internal VTK counter is no longer incremented when
+    an automatic callback is disabled, which means the number of EK steps
+    between two frames will not always be an integer multiple of ``delta_N``.
+
+    Parameters
+    ----------
+    identifier : :obj:`str`
+        Name of the VTK writer.
+    observables : :obj:`list`, {'density',}
+        List of observables to write to the VTK files.
+    delta_N : :obj:`int`
+        Write frequency. If this value is 0 (default), the object is a
+        manual VTK callback that must be triggered manually. Otherwise,
+        it is an automatic callback that is added to the time loop and
+        writes every ``delta_N`` EK steps.
+    base_folder : :obj:`str` (optional), default is 'vtk_out'
+        Path to the output VTK folder.
+    prefix : :obj:`str` (optional), default is 'simulation_step'
+        Prefix for VTK files.
+
+    """
+    _so_name = "walberla::EKVTKHandle"
+    _so_creation_policy = "GLOBAL"
+    _so_bind_methods = ("enable", "disable", "write")
+
+    def required_keys(self):
+        return self.valid_keys() - self.default_params().keys()
+
+    def __repr__(self):
+        class_id = f"{self.__class__.__module__}.{self.__class__.__name__}"
+        if self.delta_N:
+            write_when = f"every {self.delta_N} EK steps"
+            if not self.enabled:
+                write_when += " (disabled)"
+        else:
+            write_when = "on demand"
+        return f"<{class_id}: write to '{self.vtk_uid}' {write_when}>"
+
+
+@script_interface_register
+class EKReactant(ScriptInterfaceHelper):
+    _so_name = "walberla::EKReactant"
+    _so_creation_policy = "GLOBAL"
+
+
+class EKBulkReaction(ScriptInterfaceHelper):
+    _so_name = "walberla::EKBulkReaction"
+    _so_creation_policy = "GLOBAL"
+
+
+class EKIndexedReaction(ScriptInterfaceHelper):
+    _so_name = "walberla::EKIndexedReaction"
+    _so_creation_policy = "GLOBAL"
+
+    def add_node_to_index(self, node):
+        self.call_method("set_node_is_boundary", node=node, is_boundary=True)
+
+    def remove_node_from_index(self, node):
+        self.call_method("set_node_is_boundary", node=node, is_boundary=False)
+
+    def __getitem__(self, key):
+        if isinstance(key, (tuple, list, np.ndarray)) and len(key) == 3:
+            if any(isinstance(typ, slice) for typ in key):
+                shape = self.shape
+
+                indices = [np.atleast_1d(np.arange(shape[i])[key[i]])
+                           for i in range(3)]
+                dimensions = [ind.size for ind in indices]
+
+                value_grid = np.zeros((*dimensions,), dtype=bool)
+                indices = itertools.product(*map(enumerate, indices))
+                for (i, x), (j, y), (k, z) in indices:
+                    value_grid[i, j, k] = self.call_method(
+                        "get_node_is_boundary", node=(x, y, z))
+
+                return utils.array_locked(value_grid)
+            else:
+                return self.call_method("get_node_is_boundary", node=key)
+        raise TypeError(
+            f"{key} is not a valid index. Should be a point on the nodegrid or a slice")
+
+    def __setitem__(self, key, values):
+        if isinstance(key, (tuple, list, np.ndarray)) and len(key) == 3:
+            if any(isinstance(typ, slice) for typ in key):
+                shape = self.shape
+
+                indices = [np.atleast_1d(np.arange(shape[i])[key[i]])
+                           for i in range(3)]
+                dimensions = tuple(ind.size for ind in indices)
+
+                values = np.copy(values)
+
+                # broadcast if only one element was provided
+                if values.shape == ():
+                    values = np.full(dimensions, values)
+                if values.shape != dimensions:
+                    raise ValueError(
+                        f"Input-dimensions of array {values.shape} does not match slice dimensions {dimensions}.")
+
+                indices = itertools.product(*map(enumerate, indices))
+                for (i, x), (j, y), (k, z) in indices:
+                    self.call_method("set_node_is_boundary", node=(
+                        x, y, z), is_boundary=bool(values[i, j, k]))
+            else:
+                return self.call_method(
+                    "set_node_is_boundary", node=key, is_boundary=values)
+        else:
+            raise TypeError(
+                f"{key} is not a valid index. Should be a point on the nodegrid or a slice")
+
+
+@script_interface_register
+class EKReactions(ScriptObjectList):
+    _so_name = "walberla::EKReactions"
+    _so_creation_policy = "GLOBAL"
+
+    def add(self, reaction):
+        if not isinstance(reaction, (EKBulkReaction, EKIndexedReaction)):
+            raise TypeError("reaction object is not of correct type.")
+
+        self.call_method("add", object=reaction)
+
+        return reaction
+
+    def remove(self, reaction):
+        self.call_method("remove", object=reaction)
+
+    def clear(self):
+        self.call_method("clear")
diff --git a/src/python/espressomd/electrokinetics.pyx b/src/python/espressomd/electrokinetics.pyx
deleted file mode 100644
index 0f5dea2c841..00000000000
--- a/src/python/espressomd/electrokinetics.pyx
+++ /dev/null
@@ -1,511 +0,0 @@
-# Copyright (C) 2010-2022 The ESPResSo project
-#
-# This file is part of ESPResSo.
-#
-# ESPResSo is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# ESPResSo is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-include "myconfig.pxi"
-IF CUDA:
-    from .lb cimport HydrodynamicInteraction
-    from .lb cimport LBFluidRoutines
-    from .lb cimport lb_lbfluid_print_vtk_boundary
-    from .lb cimport lb_lbnode_is_index_valid
-    from .lb cimport lb_lbfluid_set_lattice_switch
-    from .lb cimport GPU
-from . import utils
-from .utils cimport Vector3i
-import numpy as np
-
-IF ELECTROKINETICS:
-    cdef class Electrokinetics(HydrodynamicInteraction):
-        """
-        Creates the electrokinetic method using the GPU unit.
-
-        """
-
-        def __getitem__(self, key):
-            if isinstance(key, (tuple, list, np.ndarray)) and len(key) == 3:
-                return ElectrokineticsRoutines(np.array(key))
-            raise ValueError(
-                f"{key} is not a valid key. Should be a point on the nodegrid e.g. ek[0,0,0].")
-
-        def validate_params(self):
-            """
-            Checks if the parameters for "stencil" and "fluid_coupling" are valid.
-
-            """
-            default_params = self.default_params()
-
-            if self._params["stencil"] not in ["linkcentered", "nodecentered"]:
-                raise ValueError(
-                    "stencil has to be 'linkcentered' or 'nodecentered'.")
-
-            if self._params["fluid_coupling"] not in ["friction", "estatics"]:
-                raise ValueError(
-                    "fluid_coupling has to be 'friction' or 'estatics'.")
-
-        def valid_keys(self):
-            """
-            Returns the valid options used for the electrokinetic method.
-            """
-
-            return ["agrid", "lb_density", "viscosity", "friction",
-                    "bulk_viscosity", "gamma_even", "gamma_odd", "T", "ext_force_density",
-                    "prefactor", "stencil", "advection", "fluid_coupling",
-                    "fluctuations", "fluctuation_amplitude", "es_coupling",
-                    "species"]
-
-        def required_keys(self):
-            """
-            Returns the necessary options to initialize the electrokinetic method.
-
-            """
-            return ["agrid", "lb_density", "viscosity",
-                    "friction", "T", "prefactor"]
-
-        def default_params(self):
-            """
-            Returns the default parameters.
-
-            """
-            return {"agrid": -1,
-                    "lb_density": -1,
-                    "viscosity": -1,
-                    "bulk_viscosity": -1,
-                    "gamma_odd": 0.0,
-                    "gamma_even": 0.0,
-                    "ext_force_density": [0., 0., 0.],
-                    "friction": 0.0,
-                    "T": -1,
-                    "prefactor": -1,
-                    "stencil": "linkcentered",
-                    "advection": True,
-                    "fluid_coupling": "friction",
-                    "fluctuations": False,
-                    "fluctuation_amplitude": 0.0,
-                    "es_coupling": False,
-                    "species": []}
-
-        def _get_params_from_es_core(self):
-            if ek_parameters.stencil == 0:
-                stencil = "linkcentered"
-            elif ek_parameters.stencil == 1:
-                stencil = "nodecentered"
-            else:
-                raise RuntimeError("Value of stencil could not be identified.")
-
-            if ek_parameters.fluidcoupling_ideal_contribution:
-                fluid_coupling = "friction"
-            else:
-                fluid_coupling = "estatics"
-
-            return {"agrid": ek_parameters.agrid,
-                    "lb_density": ek_parameters.lb_density,
-                    "viscosity": ek_parameters.viscosity,
-                    "bulk_viscosity": ek_parameters.bulk_viscosity,
-                    "gamma_odd": ek_parameters.gamma_odd,
-                    "gamma_even": ek_parameters.gamma_even,
-                    "ext_force_density": ek_parameters.lb_ext_force_density,
-                    "friction": ek_parameters.friction,
-                    "T": ek_parameters.T,
-                    "prefactor": ek_parameters.prefactor,
-                    "stencil": stencil,
-                    "advection": ek_parameters.advection,
-                    "fluid_coupling": fluid_coupling,
-                    "fluctuations": ek_parameters.fluctuations,
-                    "fluctuation_amplitude":
-                        ek_parameters.fluctuation_amplitude,
-                    "es_coupling": ek_parameters.es_coupling}
-
-        def _set_params_in_es_core(self):
-            if self._params["stencil"] == "linkcentered":
-                ek_set_stencil(0)
-            elif self._params["stencil"] == "nodecentered":
-                ek_set_stencil(1)
-
-            if self._params["fluid_coupling"] == "friction":
-                ek_set_fluidcoupling(True)
-            elif self._params["fluid_coupling"] == "estatics":
-                ek_set_fluidcoupling(False)
-
-            ek_set_agrid(self._params["agrid"])
-            ek_set_lb_density(self._params["lb_density"])
-            ek_set_viscosity(self._params["viscosity"])
-            ek_set_friction(self._params["friction"])
-            ek_set_lb_ext_force_density(self._params["ext_force_density"][0],
-                                        self._params["ext_force_density"][1],
-                                        self._params["ext_force_density"][2])
-            ek_set_T(self._params["T"])
-            ek_set_prefactor(self._params["prefactor"])
-            ek_set_bulk_viscosity(self._params["bulk_viscosity"])
-            ek_set_gamma_odd(self._params["gamma_odd"])
-            ek_set_gamma_even(self._params["gamma_even"])
-            ek_set_advection(self._params["advection"])
-            ek_set_fluctuations(self._params["fluctuations"])
-            ek_set_fluctuation_amplitude(self._params["fluctuation_amplitude"])
-            ek_set_electrostatics_coupling(self._params["es_coupling"])
-
-        def set_density(self, species=None, density=None, node=None):
-            """
-            Sets the density of a species at a specific node.
-            If no node is given the density will be set global for the species.
-
-            Parameters
-            ----------
-            species : :obj:`int`
-                species for which the density will apply.
-            density : :obj:`float`
-                The value to which the density will be set to.
-            node : numpy-array of type :obj:`int` of length (3)
-                If set the density will be only applied on this specific node.
-
-            """
-
-            if species is None or density is None:
-                raise ValueError("species and density have to be set.")
-            utils.check_type_or_throw_except(
-                species, 1, float, "species needs to be an integer.")
-            if node is None:
-                ek_set_density(species, density)
-            else:
-                utils.check_type_or_throw_except(
-                    species, node, int, "node has to be an array of 3 integers")
-                ek_node_set_density(
-                    species, node[0], node[1], node[2], density)
-
-        def _activate_method(self):
-            self._set_params_in_es_core()
-            for species in self._params["species"]:
-                species._activate_method()
-            lb_lbfluid_set_lattice_switch(GPU)
-            self.ek_init()
-
-        def neutralize_system(self, species):
-            """
-            Sets the global density of a species to a specific value
-            for which the whole system will have no net charge.
-
-            .. note :: The previous density of the species will be ignored and
-                       it will be homogeneous distributed over the whole system
-                       The species must be charged to begin with. If the
-                       neutralization would lead to a negative species density
-                       an exception will be raised.
-
-            Parameters
-            ----------
-            species : :obj:`int`
-                The species which will be changed to neutralize the system.
-
-            """
-            err = ek_neutralize_system(species.id)
-
-            if err == 1:
-                raise RuntimeError(
-                    'Species used for neutralization must be added to electrokinetics')
-            elif err == 2:
-                raise RuntimeError(
-                    'Species used for neutralization must be charged')
-            elif err == 3:
-                raise RuntimeError(
-                    'Neutralization with specified species would result in negative density')
-            elif err != 0:
-                raise RuntimeError('Unknown error')
-
-            self.ek_init()
-
-        def ek_init(self):
-            """
-            Initializes the electrokinetic system.
-            This automatically initializes the lattice-Boltzmann method on the GPU.
-
-            """
-            err = ek_init()
-            if err:
-                raise RuntimeError('EK init failed')
-
-        def add_species(self, species):
-            """
-            Initializes a new species for the electrokinetic method.
-
-            Parameters
-            ----------
-            species : :obj:`int`
-                Species to be initialized.
-
-            """
-            self._params["species"].append(species)
-
-        def get_params(self):
-            """
-            Prints out the parameters of the electrokinetic system.
-
-            """
-            self._params.update(self._get_params_from_es_core())
-            return self._params
-
-        def write_vtk_boundary(self, path):
-            """
-            Writes the boundary information into a vtk-file.
-
-            Parameters
-            ----------
-            path : :obj:`str`
-                Path of the .vtk file the boundary is written to.
-
-            """
-            lb_lbfluid_print_vtk_boundary(utils.to_char_pointer(path))
-
-        def write_vtk_velocity(self, path):
-            """
-            Writes the lattice-Boltzmann velocity information into a vtk-file.
-
-            Parameters
-            ----------
-            path : :obj:`str`
-                Path of the .vtk file the velocity is written to.
-
-            """
-            ek_lb_print_vtk_velocity(utils.to_char_pointer(path))
-
-        def write_vtk_density(self, path):
-            """
-            Writes the LB density information into a vtk-file.
-
-            Parameters
-            ----------
-            path : :obj:`str`
-                Path of the .vtk file the LB density is written to.
-
-            """
-            ek_lb_print_vtk_density(utils.to_char_pointer(path))
-
-        def write_vtk_potential(self, path):
-            """
-            Writes the electrostatic potential into a vtk-file.
-
-            Parameters
-            ----------
-            path : :obj:`str`
-                Path of the .vtk file the electrostatic potential is written to.
-
-            """
-            ek_print_vtk_potential(utils.to_char_pointer(path))
-
-        def write_vtk_lbforce(self, path):
-            """
-            Writes the LB force information into a vtk-file.
-
-            Parameters
-            ----------
-            path : :obj:`str`
-                Path of the .vtk file the LB force is written to.
-
-            """
-            ek_print_vtk_lbforce_density(utils.to_char_pointer(path))
-
-        def write_vtk_particle_potential(self, path):
-            """
-            Writes the electrostatic particle potential into a vtk-file.
-
-            .. note :: This only works if 'es_coupling' is active.
-
-            Parameters
-            ----------
-            path : :obj:`str`
-                Path of the .vtk file the electrostatic potential is written to.
-
-            """
-
-            if self._params["es_coupling"]:
-                ek_print_vtk_particle_potential(utils.to_char_pointer(path))
-            else:
-                raise RuntimeError("'es_coupling' is not active.")
-
-        def save_checkpoint(self, path):
-            raise RuntimeError("EK does not support checkpointing")
-
-        def load_checkpoint(self, path):
-            raise RuntimeError("EK does not support checkpointing")
-
-        def add_reaction(self, shape):
-            raise NotImplementedError("This method is not implemented yet.")
-
-        def add_boundary(self, shape):
-            raise NotImplementedError("This method is not implemented yet.")
-
-    cdef class ElectrokineticsRoutines(LBFluidRoutines):
-
-        property potential:
-            def __get__(self):
-                cdef double potential
-                ek_node_get_potential(self.node[0], self.node[1], self.node[2], & potential)
-                return potential
-
-            def __set__(self, value):
-                raise Exception("Potential can not be set.")
-
-    class Species:
-
-        """
-        Creates a species object that is passed to the ek instance.
-
-        """
-
-        py_number_of_species = 0
-        id = -1
-        _params = {}
-
-        # __getstate__ and __setstate__ define the pickle interaction
-        def __getstate__(self):
-            raise RuntimeError("EK does not support checkpointing")
-
-        def __setstate__(self, params):
-            raise RuntimeError("EK does not support checkpointing")
-
-        def __str__(self):
-            return f"{self.__class__.__name__}({self.get_params()})"
-
-        def __getitem__(self, key):
-            if isinstance(key, (tuple, list, np.ndarray)) and len(key) == 3:
-                return SpecieRoutines(np.array(key), self.id)
-            raise ValueError(
-                f"{key} is not a valid key. Should be a point on the nodegrid e.g. species[0,0,0].")
-
-        def __init__(self, **kwargs):
-            Species.py_number_of_species += 1
-            self.id = Species.py_number_of_species
-            utils.check_required_keys(self.required_keys(), kwargs.keys())
-            utils.check_valid_keys(self.valid_keys(), kwargs.keys())
-            self._params = self.default_params()
-            self._params.update(kwargs)
-
-        def valid_keys(self):
-            """
-            Returns the valid keys for the species.
-
-            """
-            return {"density", "D", "valency", "ext_force_density"}
-
-        def required_keys(self):
-            """
-            Returns the required keys for the species.
-
-            """
-            return {"density", "D", "valency"}
-
-        def default_params(self):
-            """
-            Returns the default parameters for the species.
-
-            """
-            return {"ext_force_density": [0, 0, 0]}
-
-        def _get_params_from_es_core(self):
-            return {
-                "density": ek_parameters.density[
-                    ek_parameters.species_index[self.id]],
-                "D": ek_parameters.D[ek_parameters.species_index[self.id]],
-                "valency": ek_parameters.valency[
-                    ek_parameters.species_index[self.id]],
-                "ext_force_density":
-                    [ek_parameters.ext_force_density[0][ek_parameters.species_index[self.id]],
-                     ek_parameters.ext_force_density[1][ek_parameters.species_index[self.id]],
-                     ek_parameters.ext_force_density[2][ek_parameters.species_index[self.id]]]}
-
-        def _set_params_in_es_core(self):
-            ek_set_D(self.id, self._params["D"])
-            ek_set_valency(self.id, self._params["valency"])
-            ek_set_density(self.id, self._params["density"])
-            ek_set_ext_force_density(self.id,
-                                     self._params["ext_force_density"][0],
-                                     self._params["ext_force_density"][1],
-                                     self._params["ext_force_density"][2])
-
-        def _activate_method(self):
-            self._set_params_in_es_core()
-
-        def get_params(self):
-            """
-            Returns the parameters of the species.
-
-            """
-            self._params.update(self._get_params_from_es_core())
-            return self._params
-
-        def write_vtk_density(self, path):
-            """
-            Writes the species density into a vtk-file.
-
-            Parameters
-            ----------
-            path : :obj:`str`
-                Path of the .vtk file the species density is written to.
-
-            """
-            ek_print_vtk_density(self.id, utils.to_char_pointer(path))
-
-        def write_vtk_flux(self, path):
-            """
-            Writes the species flux into a vtk-file.
-
-            Parameters
-            ----------
-            path : :obj:`str`
-                Path of the .vtk file the species flux is written to.
-
-            """
-            ek_print_vtk_flux(self.id, utils.to_char_pointer(path))
-
-        def write_vtk_flux_fluc(self, path):
-            ek_print_vtk_flux_fluc(self.id, utils.to_char_pointer(path))
-
-        def write_vtk_flux_link(self, path):
-            ek_print_vtk_flux_link(self.id, utils.to_char_pointer(path))
-
-    cdef class SpecieRoutines:
-        cdef Vector3i node
-        cdef int id
-
-        def __init__(self, key, id):
-            self.node[0] = key[0]
-            self.node[1] = key[1]
-            self.node[2] = key[2]
-            self.id = id
-            if not lb_lbnode_is_index_valid(self.node):
-                raise IndexError("LB node index out of bounds")
-
-        property density:
-            def __set__(self, value):
-                utils.check_type_or_throw_except(
-                    value, 1, float, "Property 'density' has to be a float")
-                if ek_node_set_density(
-                        self.id, self.node[0], self.node[1], self.node[2], value) != 0:
-                    raise RuntimeError("Species has not been added to EK.")
-
-            def __get__(self):
-                cdef double density
-                if ek_node_get_density(self.id, self.node[0], self.node[1], self.node[2], & density) != 0:
-                    raise RuntimeError("Species has not been added to EK.")
-                return density
-
-        property flux:
-            def __set__(self, value):
-                raise ValueError("Node flux is not settable.")
-
-            def __get__(self):
-                cdef double flux[3]
-                if ek_node_get_flux(
-                        self.id, self.node[0], self.node[1], self.node[2], flux) != 0:
-                    raise RuntimeError("Species has not been added to EK.")
-
-                return np.array([flux[0], flux[1], flux[2]])
diff --git a/src/python/espressomd/io/CMakeLists.txt b/src/python/espressomd/io/CMakeLists.txt
index 4023895074b..f6d2f170963 100644
--- a/src/python/espressomd/io/CMakeLists.txt
+++ b/src/python/espressomd/io/CMakeLists.txt
@@ -18,6 +18,7 @@
 #
 
 configure_file(mpiio.py mpiio.py COPYONLY)
+configure_file(vtk.py vtk.py COPYONLY)
 add_subdirectory(writer)
 set(cython_AUX ${cython_AUX}
                "${CMAKE_SOURCE_DIR}/src/python/espressomd/io/__init__.py"
diff --git a/src/python/espressomd/io/vtk.py b/src/python/espressomd/io/vtk.py
new file mode 100644
index 00000000000..2572aaa9430
--- /dev/null
+++ b/src/python/espressomd/io/vtk.py
@@ -0,0 +1,135 @@
+#
+# Copyright (C) 2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import numpy as np
+import vtk
+import vtk.util.numpy_support
+
+
+class VTKReader:
+    """
+    Reader for VTK multi-piece uniform grids written in XML format.
+    """
+    error_tolerance = 1e-5  # VTK data is written with 1e-7 precision
+
+    @classmethod
+    def get_array_names(cls, reader):
+        array_names = set()
+        n_ghost_layers = reader.GetUpdateGhostLevel()
+        n_pieces = reader.GetNumberOfPieces()
+        for piece_index in range(n_pieces):
+            reader.UpdatePiece(piece_index, n_pieces, n_ghost_layers)
+            piece = reader.GetOutput()
+            cell = piece.GetCellData()
+            for i in range(cell.GetNumberOfArrays()):
+                array_names.add(cell.GetArrayName(i))
+        return array_names
+
+    @classmethod
+    def get_piece_topology(
+            cls, piece, array, bounding_box_lower, bounding_box_upper):
+        bounds = np.array(piece.GetBounds())
+        box_l = bounds[1::2] - bounds[0:-1:2]
+        n_grid_points = array.GetNumberOfTuples()
+        shape_float = box_l / np.min(box_l)
+        shape_float *= np.cbrt(n_grid_points / np.prod(shape_float))
+        shape_int = np.around(shape_float).astype(int)
+        assert np.linalg.norm(shape_int - shape_float) < cls.error_tolerance and np.prod(
+            shape_int) == n_grid_points, "only cubic grids are supported"
+        agrid = np.mean(box_l / shape_float)
+        shape = tuple(shape_int.tolist())
+        lower_corner = []
+        for i in range(3):
+            start = int(np.around(bounds[i * 2]))
+            stop = start + shape[i]
+            bounding_box_lower[i] = min(bounding_box_lower[i], start)
+            bounding_box_upper[i] = max(bounding_box_upper[i], stop)
+            lower_corner.append(start)
+        return agrid, shape, lower_corner
+
+    @classmethod
+    def reconstruct_array(cls, reader, array_name):
+        n_pieces = reader.GetNumberOfPieces()
+        n_ghost_layers = reader.GetUpdateGhostLevel()
+        # get bounding box
+        info = []
+        agrids = []
+        bounding_box_lower = 3 * [float("inf")]
+        bounding_box_upper = 3 * [-float("inf")]
+        for piece_index in range(n_pieces):
+            reader.UpdatePiece(piece_index, n_pieces, n_ghost_layers)
+            piece = reader.GetOutput()
+            cell = piece.GetCellData()
+            array = cell.GetArray(array_name)
+            if array is not None:
+                agrid, shape, lower_corner = cls.get_piece_topology(
+                    piece, array, bounding_box_lower, bounding_box_upper)
+                agrids.append(agrid)
+                info.append([piece_index, shape, lower_corner])
+
+        if not info:
+            return None
+
+        # get array type and size
+        assert float("inf") not in bounding_box_lower
+        assert -float("inf") not in bounding_box_upper
+        if np.std(agrids) / np.mean(agrids) > cls.error_tolerance:
+            raise NotImplementedError(
+                f"VTK non-uniform grids are not supported (got agrid = {agrids} when parsing array '{array_name}')")
+        data_dims = np.array(bounding_box_upper) - np.array(bounding_box_lower)
+        piece_index = info[0][0]
+        reader.UpdatePiece(piece_index, n_pieces, n_ghost_layers)
+        array = reader.GetOutput().GetCellData().GetArray(array_name)
+        vector_length = array.GetNumberOfComponents()
+        val_dims = [] if vector_length == 1 else [vector_length]
+        data_type = array.GetDataTypeAsString()
+        if data_type == "float":
+            dtype = float
+        elif data_type == "int":
+            dtype = int
+        else:
+            raise NotImplementedError(
+                f"Unknown VTK data type '{data_type}' (when parsing array '{array_name}')")
+
+        # get data
+        data = np.empty(data_dims.tolist() + val_dims, dtype=dtype)
+        for piece_index, shape, lower_corner in info:
+            reader.UpdatePiece(piece_index, n_pieces, n_ghost_layers)
+            array = reader.GetOutput().GetCellData().GetArray(array_name)
+            subset = []
+            for i in range(3):
+                start = lower_corner[i] - bounding_box_lower[i]
+                stop = start + shape[i]
+                subset.append(slice(start, stop))
+            data[tuple(subset)] = vtk.util.numpy_support.vtk_to_numpy(
+                array).reshape(list(shape) + val_dims, order='F')
+
+        return data
+
+    def parse(self, filepath):
+        reader = vtk.vtkXMLUnstructuredGridReader()
+        reader.SetFileName(str(filepath))
+        reader.Update()
+
+        arrays = {}
+        array_names = self.get_array_names(reader)
+        for array_name in sorted(array_names):
+            arrays[array_name] = self.reconstruct_array(reader, array_name)
+
+        return arrays
diff --git a/src/python/espressomd/lb.pxd b/src/python/espressomd/lb.pxd
deleted file mode 100644
index 3ac79be7c11..00000000000
--- a/src/python/espressomd/lb.pxd
+++ /dev/null
@@ -1,217 +0,0 @@
-#
-# Copyright (C) 2013-2022 The ESPResSo project
-#
-# This file is part of ESPResSo.
-#
-# ESPResSo is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# ESPResSo is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-include "myconfig.pxi"
-
-from libcpp cimport bool
-from libcpp.vector cimport vector
-from libcpp.string cimport string
-from libc cimport stdint
-
-from .utils cimport Vector3d
-from .utils cimport Vector3i
-from .utils cimport Vector6d
-from .utils cimport Vector19d
-from .utils cimport make_array_locked
-
-cdef class FluidActor:
-    cdef public _isactive
-    cdef public _params
-    cdef public system
-
-cdef class HydrodynamicInteraction(FluidActor):
-    pass
-
-cdef class LBFluidRoutines:
-    cdef Vector3i node
-
-##############################################
-#
-# extern functions and structs
-#
-##############################################
-
-cdef extern from "grid_based_algorithms/lb_interface.hpp" namespace "ActiveLB":
-    cdef ActiveLB NONE
-    cdef ActiveLB CPU
-    cdef ActiveLB GPU
-
-cdef extern from "grid_based_algorithms/lb_interface.hpp":
-
-    cdef enum ActiveLB:
-        pass
-    void lb_lbfluid_set_tau(double c_tau) except +
-    double lb_lbfluid_get_tau() except +
-    void lb_lbfluid_set_density(double c_dens) except +
-    double lb_lbfluid_get_density() except +
-    void lb_lbfluid_set_viscosity(double c_visc) except +
-    double lb_lbfluid_get_viscosity() except +
-    void lb_lbfluid_set_agrid(double c_agrid) except +
-    double lb_lbfluid_get_agrid() except +
-    void lb_lbfluid_set_gamma_odd(double c_gamma_odd) except +
-    double lb_lbfluid_get_gamma_odd() except +
-    void lb_lbfluid_set_gamma_even(double c_gamma_even) except +
-    double lb_lbfluid_get_gamma_even() except +
-    void lb_lbfluid_set_ext_force_density(const Vector3d forcedensity) except +
-    const Vector3d lb_lbfluid_get_ext_force_density() except +
-    void lb_lbfluid_set_bulk_viscosity(double c_bulk_visc) except +
-    double lb_lbfluid_get_bulk_viscosity() except +
-    void lb_lbfluid_print_vtk_velocity(string filename) except +
-    void lb_lbfluid_print_vtk_velocity(string filename, vector[int] bb1, vector[int] bb2) except +
-    void lb_lbfluid_print_vtk_boundary(string filename) except +
-    void lb_lbfluid_print_velocity(string filename) except +
-    void lb_lbfluid_print_boundary(string filename) except +
-    void lb_lbfluid_save_checkpoint(string filename, bool binary) except +
-    void lb_lbfluid_load_checkpoint(string filename, bool binary) except +
-    void lb_lbfluid_set_lattice_switch(ActiveLB local_lattice_switch) except +
-    Vector6d lb_lbfluid_get_pressure_tensor() except +
-    bool lb_lbnode_is_index_valid(const Vector3i & ind) except +
-    Vector3i lb_lbfluid_get_shape() except +
-    const Vector3d lb_lbnode_get_velocity(const Vector3i & ind) except +
-    void lb_lbnode_set_velocity(const Vector3i & ind, const Vector3d & u) except +
-    double lb_lbnode_get_density(const Vector3i & ind) except +
-    void lb_lbnode_set_density(const Vector3i & ind, double density) except +
-    const Vector6d lb_lbnode_get_pressure_tensor(const Vector3i & ind) except +
-    const Vector6d lb_lbnode_get_pressure_tensor_neq(const Vector3i & ind) except +
-    const Vector19d lb_lbnode_get_pop(const Vector3i & ind) except +
-    void lb_lbnode_set_pop(const Vector3i & ind, const Vector19d & populations) except +
-    int lb_lbnode_get_boundary(const Vector3i & ind) except +
-    stdint.uint64_t lb_lbfluid_get_rng_state() except +
-    void lb_lbfluid_set_rng_state(stdint.uint64_t) except +
-    void lb_lbfluid_set_kT(double) except +
-    double lb_lbfluid_get_kT() except +
-    double lb_lbfluid_get_lattice_speed() except +
-    void check_tau_time_step_consistency(double tau, double time_s) except +
-    const Vector3d lb_lbfluid_get_interpolated_velocity(const Vector3d & p) except +
-
-cdef extern from "grid_based_algorithms/lb_particle_coupling.hpp":
-    void lb_lbcoupling_set_rng_state(stdint.uint64_t)
-    stdint.uint64_t lb_lbcoupling_get_rng_state() except +
-    void lb_lbcoupling_set_gamma(double)
-    double lb_lbcoupling_get_gamma() except +
-    bool lb_lbcoupling_is_seed_required()
-
-cdef extern from "grid_based_algorithms/lbgpu.hpp":
-    void linear_velocity_interpolation(double * positions, double * velocities, int length)
-    void quadratic_velocity_interpolation(double * positions, double * velocities, int length)
-
-cdef extern from "grid_based_algorithms/lb_interpolation.hpp":
-    cdef cppclass InterpolationOrder:
-        pass
-    void lb_lbinterpolation_set_interpolation_order(InterpolationOrder & order)
-
-cdef extern from "grid_based_algorithms/lb_interpolation.hpp" namespace "InterpolationOrder":
-    cdef InterpolationOrder linear
-    cdef InterpolationOrder quadratic
-
-cdef extern from "integrate.hpp":
-    double get_time_step()
-
-##############################################
-#
-# Wrapper-functions to handle unit conversions
-#
-##############################################
-
-cdef inline python_lbfluid_set_density(double dens, double agrid) except +:
-    lb_lbfluid_set_density(dens * agrid**3)
-
-cdef inline python_lbfluid_set_viscosity(double visc, double agrid, double tau) except +:
-    lb_lbfluid_set_viscosity(visc * tau / agrid**2)
-
-cdef inline python_lbfluid_set_agrid(double agrid) except +:
-    lb_lbfluid_set_agrid(agrid)
-
-cdef inline python_lbfluid_set_bulk_viscosity(double bvisc, double agrid, double tau) except +:
-    lb_lbfluid_set_bulk_viscosity(bvisc * tau / agrid**2)
-
-cdef inline python_lbfluid_set_gamma(double gamma) except +:
-    lb_lbcoupling_set_gamma(gamma)
-
-cdef inline python_lbfluid_set_gamma_odd(double gamma_odd) except +:
-    lb_lbfluid_set_gamma_odd(gamma_odd)
-
-cdef inline python_lbfluid_set_gamma_even(double gamma_even) except +:
-    lb_lbfluid_set_gamma_even(gamma_even)
-
-cdef inline python_lbfluid_set_ext_force_density(Vector3d ext_force_density, double agrid, double tau) except +:
-    lb_lbfluid_set_ext_force_density(ext_force_density * agrid**2 * tau**2)
-
-cdef inline python_lbfluid_get_density(double agrid) except +:
-    return lb_lbfluid_get_density() / agrid**3
-
-cdef inline python_lbfluid_get_viscosity(double agrid, double tau) except +:
-    return lb_lbfluid_get_viscosity() / tau * agrid**2
-
-cdef inline python_lbfluid_get_bulk_viscosity(double agrid, double tau) except +:
-    return lb_lbfluid_get_bulk_viscosity() / tau * agrid**2
-
-cdef inline python_lbfluid_get_gamma() except +:
-    return lb_lbcoupling_get_gamma()
-
-cdef inline python_lbfluid_get_ext_force_density(double agrid, double tau) except +:
-    cdef Vector3d ext_force_density = lb_lbfluid_get_ext_force_density()
-    return make_array_locked(ext_force_density / (agrid**2 * tau**2))
-
-cdef inline python_lbfluid_get_pressure_tensor(double agrid, double tau) except +:
-    cdef Vector6d c_tensor = lb_lbfluid_get_pressure_tensor()
-    cdef double unit_conversion = 1.0 / (agrid * tau**2)
-    cdef Vector6d p_tensor = c_tensor * unit_conversion
-    return [[p_tensor[0], p_tensor[1], p_tensor[3]],
-            [p_tensor[1], p_tensor[2], p_tensor[4]],
-            [p_tensor[3], p_tensor[4], p_tensor[5]]]
-
-cdef inline python_lbnode_set_velocity(Vector3i node, Vector3d velocity) except +:
-    lb_lbnode_set_velocity(node, velocity / lb_lbfluid_get_lattice_speed())
-
-cdef inline python_lbnode_get_velocity(Vector3i node) except +:
-    cdef Vector3d c_velocity = lb_lbnode_get_velocity(node)
-    return make_array_locked(c_velocity * lb_lbfluid_get_lattice_speed())
-
-cdef inline python_lbnode_get_interpolated_velocity(Vector3d pos) except +:
-    cdef Vector3d c_velocity = lb_lbfluid_get_interpolated_velocity(pos)
-    return make_array_locked(c_velocity * lb_lbfluid_get_lattice_speed())
-
-cdef inline python_lbnode_set_density(Vector3i node, double density) except +:
-    cdef double agrid = lb_lbfluid_get_agrid()
-    lb_lbnode_set_density(node, density * agrid**3)
-
-cdef inline python_lbnode_get_density(Vector3i node) except +:
-    cdef double c_density = lb_lbnode_get_density(node)
-    cdef double agrid = lb_lbfluid_get_agrid()
-    return c_density / agrid**3
-
-cdef inline python_lbnode_get_pressure_tensor(Vector3i node) except +:
-    cdef Vector6d c_tensor = lb_lbnode_get_pressure_tensor(node)
-    cdef double tau = lb_lbfluid_get_tau()
-    cdef double agrid = lb_lbfluid_get_agrid()
-    cdef double unit_conversion = 1.0 / (tau**2 * agrid)
-    cdef Vector6d p_tensor = c_tensor * unit_conversion
-    return [[p_tensor[0], p_tensor[1], p_tensor[3]],
-            [p_tensor[1], p_tensor[2], p_tensor[4]],
-            [p_tensor[3], p_tensor[4], p_tensor[5]]]
-
-cdef inline python_lbnode_get_pressure_tensor_neq(Vector3i node) except +:
-    cdef Vector6d c_tensor = lb_lbnode_get_pressure_tensor_neq(node)
-    cdef double tau = lb_lbfluid_get_tau()
-    cdef double agrid = lb_lbfluid_get_agrid()
-    cdef double unit_conversion = 1.0 / (tau**2 * agrid)
-    cdef Vector6d p_tensor = c_tensor * unit_conversion
-    return [[p_tensor[0], p_tensor[1], p_tensor[3]],
-            [p_tensor[1], p_tensor[2], p_tensor[4]],
-            [p_tensor[3], p_tensor[4], p_tensor[5]]]
diff --git a/src/python/espressomd/lb.py b/src/python/espressomd/lb.py
new file mode 100644
index 00000000000..2d155e01902
--- /dev/null
+++ b/src/python/espressomd/lb.py
@@ -0,0 +1,765 @@
+#
+# Copyright (C) 2013-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import itertools
+import numpy as np
+
+from . import utils
+from .detail.walberla import VTKOutputBase, LatticeWalberla
+from .script_interface import ScriptInterfaceHelper, script_interface_register, array_variant
+import espressomd.detail.walberla
+import espressomd.shapes
+import espressomd.code_features
+
+
+class VelocityBounceBack:
+    """
+    Hold velocity information for the velocity bounce back boundary
+    condition at a single node.
+
+    """
+
+    def __init__(self, velocity):
+        utils.check_type_or_throw_except(
+            velocity, 3, float, "VelocityBounceBack velocity must be three floats")
+        self.velocity = velocity
+
+
+class HydrodynamicInteraction(ScriptInterfaceHelper):
+    """
+    Base class for LB implementations.
+
+    """
+
+    def __getitem__(self, key):
+        raise NotImplementedError("Derived classes must implement this method")
+
+    def __str__(self):
+        return f"{self.__class__.__name__}({self.get_params()})"
+
+    def _activate(self):
+        self._activate_method()
+
+    def _deactivate(self):
+        self._deactivate_method()
+
+    def _activate_method(self):
+        self.call_method("activate")
+        utils.handle_errors("HydrodynamicInteraction activation failed")
+
+    def _deactivate_method(self):
+        self.call_method("deactivate")
+        utils.handle_errors("HydrodynamicInteraction deactivation failed")
+
+    def validate_params(self, params):
+        pass
+
+    def valid_keys(self):
+        return {"agrid", "tau", "density", "ext_force_density",
+                "kinematic_viscosity", "lattice", "kT", "seed"}
+
+    def required_keys(self):
+        return {"lattice", "density", "kinematic_viscosity", "tau"}
+
+    def default_params(self):
+        return {"lattice": None, "seed": 0, "kT": 0.,
+                "ext_force_density": [0.0, 0.0, 0.0]}
+
+    def mach_limit(self):
+        """
+        The fluid velocity is limited to :math:`v_{\\mathrm{max}} = 0.20`
+        (see *quasi-incompressible limit* in :cite:`kruger17a`,
+        chapter 7, page 272), which corresponds to Mach 0.35.
+
+        The relative error in the fluid density between a compressible fluid
+        and an incompressible fluid at Mach 0.30 is less than 5% (see
+        *constant density assumption* in :cite:`kundu01a` chapter 16, page
+        663). Since the speed of sound is :math:`c_s = 1 / \\sqrt{3}` in LB
+        velocity units in a D3Q19 lattice, the velocity limit at Mach 0.30
+        is :math:`v_{\\mathrm{max}} = 0.30 / \\sqrt{3} \\approx 0.17`.
+        At Mach 0.35 the relative error is around 6% and
+        :math:`v_{\\mathrm{max}} = 0.35 / \\sqrt{3} \\approx 0.20`.
+
+        Returns
+        -------
+        v_max : :obj:`float`
+            The Mach limit expressed in LB velocity units.
+
+        """
+        return 0.20
+
+    @classmethod
+    def _check_mach_limit(cls, velocities):
+        vel_max = cls.mach_limit(cls)
+        velocities = np.reshape(velocities, (-1, 3))
+        if np.any(np.linalg.norm(velocities, axis=1) > vel_max):
+            speed_of_sound = 1. / np.sqrt(3.)
+            mach_number = vel_max / speed_of_sound
+            raise ValueError(f"Slip velocity exceeds Mach {mach_number:.2f}")
+
+    @property
+    def pressure_tensor(self):
+        tensor = self.call_method("get_pressure_tensor")
+        return utils.array_locked(tensor)
+
+    @pressure_tensor.setter
+    def pressure_tensor(self, value):
+        raise RuntimeError(f"Property 'pressure_tensor' is read-only")
+
+
+@script_interface_register
+class LBFluidWalberla(HydrodynamicInteraction,
+                      espressomd.detail.walberla.LatticeModel):
+    """
+    The lattice-Boltzmann method for hydrodynamics using waLBerla.
+    If argument ``lattice`` is not provided, one will be default
+    constructed if an argument ``agrid`` is provided.
+
+    Parameters
+    ----------
+    lattice : :obj:`espressomd.lb.LatticeWalberla <espressomd.detail.walberla.LatticeWalberla>`
+        Lattice object. If not provided, a default one will be constructed
+        using the ``agrid`` parameter.
+    agrid : :obj:`float`
+        Lattice constant. The box size in every direction must be an integer
+        multiple of ``agrid``. Cannot be provided together with ``lattice``.
+    tau : :obj:`float`
+        LB time step, must be an integer multiple of the MD time step.
+    density : :obj:`float`
+        Fluid density.
+    kinematic_viscosity : :obj:`float`
+        Fluid kinematic viscosity.
+    ext_force_density : (3,) array_like of :obj:`float`, optional
+        Force density applied on the fluid.
+    kT : :obj:`float`, optional
+        Thermal energy of the simulated heat bath (for thermalized fluids).
+        Set it to 0 for an unthermalized fluid.
+    seed : :obj:`int`, optional
+        Initial counter value (or seed) of the philox RNG.
+        Required for a thermalized fluid. Must be positive.
+    single_precision : :obj:`bool`, optional
+        Use single-precision floating-point arithmetic.
+
+    Methods
+    -------
+    get_interpolated_velocity()
+        Get LB fluid velocity at specified position.
+
+        Parameters
+        ----------
+        pos : (3,) array_like of :obj:`float`
+            The position at which velocity is requested.
+
+        Returns
+        -------
+        v : (3,) array_like :obj:`float`
+            The LB fluid velocity at ``pos``.
+
+    add_force_at_pos():
+        Adds a force to the fluid at given position.
+
+        Parameters
+        ----------
+        pos : (3,) array_like of :obj:`float`
+            The position at which the force will be added.
+        force : (3,) array_like of :obj:`float`
+            The force vector which will be distributed at the position.
+
+    clear_boundaries()
+        Remove velocity bounce-back boundary conditions.
+
+    save_checkpoint()
+        Write LB node populations and boundary conditions to a file.
+
+        Parameters
+        ----------
+        path : :obj:`str`
+            Destination file path.
+        binary : :obj:`bool`
+            Whether to write in binary or ASCII mode.
+
+    load_checkpoint()
+        Load LB node populations and boundary conditions from a file.
+
+        Parameters
+        ----------
+        path : :obj:`str`
+            File path to read from.
+        binary : :obj:`bool`
+            Whether to read in binary or ASCII mode.
+
+    add_vtk_writer()
+        Attach a VTK writer.
+
+        Parameters
+        ----------
+        vtk : :class:`espressomd.lb.VTKOutput`
+            VTK writer.
+
+    remove_vtk_writer()
+        Detach a VTK writer.
+
+        Parameters
+        ----------
+        vtk : :class:`espressomd.lb.VTKOutput`
+            VTK writer.
+
+    clear_vtk_writers()
+        Detach all VTK writers.
+
+    """
+
+    _so_name = "walberla::LBFluid"
+    _so_creation_policy = "GLOBAL"
+    _so_bind_methods = (
+        "add_force_at_pos",
+        "clear_boundaries",
+        "get_interpolated_velocity",
+        "add_vtk_writer",
+        "remove_vtk_writer",
+        "clear_vtk_writers",
+    )
+
+    def __init__(self, *args, **kwargs):
+        if not espressomd.code_features.has_features("WALBERLA"):
+            raise NotImplementedError("Feature WALBERLA not compiled in")
+
+        if "sip" not in kwargs:
+            params = self.default_params()
+            params.update(kwargs)
+            self.validate_params(params)
+            super().__init__(*args, **params)
+        else:
+            super().__init__(**kwargs)
+
+    def validate_params(self, params):
+        super().validate_params(params)
+
+        # construct default lattice if necessary
+        if params.get("lattice") is None:
+            if "agrid" not in params:
+                raise ValueError("missing argument 'lattice' or 'agrid'")
+            params["lattice"] = LatticeWalberla(
+                agrid=params.pop("agrid"), n_ghost_layers=1)
+        elif "agrid" in params:
+            raise ValueError("cannot provide both 'lattice' and 'agrid'")
+
+        utils.check_required_keys(self.required_keys(), params.keys())
+        utils.check_valid_keys(self.valid_keys(), params.keys())
+
+    def default_params(self):
+        return {"single_precision": False, **super().default_params()}
+
+    def valid_keys(self):
+        return {"single_precision", *super().valid_keys()}
+
+    def __getitem__(self, key):
+        if isinstance(key, (tuple, list, np.ndarray)) and len(key) == 3:
+            if any(isinstance(item, slice) for item in key):
+                return LBFluidSliceWalberla(parent_sip=self, slice_range=key)
+            else:
+                return LBFluidNodeWalberla(
+                    parent_sip=self, index=np.array(key))
+
+        raise TypeError(
+            f"{key} is not a valid index. Should be a point on the "
+            "nodegrid e.g. lbf[0,0,0], or a slice e.g. lbf[:,0,0]")
+
+    def add_boundary_from_shape(self, shape,
+                                velocity=np.zeros(3, dtype=float),
+                                boundary_type=VelocityBounceBack):
+        """
+        Set velocity bounce-back boundary conditions from a shape.
+
+        Parameters
+        ----------
+        shape : :obj:`espressomd.shapes.Shape`
+            Shape to rasterize.
+        velocity : (3,) or (L, M, N, 3) array_like of :obj:`float`, optional
+            Slip velocity. By default no-slip boundary conditions are used.
+            If a vector of 3 values, a uniform slip velocity is used,
+            otherwise ``L, M, N`` must be equal to the LB grid dimensions.
+        boundary_type : Union[:class:`~espressomd.lb.VelocityBounceBack`] (optional)
+            Type of the boundary condition.
+
+        """
+        if not issubclass(boundary_type, VelocityBounceBack):
+            raise TypeError(
+                "Parameter 'boundary_type' must be a subclass of VelocityBounceBack")
+
+        utils.check_type_or_throw_except(
+            shape, 1, espressomd.shapes.Shape, "expected an espressomd.shapes.Shape")
+        if np.shape(velocity) not in [(3,), tuple(self.shape) + (3,)]:
+            raise ValueError(
+                f'Cannot process velocity value grid of shape {np.shape(velocity)}')
+
+        # range checks
+        lattice_speed = self.call_method("get_lattice_speed")
+        velocity = np.array(velocity, dtype=float).reshape((-1, 3))
+        velocity *= 1. / lattice_speed
+        self._check_mach_limit(velocity)
+
+        mask = self.get_shape_bitmask(shape=shape).astype(int)
+        self.call_method(
+            "add_boundary_from_shape",
+            raster=array_variant(mask.flatten()),
+            values=array_variant(velocity.flatten()))
+
+
+class LBFluidWalberlaGPU(HydrodynamicInteraction):
+    """
+    Initialize the lattice-Boltzmann method for hydrodynamic flow using
+    waLBerla for the GPU. See :class:`HydrodynamicInteraction` for the
+    list of parameters.
+
+    """
+
+    # pylint: disable=unused-argument
+    def __init__(self, *args, **kwargs):
+        if not espressomd.code_features.has_features("CUDA"):
+            raise NotImplementedError("Feature CUDA not compiled in")
+        if not espressomd.code_features.has_features("WALBERLA"):
+            raise NotImplementedError("Feature WALBERLA not compiled in")
+        raise NotImplementedError("Not implemented yet")
+
+
+@script_interface_register
+class LBFluidNodeWalberla(ScriptInterfaceHelper):
+    _so_name = "walberla::LBFluidNode"
+    _so_creation_policy = "GLOBAL"
+
+    def required_keys(self):
+        return {"parent_sip", "index"}
+
+    def __init__(self, *args, **kwargs):
+        if "sip" not in kwargs:
+            super().__init__(*args, **kwargs)
+            utils.handle_errors("LBFluidNode instantiation failed")
+        else:
+            super().__init__(**kwargs)
+
+    def __reduce__(self):
+        raise NotImplementedError("Cannot serialize LB fluid node objects")
+
+    def __eq__(self, obj):
+        return isinstance(obj, LBFluidNodeWalberla) and self.index == obj.index
+
+    def __hash__(self):
+        return hash(self.index)
+
+    @property
+    def index(self):
+        return tuple(self._index)
+
+    @index.setter
+    def index(self, value):
+        raise RuntimeError("Parameter 'index' is read-only.")
+
+    @property
+    def density(self):
+        return self.call_method("get_density")
+
+    @density.setter
+    def density(self, value):
+        self.call_method("set_density", value=value)
+
+    @property
+    def population(self):
+        return utils.array_locked(self.call_method("get_population"))
+
+    @population.setter
+    def population(self, value):
+        self.call_method("set_population", value=value)
+
+    @property
+    def pressure_tensor(self):
+        tensor = self.call_method("get_pressure_tensor")
+        return utils.array_locked(tensor)
+
+    @pressure_tensor.setter
+    def pressure_tensor(self, value):
+        raise RuntimeError("Property 'pressure_tensor' is read-only.")
+
+    @property
+    def is_boundary(self):
+        return self.call_method("get_is_boundary")
+
+    @is_boundary.setter
+    def is_boundary(self, value):
+        raise RuntimeError("Property 'is_boundary' is read-only.")
+
+    @property
+    def boundary(self):
+        """
+        Returns
+        -------
+        :class:`~espressomd.lb.VelocityBounceBack`
+            If the node is a boundary node
+        None
+            If the node is not a boundary node
+
+        """
+
+        velocity = self.call_method("get_velocity_at_boundary")
+        if velocity is not None:
+            return VelocityBounceBack(velocity)
+        return None
+
+    @boundary.setter
+    def boundary(self, value):
+        """
+        Parameters
+        ----------
+        value : :class:`~espressomd.lb.VelocityBounceBack` or ``None``
+            If value is :class:`~espressomd.lb.VelocityBounceBack`,
+            set the node to be a boundary node with the specified velocity.
+            If value is ``None``, the node will become a fluid node.
+
+        """
+
+        if isinstance(value, VelocityBounceBack):
+            value = value.velocity
+            lattice_speed = self.call_method("get_lattice_speed")
+            HydrodynamicInteraction._check_mach_limit(
+                np.array(value) / lattice_speed)
+        elif value is not None:
+            raise TypeError(
+                "Parameter 'value' must be an instance of VelocityBounceBack or None")
+        self.call_method("set_velocity_at_boundary", value=value)
+
+    @property
+    def boundary_force(self):
+        return self.call_method("get_boundary_force")
+
+    @boundary_force.setter
+    def boundary_force(self, value):
+        raise RuntimeError("Property 'boundary_force' is read-only.")
+
+    @property
+    def velocity(self):
+        return self.call_method("get_velocity")
+
+    @velocity.setter
+    def velocity(self, value):
+        self.call_method("set_velocity", value=value)
+
+    @property
+    def last_applied_force(self):
+        return self.call_method("get_last_applied_force")
+
+    @last_applied_force.setter
+    def last_applied_force(self, value):
+        self.call_method("set_last_applied_force", value=value)
+
+
+@script_interface_register
+class LBFluidSliceWalberla(ScriptInterfaceHelper):
+    _so_name = "walberla::LBFluidSlice"
+    _so_creation_policy = "GLOBAL"
+
+    def required_keys(self):
+        return {"parent_sip", "slice_range"}
+
+    def validate_params(self, params):
+        utils.check_required_keys(self.required_keys(), params.keys())
+
+    def __init__(self, *args, **kwargs):
+        if "sip" in kwargs:
+            super().__init__(**kwargs)
+        else:
+            self.validate_params(kwargs)
+            slice_range = kwargs.pop("slice_range")
+            grid_size = kwargs["parent_sip"].shape
+            extra_kwargs = espressomd.detail.walberla.get_slice_bounding_box(
+                slice_range, grid_size)
+            node = LBFluidNodeWalberla(index=np.array([0, 0, 0]), **kwargs)
+            super().__init__(*args, node_sip=node, **kwargs, **extra_kwargs)
+            utils.handle_errors("LBFluidSliceWalberla instantiation failed")
+
+    def __iter__(self):
+        lower, upper = self.call_method("get_slice_ranges")
+        indices = [list(range(lower[i], upper[i])) for i in range(3)]
+        lb_sip = self.call_method("get_lb_sip")
+        for index in itertools.product(*indices):
+            yield LBFluidNodeWalberla(parent_sip=lb_sip, index=np.array(index))
+
+    def __reduce__(self):
+        raise NotImplementedError("Cannot serialize LB fluid slice objects")
+
+    def _getter(self, attr):
+        value_grid, shape = self.call_method(f"get_{attr}")
+        if attr == "velocity_at_boundary":
+            value_grid = [
+                None if x is None else VelocityBounceBack(x) for x in value_grid]
+        return utils.array_locked(np.reshape(value_grid, shape))
+
+    def _setter(self, attr, values):
+        dimensions = self.call_method("get_slice_size")
+        if 0 in dimensions:
+            raise AttributeError(
+                f"Cannot set properties of an empty '{self.__class__.__name__}' object")
+
+        values = np.copy(values)
+        value_shape = tuple(self.call_method("get_value_shape", name=attr))
+        target_shape = (*dimensions, *value_shape)
+
+        # broadcast if only one element was provided
+        if values.shape == value_shape or values.shape == () and value_shape == (1,):
+            values = np.full(target_shape, values)
+
+        def shape_squeeze(shape):
+            return tuple(x for x in shape if x != 1)
+
+        if shape_squeeze(values.shape) != shape_squeeze(target_shape):
+            raise ValueError(
+                f"Input-dimensions of '{attr}' array {values.shape} does not match slice dimensions {target_shape}")
+
+        self.call_method(f"set_{attr}", values=values.flatten())
+
+    @property
+    def density(self):
+        return self._getter("density",)
+
+    @density.setter
+    def density(self, value):
+        self._setter("density", value)
+
+    @property
+    def population(self):
+        return self._getter("population")
+
+    @population.setter
+    def population(self, value):
+        self._setter("population", value)
+
+    @property
+    def pressure_tensor(self):
+        return self._getter("pressure_tensor")
+
+    @pressure_tensor.setter
+    def pressure_tensor(self, value):
+        raise RuntimeError("Property 'pressure_tensor' is read-only.")
+
+    @property
+    def is_boundary(self):
+        return self._getter("is_boundary")
+
+    @is_boundary.setter
+    def is_boundary(self, value):
+        raise RuntimeError("Property 'is_boundary' is read-only.")
+
+    @property
+    def boundary(self):
+        """
+        Returns
+        -------
+        (N, M, L) array_like of :class:`~espressomd.lb.VelocityBounceBack`
+            If the nodes are boundary nodes
+        (N, M, L) array_like of ``None``
+            If the nodes are not boundary nodes
+
+        """
+
+        return self._getter("velocity_at_boundary")
+
+    @boundary.setter
+    def boundary(self, values):
+        """
+        Parameters
+        ----------
+        values : (N, M, L) array_like of :class:`~espressomd.lb.VelocityBounceBack` or ``None``
+            If values are :class:`~espressomd.lb.VelocityBounceBack`,
+            set the nodes to be boundary nodes with the specified velocity.
+            If values are ``None``, the nodes will become fluid nodes.
+
+        """
+
+        type_error_msg = "Parameter 'values' must be an array_like of VelocityBounceBack or None"
+        values = np.copy(values)
+        lattice_speed = self.call_method("get_lattice_speed")
+        if values.dtype != np.dtype("O"):
+            raise TypeError(type_error_msg)
+        for index in np.ndindex(*values.shape):
+            if values[index] is not None:
+                if not isinstance(values[index], VelocityBounceBack):
+                    raise TypeError(type_error_msg)
+                HydrodynamicInteraction._check_mach_limit(
+                    np.array(values[index].velocity) / lattice_speed)
+                values[index] = np.array(values[index].velocity)
+        self._setter("velocity_at_boundary", values=values)
+
+    @property
+    def boundary_force(self):
+        return self._getter("boundary_force")
+
+    @boundary_force.setter
+    def boundary_force(self, value):
+        raise RuntimeError("Property 'boundary_force' is read-only.")
+
+    @property
+    def velocity(self):
+        return self._getter("velocity")
+
+    @velocity.setter
+    def velocity(self, value):
+        self._setter("velocity", value)
+
+    @property
+    def last_applied_force(self):
+        return self._getter("last_applied_force")
+
+    @last_applied_force.setter
+    def last_applied_force(self, value):
+        self._setter("last_applied_force", value)
+
+
+@script_interface_register
+class VTKOutput(VTKOutputBase):
+    """
+    Create a VTK writer.
+
+    Files are written to ``<base_folder>/<identifier>/<prefix>_*.vtu``.
+    Summary is written to ``<base_folder>/<identifier>.pvd``.
+
+    Manual VTK callbacks can be called at any time to take a snapshot
+    of the current state of the LB fluid.
+
+    Automatic VTK callbacks can be disabled at any time and re-enabled later.
+    Please note that the internal VTK counter is no longer incremented when
+    an automatic callback is disabled, which means the number of LB steps
+    between two frames will not always be an integer multiple of ``delta_N``.
+
+    Parameters
+    ----------
+    identifier : :obj:`str`
+        Name of the VTK writer.
+    observables : :obj:`list`, {'density', 'velocity_vector', 'pressure_tensor'}
+        List of observables to write to the VTK files.
+    delta_N : :obj:`int`
+        Write frequency. If this value is 0 (default), the object is a
+        manual VTK callback that must be triggered manually. Otherwise,
+        it is an automatic callback that is added to the time loop and
+        writes every ``delta_N`` LB steps.
+    base_folder : :obj:`str` (optional), default is 'vtk_out'
+        Path to the output VTK folder.
+    prefix : :obj:`str` (optional), default is 'simulation_step'
+        Prefix for VTK files.
+
+    """
+    _so_name = "walberla::LBVTKHandle"
+    _so_creation_policy = "GLOBAL"
+    _so_bind_methods = ("enable", "disable", "write")
+
+    def required_keys(self):
+        return self.valid_keys() - self.default_params().keys()
+
+    def __repr__(self):
+        class_id = f"{self.__class__.__module__}.{self.__class__.__name__}"
+        if self.delta_N:
+            write_when = f"every {self.delta_N} LB steps"
+            if not self.enabled:
+                write_when += " (disabled)"
+        else:
+            write_when = "on demand"
+        return f"<{class_id}: write to '{self.vtk_uid}' {write_when}>"
+
+
+def edge_detection(boundary_mask, periodicity):
+    """
+    Find boundary nodes in contact with the fluid. Relies on a convolution
+    kernel constructed from the D3Q19 stencil.
+
+    Parameters
+    ----------
+    boundary_mask : (N, M, L) array_like of :obj:`bool`
+        Bitmask for the rasterized boundary geometry.
+    periodicity : (3,) array_like of :obj:`bool`
+        Bitmask for the box periodicity.
+
+    Returns
+    -------
+    (N, 3) array_like of :obj:`int`
+        The indices of the boundary nodes at the interface with the fluid.
+
+    """
+    import scipy.signal
+    import itertools
+
+    fluid_mask = np.logical_not(boundary_mask)
+
+    # edge kernel
+    edge = -np.ones((3, 3, 3))
+    for i, j, k in itertools.product((0, 2), (0, 2), (0, 2)):
+        edge[i, j, k] = 0
+    edge[1, 1, 1] = -np.sum(edge)
+
+    # periodic convolution
+    wrapped_mask = np.pad(fluid_mask.astype(int), 3 * [(2, 2)], mode="wrap")
+    if not periodicity[0]:
+        wrapped_mask[:2, :, :] = 0
+        wrapped_mask[-2:, :, :] = 0
+    if not periodicity[1]:
+        wrapped_mask[:, :2, :] = 0
+        wrapped_mask[:, -2:, :] = 0
+    if not periodicity[2]:
+        wrapped_mask[:, :, :2] = 0
+        wrapped_mask[:, :, -2:] = 0
+    convolution = scipy.signal.convolve(
+        wrapped_mask, edge, mode="same", method="direct")[2:-2, 2:-2, 2:-2]
+    convolution = np.multiply(convolution, boundary_mask)
+
+    return np.array(np.nonzero(convolution < 0)).T
+
+
+def calc_cylinder_tangential_vectors(center, agrid, offset, node_indices):
+    """
+    Utility function to calculate a constant slip velocity tangential to the
+    surface of a cylinder.
+
+    Parameters
+    ----------
+    center : (3,) array_like of :obj:`float`
+        Center of the cylinder.
+    agrid : :obj:`float`
+        LB agrid.
+    offset : :obj:`float`
+        LB offset.
+    node_indices : (N, 3) array_like of :obj:`int`
+        Indices of the boundary surface nodes.
+
+    Returns
+    -------
+    (N, 3) array_like of :obj:`float`
+        The unit vectors tangential to the surface of a cylinder.
+
+    """
+    velocities = []
+    for ijk in node_indices:
+        p = (ijk + offset) * agrid
+        r = center - p
+        norm = np.linalg.norm(r[:2])
+        if norm < 1e-10:
+            velocities.append(np.zeros(3))
+            continue
+        angle_r = np.arccos(np.dot(r[:2] / norm, [1, 0]))
+        angle_v = angle_r - np.pi / 2
+        flip = np.sign(r[1])
+        slip_velocity = np.array([flip * np.cos(angle_v), np.sin(angle_v), 0.])
+        velocities.append(slip_velocity)
+    return np.array(velocities)
diff --git a/src/python/espressomd/lb.pyx b/src/python/espressomd/lb.pyx
deleted file mode 100644
index e2875002e33..00000000000
--- a/src/python/espressomd/lb.pyx
+++ /dev/null
@@ -1,809 +0,0 @@
-#
-# Copyright (C) 2013-2022 The ESPResSo project
-#
-# This file is part of ESPResSo.
-#
-# ESPResSo is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# ESPResSo is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-include "myconfig.pxi"
-import os
-import cython
-import itertools
-import functools
-import numpy as np
-cimport numpy as np
-from libc cimport stdint
-from . import highlander
-from . import utils
-from . cimport utils
-from .utils cimport Vector3i, Vector3d, Vector6d, Vector19d
-from .lb cimport get_time_step
-
-
-cdef class FluidActor:
-
-    """
-    Abstract base class for interactions affecting particles in the system,
-    such as LB fluids. Derived classes must implement the interface to the
-    relevant core objects and global variables.
-    """
-
-    # Keys in active_list have to match the method name.
-    active_list = dict(HydrodynamicInteraction=False)
-
-    # __getstate__ and __setstate__ define the pickle interaction
-    def __getstate__(self):
-        odict = self._params.copy()
-        return odict
-
-    def __setstate__(self, params):
-        self._params = params
-        self._set_params_in_es_core()
-
-    def __init__(self, *args, **kwargs):
-        self._isactive = False
-        utils.check_valid_keys(self.valid_keys(), kwargs.keys())
-        utils.check_required_keys(self.required_keys(), kwargs.keys())
-        self._params = self.default_params()
-        self._params.update(kwargs)
-
-    def _activate(self):
-        inter = self._get_interaction_type()
-        if inter in FluidActor.active_list:
-            if FluidActor.active_list[inter]:
-                raise highlander.ThereCanOnlyBeOne(self.__class__.__bases__[0])
-            FluidActor.active_list[inter] = True
-
-        self.validate_params()
-        self._activate_method()
-        utils.handle_errors("Activation of an actor")
-        self._isactive = True
-
-    def _deactivate(self):
-        self._deactivate_method()
-        utils.handle_errors("Deactivation of an actor")
-        self._isactive = False
-        inter = self._get_interaction_type()
-        if inter in FluidActor.active_list:
-            if not FluidActor.active_list[inter]:
-                raise Exception(
-                    f"Class not registered in Actor.active_list: {self.__class__.__bases__[0].__name__}")
-            FluidActor.active_list[inter] = False
-
-    def is_valid(self):
-        """
-        Check if the data stored in this instance still matches the
-        corresponding data in the core.
-        """
-        return self._params == self._get_params_from_es_core()
-
-    def get_params(self):
-        """Get interaction parameters"""
-        # If this instance refers to an actual interaction defined in the es
-        # core, load current parameters from there
-        if self.is_active():
-            update = self._get_params_from_es_core()
-            self._params.update(update)
-        return self._params
-
-    def set_params(self, **p):
-        """Update the given parameters."""
-        # Check if keys are valid
-        utils.check_valid_keys(self.valid_keys(), p.keys())
-
-        # When an interaction is newly activated, all required keys must be
-        # given
-        if not self.is_active():
-            utils.check_required_keys(self.required_keys(), p.keys())
-
-        self._params.update(p)
-        # validate updated parameters
-        self.validate_params()
-        # Put in values given by the user
-        if self.is_active():
-            self._set_params_in_es_core()
-
-    def __str__(self):
-        return f"{self.__class__.__name__}({self.get_params()})"
-
-    def _get_interaction_type(self):
-        bases = self.class_lookup(self.__class__)
-        for i in range(len(bases)):
-            if bases[i].__name__ in FluidActor.active_list:
-                return bases[i].__name__
-
-    def class_lookup(self, cls):
-        c = list(cls.__bases__)
-        for base in c:
-            c.extend(self.class_lookup(base))
-        return c
-
-    def is_active(self):
-        return self._isactive
-
-    def valid_keys(self):
-        """Virtual method."""
-        raise Exception(
-            f"Subclasses of {self._get_interaction_type()} must define the valid_keys() method.")
-
-    def required_keys(self):
-        """Virtual method."""
-        raise Exception(
-            "Subclasses of {self._get_interaction_type()} must define the required_keys() method.")
-
-    def validate_params(self):
-        """Virtual method."""
-        raise Exception(
-            "Subclasses of {self._get_interaction_type()} must define the validate_params() method.")
-
-    def _get_params_from_es_core(self):
-        """Virtual method."""
-        raise Exception(
-            "Subclasses of {self._get_interaction_type()} must define the _get_params_from_es_core() method.")
-
-    def _set_params_in_es_core(self):
-        """Virtual method."""
-        raise Exception(
-            "Subclasses of {self._get_interaction_type()} must define the _set_params_in_es_core() method.")
-
-    def default_params(self):
-        """Virtual method."""
-        raise Exception(
-            "Subclasses of {self._get_interaction_type()} must define the default_params() method.")
-
-    def _activate_method(self):
-        """Virtual method."""
-        raise Exception(
-            "Subclasses of {self._get_interaction_type()} must define the _activate_method() method.")
-
-    def _deactivate_method(self):
-        """Virtual method."""
-        raise Exception(
-            "Subclasses of {self._get_interaction_type()} must define the _deactivate_method() method.")
-
-
-cdef class HydrodynamicInteraction(FluidActor):
-    """
-    Base class for LB implementations.
-
-    Parameters
-    ----------
-    agrid : :obj:`float`
-        Lattice constant. The box size in every direction must be an integer
-        multiple of ``agrid``.
-    tau : :obj:`float`
-        LB time step, must be an integer multiple of the MD time step.
-    dens : :obj:`float`
-        Fluid density.
-    visc : :obj:`float`
-        Fluid kinematic viscosity.
-    bulk_visc : :obj:`float`, optional
-        Fluid bulk viscosity.
-    gamma_odd : :obj:`int`, optional
-        Relaxation parameter :math:`\\gamma_{\\textrm{odd}}` for kinetic modes.
-    gamma_even : :obj:`int`, optional
-        Relaxation parameter :math:`\\gamma_{\\textrm{even}}` for kinetic modes.
-    ext_force_density : (3,) array_like of :obj:`float`, optional
-        Force density applied on the fluid.
-    kT : :obj:`float`, optional
-        Thermal energy of the simulated heat bath (for thermalized fluids).
-        Set it to 0 for an unthermalized fluid.
-    seed : :obj:`int`, optional
-        Initial counter value (or seed) of the philox RNG.
-        Required for a thermalized fluid. Must be positive.
-    """
-
-    def _assert_agrid_tau_set(self):
-        unset = self.default_params()
-        assert self.agrid != unset['agrid'] and self.tau != unset['tau'], \
-            "tau and agrid have to be set first!"
-
-    def _lb_init(self):
-        raise Exception(
-            "Subclasses of HydrodynamicInteraction must define the _lb_init() method.")
-
-    @classmethod
-    def _restore_object(cls, derived_cls, params):
-        obj = derived_cls(**params)
-        obj._params = params
-        return obj
-
-    def __reduce__(self):
-        return (HydrodynamicInteraction._restore_object,
-                (self.__class__, self._params))
-
-    def __getitem__(self, key):
-        cdef Vector3i shape
-        if isinstance(key, (tuple, list, np.ndarray)):
-            if len(key) == 3:
-                if any(isinstance(typ, slice) for typ in key):
-                    shape = lb_lbfluid_get_shape()
-                    return LBSlice(key, (shape[0], shape[1], shape[2]))
-                else:
-                    return LBFluidRoutines(np.array(key))
-        else:
-            raise Exception(
-                "%s is not a valid key. Should be a point on the nodegrid e.g. lbf[0,0,0], or a slice" % key)
-    # validate the given parameters on actor initialization
-    ####################################################
-
-    def validate_params(self):
-        default_params = self.default_params()
-
-        utils.check_type_or_throw_except(
-            self._params["kT"], 1, float, "kT must be a number")
-        if self._params["kT"] > 0. and not self._params["seed"]:
-            raise ValueError(
-                "seed has to be given if temperature is not 0.")
-
-        if self._params["dens"] == default_params["dens"]:
-            raise Exception("LB_FLUID density not set")
-        elif not (self._params["dens"] > 0.0 and (utils.is_valid_type(self._params["dens"], float) or utils.is_valid_type(self._params["dens"], int))):
-            raise ValueError("Density must be a positive double")
-
-        if self._params["tau"] <= 0.:
-            raise ValueError("tau has to be a positive double")
-
-    def valid_keys(self):
-        return {"agrid", "dens", "ext_force_density", "visc", "tau",
-                "bulk_visc", "gamma_odd", "gamma_even", "kT", "seed"}
-
-    def required_keys(self):
-        return {"dens", "agrid", "visc", "tau"}
-
-    def default_params(self):
-        return {"agrid": -1.0,
-                "dens": -1.0,
-                "ext_force_density": [0.0, 0.0, 0.0],
-                "visc": -1.0,
-                "bulk_visc": -1.0,
-                "tau": -1.0,
-                "seed": None,
-                "kT": 0.}
-
-    def _set_lattice_switch(self):
-        raise Exception(
-            "Subclasses of HydrodynamicInteraction must define the _set_lattice_switch() method.")
-
-    def _set_params_in_es_core(self):
-        default_params = self.default_params()
-        self.agrid = self._params['agrid']
-        self.tau = self._params['tau']
-        self.density = self._params['dens']
-
-        if self._params['kT'] > 0.:
-            self.seed = self._params['seed']
-        self.kT = self._params['kT']
-
-        self.viscosity = self._params['visc']
-        if self._params['bulk_visc'] != default_params['bulk_visc']:
-            self.bulk_viscosity = self._params['bulk_visc']
-
-        self.ext_force_density = self._params["ext_force_density"]
-
-        if "gamma_odd" in self._params:
-            python_lbfluid_set_gamma_odd(self._params["gamma_odd"])
-
-        if "gamma_even" in self._params:
-            python_lbfluid_set_gamma_even(self._params["gamma_even"])
-
-        utils.handle_errors("LB fluid activation")
-
-    def _get_params_from_es_core(self):
-        default_params = self.default_params()
-        self._params['agrid'] = self.agrid
-        self._params["tau"] = self.tau
-        self._params['dens'] = self.density
-        self._params["kT"] = self.kT
-        if self._params['kT'] > 0.0:
-            self._params['seed'] = self.seed
-        self._params['visc'] = self.viscosity
-        if not self._params["bulk_visc"] == default_params["bulk_visc"]:
-            self._params['bulk_visc'] = self.bulk_viscosity
-        self._params['ext_force_density'] = self.ext_force_density
-        if 'gamma_odd' in self._params:
-            self._params['gamma_odd'] = lb_lbfluid_get_gamma_odd()
-        if 'gamma_even' in self._params:
-            self._params['gamma_even'] = lb_lbfluid_get_gamma_even()
-
-        return self._params
-
-    def set_interpolation_order(self, interpolation_order):
-        """ Set the order for the fluid interpolation scheme.
-
-        Parameters
-        ----------
-        interpolation_order : :obj:`str`, {"linear", "quadratic"}
-            ``"linear"`` for trilinear interpolation, ``"quadratic"`` for
-            quadratic interpolation. For the CPU implementation of LB, only
-            ``"linear"`` is available.
-
-        """
-        if interpolation_order == "linear":
-            lb_lbinterpolation_set_interpolation_order(linear)
-        elif interpolation_order == "quadratic":
-            lb_lbinterpolation_set_interpolation_order(quadratic)
-        else:
-            raise ValueError("Invalid parameter")
-
-    def get_interpolated_velocity(self, pos):
-        """Get LB fluid velocity at specified position.
-
-        Parameters
-        ----------
-        pos : (3,) array_like of :obj:`float`
-            The position at which velocity is requested.
-
-        Returns
-        -------
-        v : (3,) array_like :obj:`float`
-            The LB fluid velocity at ``pos``.
-
-        """
-        return python_lbnode_get_interpolated_velocity(
-            utils.make_Vector3d(pos))
-
-    def write_vtk_velocity(self, path, bb1=None, bb2=None):
-        """Write the LB fluid velocity to a VTK file.
-        If both ``bb1`` and ``bb2`` are specified, return a subset of the grid.
-
-        Parameters
-        ----------
-        path : :obj:`str`
-            Path to the output ASCII file.
-        bb1 : (3,) array_like of :obj:`int`, optional
-            Node indices of the lower corner of the bounding box.
-        bb2 : (3,) array_like of :obj:`int`, optional
-            Node indices of the upper corner of the bounding box.
-
-        """
-        cdef vector[int] bb1_vec
-        cdef vector[int] bb2_vec
-        if bb1 is None and bb2 is None:
-            lb_lbfluid_print_vtk_velocity(utils.to_char_pointer(path))
-        elif bb1 is None or bb2 is None:
-            raise ValueError(
-                "Invalid parameter: must provide either both bb1 and bb2, or none of them")
-        else:
-            utils.check_type_or_throw_except(
-                bb1, 3, int, "bb1 has to be an integer list of length 3")
-            utils.check_type_or_throw_except(
-                bb2, 3, int, "bb2 has to be an integer list of length 3")
-            bb1_vec = bb1
-            bb2_vec = bb2
-            lb_lbfluid_print_vtk_velocity(
-                utils.to_char_pointer(path), bb1_vec, bb2_vec)
-
-    def write_vtk_boundary(self, path):
-        """Write the LB boundaries to a VTK file.
-
-        Parameters
-        ----------
-        path : :obj:`str`
-            Path to the output ASCII file.
-
-        """
-        lb_lbfluid_print_vtk_boundary(utils.to_char_pointer(path))
-
-    def write_velocity(self, path):
-        """Write the LB fluid velocity to a data file that can be loaded by
-        numpy, with format "x y z vx vy vz".
-
-        Parameters
-        ----------
-        path : :obj:`str`
-            Path to the output data file.
-
-        """
-        lb_lbfluid_print_velocity(utils.to_char_pointer(path))
-
-    def write_boundary(self, path):
-        """Write the LB boundaries to a data file that can be loaded by numpy,
-        with format "x y z u".
-
-        Parameters
-        ----------
-        path : :obj:`str`
-            Path to the output data file.
-
-        """
-        lb_lbfluid_print_boundary(utils.to_char_pointer(path))
-
-    def save_checkpoint(self, path, binary):
-        '''
-        Write LB node populations to a file.
-        :class:`~espressomd.lbboundaries.LBBoundaries`
-        information is not written to the file.
-        '''
-        tmp_path = path + ".__tmp__"
-        lb_lbfluid_save_checkpoint(utils.to_char_pointer(tmp_path), binary)
-        os.rename(tmp_path, path)
-
-    def load_checkpoint(self, path, binary):
-        '''
-        Load LB node populations from a file.
-        :class:`~espressomd.lbboundaries.LBBoundaries`
-        information is not available in the file. The boundary
-        information of the grid will be set to zero,
-        even if :class:`~espressomd.lbboundaries.LBBoundaries`
-        contains :class:`~espressomd.lbboundaries.LBBoundary`
-        objects (they are ignored).
-        '''
-        lb_lbfluid_load_checkpoint(utils.to_char_pointer(path), binary)
-
-    def _activate_method(self):
-        raise Exception(
-            "Subclasses of HydrodynamicInteraction have to implement _activate_method.")
-
-    def _deactivate_method(self):
-        lb_lbfluid_set_lattice_switch(NONE)
-
-    property shape:
-        def __get__(self):
-            cdef Vector3i shape = lb_lbfluid_get_shape()
-            return (shape[0], shape[1], shape[2])
-
-    property kT:
-        def __get__(self):
-            return lb_lbfluid_get_kT()
-
-        def __set__(self, kT):
-            cdef double _kT = kT
-            lb_lbfluid_set_kT(_kT)
-
-    property seed:
-        def __get__(self):
-            return lb_lbfluid_get_rng_state()
-
-        def __set__(self, seed):
-            cdef stdint.uint64_t _seed = seed
-            lb_lbfluid_set_rng_state(seed)
-
-    property pressure_tensor:
-        def __get__(self):
-            tensor = python_lbfluid_get_pressure_tensor(self.agrid, self.tau)
-            return utils.array_locked(tensor)
-
-        def __set__(self, value):
-            raise NotImplementedError
-
-    property ext_force_density:
-        def __get__(self):
-            self._assert_agrid_tau_set()
-            return python_lbfluid_get_ext_force_density(self.agrid, self.tau)
-
-        def __set__(self, ext_force_density):
-            self._assert_agrid_tau_set()
-            python_lbfluid_set_ext_force_density(
-                utils.make_Vector3d(ext_force_density), self.agrid, self.tau)
-
-    property density:
-        def __get__(self):
-            self._assert_agrid_tau_set()
-            return python_lbfluid_get_density(self.agrid)
-
-        def __set__(self, density):
-            self._assert_agrid_tau_set()
-            python_lbfluid_set_density(density, self.agrid)
-
-    property viscosity:
-        def __get__(self):
-            self._assert_agrid_tau_set()
-            return python_lbfluid_get_viscosity(self.agrid, self.tau)
-
-        def __set__(self, viscosity):
-            self._assert_agrid_tau_set()
-            python_lbfluid_set_viscosity(viscosity, self.agrid, self.tau)
-
-    property bulk_viscosity:
-        def __get__(self):
-            self._assert_agrid_tau_set()
-            return python_lbfluid_get_bulk_viscosity(self.agrid, self.tau)
-
-        def __set__(self, viscosity):
-            self._assert_agrid_tau_set()
-            python_lbfluid_set_bulk_viscosity(viscosity, self.agrid, self.tau)
-
-    property tau:
-        def __get__(self):
-            return lb_lbfluid_get_tau()
-
-        def __set__(self, tau):
-            lb_lbfluid_set_tau(tau)
-            if get_time_step() > 0.0:
-                check_tau_time_step_consistency(tau, get_time_step())
-
-    property agrid:
-        def __get__(self):
-            return lb_lbfluid_get_agrid()
-
-        def __set__(self, agrid):
-            lb_lbfluid_set_agrid(agrid)
-
-    def nodes(self):
-        """Provides a generator for iterating over all lb nodes"""
-
-        shape = self.shape
-        for i, j, k in itertools.product(
-                range(shape[0]), range(shape[1]), range(shape[2])):
-            yield self[i, j, k]
-
-
-cdef class LBFluid(HydrodynamicInteraction):
-    """
-    Initialize the lattice-Boltzmann method for hydrodynamic flow using the CPU.
-    See :class:`HydrodynamicInteraction` for the list of parameters.
-
-    """
-
-    def _set_lattice_switch(self):
-        lb_lbfluid_set_lattice_switch(CPU)
-
-    def _activate_method(self):
-        self.validate_params()
-        self._set_lattice_switch()
-        self._set_params_in_es_core()
-
-IF CUDA:
-    cdef class LBFluidGPU(HydrodynamicInteraction):
-        """
-        Initialize the lattice-Boltzmann method for hydrodynamic flow using the GPU.
-        See :class:`HydrodynamicInteraction` for the list of parameters.
-
-        """
-
-        def _set_lattice_switch(self):
-            lb_lbfluid_set_lattice_switch(GPU)
-
-        def _activate_method(self):
-            self.validate_params()
-            self._set_lattice_switch()
-            self._set_params_in_es_core()
-
-        @cython.boundscheck(False)
-        @cython.wraparound(False)
-        def get_interpolated_fluid_velocity_at_positions(self, np.ndarray[double, ndim=2, mode="c"] positions not None, three_point=False):
-            """Calculate the fluid velocity at given positions.
-
-            Parameters
-            ----------
-            positions : (N,3) numpy-array of type :obj:`float`
-                The 3-dimensional positions.
-
-            Returns
-            -------
-            velocities : (N,3) numpy-array of type :obj:`float`
-                The 3-dimensional LB fluid velocities.
-
-            Raises
-            ------
-            AssertionError
-                If shape of ``positions`` not (N,3).
-
-            """
-            assert positions.shape[1] == 3, \
-                "The input array must have shape (N,3)"
-            cdef int length
-            length = positions.shape[0]
-            velocities = np.empty_like(positions)
-            if three_point:
-                quadratic_velocity_interpolation(< double * >np.PyArray_GETPTR2(positions, 0, 0), < double * >np.PyArray_GETPTR2(velocities, 0, 0), length)
-            else:
-                linear_velocity_interpolation(< double * >np.PyArray_GETPTR2(positions, 0, 0), < double * >np.PyArray_GETPTR2(velocities, 0, 0), length)
-            return velocities * lb_lbfluid_get_lattice_speed()
-
-ELSE:
-    cdef class LBFluidGPU(HydrodynamicInteraction):
-        def __init__(self, *args, **kwargs):
-            raise Exception("LBFluidGPU not compiled in.")
-
-
-cdef class LBFluidRoutines:
-
-    def __init__(self, key):
-        utils.check_type_or_throw_except(
-            key, 3, int, "The index of an lb fluid node consists of three integers.")
-        self.node = utils.make_Vector3i(key)
-        if not lb_lbnode_is_index_valid(self.node):
-            raise ValueError("LB node index out of bounds")
-
-    property index:
-        def __get__(self):
-            return (self.node[0], self.node[1], self.node[2])
-
-    property velocity:
-        def __get__(self):
-            return python_lbnode_get_velocity(self.node)
-
-        def __set__(self, value):
-            utils.check_type_or_throw_except(
-                value, 3, float, "velocity has to be 3 floats")
-            python_lbnode_set_velocity(self.node, utils.make_Vector3d(value))
-
-    property density:
-        def __get__(self):
-            return python_lbnode_get_density(self.node)
-
-        def __set__(self, value):
-            python_lbnode_set_density(self.node, value)
-
-    property pressure_tensor:
-        def __get__(self):
-            tensor = python_lbnode_get_pressure_tensor(self.node)
-            return utils.array_locked(tensor)
-
-        def __set__(self, value):
-            raise NotImplementedError
-
-    property pressure_tensor_neq:
-        def __get__(self):
-            tensor = python_lbnode_get_pressure_tensor_neq(self.node)
-            return utils.array_locked(tensor)
-
-        def __set__(self, value):
-            raise NotImplementedError
-
-    property population:
-        def __get__(self):
-            cdef Vector19d double_return
-            double_return = lb_lbnode_get_pop(self.node)
-            return utils.array_locked(np.array([double_return[0],
-                                                double_return[1],
-                                                double_return[2],
-                                                double_return[3],
-                                                double_return[4],
-                                                double_return[5],
-                                                double_return[6],
-                                                double_return[7],
-                                                double_return[8],
-                                                double_return[9],
-                                                double_return[10],
-                                                double_return[11],
-                                                double_return[12],
-                                                double_return[13],
-                                                double_return[14],
-                                                double_return[15],
-                                                double_return[16],
-                                                double_return[17],
-                                                double_return[18]]
-                                               ))
-
-        def __set__(self, population):
-            cdef Vector19d _population
-            for i in range(19):
-                _population[i] = population[i]
-            lb_lbnode_set_pop(self.node, _population)
-
-    property boundary:
-        def __get__(self):
-            return lb_lbnode_get_boundary(self.node)
-
-        def __set__(self, value):
-            raise NotImplementedError
-
-    def __eq__(self, obj1):
-        index_1 = np.array(self.index)
-        index_2 = np.array(obj1.index)
-        return all(index_1 == index_2)
-
-    def __hash__(self):
-        return hash(self.index)
-
-
-class LBSlice:
-
-    def __init__(self, key, shape):
-        self.x_indices, self.y_indices, self.z_indices = self.get_indices(
-            key, shape[0], shape[1], shape[2])
-
-    def get_indices(self, key, shape_x, shape_y, shape_z):
-        x_indices = np.atleast_1d(np.arange(shape_x)[key[0]])
-        y_indices = np.atleast_1d(np.arange(shape_y)[key[1]])
-        z_indices = np.atleast_1d(np.arange(shape_z)[key[2]])
-        return x_indices, y_indices, z_indices
-
-    def get_values(self, x_indices, y_indices, z_indices, prop_name):
-        shape_res = np.shape(
-            getattr(LBFluidRoutines(np.array([0, 0, 0])), prop_name))
-        res = np.zeros(
-            (x_indices.size,
-             y_indices.size,
-             z_indices.size,
-             *shape_res))
-        for i, x in enumerate(x_indices):
-            for j, y in enumerate(y_indices):
-                for k, z in enumerate(z_indices):
-                    res[i, j, k] = getattr(LBFluidRoutines(
-                        np.array([x, y, z])), prop_name)
-        if shape_res == (1,):
-            res = np.squeeze(res, axis=-1)
-        return utils.array_locked(res)
-
-    def set_values(self, x_indices, y_indices, z_indices, prop_name, value):
-        for i, x in enumerate(x_indices):
-            for j, y in enumerate(y_indices):
-                for k, z in enumerate(z_indices):
-                    setattr(LBFluidRoutines(
-                        np.array([x, y, z])), prop_name, value[i, j, k])
-
-    def __iter__(self):
-        indices = [(x, y, z) for (x, y, z) in itertools.product(
-            self.x_indices, self.y_indices, self.z_indices)]
-        return (LBFluidRoutines(np.array(index)) for index in indices)
-
-
-def _add_lb_slice_properties():
-    """
-    Automatically add all of LBFluidRoutines's properties to LBSlice.
-
-    """
-
-    def set_attribute(lb_slice, value, attribute):
-        """
-        Setter function that sets attribute on every member of lb_slice.
-        If values contains only one element, all members are set to it.
-
-        """
-
-        indices = [lb_slice.x_indices, lb_slice.y_indices, lb_slice.z_indices]
-        N = [len(x) for x in indices]
-
-        if N[0] * N[1] * N[2] == 0:
-            raise AttributeError("Cannot set properties of an empty LBSlice")
-
-        value = np.copy(value)
-        attribute_shape = lb_slice.get_values(
-            *np.zeros((3, 1), dtype=int), attribute).shape[3:]
-        target_shape = (*N, *attribute_shape)
-
-        # broadcast if only one element was provided
-        if value.shape == attribute_shape:
-            value = np.ones(target_shape) * value
-
-        if value.shape != target_shape:
-            raise ValueError(
-                f"Input-dimensions of {attribute} array {value.shape} does not match slice dimensions {target_shape}.")
-
-        lb_slice.set_values(*indices, attribute, value)
-
-    def get_attribute(lb_slice, attribute):
-        """
-        Getter function that copies attribute from every member of
-        lb_slice into an array (if possible).
-
-        """
-
-        indices = [lb_slice.x_indices, lb_slice.y_indices, lb_slice.z_indices]
-        N = [len(x) for x in indices]
-
-        if N[0] * N[1] * N[2] == 0:
-            return np.empty(0, dtype=type(None))
-
-        return lb_slice.get_values(*indices, attribute)
-
-    for attribute_name in dir(LBFluidRoutines):
-        if attribute_name in dir(LBSlice) or not isinstance(
-                getattr(LBFluidRoutines, attribute_name), type(LBFluidRoutines.density)):
-            continue
-
-        # synthesize a new property
-        new_property = property(
-            functools.partial(get_attribute, attribute=attribute_name),
-            functools.partial(set_attribute, attribute=attribute_name),
-            doc=getattr(LBFluidRoutines, attribute_name).__doc__ or f'{attribute_name} for a slice')
-        # attach the property to LBSlice
-        setattr(LBSlice, attribute_name, new_property)
-
-
-_add_lb_slice_properties()
diff --git a/src/python/espressomd/lbboundaries.py b/src/python/espressomd/lbboundaries.py
deleted file mode 100644
index 5ddea5dcc0a..00000000000
--- a/src/python/espressomd/lbboundaries.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright (C) 2010-2022 The ESPResSo project
-#
-# This file is part of ESPResSo.
-#
-# ESPResSo is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# ESPResSo is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-from .script_interface import ScriptObjectList, ScriptInterfaceHelper, script_interface_register
-from .code_features import has_features
-
-
-if any(has_features(i) for i in ["LB_BOUNDARIES", "LB_BOUNDARIES_GPU"]):
-    @script_interface_register
-    class LBBoundaries(ScriptObjectList):
-
-        """
-        Creates a set of lattice-Boltzmann boundaries.
-
-        Methods
-        -------
-        size()
-            Get the number of active boundaries.
-        empty()
-            Return ``True`` if there are not active boundaries.
-        clear()
-            Clear the list of boundaries.
-
-        """
-
-        _so_name = "LBBoundaries::LBBoundaries"
-        _so_bind_methods = ("size", "empty", "clear")
-
-        def add(self, *args, **kwargs):
-            """
-            Adds a boundary to the set of boundaries.
-            Either pass a valid boundary as argument,
-            or a valid set of parameters to create a boundary.
-
-            """
-
-            if len(args) == 1:
-                if isinstance(args[0], LBBoundary):
-                    lbboundary = args[0]
-                else:
-                    raise TypeError(
-                        "Either a LBBoundary object or key-value pairs for the parameters of a LBBoundary object need to be passed.")
-            else:
-                lbboundary = LBBoundary(**kwargs)
-            self.call_method("add", object=lbboundary)
-            return lbboundary
-
-        def remove(self, lbboundary):
-            """
-            Removes a boundary from the set.
-
-            Parameters
-            ----------
-            lbboundary : :obj:`LBBoundary`
-                The boundary to be removed from the set.
-
-            """
-
-            self.call_method("remove", object=lbboundary)
-
-    @script_interface_register
-    class LBBoundary(ScriptInterfaceHelper):
-
-        """
-        Creates a LB boundary from a shape.
-
-        The fluid velocity is limited to :math:`v_{\\mathrm{max}} = 0.20`
-        (see *quasi-incompressible limit* in :cite:`kruger17a`,
-        chapter 7, page 272), which corresponds to Mach 0.35.
-
-        The relative error in the fluid density between a compressible fluid
-        and an incompressible fluid at Mach 0.30 is less than 5% (see
-        *constant density assumption* in :cite:`kundu01a` chapter 16, page
-        663). Since the speed of sound is :math:`c_s = 1 / \\sqrt{3}` in LB
-        velocity units in a D3Q19 lattice, the velocity limit at Mach 0.30
-        is :math:`v_{\\mathrm{max}} = 0.30 / \\sqrt{3} \\approx 0.17`.
-        At Mach 0.35 the relative error is around 6% and
-        :math:`v_{\\mathrm{max}} = 0.35 / \\sqrt{3} \\approx 0.20`.
-
-        Parameters
-        ----------
-        shape : :obj:`espressomd.shapes.Shape`
-            The shape from which to build the boundary.
-        velocity : (3,) array_like of :obj:`float`, optional
-            The boundary slip velocity. By default, a velocity of zero is used
-            (no-slip boundary condition).
-
-        """
-
-        _so_name = "LBBoundaries::LBBoundary"
-        _so_bind_methods = ("get_force",)
diff --git a/src/python/espressomd/script_interface.pyx b/src/python/espressomd/script_interface.pyx
index 39a2121c845..5a81316d83b 100644
--- a/src/python/espressomd/script_interface.pyx
+++ b/src/python/espressomd/script_interface.pyx
@@ -207,6 +207,19 @@ cdef class PScriptInterface:
 
         return odict
 
+
+class array_variant(np.ndarray):
+
+    """
+    Returns a numpy.ndarray that will be serialized as a ``std::vector``.
+
+    """
+
+    def __new__(cls, input_array):
+        obj = np.asarray(input_array).view(cls)
+        return obj
+
+
 cdef Variant python_object_to_variant(value) except *:
     """Convert Python objects to C++ Variant objects."""
 
@@ -216,10 +229,17 @@ cdef Variant python_object_to_variant(value) except *:
     cdef unordered_map[int, Variant] map_int2var
     cdef unordered_map[string, Variant] map_str2var
     cdef PObjectRef oref
+    cdef int[::1] view_int
+    cdef int * data_int
+    cdef double[::1] view_double
+    cdef double * data_double
 
     if value is None:
         return Variant()
 
+    if isinstance(value, np.ndarray) and value.ndim == 0:
+        value = value.item()
+
     # The order is important, the object character should
     # be preserved even if the PScriptInterface derived class
     # is iterable.
@@ -245,6 +265,16 @@ cdef Variant python_object_to_variant(value) except *:
                     f" to 'Variant[std::unordered_map<std::string, Variant>]'")
     elif type(value) in (str, np.str_):
         return make_variant[string](utils.to_char_pointer(str(value)))
+    elif isinstance(value, array_variant) and np.issubdtype(value.dtype, np.signedinteger):
+        view_int = np.ascontiguousarray(value, dtype=np.int32)
+        data_int = &view_int[0]
+        vec_int.assign(data_int, data_int + len(view_int))
+        return make_variant[vector[int]](vec_int)
+    elif isinstance(value, array_variant) and np.issubdtype(value.dtype, np.floating):
+        view_double = np.ascontiguousarray(value, dtype=np.float64)
+        data_double = &view_double[0]
+        vec_double.assign(data_double, data_double + len(view_double))
+        return make_variant[vector[double]](vec_double)
     elif hasattr(value, '__iter__'):
         if len(value) == 0:
             return make_variant[vector[Variant]](vec_variant)
@@ -448,8 +478,7 @@ class ScriptInterfaceHelper(PScriptInterface):
 class ScriptObjectList(ScriptInterfaceHelper):
     """
     Base class for container-like classes such as
-    :class:`~espressomd.constraints.Constraints` and
-    :class:`~espressomd.lbboundaries.LBBoundaries`. Derived classes must
+    :class:`~espressomd.constraints.Constraints`. Derived classes must
     implement an ``add()`` method which adds a single item to the container.
 
     The core objects must be managed by a container derived from
diff --git a/src/python/espressomd/shapes.py b/src/python/espressomd/shapes.py
index 241e90a303a..e92dc3fc0df 100644
--- a/src/python/espressomd/shapes.py
+++ b/src/python/espressomd/shapes.py
@@ -21,7 +21,7 @@
 
 
 class Shape:
-    _so_bind_methods = ("calc_distance",)
+    _so_bind_methods = ("calc_distance", "is_inside")
 
 
 @script_interface_register
diff --git a/src/python/espressomd/system.py b/src/python/espressomd/system.py
index 0d39573cc6d..2964f81df03 100644
--- a/src/python/espressomd/system.py
+++ b/src/python/espressomd/system.py
@@ -29,11 +29,10 @@
 from . import collision_detection
 from . import comfixed
 from . import constraints
-from . import ekboundaries
 from . import galilei
 from . import interactions
 from . import integrate
-from . import lbboundaries
+from . import electrokinetics
 from . import lees_edwards
 from . import particle_data
 from . import thermostat
@@ -91,11 +90,11 @@ class System(ScriptInterfaceHelper):
     collision_detection: :class:`espressomd.collision_detection.CollisionDetection`
     comfixed: :class:`espressomd.comfixed.ComFixed`
     constraints: :class:`espressomd.constraints.Constraints`
+    ekcontainer: :class:`espressomd.electrokinetics.EKContainer`
+    ekreactions: :class:`espressomd.electrokinetics.EKReactions`
     cuda_init_handle: :class:`espressomd.cuda_init.CudaInitHandle`
-    ekboundaries: :class:`espressomd.ekboundaries.EKBoundaries`
     galilei: :class:`espressomd.galilei.GalileiTransform`
     integrator: :class:`espressomd.integrate.IntegratorHandle`
-    lbboundaries: :class:`espressomd.lbboundaries.LBBoundaries`
     lees_edwards: :class:`espressomd.lees_edwards.LeesEdwards`
     non_bonded_inter: :class:`espressomd.interactions.NonBondedInteractions`
     part: :class:`espressomd.particle_data.ParticleList`
@@ -210,10 +209,10 @@ def __init__(self, **kwargs):
         self.constraints = constraints.Constraints()
         if has_features("CUDA"):
             self.cuda_init_handle = cuda_init.CudaInitHandle()
+        if has_features("WALBERLA"):
+            self.ekcontainer = electrokinetics.EKContainer()
+            self.ekreactions = electrokinetics.EKReactions()
         self.galilei = galilei.GalileiTransform()
-        if has_features("LB_BOUNDARIES") or has_features("LB_BOUNDARIES_GPU"):
-            self.lbboundaries = lbboundaries.LBBoundaries()
-            self.ekboundaries = ekboundaries.EKBoundaries()
         self.lees_edwards = lees_edwards.LeesEdwards()
         self.non_bonded_inter = interactions.NonBondedInteractions()
         self.part = particle_data.ParticleList()
@@ -243,14 +242,14 @@ def __getstate__(self):
             checkpointable_properties.append("_active_virtual_sites_handle")
         checkpointable_properties += [
             "non_bonded_inter", "bonded_inter", "cell_system", "lees_edwards",
-            "part", "actors", "analysis", "auto_update_accumulators",
-            "comfixed", "constraints", "galilei", "thermostat",
-            "bond_breakage"
+            "part", "analysis", "auto_update_accumulators",
+            "comfixed", "constraints", "galilei", "bond_breakage"
         ]
-        if has_features("LB_BOUNDARIES") or has_features("LB_BOUNDARIES_GPU"):
-            checkpointable_properties.append("lbboundaries")
         if has_features("COLLISION_DETECTION"):
             checkpointable_properties.append("collision_detection")
+        checkpointable_properties += ["actors", "thermostat"]
+        if has_features("WALBERLA"):
+            checkpointable_properties += ["ekcontainer", "ekreactions"]
 
         odict = collections.OrderedDict()
         for property_name in checkpointable_properties:
diff --git a/src/python/espressomd/thermostat.pxd b/src/python/espressomd/thermostat.pxd
index 7ad36753057..4b0394d12a1 100644
--- a/src/python/espressomd/thermostat.pxd
+++ b/src/python/espressomd/thermostat.pxd
@@ -114,3 +114,14 @@ cdef extern from "stokesian_dynamics/sd_interface.hpp":
     IF STOKESIAN_DYNAMICS:
         void set_sd_kT(double kT) except +
         double get_sd_kT()
+
+cdef extern from "grid_based_algorithms/lb_interface.hpp":
+    double lb_lbfluid_get_kT "LB::get_kT"() except +
+
+cdef extern from "grid_based_algorithms/lb_particle_coupling.hpp":
+    void lb_lbcoupling_set_rng_state(stdint.uint64_t) except +
+    stdint.uint64_t lb_lbcoupling_get_rng_state() except +
+    void lb_lbcoupling_set_gamma(double) except +
+    double lb_lbcoupling_get_gamma() except +
+    cbool lb_lbcoupling_is_seed_required() except +
+    void mpi_bcast_lb_particle_coupling()
diff --git a/src/python/espressomd/thermostat.pyx b/src/python/espressomd/thermostat.pyx
index 1b8e63c1d68..f9429703875 100644
--- a/src/python/espressomd/thermostat.pyx
+++ b/src/python/espressomd/thermostat.pyx
@@ -20,12 +20,6 @@ import functools
 include "myconfig.pxi"
 from . cimport utils
 from .lb import HydrodynamicInteraction
-from .lb cimport lb_lbcoupling_set_gamma
-from .lb cimport lb_lbcoupling_get_gamma
-from .lb cimport lb_lbcoupling_set_rng_state
-from .lb cimport lb_lbcoupling_get_rng_state
-from .lb cimport lb_lbcoupling_is_seed_required
-from .lb cimport lb_lbfluid_get_kT
 
 
 def AssertThermostatType(*allowedthermostats):
@@ -234,13 +228,17 @@ cdef class Thermostat:
                 thermo_list.append(sd_dict)
         return thermo_list
 
+    def _set_temperature(self, kT):
+        mpi_set_temperature(kT)
+        utils.handle_errors("Temperature change failed")
+
     def turn_off(self):
         """
         Turns off all the thermostat and sets all the thermostat variables to zero.
 
         """
 
-        mpi_set_temperature(0.)
+        self._set_temperature(0.)
         mpi_set_thermo_virtual(True)
         IF PARTICLE_ANISOTROPY:
             mpi_set_langevin_gamma(utils.make_Vector3d((0., 0., 0.)))
@@ -257,6 +255,7 @@ cdef class Thermostat:
 
         mpi_set_thermo_switch(THERMO_OFF)
         lb_lbcoupling_set_gamma(0.0)
+        mpi_bcast_lb_particle_coupling()
 
     @AssertThermostatType(THERMO_LANGEVIN, THERMO_DPD)
     def set_langevin(self, kT, gamma, gamma_rotation=None,
@@ -348,7 +347,7 @@ cdef class Thermostat:
                 raise ValueError("seed must be a positive integer")
             langevin_set_rng_seed(seed)
 
-        mpi_set_temperature(kT)
+        self._set_temperature(kT)
         IF PARTICLE_ANISOTROPY:
             cdef utils.Vector3d gamma_vec
             if scalar_gamma_def:
@@ -475,7 +474,7 @@ cdef class Thermostat:
                 raise ValueError("seed must be a positive integer")
             brownian_set_rng_seed(seed)
 
-        mpi_set_temperature(kT)
+        self._set_temperature(kT)
         IF PARTICLE_ANISOTROPY:
             cdef utils.Vector3d gamma_vec
             if scalar_gamma_def:
@@ -524,11 +523,11 @@ cdef class Thermostat:
         """
         Sets the LB thermostat.
 
-        This thermostat requires the feature ``LBFluid`` or ``LBFluidGPU``.
+        This thermostat requires the feature ``WALBERLA``.
 
         Parameters
         ----------
-        LB_fluid : :class:`~espressomd.lb.LBFluid` or :class:`~espressomd.lb.LBFluidGPU`
+        LB_fluid : :class:`~espressomd.lb.LBFluidWalberla`
         seed : :obj:`int`
             Seed for the random number generator, required if kT > 0.
             Must be positive.
@@ -553,14 +552,17 @@ cdef class Thermostat:
                 if seed < 0:
                     raise ValueError("seed must be a positive integer")
                 lb_lbcoupling_set_rng_state(seed)
+                mpi_bcast_lb_particle_coupling()
         else:
             lb_lbcoupling_set_rng_state(0)
+            mpi_bcast_lb_particle_coupling()
 
         global thermo_switch
         mpi_set_thermo_switch(thermo_switch | THERMO_LB)
 
         mpi_set_thermo_virtual(act_on_virtual)
         lb_lbcoupling_set_gamma(gamma)
+        mpi_bcast_lb_particle_coupling()
 
     IF NPT:
         @AssertThermostatType(THERMO_NPT_ISO)
@@ -598,7 +600,7 @@ cdef class Thermostat:
                     raise ValueError("seed must be a positive integer")
                 npt_iso_set_rng_seed(seed)
 
-            mpi_set_temperature(kT)
+            self._set_temperature(kT)
             global thermo_switch
             mpi_set_thermo_switch(thermo_switch | THERMO_NPT_ISO)
             mpi_set_nptiso_gammas(gamma0, gammav)
@@ -635,7 +637,7 @@ cdef class Thermostat:
                     raise ValueError("seed must be a positive integer")
                 dpd_set_rng_seed(seed)
 
-            mpi_set_temperature(kT)
+            self._set_temperature(kT)
             global thermo_switch
             mpi_set_thermo_switch(thermo_switch | THERMO_DPD)
 
diff --git a/src/python/espressomd/visualization.py b/src/python/espressomd/visualization.py
index f06e30601c0..1379f9fe81a 100644
--- a/src/python/espressomd/visualization.py
+++ b/src/python/espressomd/visualization.py
@@ -193,8 +193,10 @@ class openGLLive():
         Rescale LB node velocity arrow length.
     LB_vel_radius_scale : :obj:`float`, optional
         Rescale LB node velocity arrow radii.
-    LB_arrow_color : (3,) array_like of :obj:`float`, optional
-        RGB of the LB velocity arrows.
+    LB_arrow_color_fluid : (3,) array_like of :obj:`float`, optional
+        RGB of the LB velocity arrows inside the fluid.
+    LB_arrow_color_boundary : (3,) array_like of :obj:`float`, optional
+        RGB of the LB velocity arrows inside boundaries.
     LB_arrow_material : :obj:`str`, optional
         Material of LB arrows.
     quality_constraints : :obj:`int`, optional
@@ -338,7 +340,8 @@ def __init__(self, system, **kwargs):
             'LB_plane_ngrid': 5,
             'LB_vel_scale': 1.0,
             'LB_vel_radius_scale': 0.005,
-            'LB_arrow_color': [1.0, 1.0, 1.0],
+            'LB_arrow_color_fluid': [1.0, 1.0, 1.0],
+            'LB_arrow_color_boundary': [1.0, 0.25, 0.25],
             'LB_arrow_material': 'transparent1',
             'LB_arrow_quality': 16,
 
@@ -374,11 +377,6 @@ def __init__(self, system, **kwargs):
         if not espressomd.has_features('ROTATION'):
             self.specs['director_arrows'] = False
 
-        if not espressomd.has_features('LB_BOUNDARIES') and \
-                not espressomd.has_features('LB_BOUNDARIES_GPU'):
-            self.specs['LB_draw_boundaries'] = False
-            self.specs['LB_draw_node_boundaries'] = False
-
         # ESPResSo-related inits that are known only when running the
         # integration loop are called once in the update loop
         # (constraints, node boxes, cell boxes, charge range, bonds)
@@ -763,7 +761,8 @@ def _update_lb_velocity_plane_cpu(self):
                               xj * 1.0 / ng * self.lb_plane_b2) % self.system.box_l)
                 i, j, k = (int(ppp / agrid) for ppp in pp)
                 lb_vel = np.copy(self.lb[i, j, k].velocity)
-                self.lb_plane_vel.append([pp, lb_vel])
+                lb_boundary = self.lb[i, j, k].is_boundary
+                self.lb_plane_vel.append([pp, lb_vel, lb_boundary])
 
     def _update_lb_velocity_plane_gpu(self):
         ng = self.specs['LB_plane_ngrid']
@@ -777,8 +776,9 @@ def _update_lb_velocity_plane_gpu(self):
         lb_vels = self.lb.get_interpolated_fluid_velocity_at_positions(
             np.array(col_pos))
         self.lb_plane_vel = []
+        lb_boundary = False  # TODO WALBERLA
         for p, v in zip(col_pos, lb_vels):
-            self.lb_plane_vel.append([p, v])
+            self.lb_plane_vel.append([p, v, lb_boundary])
 
     def _update_cells(self):
         self.cell_box_origins = []
@@ -870,21 +870,6 @@ def shape_arguments(shape, part_type):
                     except KeyError:
                         self.shapes.append(Shape(*arguments))
 
-        if self.specs['LB_draw_boundaries']:
-            ni = 0
-            for constraint in self.system.lbboundaries:
-                if isinstance(constraint, espressomd.lbboundaries.LBBoundary):
-                    part_type = ni
-                    ni += 1
-                    shape = constraint.get_parameter('shape')
-                    for sub_shape in unpack_shapes(shape):
-                        arguments = shape_arguments(sub_shape, part_type)
-                        try:
-                            self.shapes.append(
-                                shape_mapping[sub_shape.name()](*arguments))
-                        except KeyError:
-                            self.shapes.append(Shape(*arguments))
-
     def _update_bonds(self, particle_data):
         """Update bond data used for drawing bonds.
         Do not call directly but use
@@ -947,6 +932,8 @@ def _draw_system(self):
             self._draw_cells()
         if self.specs['LB_draw_nodes'] or self.specs['LB_draw_node_boundaries']:
             self._draw_lb_grid()
+        if self.specs['LB_draw_boundaries']:
+            self._draw_lb_boundaries()
 
     def _draw_system_box(self):
         draw_box([0, 0, 0], self.system.box_l, self.inverse_bg_color,
@@ -973,14 +960,29 @@ def _draw_lb_grid(self):
                 for k in range(int(dims[2])):
                     n = np.array([i, j, k]) * cell_size
                     if self.specs['LB_draw_node_boundaries'] \
-                            and self.lb[i, j, k].boundary:
+                            and self.lb[i, j, k].is_boundary:
                         draw_box(n, cell_size, self.lb_box_color_boundary,
                                  self.materials['transparent2'], 5.0)
                     if self.specs['LB_draw_nodes'] \
-                            and not self.lb[i, j, k].boundary:
+                            and not self.lb[i, j, k].is_boundary:
                         draw_box(n, cell_size, self.lb_box_color,
                                  self.materials['transparent2'], 1.5)
 
+    def _draw_lb_boundaries(self):
+        a = self.lb_params['agrid']
+        dims = np.rint(np.array(self.system.box_l) / a)
+
+        set_solid_material(self.inverse_bg_color)
+        OpenGL.GL.glPointSize(self.specs['rasterize_pointsize'])
+        OpenGL.GL.glBegin(OpenGL.GL.GL_POINTS)
+        for i in range(int(dims[0])):
+            for j in range(int(dims[1])):
+                for k in range(int(dims[2])):
+                    if self.lb[i, j, k].is_boundary:
+                        OpenGL.GL.glVertex3f(
+                            i * a + 0.5, j * a + 0.5, k * a + 0.5)
+        OpenGL.GL.glEnd()
+
     def _draw_constraints(self):
 
         # clip borders of simulation box
@@ -1242,12 +1244,12 @@ def _cut_bond(self, x_a, dx):
     # arrows in a plane for LB velocities
     def _draw_lb_vel(self):
 
-        for lb_pos, lb_vel in self.lb_plane_vel:
+        for lb_pos, lb_vel, lb_boundary in self.lb_plane_vel:
             draw_arrow(
                 lb_pos,
                 lb_vel * self.specs['LB_vel_scale'],
                 self.lb_arrow_radius,
-                self.specs['LB_arrow_color'],
+                self.specs['LB_arrow_color_boundary'] if lb_boundary else self.specs['LB_arrow_color_fluid'],
                 self.materials[self.specs['LB_arrow_material']],
                 self.specs['LB_arrow_quality'])
 
@@ -1592,14 +1594,12 @@ def _init_espresso_visualization(self):
         self.depth = 0
 
         # LOOK FOR LB ACTOR
-        lb_types = [espressomd.lb.LBFluid]
-        if espressomd.has_features('CUDA'):
-            lb_types.append(espressomd.lb.LBFluidGPU)
+        lb_types = [espressomd.lb.LBFluidWalberla]
         for actor in self.system.actors:
             if isinstance(actor, tuple(lb_types)):
                 self.lb_params = actor.get_params()
                 self.lb = actor
-                self.lb_is_cpu = isinstance(actor, espressomd.lb.LBFluid)
+                self.lb_is_cpu = True
                 break
 
         if self.specs['LB_draw_velocity_plane']:
diff --git a/src/script_interface/CMakeLists.txt b/src/script_interface/CMakeLists.txt
index 13736ca4090..430cd7a8662 100644
--- a/src/script_interface/CMakeLists.txt
+++ b/src/script_interface/CMakeLists.txt
@@ -38,7 +38,6 @@ add_subdirectory(galilei)
 add_subdirectory(h5md)
 add_subdirectory(integrators)
 add_subdirectory(interactions)
-add_subdirectory(lbboundaries)
 add_subdirectory(lees_edwards)
 add_subdirectory(magnetostatics)
 add_subdirectory(math)
@@ -51,6 +50,7 @@ add_subdirectory(scafacos)
 add_subdirectory(shapes)
 add_subdirectory(system)
 add_subdirectory(virtual_sites)
+add_subdirectory(walberla)
 
 install(TARGETS espresso_script_interface
         LIBRARY DESTINATION ${ESPRESSO_INSTALL_PYTHON}/espressomd)
diff --git a/src/script_interface/initialize.cpp b/src/script_interface/initialize.cpp
index b9625598722..29b7d11d164 100644
--- a/src/script_interface/initialize.cpp
+++ b/src/script_interface/initialize.cpp
@@ -34,7 +34,6 @@
 #include "h5md/initialize.hpp"
 #include "integrators/initialize.hpp"
 #include "interactions/initialize.hpp"
-#include "lbboundaries/initialize.hpp"
 #include "lees_edwards/initialize.hpp"
 #include "magnetostatics/initialize.hpp"
 #include "math/initialize.hpp"
@@ -46,6 +45,7 @@
 #include "shapes/initialize.hpp"
 #include "system/initialize.hpp"
 #include "virtual_sites/initialize.hpp"
+#include "walberla/initialize.hpp"
 
 namespace ScriptInterface {
 void initialize(Utils::Factory<ObjectHandle> *f) {
@@ -62,7 +62,6 @@ void initialize(Utils::Factory<ObjectHandle> *f) {
   Galilei::initialize(f);
   Integrators::initialize(f);
   Interactions::initialize(f);
-  LBBoundaries::initialize(f);
   LeesEdwards::initialize(f);
   Math::initialize(f);
   MPIIO::initialize(f);
@@ -76,6 +75,9 @@ void initialize(Utils::Factory<ObjectHandle> *f) {
 #ifdef H5MD
   Writer::initialize(f);
 #endif
+#ifdef WALBERLA
+  walberla::initialize(f);
+#endif
 }
 
 } // namespace ScriptInterface
diff --git a/src/script_interface/lbboundaries/LBBoundaries.hpp b/src/script_interface/lbboundaries/LBBoundaries.hpp
deleted file mode 100644
index c08d4df304c..00000000000
--- a/src/script_interface/lbboundaries/LBBoundaries.hpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef SCRIPT_INTERFACE_LBBOUNDARIES_LBBOUNDARIES_HPP
-#define SCRIPT_INTERFACE_LBBOUNDARIES_LBBOUNDARIES_HPP
-
-#include "config/config.hpp"
-
-#include "LBBoundary.hpp"
-
-#include "core/grid_based_algorithms/lb_boundaries.hpp"
-#include "script_interface/ObjectList.hpp"
-#include "script_interface/ScriptInterface.hpp"
-
-#include <memory>
-
-namespace ScriptInterface {
-namespace LBBoundaries {
-class LBBoundaries : public ObjectList<LBBoundary> {
-  void add_in_core(std::shared_ptr<LBBoundary> const &obj_ptr) override {
-#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU)
-    ::LBBoundaries::add(obj_ptr->lbboundary());
-#endif
-  }
-
-  void remove_in_core(std::shared_ptr<LBBoundary> const &obj_ptr) override {
-#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU)
-    ::LBBoundaries::remove(obj_ptr->lbboundary());
-#endif
-  }
-
-private:
-  // disable serialization: pickling done by the python interface
-  std::string get_internal_state() const override { return {}; }
-  void set_internal_state(std::string const &state) override {}
-};
-} /* namespace LBBoundaries */
-} /* namespace ScriptInterface */
-#endif
diff --git a/src/script_interface/lbboundaries/LBBoundary.hpp b/src/script_interface/lbboundaries/LBBoundary.hpp
deleted file mode 100644
index 7dd3e090df8..00000000000
--- a/src/script_interface/lbboundaries/LBBoundary.hpp
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (C) 2010-2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef SCRIPT_INTERFACE_LBBOUNDARIES_LBBOUNDARY_HPP
-#define SCRIPT_INTERFACE_LBBOUNDARIES_LBBOUNDARY_HPP
-
-#include "config/config.hpp"
-
-#include "script_interface/ScriptInterface.hpp"
-#include "script_interface/auto_parameters/AutoParameters.hpp"
-#include "script_interface/shapes/Shape.hpp"
-
-#include "core/grid_based_algorithms/lb_interface.hpp"
-#include "core/grid_based_algorithms/lbboundaries/LBBoundary.hpp"
-
-#include <memory>
-#include <string>
-
-namespace ScriptInterface {
-namespace LBBoundaries {
-class LBBoundary : public AutoParameters<LBBoundary> {
-public:
-  LBBoundary() : m_lbboundary(std::make_shared<::LBBoundaries::LBBoundary>()) {
-    add_parameters(
-        {{"velocity",
-          [this](Variant const &value) {
-            m_lbboundary->set_velocity(get_value<Utils::Vector3d>(value));
-          },
-          [this]() { return m_lbboundary->velocity(); }},
-         {"shape",
-          [this](Variant const &value) {
-            m_shape = get_value<std::shared_ptr<Shapes::Shape>>(value);
-
-            if (m_shape) {
-              m_lbboundary->set_shape(m_shape->shape());
-            }
-          },
-          [this]() { return m_shape; }}});
-#ifdef EK_BOUNDARIES
-    add_parameters({{"charge_density",
-                     [this](Variant const &value) {
-                       m_lbboundary->set_charge_density(
-                           get_value<double>(value));
-                     },
-                     [this]() { return m_lbboundary->charge_density(); }},
-                    {"net_charge",
-                     [this](Variant const &value) {
-                       m_lbboundary->set_net_charge(get_value<double>(value));
-                     },
-                     [this]() { return m_lbboundary->net_charge(); }}});
-#endif
-  }
-
-  Variant do_call_method(const std::string &name, const VariantMap &) override {
-    if (name == "get_force") {
-      // The get force method uses mpi callbacks on lb cpu
-      if (context()->is_head_node()) {
-        const auto agrid = lb_lbfluid_get_agrid();
-        const auto tau = lb_lbfluid_get_tau();
-        const double unit_conversion = agrid / tau / tau;
-        return m_lbboundary->get_force() * unit_conversion;
-      }
-      return none;
-    }
-    return none;
-  }
-
-  std::shared_ptr<::LBBoundaries::LBBoundary> lbboundary() {
-    return m_lbboundary;
-  }
-
-private:
-  /* The actual constraint */
-  std::shared_ptr<::LBBoundaries::LBBoundary> m_lbboundary;
-
-  /* Keep a reference to the shape */
-  std::shared_ptr<Shapes::Shape> m_shape;
-};
-
-} // namespace LBBoundaries
-} /* namespace ScriptInterface */
-#endif
diff --git a/src/script_interface/lees_edwards/LeesEdwards.hpp b/src/script_interface/lees_edwards/LeesEdwards.hpp
index 01bf9b3f96f..029931309b5 100644
--- a/src/script_interface/lees_edwards/LeesEdwards.hpp
+++ b/src/script_interface/lees_edwards/LeesEdwards.hpp
@@ -22,6 +22,7 @@
 #include "Protocol.hpp"
 
 #include "core/grid.hpp"
+#include "core/grid_based_algorithms/lb_interface.hpp"
 #include "core/lees_edwards/LeesEdwardsBC.hpp"
 #include "core/lees_edwards/lees_edwards.hpp"
 
@@ -44,6 +45,10 @@ class LeesEdwards : public AutoParameters<LeesEdwards> {
         {{"protocol",
           [this](Variant const &value) {
             if (is_none(value)) {
+              context()->parallel_try_catch([]() {
+                auto constexpr invalid_dir = LeesEdwardsBC::invalid_dir;
+                LB::lebc_sanity_checks(invalid_dir, invalid_dir);
+              });
               m_protocol = nullptr;
               ::box_geo.set_lees_edwards_bc(LeesEdwardsBC{});
               ::LeesEdwards::unset_protocol();
@@ -95,6 +100,7 @@ class LeesEdwards : public AutoParameters<LeesEdwards> {
           throw std::invalid_argument("Parameters 'shear_direction' and "
                                       "'shear_plane_normal' must differ");
         }
+        LB::lebc_sanity_checks(shear_direction, shear_plane_normal);
         // update box geometry and cell structure
         ::box_geo.set_lees_edwards_bc(
             LeesEdwardsBC{0., 0., shear_direction, shear_plane_normal});
diff --git a/src/script_interface/shapes/Shape.hpp b/src/script_interface/shapes/Shape.hpp
index bf7820e2e00..6174308ac34 100644
--- a/src/script_interface/shapes/Shape.hpp
+++ b/src/script_interface/shapes/Shape.hpp
@@ -51,6 +51,20 @@ class Shape : public AutoParameters<Shape> {
       return std::vector<Variant>{dist, vec};
     }
 
+    if (name == "is_inside") {
+      auto const pos = get_value<Utils::Vector3d>(params.at("position"));
+      auto is_in = shape()->is_inside(pos);
+      return {is_in};
+    }
+
+    if (name == "rasterize") {
+      auto const grid_size = get_value<Utils::Vector3i>(params.at("grid_size"));
+      auto const grid_spacing = get_value<double>(params.at("grid_spacing"));
+      auto const grid_offset = get_value<double>(params.at("grid_offset"));
+      auto raster = shape()->rasterize(grid_size, grid_spacing, grid_offset);
+      return {raster};
+    }
+
     return {};
   }
 };
diff --git a/src/script_interface/walberla/CMakeLists.txt b/src/script_interface/walberla/CMakeLists.txt
new file mode 100644
index 00000000000..eafd59aef4c
--- /dev/null
+++ b/src/script_interface/walberla/CMakeLists.txt
@@ -0,0 +1,31 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+if(ESPRESSO_BUILD_WITH_WALBERLA)
+  target_sources(
+    espresso_script_interface
+    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/initialize.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/LBFluidNode.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/LBFluidSlice.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/LBFluid.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/EKSpecies.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/EKSpeciesNode.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/EKSpeciesSlice.cpp)
+  target_link_libraries(espresso_script_interface PRIVATE espresso::walberla)
+endif(ESPRESSO_BUILD_WITH_WALBERLA)
diff --git a/src/script_interface/walberla/EKContainer.hpp b/src/script_interface/walberla/EKContainer.hpp
new file mode 100644
index 00000000000..fbae513e740
--- /dev/null
+++ b/src/script_interface/walberla/EKContainer.hpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "EKFFT.hpp"
+#include "EKNone.hpp"
+#include "EKSpecies.hpp"
+
+#include "core/grid_based_algorithms/ek_container.hpp"
+
+#include <script_interface/ObjectList.hpp>
+#include <script_interface/ScriptInterface.hpp>
+
+#include <memory>
+#include <string>
+
+namespace ScriptInterface::walberla {
+
+class EKContainer : public ObjectList<EKSpecies> {
+  using Base = ObjectList<EKSpecies>;
+
+  boost::variant<
+#ifdef WALBERLA_FFT
+      std::shared_ptr<EKFFT>,
+#endif
+      std::shared_ptr<EKNone>>
+      m_poisson_solver{std::shared_ptr<EKNone>()};
+
+  void add_in_core(std::shared_ptr<EKSpecies> const &obj_ptr) override {
+    context()->parallel_try_catch(
+        [&obj_ptr]() { EK::ek_container.add(obj_ptr->get_ekinstance()); });
+  }
+  void remove_in_core(std::shared_ptr<EKSpecies> const &obj_ptr) override {
+    EK::ek_container.remove(obj_ptr->get_ekinstance());
+  }
+
+public:
+  EKContainer() : Base::ObjectList() {
+    add_parameters({
+        {"tau",
+         [this](Variant const &v) {
+           if (is_none(v)) {
+             if (get_value<int>(do_call_method("size", {})) == 0) {
+               EK::ek_container.set_tau(0.);
+               return;
+             }
+             context()->parallel_try_catch([]() {
+               throw std::domain_error(
+                   "Parameter 'tau' is required when container isn't empty");
+             });
+           }
+           auto const tau = get_value<double>(v);
+           context()->parallel_try_catch([tau]() {
+             if (tau <= 0.) {
+               throw std::domain_error("Parameter 'tau' must be > 0");
+             }
+           });
+           EK::ek_container.set_tau(get_value<double>(v));
+         },
+         []() {
+           auto const tau = EK::ek_container.get_tau();
+           return (tau == 0.) ? Variant{none} : Variant{tau};
+         }},
+        {"solver", [this](Variant const &v) { set_solver(v); },
+         [this]() { return get_solver(); }},
+    });
+  }
+
+  void do_construct(VariantMap const &params) override {
+    if (params.count("solver")) {
+      set_solver(params.at("solver"));
+    }
+    if (params.count("tau")) {
+      do_set_parameter("tau", params.at("tau"));
+    }
+    // EK species must be added after tau
+    Base::do_construct(params);
+  }
+
+protected:
+  Variant do_call_method(std::string const &method,
+                         VariantMap const &parameters) override {
+    if (method == "is_poisson_solver_set") {
+      return EK::ek_container.is_poisson_solver_set();
+    }
+
+    return Base::do_call_method(method, parameters);
+  }
+
+private:
+  struct GetPoissonSolverVariant : public boost::static_visitor<Variant> {
+    template <typename T>
+    auto operator()(std::shared_ptr<T> const &solver) const {
+      return (solver) ? Variant{solver} : Variant{none};
+    }
+  };
+
+  struct GetPoissonSolverInstance
+      : public boost::static_visitor<
+            std::shared_ptr<::walberla::PoissonSolver>> {
+    template <typename T>
+    auto operator()(std::shared_ptr<T> const &solver) const {
+      return (solver) ? solver->get_instance()
+                      : std::shared_ptr<::walberla::PoissonSolver>();
+    }
+  };
+
+  Variant get_solver() const {
+    auto const visitor = GetPoissonSolverVariant();
+    return boost::apply_visitor(visitor, m_poisson_solver);
+  }
+
+  void set_solver(Variant const &solver_variant) {
+    boost::optional<decltype(m_poisson_solver)> solver;
+    if (is_none(solver_variant)) {
+      solver = std::shared_ptr<EKNone>();
+    } else {
+#ifdef WALBERLA_FFT
+      try {
+        solver = get_value<std::shared_ptr<EKFFT>>(solver_variant);
+      } catch (...) {
+      }
+#endif
+      if (not solver) {
+        solver = get_value<std::shared_ptr<EKNone>>(solver_variant);
+      }
+    }
+    m_poisson_solver = *solver;
+    auto const visitor = GetPoissonSolverInstance();
+    auto const instance = boost::apply_visitor(visitor, m_poisson_solver);
+    context()->parallel_try_catch(
+        [&instance]() { EK::ek_container.set_poisson_solver(instance); });
+  }
+};
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/EKFFT.hpp b/src/script_interface/walberla/EKFFT.hpp
new file mode 100644
index 00000000000..d091b001469
--- /dev/null
+++ b/src/script_interface/walberla/EKFFT.hpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+#ifdef WALBERLA_FFT
+
+#include "EKPoissonSolver.hpp"
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/ek_poisson_fft_init.hpp>
+
+#include <script_interface/ScriptInterface.hpp>
+#include <script_interface/auto_parameters/AutoParameters.hpp>
+
+#include <utils/math/int_pow.hpp>
+
+#include <memory>
+
+namespace ScriptInterface::walberla {
+
+class EKFFT : public EKPoissonSolver {
+  std::shared_ptr<::walberla::PoissonSolver> m_instance;
+  std::shared_ptr<LatticeWalberla> m_lattice;
+  double m_conv_permittivity;
+  bool m_single_precision;
+
+public:
+  void do_construct(VariantMap const &args) override {
+    m_single_precision = get_value_or<bool>(args, "single_precision", false);
+    m_lattice = get_value<std::shared_ptr<LatticeWalberla>>(args, "lattice");
+
+    // unit conversions
+    auto const agrid = get_value<double>(m_lattice->get_parameter("agrid"));
+    m_conv_permittivity = Utils::int_pow<2>(agrid);
+    auto const permittivity =
+        get_value<double>(args, "permittivity") * m_conv_permittivity;
+
+    m_instance = new_ek_poisson_fft(m_lattice->lattice(), permittivity,
+                                    m_single_precision);
+
+    add_parameters({
+        {"permittivity",
+         [this](Variant const &v) {
+           m_instance->set_permittivity(get_value<double>(v) *
+                                        m_conv_permittivity);
+         },
+         [this]() {
+           return m_instance->get_permittivity() / m_conv_permittivity;
+         }},
+        {"single_precision", AutoParameter::read_only,
+         [this]() { return m_single_precision; }},
+        {"lattice", AutoParameter::read_only, [this]() { return m_lattice; }},
+    });
+  }
+
+  [[nodiscard]] std::shared_ptr<::walberla::PoissonSolver>
+  get_instance() const noexcept override {
+    return m_instance;
+  }
+};
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA_FFT
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/EKNone.hpp b/src/script_interface/walberla/EKNone.hpp
new file mode 100644
index 00000000000..5aa1eb4e709
--- /dev/null
+++ b/src/script_interface/walberla/EKNone.hpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "EKPoissonSolver.hpp"
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/ek_poisson_none_init.hpp>
+
+#include <script_interface/ScriptInterface.hpp>
+#include <script_interface/auto_parameters/AutoParameters.hpp>
+
+#include <memory>
+
+namespace ScriptInterface::walberla {
+
+class EKNone : public EKPoissonSolver {
+  std::shared_ptr<::walberla::PoissonSolver> m_instance;
+  std::shared_ptr<LatticeWalberla> m_lattice;
+  bool m_single_precision;
+
+public:
+  void do_construct(VariantMap const &args) override {
+    m_single_precision = get_value_or<bool>(args, "single_precision", false);
+    m_lattice = get_value<std::shared_ptr<LatticeWalberla>>(args, "lattice");
+
+    m_instance = new_ek_poisson_none(m_lattice->lattice(), m_single_precision);
+
+    add_parameters({
+        {"single_precision", AutoParameter::read_only,
+         [this]() { return m_single_precision; }},
+        {"lattice", AutoParameter::read_only, [this]() { return m_lattice; }},
+    });
+  }
+
+  [[nodiscard]] std::shared_ptr<::walberla::PoissonSolver>
+  get_instance() const noexcept override {
+    return m_instance;
+  }
+};
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/EKPoissonSolver.hpp b/src/script_interface/walberla/EKPoissonSolver.hpp
new file mode 100644
index 00000000000..71d93dd2ecd
--- /dev/null
+++ b/src/script_interface/walberla/EKPoissonSolver.hpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include <script_interface/ScriptInterface.hpp>
+#include <script_interface/auto_parameters/AutoParameters.hpp>
+
+#include <walberla_bridge/electrokinetics/PoissonSolver/PoissonSolver.hpp>
+
+#include <memory>
+
+namespace ScriptInterface::walberla {
+
+class EKPoissonSolver : public AutoParameters<EKPoissonSolver> {
+public:
+  virtual std::shared_ptr<::walberla::PoissonSolver>
+  get_instance() const noexcept = 0;
+};
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/EKReactant.hpp b/src/script_interface/walberla/EKReactant.hpp
new file mode 100644
index 00000000000..61b39388a72
--- /dev/null
+++ b/src/script_interface/walberla/EKReactant.hpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "EKSpecies.hpp"
+
+#include <walberla_bridge/electrokinetics/reactions/EKReactant.hpp>
+
+#include <script_interface/ScriptInterface.hpp>
+#include <script_interface/auto_parameters/AutoParameters.hpp>
+
+#include <memory>
+
+namespace ScriptInterface::walberla {
+class EKReactant : public AutoParameters<::walberla::EKReactant> {
+public:
+  void do_construct(VariantMap const &args) override {
+    m_ekreactant = std::make_shared<::walberla::EKReactant>(
+        get_value<std::shared_ptr<EKSpecies>>(args, "ekspecies")
+            ->get_ekinstance(),
+        get_value<double>(args, "stoech_coeff"),
+        get_value<double>(args, "order"));
+
+    add_parameters({{"order", AutoParameter::read_only,
+                     [this]() { return m_ekreactant->get_order(); }},
+                    {"stoech_coeff",
+                     [this](Variant const &v) {
+                       m_ekreactant->set_stoech_coefficient(
+                           get_value<double>(v));
+                     },
+                     [this]() { return m_ekreactant->get_stoech_coeff(); }}});
+  }
+
+  [[nodiscard]] std::shared_ptr<::walberla::EKReactant> get_instance() {
+    return m_ekreactant;
+  }
+
+private:
+  /* The actual instance */
+  std::shared_ptr<::walberla::EKReactant> m_ekreactant;
+};
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/EKReaction.hpp b/src/script_interface/walberla/EKReaction.hpp
new file mode 100644
index 00000000000..72ba11569ba
--- /dev/null
+++ b/src/script_interface/walberla/EKReaction.hpp
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "EKReactant.hpp"
+#include "LatticeIndices.hpp"
+#include "LatticeWalberla.hpp"
+
+#include <walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp>
+#include <walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.hpp>
+#include <walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.hpp>
+
+#include <script_interface/ScriptInterface.hpp>
+#include <script_interface/auto_parameters/AutoParameters.hpp>
+#include <script_interface/communication.hpp>
+
+#include <algorithm>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace ScriptInterface::walberla {
+
+class EKReaction : public AutoParameters<EKReaction, LatticeIndices> {
+public:
+  [[nodiscard]] std::shared_ptr<::walberla::EKReactionBase>
+  get_instance() const {
+    return m_ekreaction;
+  }
+
+  [[nodiscard]] auto get_conversion_coefficient() const noexcept {
+    return m_conv_coefficient;
+  }
+
+protected:
+  auto get_agrid(VariantMap const &args) const {
+    auto lattice = get_value<std::shared_ptr<LatticeWalberla>>(args, "lattice");
+    return get_value<double>(lattice->get_parameter("agrid"));
+  }
+
+  auto calculate_bulk_conversion_factor(VariantMap const &args) const {
+    auto const tau = get_value<double>(args, "tau");
+    auto const agrid = get_agrid(args);
+    auto reactant = get_value<std::vector<Variant>>(args, "reactants");
+
+    auto get_order = [](Variant const &v) {
+      return get_value<double>(
+          get_value<std::shared_ptr<EKReactant>>(v)->get_parameter("order"));
+    };
+    auto const sum_alphas =
+        std::accumulate(reactant.begin(), reactant.end(), 0.,
+                        [get_order](double sum, auto &element) {
+                          return sum + get_order(element);
+                        });
+
+    return tau / std::pow(Utils::int_pow<3>(agrid), sum_alphas - 1.);
+  }
+
+  template <typename T>
+  std::shared_ptr<T> make_instance(VariantMap const &args) const {
+    auto lattice = get_value<std::shared_ptr<LatticeWalberla>>(args, "lattice");
+    auto reactant = get_value<std::vector<Variant>>(args, "reactants");
+    auto output =
+        std::vector<std::shared_ptr<::walberla::EKReactant>>(reactant.size());
+    auto get_instance = [](Variant const &v) {
+      return get_value<std::shared_ptr<EKReactant>>(v)->get_instance();
+    };
+    std::transform(reactant.begin(), reactant.end(), output.begin(),
+                   get_instance);
+
+    auto const coefficient =
+        get_value<double>(args, "coefficient") * get_conversion_coefficient();
+
+    return std::make_shared<T>(lattice->lattice(), output, coefficient);
+  }
+
+  std::shared_ptr<::walberla::EKReactionBase> m_ekreaction;
+  double m_conv_coefficient;
+};
+
+class EKBulkReaction : public EKReaction {
+public:
+  EKBulkReaction() {
+    add_parameters({{"coefficient",
+                     [this](Variant const &v) {
+                       get_instance()->set_coefficient(
+                           get_value<double>(v) * get_conversion_coefficient());
+                     },
+                     [this]() {
+                       return get_instance()->get_coefficient() /
+                              get_conversion_coefficient();
+                     }}});
+  }
+
+  void do_construct(VariantMap const &args) override {
+    m_conv_coefficient = calculate_bulk_conversion_factor(args);
+    m_ekreaction = make_instance<::walberla::EKReactionImplBulk>(args);
+  }
+};
+
+class EKIndexedReaction : public EKReaction {
+public:
+  EKIndexedReaction() {
+    add_parameters(
+        {{"coefficient",
+          [this](Variant const &v) {
+            get_instance()->set_coefficient(get_value<double>(v) *
+                                            get_conversion_coefficient());
+          },
+          [this]() {
+            return get_instance()->get_coefficient() /
+                   get_conversion_coefficient();
+          }},
+         {"shape", AutoParameter::read_only, [this]() {
+            return get_instance()->get_lattice()->get_grid_dimensions();
+          }}});
+  }
+
+  void do_construct(VariantMap const &args) override {
+    auto const agrid = get_agrid(args);
+    m_conv_coefficient = calculate_bulk_conversion_factor(args) / agrid;
+    m_ekreaction = make_instance<::walberla::EKReactionImplIndexed>(args);
+    m_ekreaction_impl =
+        std::dynamic_pointer_cast<::walberla::EKReactionImplIndexed>(
+            get_instance());
+  }
+
+  [[nodiscard]] Variant do_call_method(std::string const &method,
+                                       VariantMap const &parameters) override {
+    if (method == "set_node_is_boundary") {
+      auto const index = get_mapped_index(
+          get_value<Utils::Vector3i>(parameters, "node"),
+          get_instance()->get_lattice()->get_grid_dimensions());
+      m_ekreaction_impl->set_node_is_boundary(
+          index, get_value<bool>(parameters, "is_boundary"));
+      return none;
+    }
+    if (method == "get_node_is_boundary") {
+      auto const index = get_mapped_index(
+          get_value<Utils::Vector3i>(parameters, "node"),
+          get_instance()->get_lattice()->get_grid_dimensions());
+      auto const result = m_ekreaction_impl->get_node_is_boundary(index);
+      return mpi_reduce_optional(context()->get_comm(), result);
+    }
+    return {};
+  }
+
+private:
+  std::shared_ptr<::walberla::EKReactionImplIndexed> m_ekreaction_impl;
+};
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/EKReactions.hpp b/src/script_interface/walberla/EKReactions.hpp
new file mode 100644
index 00000000000..cb15a361109
--- /dev/null
+++ b/src/script_interface/walberla/EKReactions.hpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "EKReaction.hpp"
+
+#include "core/grid_based_algorithms/ek_reactions.hpp"
+
+#include <script_interface/ObjectList.hpp>
+#include <script_interface/ScriptInterface.hpp>
+
+#include <memory>
+
+namespace ScriptInterface::walberla {
+
+class EKReactions : public ObjectList<EKReaction> {
+  void add_in_core(std::shared_ptr<EKReaction> const &obj_ptr) override {
+    EK::ek_reactions.add(obj_ptr->get_instance());
+  }
+  void remove_in_core(std::shared_ptr<EKReaction> const &obj_ptr) override {
+    EK::ek_reactions.remove(obj_ptr->get_instance());
+  }
+};
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/EKSpecies.cpp b/src/script_interface/walberla/EKSpecies.cpp
new file mode 100644
index 00000000000..172f3c3bce2
--- /dev/null
+++ b/src/script_interface/walberla/EKSpecies.cpp
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "EKSpecies.hpp"
+#include "WalberlaCheckpoint.hpp"
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/EKWalberlaNodeState.hpp>
+#include <walberla_bridge/electrokinetics/ek_walberla_init.hpp>
+
+#include <boost/mpi.hpp>
+#include <boost/mpi/collectives/all_reduce.hpp>
+#include <boost/mpi/collectives/broadcast.hpp>
+#include <boost/optional.hpp>
+
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace ScriptInterface::walberla {
+
+std::unordered_map<std::string, int> const EKVTKHandle::obs_map = {
+    {"density", static_cast<int>(EKOutputVTK::density)},
+};
+
+Variant EKSpecies::do_call_method(std::string const &method,
+                                  VariantMap const &parameters) {
+  if (method == "update_flux_boundary_from_shape") {
+    auto values = get_value<std::vector<double>>(parameters, "values");
+    std::transform(values.begin(), values.end(), values.begin(),
+                   [this](double v) { return v * m_conv_flux; });
+
+    m_instance->update_flux_boundary_from_shape(
+        get_value<std::vector<int>>(parameters, "raster"), values);
+    return {};
+  }
+  if (method == "update_density_boundary_from_shape") {
+    auto values = get_value<std::vector<double>>(parameters, "values");
+    std::transform(values.begin(), values.end(), values.begin(),
+                   [this](double v) { return v * m_conv_density; });
+    m_instance->update_density_boundary_from_shape(
+        get_value<std::vector<int>>(parameters, "raster"), values);
+    return {};
+  }
+  if (method == "clear_flux_boundaries") {
+    m_instance->clear_flux_boundaries();
+    return {};
+  }
+  if (method == "clear_density_boundaries") {
+    m_instance->clear_density_boundaries();
+    return {};
+  }
+  if (method == "save_checkpoint") {
+    auto const path = get_value<std::string>(parameters, "path");
+    auto const mode = get_value<int>(parameters, "mode");
+    save_checkpoint(path, mode);
+    return {};
+  }
+  if (method == "load_checkpoint") {
+    auto const path = get_value<std::string>(parameters, "path");
+    auto const mode = get_value<int>(parameters, "mode");
+    load_checkpoint(path, mode);
+    return {};
+  }
+  return Base::do_call_method(method, parameters);
+}
+
+void EKSpecies::do_construct(VariantMap const &args) {
+  m_lattice = get_value<std::shared_ptr<LatticeWalberla>>(args, "lattice");
+  m_vtk_writers =
+      get_value_or<decltype(m_vtk_writers)>(args, "vtk_writers", {});
+  auto const single_precision = get_value<bool>(args, "single_precision");
+  auto const agrid = get_value<double>(m_lattice->get_parameter("agrid"));
+  auto const diffusion = get_value<double>(args, "diffusion");
+  auto const ext_efield = get_value<Utils::Vector3d>(args, "ext_efield");
+  auto const density = get_value<double>(args, "density");
+  auto const kT = get_value<double>(args, "kT");
+  auto const tau = m_tau = get_value<double>(args, "tau");
+  context()->parallel_try_catch([&]() {
+    if (tau <= 0.) {
+      throw std::domain_error("Parameter 'tau' must be > 0");
+    }
+    if (kT < 0.) {
+      throw std::domain_error("Parameter 'kT' must be >= 0");
+    }
+    if (density < 0.) {
+      throw std::domain_error("Parameter 'density' must be >= 0");
+    }
+    m_conv_energy = Utils::int_pow<2>(tau) / Utils::int_pow<2>(agrid);
+    m_conv_diffusion = tau / Utils::int_pow<2>(agrid);
+    m_conv_ext_efield = Utils::int_pow<2>(tau) / agrid;
+    m_conv_density = Utils::int_pow<3>(agrid);
+    m_conv_flux = tau * Utils::int_pow<2>(agrid);
+    auto const ek_diffusion = diffusion * m_conv_diffusion;
+    auto const ek_ext_efield = ext_efield * m_conv_ext_efield;
+    auto const ek_density = m_density = density * m_conv_density;
+    auto const ek_kT = kT * m_conv_energy;
+    m_instance = new_ek_walberla(
+        m_lattice->lattice(), ek_diffusion, ek_kT,
+        get_value<double>(args, "valency"), ek_ext_efield, ek_density,
+        get_value<bool>(args, "advection"),
+        get_value<bool>(args, "friction_coupling"), single_precision);
+    for (auto &vtk : m_vtk_writers) {
+      vtk->attach_to_lattice(m_instance, get_latice_to_md_units_conversion());
+    }
+  });
+}
+
+void EKSpecies::load_checkpoint(std::string const &filename, int mode) {
+  auto &ek_obj = *m_instance;
+
+  auto const read_metadata = [&ek_obj](CheckpointFile &cpfile) {
+    auto const expected_grid_size = ek_obj.get_lattice().get_grid_dimensions();
+    Utils::Vector3i read_grid_size;
+    cpfile.read(read_grid_size);
+    if (read_grid_size != expected_grid_size) {
+      std::stringstream message;
+      message << "grid dimensions mismatch, "
+              << "read [" << read_grid_size << "], "
+              << "expected [" << expected_grid_size << "].";
+      throw std::runtime_error(message.str());
+    }
+  };
+
+  auto const read_data = [&ek_obj](CheckpointFile &cpfile) {
+    auto const grid_size = ek_obj.get_lattice().get_grid_dimensions();
+    auto const i_max = grid_size[0];
+    auto const j_max = grid_size[1];
+    auto const k_max = grid_size[2];
+    EKWalberlaNodeState cpnode;
+    for (int i = 0; i < i_max; i++) {
+      for (int j = 0; j < j_max; j++) {
+        for (int k = 0; k < k_max; k++) {
+          auto const ind = Utils::Vector3i{{i, j, k}};
+          cpfile.read(cpnode.density);
+          cpfile.read(cpnode.is_boundary_density);
+          if (cpnode.is_boundary_density) {
+            cpfile.read(cpnode.density_boundary);
+          }
+          cpfile.read(cpnode.is_boundary_flux);
+          if (cpnode.is_boundary_flux) {
+            cpfile.read(cpnode.flux_boundary);
+          }
+          ek_obj.set_node_density(ind, cpnode.density);
+          if (cpnode.is_boundary_density) {
+            ek_obj.set_node_density_boundary(ind, cpnode.density_boundary);
+          }
+          if (cpnode.is_boundary_flux) {
+            ek_obj.set_node_flux_boundary(ind, cpnode.flux_boundary);
+          }
+        }
+      }
+    }
+  };
+
+  auto const on_success = [&ek_obj]() { ek_obj.ghost_communication(); };
+
+  load_checkpoint_common(*context(), "EK", filename, mode, read_metadata,
+                         read_data, on_success);
+}
+
+void EKSpecies::save_checkpoint(std::string const &filename, int mode) {
+  auto &ek_obj = *m_instance;
+
+  auto const write_metadata = [&ek_obj,
+                               mode](std::shared_ptr<CheckpointFile> cpfile_ptr,
+                                     Context const &context) {
+    auto const grid_size = ek_obj.get_lattice().get_grid_dimensions();
+    if (context.is_head_node()) {
+      cpfile_ptr->write(grid_size);
+      unit_test_handle(mode);
+    }
+  };
+
+  auto const on_failure = [](std::shared_ptr<CheckpointFile> const &,
+                             Context const &context) {
+    if (context.is_head_node()) {
+      auto failure = true;
+      boost::mpi::broadcast(context.get_comm(), failure, 0);
+    }
+  };
+
+  auto const write_data = [&ek_obj,
+                           mode](std::shared_ptr<CheckpointFile> cpfile_ptr,
+                                 Context const &context) {
+    auto const get_node_checkpoint = [&](Utils::Vector3i const &ind)
+        -> boost::optional<EKWalberlaNodeState> {
+      auto const density = ek_obj.get_node_density(ind);
+      auto const is_b_d = ek_obj.get_node_is_density_boundary(ind);
+      auto const dens_b = ek_obj.get_node_density_at_boundary(ind);
+      auto const is_b_f = ek_obj.get_node_is_flux_boundary(ind);
+      auto const flux_b = ek_obj.get_node_flux_at_boundary(ind);
+      if (density and is_b_d and is_b_f and
+          ((*is_b_d) ? dens_b.has_value() : true) and
+          ((*is_b_f) ? flux_b.has_value() : true)) {
+        EKWalberlaNodeState cpnode;
+        cpnode.density = *density;
+        cpnode.is_boundary_density = *is_b_d;
+        if (*is_b_d) {
+          cpnode.density_boundary = *dens_b;
+        }
+        cpnode.is_boundary_flux = *is_b_f;
+        if (*is_b_f) {
+          cpnode.flux_boundary = *flux_b;
+        }
+        return cpnode;
+      }
+      return {boost::none};
+    };
+
+    auto failure = false;
+    auto const &comm = context.get_comm();
+    auto const is_head_node = context.is_head_node();
+    auto const unit_test_mode = (mode != static_cast<int>(CptMode::ascii)) and
+                                (mode != static_cast<int>(CptMode::binary));
+    auto const grid_size = ek_obj.get_lattice().get_grid_dimensions();
+    auto const i_max = grid_size[0];
+    auto const j_max = grid_size[1];
+    auto const k_max = grid_size[2];
+    EKWalberlaNodeState cpnode;
+    for (int i = 0; i < i_max; i++) {
+      for (int j = 0; j < j_max; j++) {
+        for (int k = 0; k < k_max; k++) {
+          auto const ind = Utils::Vector3i{{i, j, k}};
+          auto const result = get_node_checkpoint(ind);
+          if (!unit_test_mode) {
+            assert(1 == boost::mpi::all_reduce(comm, static_cast<int>(!!result),
+                                               std::plus<>()) &&
+                   "Incorrect number of return values");
+          }
+          if (is_head_node) {
+            if (result) {
+              cpnode = *result;
+            } else {
+              comm.recv(boost::mpi::any_source, 42, cpnode);
+            }
+            auto &cpfile = *cpfile_ptr;
+            cpfile.write(cpnode.density);
+            cpfile.write(cpnode.is_boundary_density);
+            if (cpnode.is_boundary_density) {
+              cpfile.write(cpnode.density_boundary);
+            }
+            cpfile.write(cpnode.is_boundary_flux);
+            if (cpnode.is_boundary_flux) {
+              cpfile.write(cpnode.flux_boundary);
+            }
+            boost::mpi::broadcast(comm, failure, 0);
+          } else {
+            if (result) {
+              comm.send(0, 42, *result);
+            }
+            boost::mpi::broadcast(comm, failure, 0);
+            if (failure) {
+              return;
+            }
+          }
+        }
+      }
+    }
+  };
+
+  save_checkpoint_common(*context(), "EK", filename, mode, write_metadata,
+                         write_data, on_failure);
+}
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/EKSpecies.hpp b/src/script_interface/walberla/EKSpecies.hpp
new file mode 100644
index 00000000000..4f304e2f970
--- /dev/null
+++ b/src/script_interface/walberla/EKSpecies.hpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "LatticeModel.hpp"
+#include "LatticeWalberla.hpp"
+#include "VTKHandle.hpp"
+
+#include <walberla_bridge/LatticeModel.hpp>
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/EKinWalberlaBase.hpp>
+
+#include <script_interface/ScriptInterface.hpp>
+
+#include <utils/math/int_pow.hpp>
+
+#include <memory>
+#include <string>
+
+namespace ScriptInterface::walberla {
+
+class EKVTKHandle;
+
+class EKSpecies : public LatticeModel<::EKinWalberlaBase, EKVTKHandle> {
+  using Base = LatticeModel<::EKinWalberlaBase, EKVTKHandle>;
+  double m_conv_diffusion;
+  double m_conv_ext_efield;
+  double m_conv_energy;
+  double m_conv_density;
+  double m_conv_flux;
+  double m_tau;
+  double m_density;
+
+public:
+  EKSpecies() {
+    add_parameters({
+        {"lattice", AutoParameter::read_only, [this]() { return m_lattice; }},
+        {"diffusion",
+         [this](Variant const &v) {
+           m_instance->set_diffusion(get_value<double>(v) * m_conv_diffusion);
+         },
+         [this]() { return m_instance->get_diffusion() / m_conv_diffusion; }},
+        {"kT",
+         [this](Variant const &v) {
+           m_instance->set_kT(get_value<double>(v) * m_conv_energy);
+         },
+         [this]() { return m_instance->get_kT() / m_conv_energy; }},
+        {"valency",
+         [this](Variant const &v) {
+           m_instance->set_valency(get_value<double>(v));
+         },
+         [this]() { return m_instance->get_valency(); }},
+        {"ext_efield",
+         [this](Variant const &v) {
+           m_instance->set_ext_efield(get_value<Utils::Vector3d>(v) *
+                                      m_conv_ext_efield);
+         },
+         [this]() { return m_instance->get_ext_efield() / m_conv_ext_efield; }},
+        {"advection",
+         [this](Variant const &v) {
+           m_instance->set_advection(get_value<bool>(v));
+         },
+         [this]() { return m_instance->get_advection(); }},
+        {"friction_coupling",
+         [this](Variant const &v) {
+           m_instance->set_friction_coupling(get_value<bool>(v));
+         },
+         [this]() { return m_instance->get_friction_coupling(); }},
+        {"single_precision", AutoParameter::read_only,
+         [this]() { return not m_instance->is_double_precision(); }},
+        {"tau", AutoParameter::read_only, [this]() { return m_tau; }},
+        {"density", AutoParameter::read_only,
+         [this]() { return m_density / m_conv_density; }},
+        {"shape", AutoParameter::read_only,
+         [this]() { return m_instance->get_lattice().get_grid_dimensions(); }},
+        {"vtk_writers", AutoParameter::read_only,
+         [this]() { return serialize_vtk_writers(); }},
+    });
+  }
+
+  void do_construct(VariantMap const &args) override;
+
+  [[nodiscard]] auto get_ekinstance() const { return m_instance; }
+  [[nodiscard]] auto get_lattice() const { return m_lattice; }
+
+  Variant do_call_method(std::string const &method,
+                         VariantMap const &parameters) override;
+
+  [[nodiscard]] auto get_conversion_factor_density() const noexcept {
+    return m_conv_density;
+  }
+  [[nodiscard]] auto get_conversion_factor_flux() const noexcept {
+    return m_conv_flux;
+  }
+
+  ::LatticeModel::units_map get_latice_to_md_units_conversion() const override {
+    return {
+        {"density", 1. / m_conv_density},
+        {"flux", 1. / m_conv_flux},
+    };
+  }
+
+private:
+  void load_checkpoint(std::string const &filename, int mode);
+  void save_checkpoint(std::string const &filename, int mode);
+};
+
+class EKVTKHandle : public VTKHandleBase<::EKinWalberlaBase> {
+  static std::unordered_map<std::string, int> const obs_map;
+
+  std::unordered_map<std::string, int> const &get_obs_map() const override {
+    return obs_map;
+  }
+};
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/EKSpeciesNode.cpp b/src/script_interface/walberla/EKSpeciesNode.cpp
new file mode 100644
index 00000000000..594f1a73cd0
--- /dev/null
+++ b/src/script_interface/walberla/EKSpeciesNode.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "EKSpeciesNode.hpp"
+
+#include "LatticeIndices.hpp"
+
+#include <script_interface/communication.hpp>
+
+#include <walberla_bridge/electrokinetics/EKinWalberlaBase.hpp>
+
+#include <utils/Vector.hpp>
+#include <utils/constants.hpp>
+
+#include <boost/mpi/collectives/all_reduce.hpp>
+#include <boost/optional.hpp>
+#include <boost/serialization/vector.hpp>
+
+#include <cassert>
+#include <memory>
+#include <stdexcept>
+#include <string>
+
+namespace ScriptInterface::walberla {
+
+static bool is_boundary_all_reduce(boost::mpi::communicator const &comm,
+                                   boost::optional<bool> const &is_boundary) {
+  return boost::mpi::all_reduce(comm, is_boundary ? *is_boundary : false,
+                                std::logical_or<>());
+}
+
+Variant EKSpeciesNode::do_call_method(std::string const &name,
+                                      VariantMap const &params) {
+  if (name == "override_index") {
+    // this hidden feature is used to iterate an EK slice without
+    // rebuilding a EKSpeciesNode for each node in the slice
+    auto const index = get_value<Utils::Vector3i>(params, "index");
+    if (not is_index_valid(index, m_grid_size)) {
+      return ES_ERROR;
+    }
+    m_index = index;
+    return ES_OK;
+  }
+  if (name == "set_density") {
+    auto const dens = get_value<double>(params, "value");
+    m_ek_species->set_node_density(m_index, dens * m_conv_dens);
+    m_ek_species->ghost_communication();
+    return {};
+  }
+  if (name == "get_density") {
+    auto const result = m_ek_species->get_node_density(m_index);
+    return mpi_reduce_optional(context()->get_comm(), result) / m_conv_dens;
+  }
+  if (name == "get_is_boundary") {
+    auto const result = m_ek_species->get_node_is_boundary(m_index);
+    return mpi_reduce_optional(context()->get_comm(), result);
+  }
+  if (name == "get_node_density_at_boundary") {
+    auto const boundary_opt =
+        m_ek_species->get_node_is_density_boundary(m_index);
+    if (is_boundary_all_reduce(context()->get_comm(), boundary_opt)) {
+      auto const result = m_ek_species->get_node_density_at_boundary(m_index);
+      return mpi_reduce_optional(context()->get_comm(), result) / m_conv_dens;
+    }
+    return Variant{None{}};
+  }
+  if (name == "set_node_density_at_boundary") {
+    if (is_none(params.at("value"))) {
+      m_ek_species->remove_node_from_density_boundary(m_index);
+    } else {
+      auto const dens = get_value<double>(params, "value") * m_conv_dens;
+      m_ek_species->set_node_density_boundary(m_index, dens);
+    }
+    return {};
+  }
+  if (name == "get_node_flux_at_boundary") {
+    auto const boundary_opt = m_ek_species->get_node_is_flux_boundary(m_index);
+    if (is_boundary_all_reduce(context()->get_comm(), boundary_opt)) {
+      auto const result = m_ek_species->get_node_flux_at_boundary(m_index);
+      return mpi_reduce_optional(context()->get_comm(), result) / m_conv_flux;
+    }
+    return Variant{None{}};
+  }
+  if (name == "set_node_flux_at_boundary") {
+    if (is_none(params.at("value"))) {
+      m_ek_species->remove_node_from_flux_boundary(m_index);
+    } else {
+      auto const flux =
+          get_value<Utils::Vector3d>(params, "value") * m_conv_flux;
+      m_ek_species->set_node_flux_boundary(m_index, flux);
+    }
+    return {};
+  }
+
+  return {};
+}
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/EKSpeciesNode.hpp b/src/script_interface/walberla/EKSpeciesNode.hpp
new file mode 100644
index 00000000000..cdcd0aa0d50
--- /dev/null
+++ b/src/script_interface/walberla/EKSpeciesNode.hpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "EKSpecies.hpp"
+#include "LatticeIndices.hpp"
+
+#include <script_interface/ScriptInterface.hpp>
+#include <script_interface/auto_parameters/AutoParameters.hpp>
+
+#include <walberla_bridge/electrokinetics/EKinWalberlaBase.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <cassert>
+#include <memory>
+#include <stdexcept>
+#include <string>
+
+namespace ScriptInterface::walberla {
+
+class EKSpeciesNode : public AutoParameters<EKSpeciesNode, LatticeIndices> {
+  std::shared_ptr<::EKinWalberlaBase> m_ek_species;
+  Utils::Vector3i m_index;
+  Utils::Vector3i m_grid_size;
+  double m_conv_dens;
+  double m_conv_flux;
+
+public:
+  EKSpeciesNode() {
+    add_parameters(
+        {{"_index", AutoParameter::read_only, [this]() { return m_index; }}});
+  }
+
+  void do_construct(VariantMap const &params) override {
+    auto const ek_sip =
+        get_value<std::shared_ptr<EKSpecies>>(params, "parent_sip");
+    m_ek_species = ek_sip->get_ekinstance();
+    assert(m_ek_species);
+    m_conv_dens = ek_sip->get_conversion_factor_density();
+    m_conv_flux = ek_sip->get_conversion_factor_flux();
+    m_grid_size = m_ek_species->get_lattice().get_grid_dimensions();
+    m_index = get_mapped_index(get_value<Utils::Vector3i>(params, "index"),
+                               m_grid_size);
+  }
+
+  Variant do_call_method(std::string const &name,
+                         VariantMap const &params) override;
+};
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/EKSpeciesSlice.cpp b/src/script_interface/walberla/EKSpeciesSlice.cpp
new file mode 100644
index 00000000000..3018fdf57f0
--- /dev/null
+++ b/src/script_interface/walberla/EKSpeciesSlice.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "EKSpeciesSlice.hpp"
+
+#include "LatticeSlice.impl.hpp"
+
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+namespace ScriptInterface::walberla {
+
+Variant EKSpeciesSlice::do_call_method(std::string const &name,
+                                       VariantMap const &params) {
+  if (name == "get_slice_size") {
+    return {m_slice_upper_corner - m_slice_lower_corner};
+  }
+  if (name == "get_slice_ranges") {
+    return {std::vector<Variant>{m_slice_lower_corner, m_slice_upper_corner}};
+  }
+  if (name == "get_ek_sip") {
+    return {m_ek_sip};
+  }
+  if (name == "get_value_shape") {
+    auto const name = get_value<std::string>(params, "name");
+    if (m_shape_val.count(name) == 0) {
+      context()->parallel_try_catch([&]() {
+        throw std::runtime_error("Unknown EK property '" + name + "'");
+      });
+    }
+    return m_shape_val.at(name);
+  }
+
+  // slice getter/setter callback
+  auto const call = [this, params](auto method_ptr,
+                                   std::vector<int> const &data_dims,
+                                   double units = 1.) -> Variant {
+    auto &obj = *m_ek_species;
+    if constexpr (std::is_invocable_v<decltype(method_ptr), LatticeModel *,
+                                      Utils::Vector3i const &,
+                                      Utils::Vector3i const &>) {
+      return gather_3d(params, data_dims, obj, method_ptr, units);
+    } else {
+      scatter_3d(params, data_dims, obj, method_ptr, units);
+      return {};
+    }
+  };
+
+  if (name == "get_density") {
+    return call(&LatticeModel::get_slice_density, {1}, 1. / m_conv_dens);
+  }
+  if (name == "set_density") {
+    return call(&LatticeModel::set_slice_density, {1}, m_conv_dens);
+  }
+  if (name == "get_is_boundary") {
+    return call(&LatticeModel::get_slice_is_boundary, {1});
+  }
+  if (name == "get_flux_at_boundary") {
+    return call(&LatticeModel::get_slice_flux_at_boundary, {1},
+                1. / m_conv_flux);
+  }
+  if (name == "set_flux_at_boundary") {
+    return call(&LatticeModel::set_slice_flux_boundary, {1}, m_conv_flux);
+  }
+  if (name == "get_density_at_boundary") {
+    return call(&LatticeModel::get_slice_density_at_boundary, {1},
+                1. / m_conv_dens);
+  }
+  if (name == "set_density_at_boundary") {
+    return call(&LatticeModel::set_slice_density_boundary, {1}, m_conv_dens);
+  }
+
+  return {};
+}
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/EKSpeciesSlice.hpp b/src/script_interface/walberla/EKSpeciesSlice.hpp
new file mode 100644
index 00000000000..ce67ca31ee4
--- /dev/null
+++ b/src/script_interface/walberla/EKSpeciesSlice.hpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "EKSpecies.hpp"
+
+#include "LatticeSlice.hpp"
+
+#include <script_interface/ScriptInterface.hpp>
+#include <script_interface/auto_parameters/AutoParameters.hpp>
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/EKinWalberlaBase.hpp>
+
+#include <utils/Vector.hpp>
+#include <utils/math/int_pow.hpp>
+
+#include <cassert>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+#include <vector>
+
+namespace ScriptInterface::walberla {
+
+using DensityBoundaryType = boost::optional<double>;
+using FluxBoundaryType = boost::optional<Utils::Vector3d>;
+
+struct EKFieldSerializer {
+
+  template <typename T> static Variant serialize(std::vector<T> const &values) {
+    if constexpr (std::is_same_v<T, FluxBoundaryType> or
+                  std::is_same_v<T, DensityBoundaryType>) {
+      std::vector<Variant> vec;
+      vec.reserve(values.size());
+      for (auto const &opt : values) {
+        if (opt) {
+          vec.emplace_back(Variant{*opt});
+        } else {
+          vec.emplace_back(Variant{None{}});
+        }
+      }
+      return {vec};
+    } else if constexpr (std::is_same_v<T, int> or std::is_same_v<T, double>) {
+      return {values};
+    } else {
+      return make_vector_of_variants(values);
+    }
+  }
+
+  template <typename T>
+  static std::vector<T> deserialize(Variant const &variant) {
+    std::vector<T> values;
+    if constexpr (std::is_same_v<T, FluxBoundaryType>) {
+      auto const vector_variants = get_value<std::vector<Variant>>(variant);
+      for (auto const &value : vector_variants) {
+        if (is_none(value)) {
+          values.emplace_back(boost::none);
+        } else {
+          values.emplace_back(get_value<Utils::Vector3d>(value));
+        }
+      }
+    } else if constexpr (std::is_same_v<T, DensityBoundaryType>) {
+      auto const vector_variants = get_value<std::vector<Variant>>(variant);
+      for (auto const &value : vector_variants) {
+        if (is_none(value)) {
+          values.emplace_back(boost::none);
+        } else {
+          values.emplace_back(get_value<double>(value));
+        }
+      }
+    } else if constexpr (std::is_same_v<T, double>) {
+      if (is_type<std::vector<int>>(variant)) {
+        auto const values_int = get_value<std::vector<int>>(variant);
+        values.reserve(values_int.size());
+        for (auto const val : values_int) {
+          values.emplace_back(static_cast<double>(val));
+        }
+      } else {
+        values = get_value<std::vector<T>>(variant);
+      }
+    } else {
+      values = get_value<std::vector<T>>(variant);
+    }
+    return values;
+  }
+};
+
+class EKSpeciesSlice : public LatticeSlice<EKFieldSerializer> {
+  using LatticeModel = ::EKinWalberlaBase;
+  std::shared_ptr<LatticeModel> m_ek_species;
+  std::shared_ptr<EKSpecies> m_ek_sip;
+  double m_conv_dens;
+  double m_conv_flux;
+  std::unordered_map<std::string, std::vector<int>> m_shape_val;
+
+public:
+  void do_construct(VariantMap const &params) override {
+    m_ek_sip = get_value<std::shared_ptr<EKSpecies>>(params, "parent_sip");
+    m_ek_species = m_ek_sip->get_ekinstance();
+    assert(m_ek_species);
+    m_conv_dens = m_ek_sip->get_conversion_factor_density();
+    m_conv_flux = m_ek_sip->get_conversion_factor_flux();
+    m_shape = get_value<std::vector<int>>(params, "shape");
+    m_slice_lower_corner =
+        get_value<Utils::Vector3i>(params, "slice_lower_corner");
+    m_slice_upper_corner =
+        get_value<Utils::Vector3i>(params, "slice_upper_corner");
+    m_shape_val["density"] = {1};
+    m_shape_val["flux_at_boundary"] = {1};
+    m_shape_val["density_at_boundary"] = {1};
+    m_shape_val["is_boundary"] = {1};
+  }
+
+  Variant do_call_method(std::string const &name,
+                         VariantMap const &params) override;
+
+  ::LatticeWalberla const &get_lattice() const override {
+    return m_ek_species->get_lattice();
+  }
+};
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/LBFluid.cpp b/src/script_interface/walberla/LBFluid.cpp
new file mode 100644
index 00000000000..42e6ecbb4c5
--- /dev/null
+++ b/src/script_interface/walberla/LBFluid.cpp
@@ -0,0 +1,373 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "LBFluid.hpp"
+#include "WalberlaCheckpoint.hpp"
+
+#include "core/BoxGeometry.hpp"
+#include "core/event.hpp"
+#include "core/grid.hpp"
+#include "core/grid_based_algorithms/lb_walberla_instance.hpp"
+#include "core/integrate.hpp"
+#include "core/lees_edwards/lees_edwards.hpp"
+#include "core/lees_edwards/protocols.hpp"
+
+#include <script_interface/communication.hpp>
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaNodeState.hpp>
+#include <walberla_bridge/lattice_boltzmann/LeesEdwardsPack.hpp>
+#include <walberla_bridge/lattice_boltzmann/lb_walberla_init.hpp>
+
+#include <utils/Vector.hpp>
+#include <utils/matrix.hpp>
+
+#include <boost/mpi.hpp>
+#include <boost/mpi/collectives/all_reduce.hpp>
+#include <boost/mpi/collectives/broadcast.hpp>
+#include <boost/optional.hpp>
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <functional>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace ScriptInterface::walberla {
+
+std::unordered_map<std::string, int> const LBVTKHandle::obs_map = {
+    {"density", static_cast<int>(OutputVTK::density)},
+    {"velocity_vector", static_cast<int>(OutputVTK::velocity_vector)},
+    {"pressure_tensor", static_cast<int>(OutputVTK::pressure_tensor)},
+};
+
+Variant LBFluid::do_call_method(std::string const &name,
+                                VariantMap const &params) {
+  if (name == "activate") {
+    context()->parallel_try_catch(
+        [&]() { ::activate_lb_walberla(m_instance, m_lb_params); });
+    m_is_active = true;
+    return {};
+  }
+  if (name == "deactivate") {
+    ::deactivate_lb_walberla();
+    m_is_active = false;
+    return {};
+  }
+  if (name == "add_force_at_pos") {
+    auto const pos = get_value<Utils::Vector3d>(params, "pos");
+    auto const f = get_value<Utils::Vector3d>(params, "force");
+    auto const folded_pos = folded_position(pos, box_geo);
+    m_instance->add_force_at_pos(folded_pos * m_conv_dist, f * m_conv_force);
+    return {};
+  }
+  if (name == "get_interpolated_velocity") {
+    auto const pos = get_value<Utils::Vector3d>(params, "pos");
+    return get_interpolated_velocity(pos);
+  }
+  if (name == "get_pressure_tensor") {
+    return get_average_pressure_tensor();
+  }
+  if (name == "load_checkpoint") {
+    auto const path = get_value<std::string>(params, "path");
+    auto const mode = get_value<int>(params, "mode");
+    load_checkpoint(path, mode);
+    return {};
+  }
+  if (name == "save_checkpoint") {
+    auto const path = get_value<std::string>(params, "path");
+    auto const mode = get_value<int>(params, "mode");
+    save_checkpoint(path, mode);
+    return {};
+  }
+  if (name == "clear_boundaries") {
+    m_instance->clear_boundaries();
+    m_instance->ghost_communication();
+    on_lb_boundary_conditions_change();
+    return {};
+  }
+  if (name == "add_boundary_from_shape") {
+    m_instance->update_boundary_from_shape(
+        get_value<std::vector<int>>(params, "raster"),
+        get_value<std::vector<double>>(params, "values"));
+    return {};
+  }
+  if (name == "get_lattice_speed") {
+    return 1. / m_conv_speed;
+  }
+
+  return Base::do_call_method(name, params);
+}
+
+void LBFluid::do_construct(VariantMap const &params) {
+  m_lattice = get_value<std::shared_ptr<LatticeWalberla>>(params, "lattice");
+  m_vtk_writers =
+      get_value_or<decltype(m_vtk_writers)>(params, "vtk_writers", {});
+  auto const tau = get_value<double>(params, "tau");
+  auto const agrid = get_value<double>(m_lattice->get_parameter("agrid"));
+  auto const visc = get_value<double>(params, "kinematic_viscosity");
+  auto const dens = get_value<double>(params, "density");
+  auto const kT = get_value<double>(params, "kT");
+  auto const ext_f = get_value<Utils::Vector3d>(params, "ext_force_density");
+  auto const single_precision = get_value<bool>(params, "single_precision");
+  m_lb_params = std::make_shared<::LBWalberlaParams>(agrid, tau);
+  m_is_active = false;
+  m_seed = get_value<int>(params, "seed");
+  context()->parallel_try_catch([&]() {
+    if (tau <= 0.) {
+      throw std::domain_error("Parameter 'tau' must be > 0");
+    }
+    m_conv_dist = 1. / agrid;
+    m_conv_visc = Utils::int_pow<1>(tau) / Utils::int_pow<2>(agrid);
+    m_conv_energy = Utils::int_pow<2>(tau) / Utils::int_pow<2>(agrid);
+    m_conv_dens = Utils::int_pow<3>(agrid);
+    m_conv_speed = Utils::int_pow<1>(tau) / Utils::int_pow<1>(agrid);
+    m_conv_press = Utils::int_pow<2>(tau) * Utils::int_pow<1>(agrid);
+    m_conv_force = Utils::int_pow<2>(tau) / Utils::int_pow<1>(agrid);
+    m_conv_force_dens = Utils::int_pow<2>(tau) * Utils::int_pow<2>(agrid);
+    auto const lb_lattice = m_lattice->lattice();
+    auto const lb_visc = m_conv_visc * visc;
+    auto const lb_dens = m_conv_dens * dens;
+    auto const lb_kT = m_conv_energy * kT;
+    auto const lb_ext_f = m_conv_force_dens * ext_f;
+    if (m_seed < 0) {
+      throw std::domain_error("Parameter 'seed' must be >= 0");
+    }
+    if (lb_kT < 0.) {
+      throw std::domain_error("Parameter 'kT' must be >= 0");
+    }
+    if (lb_dens <= 0.) {
+      throw std::domain_error("Parameter 'density' must be > 0");
+    }
+    if (lb_visc < 0.) {
+      throw std::domain_error("Parameter 'kinematic_viscosity' must be >= 0");
+    }
+    m_instance =
+        new_lb_walberla(lb_lattice, lb_visc, lb_dens, single_precision);
+    if (auto le_protocol = LeesEdwards::get_protocol().lock()) {
+      if (lb_kT != 0.) {
+        throw std::runtime_error(
+            "Lees-Edwards LB doesn't support thermalization");
+      }
+      auto const &le_bc = ::box_geo.lees_edwards_bc();
+      auto lees_edwards_object = std::make_unique<LeesEdwardsPack>(
+          le_bc.shear_direction, le_bc.shear_plane_normal,
+          [this, le_protocol]() {
+            return get_pos_offset(get_sim_time(), *le_protocol) /
+                   m_lb_params->get_agrid();
+          },
+          [this, le_protocol]() {
+            return get_shear_velocity(get_sim_time(), *le_protocol) *
+                   (m_lb_params->get_tau() / m_lb_params->get_agrid());
+          });
+      m_instance->set_collision_model(std::move(lees_edwards_object));
+    } else {
+      m_instance->set_collision_model(lb_kT, m_seed);
+    }
+    m_instance->ghost_communication(); // synchronize ghost layers
+    m_instance->set_external_force(lb_ext_f);
+    for (auto &vtk : m_vtk_writers) {
+      vtk->attach_to_lattice(m_instance, get_latice_to_md_units_conversion());
+    }
+  });
+}
+
+std::vector<Variant> LBFluid::get_average_pressure_tensor() const {
+  auto const local = m_instance->get_pressure_tensor() / m_conv_press;
+  auto const tensor_flat = mpi_reduce_sum(context()->get_comm(), local);
+  auto tensor = Utils::Matrix<double, 3, 3>{};
+  std::copy(tensor_flat.begin(), tensor_flat.end(), tensor.m_data.begin());
+  return std::vector<Variant>{tensor.row<0>().as_vector(),
+                              tensor.row<1>().as_vector(),
+                              tensor.row<2>().as_vector()};
+}
+
+Variant LBFluid::get_interpolated_velocity(Utils::Vector3d const &pos) const {
+  auto const lb_pos = folded_position(pos, box_geo) * m_conv_dist;
+  auto const result = m_instance->get_velocity_at_pos(lb_pos);
+  return mpi_reduce_optional(context()->get_comm(), result) / m_conv_speed;
+}
+
+void LBFluid::load_checkpoint(std::string const &filename, int mode) {
+  auto &lb_obj = *m_instance;
+
+  auto const read_metadata = [&lb_obj](CheckpointFile &cpfile) {
+    auto const expected_grid_size = lb_obj.get_lattice().get_grid_dimensions();
+    auto const expected_pop_size = lb_obj.stencil_size();
+    Utils::Vector3i read_grid_size;
+    std::size_t read_pop_size;
+    cpfile.read(read_grid_size);
+    cpfile.read(read_pop_size);
+    if (read_grid_size != expected_grid_size) {
+      std::stringstream message;
+      message << "grid dimensions mismatch, "
+              << "read [" << read_grid_size << "], "
+              << "expected [" << expected_grid_size << "].";
+      throw std::runtime_error(message.str());
+    }
+    if (read_pop_size != expected_pop_size) {
+      throw std::runtime_error("population size mismatch, read " +
+                               std::to_string(read_pop_size) + ", expected " +
+                               std::to_string(expected_pop_size) + ".");
+    }
+  };
+
+  auto const read_data = [&lb_obj](CheckpointFile &cpfile) {
+    auto const grid_size = lb_obj.get_lattice().get_grid_dimensions();
+    auto const i_max = grid_size[0];
+    auto const j_max = grid_size[1];
+    auto const k_max = grid_size[2];
+    LBWalberlaNodeState cpnode;
+    cpnode.populations.resize(lb_obj.stencil_size());
+    for (int i = 0; i < i_max; i++) {
+      for (int j = 0; j < j_max; j++) {
+        for (int k = 0; k < k_max; k++) {
+          auto const ind = Utils::Vector3i{{i, j, k}};
+          cpfile.read(cpnode.populations);
+          cpfile.read(cpnode.last_applied_force);
+          cpfile.read(cpnode.is_boundary);
+          if (cpnode.is_boundary) {
+            cpfile.read(cpnode.slip_velocity);
+          }
+          lb_obj.set_node_population(ind, cpnode.populations);
+          lb_obj.set_node_last_applied_force(ind, cpnode.last_applied_force);
+          if (cpnode.is_boundary) {
+            lb_obj.set_node_velocity_at_boundary(ind, cpnode.slip_velocity);
+          }
+        }
+      }
+    }
+  };
+
+  auto const on_success = [&lb_obj]() {
+    lb_obj.reallocate_ubb_field();
+    lb_obj.ghost_communication();
+  };
+
+  load_checkpoint_common(*context(), "LB", filename, mode, read_metadata,
+                         read_data, on_success);
+}
+
+void LBFluid::save_checkpoint(std::string const &filename, int mode) {
+  auto &lb_obj = *m_instance;
+
+  auto const write_metadata = [&lb_obj,
+                               mode](std::shared_ptr<CheckpointFile> cpfile_ptr,
+                                     Context const &context) {
+    auto const grid_size = lb_obj.get_lattice().get_grid_dimensions();
+    auto const pop_size = lb_obj.stencil_size();
+    if (context.is_head_node()) {
+      cpfile_ptr->write(grid_size);
+      cpfile_ptr->write(pop_size);
+      unit_test_handle(mode);
+    }
+  };
+
+  auto const on_failure = [](std::shared_ptr<CheckpointFile> const &,
+                             Context const &context) {
+    if (context.is_head_node()) {
+      auto failure = true;
+      boost::mpi::broadcast(context.get_comm(), failure, 0);
+    }
+  };
+
+  auto const write_data = [&lb_obj,
+                           mode](std::shared_ptr<CheckpointFile> cpfile_ptr,
+                                 Context const &context) {
+    auto const get_node_checkpoint = [&](Utils::Vector3i const &ind)
+        -> boost::optional<LBWalberlaNodeState> {
+      auto const pop = lb_obj.get_node_population(ind);
+      auto const laf = lb_obj.get_node_last_applied_force(ind);
+      auto const lbb = lb_obj.get_node_is_boundary(ind);
+      auto const vbb = lb_obj.get_node_velocity_at_boundary(ind);
+      if (pop and laf and lbb and ((*lbb) ? vbb.has_value() : true)) {
+        LBWalberlaNodeState cpnode;
+        cpnode.populations = *pop;
+        cpnode.last_applied_force = *laf;
+        cpnode.is_boundary = *lbb;
+        if (*lbb) {
+          cpnode.slip_velocity = *vbb;
+        }
+        return cpnode;
+      }
+      return {boost::none};
+    };
+
+    auto failure = false;
+    auto const &comm = context.get_comm();
+    auto const is_head_node = context.is_head_node();
+    auto const unit_test_mode = (mode != static_cast<int>(CptMode::ascii)) and
+                                (mode != static_cast<int>(CptMode::binary));
+    auto const grid_size = lb_obj.get_lattice().get_grid_dimensions();
+    auto const i_max = grid_size[0];
+    auto const j_max = grid_size[1];
+    auto const k_max = grid_size[2];
+    LBWalberlaNodeState cpnode;
+    for (int i = 0; i < i_max; i++) {
+      for (int j = 0; j < j_max; j++) {
+        for (int k = 0; k < k_max; k++) {
+          auto const ind = Utils::Vector3i{{i, j, k}};
+          auto const result = get_node_checkpoint(ind);
+          if (!unit_test_mode) {
+            assert(1 == boost::mpi::all_reduce(comm, static_cast<int>(!!result),
+                                               std::plus<>()) &&
+                   "Incorrect number of return values");
+          }
+          if (is_head_node) {
+            if (result) {
+              cpnode = *result;
+            } else {
+              comm.recv(boost::mpi::any_source, 42, cpnode);
+            }
+            auto &cpfile = *cpfile_ptr;
+            cpfile.write(cpnode.populations);
+            cpfile.write(cpnode.last_applied_force);
+            cpfile.write(cpnode.is_boundary);
+            if (cpnode.is_boundary) {
+              cpfile.write(cpnode.slip_velocity);
+            }
+            boost::mpi::broadcast(comm, failure, 0);
+          } else {
+            if (result) {
+              comm.send(0, 42, *result);
+            }
+            boost::mpi::broadcast(comm, failure, 0);
+            if (failure) {
+              return;
+            }
+          }
+        }
+      }
+    }
+  };
+
+  save_checkpoint_common(*context(), "LB", filename, mode, write_metadata,
+                         write_data, on_failure);
+}
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/LBFluid.hpp b/src/script_interface/walberla/LBFluid.hpp
new file mode 100644
index 00000000000..43477e187ea
--- /dev/null
+++ b/src/script_interface/walberla/LBFluid.hpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "LatticeModel.hpp"
+#include "LatticeWalberla.hpp"
+#include "VTKHandle.hpp"
+
+#include "core/grid_based_algorithms/lb_walberla_instance.hpp"
+
+#include <script_interface/ScriptInterface.hpp>
+
+#include <walberla_bridge/LatticeModel.hpp>
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaNodeState.hpp>
+
+#include <utils/Vector.hpp>
+#include <utils/math/int_pow.hpp>
+
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace ScriptInterface::walberla {
+
+class LBVTKHandle;
+
+class LBFluid : public LatticeModel<::LBWalberlaBase, LBVTKHandle> {
+  using Base = LatticeModel<::LBWalberlaBase, LBVTKHandle>;
+  std::shared_ptr<::LBWalberlaParams> m_lb_params;
+  bool m_is_active;
+  int m_seed;
+  double m_conv_dist;
+  double m_conv_visc;
+  double m_conv_dens;
+  double m_conv_speed;
+  double m_conv_press;
+  double m_conv_force;
+  double m_conv_force_dens;
+  double m_conv_energy;
+
+public:
+  LBFluid() {
+    add_parameters({
+        {"lattice", AutoParameter::read_only, [this]() { return m_lattice; }},
+        {"single_precision", AutoParameter::read_only,
+         [this]() { return not m_instance->is_double_precision(); }},
+        {"is_active", AutoParameter::read_only,
+         [this]() { return m_is_active; }},
+        {"agrid", AutoParameter::read_only,
+         [this]() { return m_lb_params->get_agrid(); }},
+        {"tau", AutoParameter::read_only,
+         [this]() { return m_lb_params->get_tau(); }},
+        {"shape", AutoParameter::read_only,
+         [this]() { return m_instance->get_lattice().get_grid_dimensions(); }},
+        {"kT", AutoParameter::read_only,
+         [this]() { return m_instance->get_kT() / m_conv_energy; }},
+        {"seed", AutoParameter::read_only, [this]() { return m_seed; }},
+        {"rng_state",
+         [this](Variant const &v) {
+           auto const rng_state = get_value<int>(v);
+           context()->parallel_try_catch([&]() {
+             if (rng_state < 0) {
+               throw std::domain_error("Parameter 'rng_state' must be >= 0");
+             }
+             m_instance->set_rng_state(static_cast<uint64_t>(rng_state));
+           });
+         },
+         [this]() {
+           auto const opt = m_instance->get_rng_state();
+           return (opt) ? Variant{static_cast<int>(*opt)} : Variant{None{}};
+         }},
+        {"density", AutoParameter::read_only,
+         [this]() { return m_instance->get_density() / m_conv_dens; }},
+        {"kinematic_viscosity",
+         [this](Variant const &v) {
+           auto const visc = m_conv_visc * get_value<double>(v);
+           m_instance->set_viscosity(visc);
+         },
+         [this]() { return m_instance->get_viscosity() / m_conv_visc; }},
+        {"ext_force_density",
+         [this](Variant const &v) {
+           auto const ext_f = m_conv_force_dens * get_value<Utils::Vector3d>(v);
+           m_instance->set_external_force(ext_f);
+         },
+         [this]() {
+           return m_instance->get_external_force() / m_conv_force_dens;
+         }},
+        {"vtk_writers", AutoParameter::read_only,
+         [this]() { return serialize_vtk_writers(); }},
+    });
+  }
+
+  void do_construct(VariantMap const &params) override;
+
+  Variant do_call_method(std::string const &name,
+                         VariantMap const &params) override;
+
+  [[nodiscard]] auto get_lb_fluid() const { return m_instance; }
+  [[nodiscard]] auto get_lb_params() const { return m_lb_params; }
+
+  ::LatticeModel::units_map get_latice_to_md_units_conversion() const override {
+    return {
+        {"density", 1. / m_conv_dens},
+        {"velocity", 1. / m_conv_speed},
+        {"pressure", 1. / m_conv_press},
+    };
+  }
+
+private:
+  void load_checkpoint(std::string const &filename, int mode);
+  void save_checkpoint(std::string const &filename, int mode);
+  std::vector<Variant> get_average_pressure_tensor() const;
+  Variant get_interpolated_velocity(Utils::Vector3d const &pos) const;
+};
+
+class LBVTKHandle : public VTKHandleBase<::LBWalberlaBase> {
+  static std::unordered_map<std::string, int> const obs_map;
+
+  std::unordered_map<std::string, int> const &get_obs_map() const override {
+    return obs_map;
+  }
+};
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/LBFluidNode.cpp b/src/script_interface/walberla/LBFluidNode.cpp
new file mode 100644
index 00000000000..9d073b134a2
--- /dev/null
+++ b/src/script_interface/walberla/LBFluidNode.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "LBFluidNode.hpp"
+
+#include <script_interface/communication.hpp>
+
+#include <utils/Vector.hpp>
+#include <utils/matrix.hpp>
+
+#include <boost/mpi/collectives/all_reduce.hpp>
+#include <boost/mpi/communicator.hpp>
+#include <boost/optional.hpp>
+
+#include <string>
+#include <vector>
+
+namespace ScriptInterface::walberla {
+
+static bool is_boundary_all_reduce(boost::mpi::communicator const &comm,
+                                   boost::optional<bool> const &is_boundary) {
+  return boost::mpi::all_reduce(comm, is_boundary ? *is_boundary : false,
+                                std::logical_or<>());
+}
+
+Variant LBFluidNode::do_call_method(std::string const &name,
+                                    VariantMap const &params) {
+  if (name == "set_velocity_at_boundary") {
+    if (is_none(params.at("value"))) {
+      m_lb_fluid->remove_node_from_boundary(m_index);
+      m_lb_fluid->ghost_communication();
+    } else {
+      auto const u =
+          get_value<Utils::Vector3d>(params, "value") * m_conv_velocity;
+      m_lb_fluid->set_node_velocity_at_boundary(m_index, u);
+      m_lb_fluid->ghost_communication();
+    }
+    return {};
+  }
+  if (name == "get_velocity_at_boundary") {
+    auto const boundary_opt = m_lb_fluid->get_node_is_boundary(m_index);
+    if (is_boundary_all_reduce(context()->get_comm(), boundary_opt)) {
+      auto const result = m_lb_fluid->get_node_velocity_at_boundary(m_index);
+      return mpi_reduce_optional(context()->get_comm(), result) /
+             m_conv_velocity;
+    }
+    return Variant{None{}};
+  }
+  if (name == "get_density") {
+    auto const result = m_lb_fluid->get_node_density(m_index);
+    return mpi_reduce_optional(context()->get_comm(), result) / m_conv_dens;
+  }
+  if (name == "set_density") {
+    auto const dens = get_value<double>(params, "value");
+    m_lb_fluid->set_node_density(m_index, dens * m_conv_dens);
+    m_lb_fluid->ghost_communication();
+    return {};
+  }
+  if (name == "get_population") {
+    auto const result = m_lb_fluid->get_node_population(m_index);
+    return mpi_reduce_optional(context()->get_comm(), result);
+  }
+  if (name == "set_population") {
+    auto const pop = get_value<std::vector<double>>(params, "value");
+    m_lb_fluid->set_node_population(m_index, pop);
+    m_lb_fluid->ghost_communication();
+    return {};
+  }
+  if (name == "get_velocity") {
+    auto const result = m_lb_fluid->get_node_velocity(m_index);
+    return mpi_reduce_optional(context()->get_comm(), result) / m_conv_velocity;
+  }
+  if (name == "set_velocity") {
+    auto const u =
+        get_value<Utils::Vector3d>(params, "value") * m_conv_velocity;
+    m_lb_fluid->set_node_velocity(m_index, u);
+    m_lb_fluid->ghost_communication();
+    return {};
+  }
+  if (name == "get_is_boundary") {
+    auto const result = m_lb_fluid->get_node_is_boundary(m_index);
+    return mpi_reduce_optional(context()->get_comm(), result);
+  }
+  if (name == "get_boundary_force") {
+    auto const boundary_opt = m_lb_fluid->get_node_is_boundary(m_index);
+    if (is_boundary_all_reduce(context()->get_comm(), boundary_opt)) {
+      auto result = m_lb_fluid->get_node_boundary_force(m_index);
+      return mpi_reduce_optional(context()->get_comm(), result) / m_conv_force;
+    }
+    return Variant{None{}};
+  }
+  if (name == "get_pressure_tensor") {
+    auto const result = m_lb_fluid->get_node_pressure_tensor(m_index);
+    auto value = boost::optional<std::vector<double>>{};
+    if (result) {
+      value = (*result / m_conv_press).as_vector();
+    }
+    auto const vec = mpi_reduce_optional(context()->get_comm(), value);
+    if (context()->is_head_node()) {
+      auto tensor = Utils::Matrix<double, 3, 3>{};
+      std::copy(vec.begin(), vec.end(), tensor.m_data.begin());
+      return std::vector<Variant>{tensor.row<0>().as_vector(),
+                                  tensor.row<1>().as_vector(),
+                                  tensor.row<2>().as_vector()};
+    }
+    return {};
+  }
+  if (name == "get_last_applied_force") {
+    auto const result = m_lb_fluid->get_node_last_applied_force(m_index);
+    return mpi_reduce_optional(context()->get_comm(), result) / m_conv_force;
+  }
+  if (name == "set_last_applied_force") {
+    auto const f = get_value<Utils::Vector3d>(params, "value");
+    m_lb_fluid->set_node_last_applied_force(m_index, f * m_conv_force);
+    m_lb_fluid->ghost_communication();
+    return {};
+  }
+  if (name == "get_lattice_speed") {
+    return 1. / m_conv_velocity;
+  }
+
+  return {};
+}
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/LBFluidNode.hpp b/src/script_interface/walberla/LBFluidNode.hpp
new file mode 100644
index 00000000000..9ad6b09ae6b
--- /dev/null
+++ b/src/script_interface/walberla/LBFluidNode.hpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "LBFluid.hpp"
+
+#include "LatticeIndices.hpp"
+
+#include <script_interface/ScriptInterface.hpp>
+#include <script_interface/auto_parameters/AutoParameters.hpp>
+
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
+
+#include <utils/Vector.hpp>
+#include <utils/math/int_pow.hpp>
+
+#include <cassert>
+#include <memory>
+#include <stdexcept>
+#include <string>
+
+namespace ScriptInterface::walberla {
+
+class LBFluidNode : public AutoParameters<LBFluidNode, LatticeIndices> {
+  std::shared_ptr<::LBWalberlaBase> m_lb_fluid;
+  Utils::Vector3i m_index;
+  Utils::Vector3i m_grid_size;
+  double m_conv_dens;
+  double m_conv_press;
+  double m_conv_force;
+  double m_conv_velocity;
+
+public:
+  LBFluidNode() {
+    add_parameters(
+        {{"_index", AutoParameter::read_only, [this]() { return m_index; }}});
+  }
+
+  void do_construct(VariantMap const &params) override {
+    auto const lb_sip =
+        get_value<std::shared_ptr<LBFluid>>(params, "parent_sip");
+    m_lb_fluid = lb_sip->get_lb_fluid();
+    auto const lb_params = lb_sip->get_lb_params();
+    auto const tau = lb_params->get_tau();
+    auto const agrid = lb_params->get_agrid();
+    m_conv_dens = Utils::int_pow<3>(agrid);
+    m_conv_press = Utils::int_pow<1>(agrid) * Utils::int_pow<2>(tau);
+    m_conv_force = Utils::int_pow<2>(tau) / Utils::int_pow<1>(agrid);
+    m_conv_velocity = Utils::int_pow<1>(tau) / Utils::int_pow<1>(agrid);
+    m_grid_size = m_lb_fluid->get_lattice().get_grid_dimensions();
+    m_index = get_mapped_index(get_value<Utils::Vector3i>(params, "index"),
+                               m_grid_size);
+  }
+
+  Variant do_call_method(std::string const &name,
+                         VariantMap const &params) override;
+};
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/LBFluidSlice.cpp b/src/script_interface/walberla/LBFluidSlice.cpp
new file mode 100644
index 00000000000..f87457a78b6
--- /dev/null
+++ b/src/script_interface/walberla/LBFluidSlice.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "LBFluidSlice.hpp"
+
+#include "LatticeSlice.impl.hpp"
+
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+namespace ScriptInterface::walberla {
+
+Variant LBFluidSlice::do_call_method(std::string const &name,
+                                     VariantMap const &params) {
+  if (name == "get_slice_size") {
+    return {m_slice_upper_corner - m_slice_lower_corner};
+  }
+  if (name == "get_slice_ranges") {
+    return {std::vector<Variant>{m_slice_lower_corner, m_slice_upper_corner}};
+  }
+  if (name == "get_lb_sip") {
+    return {m_lb_sip};
+  }
+  if (name == "get_value_shape") {
+    auto const name = get_value<std::string>(params, "name");
+    if (m_shape_val.count(name) == 0) {
+      context()->parallel_try_catch([&]() {
+        throw std::runtime_error("Unknown fluid property '" + name + "'");
+      });
+    }
+    return m_shape_val.at(name);
+  }
+  if (name == "get_lattice_speed") {
+    return 1. / m_conv_velocity;
+  }
+
+  // slice getter/setter callback
+  auto const call = [this, params](auto method_ptr,
+                                   std::vector<int> const &data_dims,
+                                   double units = 1.) -> Variant {
+    auto &obj = *m_lb_fluid;
+    if constexpr (std::is_invocable_v<decltype(method_ptr), LatticeModel *,
+                                      Utils::Vector3i const &,
+                                      Utils::Vector3i const &>) {
+      return gather_3d(params, data_dims, obj, method_ptr, units);
+    } else {
+      scatter_3d(params, data_dims, obj, method_ptr, units);
+      return {};
+    }
+  };
+
+  if (name == "get_population") {
+    auto const pop_size = m_shape_val.at("population");
+    return call(&LatticeModel::get_slice_population, pop_size);
+  }
+  if (name == "set_population") {
+    auto const pop_size = m_shape_val.at("population");
+    return call(&LatticeModel::set_slice_population, pop_size);
+  }
+  if (name == "get_density") {
+    return call(&LatticeModel::get_slice_density, {1}, 1. / m_conv_dens);
+  }
+  if (name == "set_density") {
+    return call(&LatticeModel::set_slice_density, {1}, m_conv_dens);
+  }
+  if (name == "get_velocity") {
+    return call(&LatticeModel::get_slice_velocity, {3}, 1. / m_conv_velocity);
+  }
+  if (name == "set_velocity") {
+    return call(&LatticeModel::set_slice_velocity, {3}, m_conv_velocity);
+  }
+  if (name == "get_is_boundary") {
+    return call(&LatticeModel::get_slice_is_boundary, {1});
+  }
+  if (name == "get_velocity_at_boundary") {
+    return call(&LatticeModel::get_slice_velocity_at_boundary, {1},
+                1. / m_conv_velocity);
+  }
+  if (name == "set_velocity_at_boundary") {
+    return call(&LatticeModel::set_slice_velocity_at_boundary, {1},
+                m_conv_velocity);
+  }
+  if (name == "get_pressure_tensor") {
+    return call(&LatticeModel::get_slice_pressure_tensor, {3, 3},
+                1. / m_conv_press);
+  }
+  if (name == "get_last_applied_force") {
+    return call(&LatticeModel::get_slice_last_applied_force, {3},
+                1. / m_conv_force);
+  }
+  if (name == "set_last_applied_force") {
+    return call(&LatticeModel::set_slice_last_applied_force, {3}, m_conv_force);
+  }
+
+  return {};
+}
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/LBFluidSlice.hpp b/src/script_interface/walberla/LBFluidSlice.hpp
new file mode 100644
index 00000000000..9d69c728226
--- /dev/null
+++ b/src/script_interface/walberla/LBFluidSlice.hpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "LBFluid.hpp"
+
+#include "LatticeSlice.hpp"
+
+#include <script_interface/ScriptInterface.hpp>
+#include <script_interface/auto_parameters/AutoParameters.hpp>
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
+
+#include <utils/Vector.hpp>
+#include <utils/math/int_pow.hpp>
+
+#include <cassert>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+#include <vector>
+
+namespace ScriptInterface::walberla {
+
+using VelocityBounceBackType = boost::optional<Utils::Vector3d>;
+
+struct LBFieldSerializer {
+  template <typename T> static Variant serialize(std::vector<T> const &values) {
+    if constexpr (std::is_same_v<T, VelocityBounceBackType>) {
+      std::vector<Variant> vec;
+      vec.reserve(values.size());
+      for (auto const &opt : values) {
+        if (opt) {
+          vec.emplace_back(Variant{*opt});
+        } else {
+          vec.emplace_back(Variant{None{}});
+        }
+      }
+      return {vec};
+    } else if constexpr (std::is_same_v<T, int> or std::is_same_v<T, double>) {
+      return {values};
+    } else {
+      return make_vector_of_variants(values);
+    }
+  }
+
+  template <typename T>
+  static std::vector<T> deserialize(Variant const &variant) {
+    std::vector<T> values;
+    if constexpr (std::is_same_v<T, VelocityBounceBackType>) {
+      auto const vector_variants = get_value<std::vector<Variant>>(variant);
+      for (auto const &value : vector_variants) {
+        if (is_none(value)) {
+          values.emplace_back(boost::none);
+        } else {
+          values.emplace_back(get_value<Utils::Vector3d>(value));
+        }
+      }
+    } else if constexpr (std::is_same_v<T, double>) {
+      if (is_type<std::vector<int>>(variant)) {
+        auto const values_int = get_value<std::vector<int>>(variant);
+        values.reserve(values_int.size());
+        for (auto const val : values_int) {
+          values.emplace_back(static_cast<double>(val));
+        }
+      } else {
+        values = get_value<std::vector<T>>(variant);
+      }
+    } else {
+      values = get_value<std::vector<T>>(variant);
+    }
+    return values;
+  }
+};
+
+class LBFluidSlice : public LatticeSlice<LBFieldSerializer> {
+  using LatticeModel = ::LBWalberlaBase;
+  std::shared_ptr<LatticeModel> m_lb_fluid;
+  std::shared_ptr<LBFluid> m_lb_sip;
+  double m_conv_dens;
+  double m_conv_press;
+  double m_conv_force;
+  double m_conv_velocity;
+  std::unordered_map<std::string, std::vector<int>> m_shape_val;
+
+public:
+  void do_construct(VariantMap const &params) override {
+    m_lb_sip = get_value<std::shared_ptr<LBFluid>>(params, "parent_sip");
+    m_lb_fluid = m_lb_sip->get_lb_fluid();
+    auto const lb_params = m_lb_sip->get_lb_params();
+    auto const tau = lb_params->get_tau();
+    auto const agrid = lb_params->get_agrid();
+    m_conv_dens = Utils::int_pow<3>(agrid);
+    m_conv_press = Utils::int_pow<1>(agrid) * Utils::int_pow<2>(tau);
+    m_conv_force = Utils::int_pow<2>(tau) / Utils::int_pow<1>(agrid);
+    m_conv_velocity = Utils::int_pow<1>(tau) / Utils::int_pow<1>(agrid);
+    m_shape = get_value<std::vector<int>>(params, "shape");
+    m_slice_lower_corner =
+        get_value<Utils::Vector3i>(params, "slice_lower_corner");
+    m_slice_upper_corner =
+        get_value<Utils::Vector3i>(params, "slice_upper_corner");
+    m_shape_val["density"] = {1};
+    m_shape_val["population"] = {static_cast<int>(m_lb_fluid->stencil_size())};
+    m_shape_val["velocity"] = {3};
+    m_shape_val["velocity_at_boundary"] = {1};
+    m_shape_val["is_boundary"] = {1};
+    m_shape_val["last_applied_force"] = {3};
+    m_shape_val["pressure_tensor"] = {3, 3};
+  }
+
+  Variant do_call_method(std::string const &name,
+                         VariantMap const &params) override;
+
+  ::LatticeWalberla const &get_lattice() const override {
+    return m_lb_fluid->get_lattice();
+  }
+};
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/LatticeIndices.hpp b/src/script_interface/walberla/LatticeIndices.hpp
new file mode 100644
index 00000000000..5d594f0614d
--- /dev/null
+++ b/src/script_interface/walberla/LatticeIndices.hpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <script_interface/ScriptInterface.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <initializer_list>
+#include <sstream>
+#include <stdexcept>
+
+namespace ScriptInterface {
+
+/** @brief Interface to carry out simple operations on lattice indices. */
+class LatticeIndices : public ObjectHandle {
+protected:
+  [[nodiscard]] bool is_index_valid(Utils::Vector3i const &index,
+                                    Utils::Vector3i const &shape) const {
+    return index < shape and index >= Utils::Vector3i{};
+  }
+
+  void throw_invalid_index(Utils::Vector3i const &index,
+                           Utils::Vector3i const &shape) const {
+    if (context()->is_head_node()) {
+      auto constexpr formatter = Utils::Vector3i::formatter(", ");
+      std::stringstream ss;
+      ss << "provided index [" << formatter << index << "] is out of range "
+         << "for shape [" << formatter << shape << "]";
+      throw std::out_of_range(ss.str());
+    }
+    throw Exception("");
+  }
+
+  [[nodiscard]] Utils::Vector3i
+  get_mapped_index(Utils::Vector3i const &index,
+                   Utils::Vector3i const &shape) const {
+    auto output = index;
+    for (auto i : {0u, 1u, 2u}) {
+      if (output[i] < 0) {
+        output[i] += shape[i];
+      }
+    }
+    if (not is_index_valid(output, shape)) {
+      throw_invalid_index(index, shape);
+    }
+    return output;
+  }
+};
+
+} // namespace ScriptInterface
diff --git a/src/script_interface/walberla/LatticeModel.hpp b/src/script_interface/walberla/LatticeModel.hpp
new file mode 100644
index 00000000000..c5830ab82ad
--- /dev/null
+++ b/src/script_interface/walberla/LatticeModel.hpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "LatticeWalberla.hpp"
+
+#include <walberla_bridge/LatticeModel.hpp>
+
+#include <script_interface/ScriptInterface.hpp>
+#include <script_interface/auto_parameters/AutoParameters.hpp>
+
+#include <algorithm>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace ScriptInterface::walberla {
+
+template <class Method, class VTKHandle>
+class LatticeModel : public AutoParameters<LatticeModel<Method, VTKHandle>> {
+protected:
+  std::shared_ptr<LatticeWalberla> m_lattice;
+  std::shared_ptr<Method> m_instance;
+  std::vector<std::shared_ptr<VTKHandle>> m_vtk_writers;
+
+  virtual ::LatticeModel::units_map
+  get_latice_to_md_units_conversion() const = 0;
+
+  auto find_vtk(std::shared_ptr<VTKHandle> const &vtk) const {
+    return std::find(m_vtk_writers.begin(), m_vtk_writers.end(), vtk);
+  }
+
+  auto serialize_vtk_writers() const {
+    return make_vector_of_variants(m_vtk_writers);
+  }
+
+public:
+  Variant do_call_method(std::string const &method_name,
+                         VariantMap const &params) override {
+    if (method_name == "add_vtk_writer") {
+      auto vtk = get_value<std::shared_ptr<VTKHandle>>(params, "vtk");
+      auto const needle = find_vtk(vtk);
+      ObjectHandle::context()->parallel_try_catch([&]() {
+        if (needle != m_vtk_writers.end()) {
+          throw std::runtime_error(
+              "VTK object is already attached to this lattice");
+        }
+        vtk->attach_to_lattice(m_instance, get_latice_to_md_units_conversion());
+        m_vtk_writers.emplace_back(vtk);
+      });
+      return {};
+    }
+    if (method_name == "remove_vtk_writer") {
+      auto const vtk = get_value<std::shared_ptr<VTKHandle>>(params, "vtk");
+      auto const needle = find_vtk(vtk);
+      ObjectHandle::context()->parallel_try_catch([&]() {
+        if (needle == m_vtk_writers.end()) {
+          throw std::runtime_error(
+              "VTK object is not attached to this lattice");
+        }
+        vtk->detach_from_lattice();
+      });
+      m_vtk_writers.erase(needle);
+      return {};
+    }
+    if (method_name == "clear_vtk_writers") {
+      for (auto const &vtk : m_vtk_writers) {
+        vtk->detach_from_lattice();
+      }
+      m_vtk_writers.clear();
+    }
+    return {};
+  }
+};
+
+} // namespace ScriptInterface::walberla
diff --git a/src/script_interface/walberla/LatticeSlice.hpp b/src/script_interface/walberla/LatticeSlice.hpp
new file mode 100644
index 00000000000..0a201ac51ae
--- /dev/null
+++ b/src/script_interface/walberla/LatticeSlice.hpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "LatticeIndices.hpp"
+
+#include <script_interface/ScriptInterface.hpp>
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <cassert>
+#include <tuple>
+#include <vector>
+
+namespace ScriptInterface::walberla {
+
+template <class FieldSerializer> class LatticeSlice : public LatticeIndices {
+protected:
+  Utils::Vector3i m_slice_lower_corner;
+  Utils::Vector3i m_slice_upper_corner;
+  std::vector<int> m_shape;
+
+public:
+  virtual ::LatticeWalberla const &get_lattice() const = 0;
+
+private:
+  auto get_sentinel_index(::LatticeWalberla const &lattice) const {
+    return -(static_cast<int>(lattice.get_ghost_layers()) + 1);
+  }
+
+  auto get_slices_bounding_boxes() const {
+    auto const &lattice = get_lattice();
+    auto const &slice_lower_corner = m_slice_lower_corner;
+    auto const &slice_upper_corner = m_slice_upper_corner;
+    assert(slice_upper_corner <= lattice.get_grid_dimensions());
+    assert(slice_lower_corner >= Utils::Vector3i::broadcast(0));
+    auto const sentinel = get_sentinel_index(lattice);
+    auto [local_lower_corner, local_upper_corner] =
+        lattice.get_local_grid_range();
+    for (auto const i : {0, 1, 2}) {
+      if (local_lower_corner[i] >= slice_upper_corner[i] or
+          slice_lower_corner[i] >= local_upper_corner[i]) {
+        local_lower_corner[i] = sentinel;
+        local_upper_corner[i] = sentinel;
+      } else {
+        if (slice_lower_corner[i] > local_lower_corner[i]) {
+          local_lower_corner[i] = slice_lower_corner[i];
+        }
+        if (slice_upper_corner[i] < local_upper_corner[i]) {
+          local_upper_corner[i] = slice_upper_corner[i];
+        }
+      }
+    }
+    return std::make_tuple(slice_lower_corner, slice_upper_corner,
+                           local_lower_corner, local_upper_corner);
+  }
+
+protected:
+  template <class LatticeModel, typename T>
+  Variant gather_3d(VariantMap const &params, std::vector<int> const &data_dims,
+                    LatticeModel const &lattice_model,
+                    std::vector<T> (LatticeModel::*getter)(
+                        Utils::Vector3i const &, Utils::Vector3i const &) const,
+                    double units_conversion = 1.) const;
+
+  template <class LatticeModel, typename T>
+  void scatter_3d(VariantMap const &params, std::vector<int> const &data_dims,
+                  LatticeModel &lattice_model,
+                  void (LatticeModel::*setter)(Utils::Vector3i const &,
+                                               Utils::Vector3i const &,
+                                               std::vector<T> const &),
+                  double units_conversion = 1.);
+};
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/LatticeSlice.impl.hpp b/src/script_interface/walberla/LatticeSlice.impl.hpp
new file mode 100644
index 00000000000..5d81b247734
--- /dev/null
+++ b/src/script_interface/walberla/LatticeSlice.impl.hpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <utils/Vector.hpp>
+
+#include <boost/mpi/collectives/gather.hpp>
+#include <boost/mpi/collectives/scatter.hpp>
+#include <boost/mpi/communicator.hpp>
+#include <boost/multi_array.hpp>
+#include <boost/optional.hpp>
+#include <boost/serialization/optional.hpp>
+#include <boost/serialization/vector.hpp>
+
+#include <algorithm>
+#include <functional>
+#include <stdexcept>
+#include <tuple>
+#include <type_traits>
+#include <vector>
+
+namespace ScriptInterface::walberla {
+
+namespace detail {
+
+// boundary types are always boost::optional types
+template <class> struct is_optional_type : public std::false_type {};
+template <class T>
+struct is_optional_type<boost::optional<T>> : public std::true_type {};
+
+template <class ArrayView, typename T>
+void unflatten_grid(ArrayView &view, std::vector<T> const &values) {
+  using array_type = boost::multi_array<T, 4>;
+  auto it = values.begin();
+  auto const dim_i = static_cast<typename array_type::index>(view.shape()[0]);
+  auto const dim_j = static_cast<typename array_type::index>(view.shape()[1]);
+  auto const dim_k = static_cast<typename array_type::index>(view.shape()[2]);
+  auto const dim_t = static_cast<typename array_type::index>(view.shape()[3]);
+  for (typename array_type::index i = 0; i != dim_i; ++i) {
+    for (typename array_type::index j = 0; j != dim_j; ++j) {
+      for (typename array_type::index k = 0; k != dim_k; ++k) {
+        for (typename array_type::index t = 0; t != dim_t; ++t) {
+          view[i][j][k][t] = *it;
+          ++it;
+        }
+      }
+    }
+  }
+}
+
+template <class FieldSerializer, class ArrayView, typename T>
+void flatten_grid(ArrayView const &view, std::vector<T> &out,
+                  double units_conversion) {
+  using array_type = boost::multi_array<T, 4>;
+  out.reserve(view.num_elements());
+  auto const dim_i = static_cast<typename array_type::index>(view.shape()[0]);
+  auto const dim_j = static_cast<typename array_type::index>(view.shape()[1]);
+  auto const dim_k = static_cast<typename array_type::index>(view.shape()[2]);
+  auto const dim_t = static_cast<typename array_type::index>(view.shape()[3]);
+  for (typename array_type::index i = 0; i != dim_i; ++i) {
+    for (typename array_type::index j = 0; j != dim_j; ++j) {
+      for (typename array_type::index k = 0; k != dim_k; ++k) {
+        for (typename array_type::index t = 0; t != dim_t; ++t) {
+          if constexpr (std::is_floating_point_v<T>) {
+            out.emplace_back(view[i][j][k][t] * units_conversion);
+          } else if constexpr (is_optional_type<T>{}) {
+            if (view[i][j][k][t]) {
+              out.emplace_back(*(view[i][j][k][t]) * units_conversion);
+            } else {
+              out.emplace_back(boost::none);
+            }
+          } else {
+            out.emplace_back(view[i][j][k][t]);
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace detail
+
+inline auto gather_slices_topology(boost::mpi::communicator const &comm,
+                                   Utils::Vector3i const &local_lower_corner,
+                                   Utils::Vector3i const &local_upper_corner) {
+  std::vector<Utils::Vector3i> nodes_lower_corners;
+  std::vector<Utils::Vector3i> nodes_upper_corners;
+  boost::mpi::gather(comm, local_lower_corner, nodes_lower_corners, 0);
+  boost::mpi::gather(comm, local_upper_corner, nodes_upper_corners, 0);
+  return std::make_tuple(nodes_lower_corners, nodes_upper_corners);
+}
+
+template <class FieldSerializer>
+template <class LatticeModel, typename T>
+Variant LatticeSlice<FieldSerializer>::gather_3d(
+    VariantMap const &params, std::vector<int> const &data_dims,
+    LatticeModel const &lattice_model,
+    std::vector<T> (LatticeModel::*getter)(Utils::Vector3i const &,
+                                           Utils::Vector3i const &) const,
+    double units_conversion) const {
+  auto const &comm = context()->get_comm();
+  auto const [slice_lower_corner, slice_upper_corner, local_lower_corner,
+              local_upper_corner] = get_slices_bounding_boxes();
+  auto const [nodes_lower_corners, nodes_upper_corners] =
+      gather_slices_topology(comm, local_lower_corner, local_upper_corner);
+  auto const data_size = std::accumulate(data_dims.cbegin(), data_dims.cend(),
+                                         1, std::multiplies<>());
+  auto const local_values =
+      (lattice_model.*getter)(local_lower_corner, local_upper_corner);
+  std::vector<std::vector<T>> nodes_values;
+  boost::mpi::gather(comm, local_values, nodes_values, 0);
+  if (comm.rank() == 0) {
+    auto const dims = slice_upper_corner - slice_lower_corner;
+    using index_range = boost::multi_array_types::index_range;
+    using array_type = boost::multi_array<T, 4>;
+    array_type array(boost::extents[dims[0]][dims[1]][dims[2]][data_size]);
+    // populate the 3D array with data from each node
+    for (std::size_t rank = 0; rank < nodes_values.size(); ++rank) {
+      if (nodes_values[rank].empty()) {
+        continue;
+      }
+      auto const range_lower_corner =
+          nodes_lower_corners[rank] - slice_lower_corner;
+      auto const range_upper_corner =
+          nodes_upper_corners[rank] - slice_lower_corner;
+      auto const local_range = [&](int j) {
+        return index_range(range_lower_corner[j], range_upper_corner[j]);
+      };
+      typename array_type::template array_view<4>::type view =
+          array[boost::indices[local_range(0)][local_range(1)][local_range(2)]
+                              [index_range()]];
+      detail::unflatten_grid(view, nodes_values[rank]);
+    }
+    // create the output flat array
+    std::vector<T> out;
+    detail::flatten_grid<FieldSerializer>(array, out, units_conversion);
+    std::vector<int> shape = {m_shape.begin(), m_shape.end()};
+    if (not(data_dims.size() == 1ul and data_dims[0] == 1)) {
+      shape.insert(shape.end(), data_dims.begin(), data_dims.end());
+    }
+    auto const variant = FieldSerializer::serialize(out);
+    return {std::vector<Variant>{{variant, Variant{shape}}}};
+  }
+  return {};
+}
+
+template <class FieldSerializer>
+template <class LatticeModel, typename T>
+void LatticeSlice<FieldSerializer>::scatter_3d(
+    VariantMap const &params, std::vector<int> const &data_dims,
+    LatticeModel &lattice_model,
+    void (LatticeModel::*setter)(Utils::Vector3i const &,
+                                 Utils::Vector3i const &,
+                                 std::vector<T> const &),
+    double units_conversion) {
+  auto const &comm = context()->get_comm();
+  auto const [slice_lower_corner, slice_upper_corner, local_lower_corner,
+              local_upper_corner] = get_slices_bounding_boxes();
+  auto const [nodes_lower_corners, nodes_upper_corners] =
+      gather_slices_topology(comm, local_lower_corner, local_upper_corner);
+  auto const data_size = std::accumulate(data_dims.cbegin(), data_dims.cend(),
+                                         1, std::multiplies<>());
+  auto const sentinel = get_sentinel_index(lattice_model.get_lattice());
+  std::vector<std::vector<T>> nodes_values(comm.size());
+  if (comm.rank() == 0) {
+    auto const values =
+        FieldSerializer::template deserialize<T>(params.at("values"));
+    auto const dims = slice_upper_corner - slice_lower_corner;
+    using index_range = boost::multi_array_types::index_range;
+    using array_type = boost::multi_array<T, 4>;
+    array_type array(boost::extents[dims[0]][dims[1]][dims[2]][data_size]);
+    // populate the 3D array from the input flat array
+    detail::unflatten_grid(array, values);
+    // partition the 3D array into individual flat arrays for each MPI rank
+    for (std::size_t rank = 0; rank < nodes_lower_corners.size(); ++rank) {
+      auto const range_lower = nodes_lower_corners[rank] - slice_lower_corner;
+      auto const range_upper = nodes_upper_corners[rank] - slice_lower_corner;
+      if (not(range_lower > Utils::Vector3i::broadcast(sentinel))) {
+        continue;
+      }
+      auto const local_range = [&](int j) {
+        return index_range(range_lower[j], range_upper[j]);
+      };
+      typename array_type::template array_view<4>::type view =
+          array[boost::indices[local_range(0)][local_range(1)][local_range(2)]
+                              [index_range()]];
+      detail::flatten_grid<FieldSerializer>(view, nodes_values[rank],
+                                            units_conversion);
+    }
+  }
+  std::vector<T> local_values;
+  boost::mpi::scatter(comm, nodes_values, local_values, 0);
+  (lattice_model.*setter)(local_lower_corner, local_upper_corner, local_values);
+  lattice_model.ghost_communication();
+}
+
+} // namespace ScriptInterface::walberla
diff --git a/src/script_interface/walberla/LatticeWalberla.hpp b/src/script_interface/walberla/LatticeWalberla.hpp
new file mode 100644
index 00000000000..2a0ba74861e
--- /dev/null
+++ b/src/script_interface/walberla/LatticeWalberla.hpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "core/grid.hpp"
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+
+#include <script_interface/ScriptInterface.hpp>
+#include <script_interface/auto_parameters/AutoParameters.hpp>
+
+#include <cmath>
+#include <memory>
+#include <stdexcept>
+#include <string>
+
+namespace ScriptInterface::walberla {
+
+class LatticeWalberla : public AutoParameters<LatticeWalberla> {
+  std::shared_ptr<::LatticeWalberla> m_lattice;
+  double m_agrid;
+
+public:
+  LatticeWalberla() {
+    add_parameters({
+        {"agrid", AutoParameter::read_only, [this]() { return m_agrid; }},
+        {"n_ghost_layers", AutoParameter::read_only,
+         [this]() { return static_cast<int>(m_lattice->get_ghost_layers()); }},
+        {"shape", AutoParameter::read_only,
+         [this]() { return m_lattice->get_grid_dimensions(); }},
+    });
+  }
+
+  void do_construct(VariantMap const &args) override {
+    m_agrid = get_value<double>(args, "agrid");
+    auto const n_ghost_layers = get_value<int>(args, "n_ghost_layers");
+
+    context()->parallel_try_catch([&]() {
+      if (m_agrid <= 0.) {
+        throw std::domain_error("Parameter 'agrid' must be > 0");
+      }
+      if (n_ghost_layers < 0) {
+        throw std::domain_error("Parameter 'n_ghost_layers' must be >= 0");
+      }
+      auto const box_size = ::box_geo.length();
+      auto const grid_dim =
+          ::LatticeWalberla::calc_grid_dimensions(box_size, m_agrid);
+      m_lattice = std::make_shared<::LatticeWalberla>(
+          grid_dim, node_grid, static_cast<unsigned int>(n_ghost_layers));
+    });
+  }
+
+  std::shared_ptr<::LatticeWalberla> lattice() { return m_lattice; }
+  std::shared_ptr<const ::LatticeWalberla> lattice() const { return m_lattice; }
+};
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/VTKHandle.hpp b/src/script_interface/walberla/VTKHandle.hpp
new file mode 100644
index 00000000000..edcf6aa5dbb
--- /dev/null
+++ b/src/script_interface/walberla/VTKHandle.hpp
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include <walberla_bridge/LatticeModel.hpp>
+#include <walberla_bridge/VTKHandle.hpp>
+
+#include <script_interface/ScriptInterface.hpp>
+#include <script_interface/auto_parameters/AutoParameters.hpp>
+
+#include <boost/algorithm/string/join.hpp>
+
+#include <algorithm>
+#include <filesystem>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace ScriptInterface::walberla {
+
+template <class Field>
+class VTKHandleBase : public AutoParameters<VTKHandleBase<Field>> {
+private:
+  int m_delta_N;
+  int m_flag_obs;
+  std::string m_identifier;
+  std::string m_base_folder;
+  std::string m_prefix;
+  std::shared_ptr<::VTKHandle> m_vtk_handle;
+  std::weak_ptr<Field> m_field;
+  ::LatticeModel::units_map m_units;
+  std::vector<Variant> m_pending_arguments;
+
+  [[nodiscard]] auto get_vtk_uid() const {
+    return m_base_folder + '/' + m_identifier;
+  }
+
+  [[nodiscard]] std::shared_ptr<Field> get_field_instance() const {
+    if (auto const field = m_field.lock()) {
+      return field;
+    }
+    auto const has_lattice_expired = m_pending_arguments.empty();
+    auto const err_expired = "Attempted access to an expired lattice object";
+    auto const err_detached = "This VTK object isn't attached to a lattice";
+    throw std::runtime_error(has_lattice_expired ? err_expired : err_detached);
+  }
+
+  virtual std::unordered_map<std::string, int> const &get_obs_map() const = 0;
+
+  [[nodiscard]] auto get_valid_observable_names() const {
+    std::vector<std::string> names{};
+    for (auto const &kv : get_obs_map()) {
+      names.emplace_back(kv.first);
+    }
+    std::sort(names.begin(), names.end());
+    return names;
+  }
+
+  [[nodiscard]] int
+  deserialize_obs_flag(std::vector<std::string> const &names) const {
+    int flag{0};
+    auto const &obs_map = get_obs_map();
+    for (auto const &name : names) {
+      if (obs_map.count(name) == 0) {
+        auto const valid_names = get_valid_observable_names();
+        std::stringstream message;
+        message << "Only the following VTK observables are supported: ["
+                << "'" << boost::algorithm::join(valid_names, "', '") << "'"
+                << "], got '" << name << "'";
+        throw std::invalid_argument(message.str());
+      }
+      flag |= obs_map.at(name);
+    }
+    return flag;
+  }
+
+  [[nodiscard]] Variant serialize_obs_flag(int flag) const {
+    std::vector<Variant> observables{};
+    for (auto const &kv : get_obs_map()) {
+      if (flag & kv.second) {
+        observables.emplace_back(kv.first);
+      }
+    }
+    return observables;
+  }
+
+public:
+  VTKHandleBase() {
+    constexpr auto read_only = AutoParameter::read_only;
+    AutoParameters<VTKHandleBase<Field>>::add_parameters({
+        {"enabled", read_only, [this]() { return m_vtk_handle->enabled; }},
+        {"delta_N", read_only, [this]() { return m_delta_N; }},
+        {"vtk_uid", read_only, [this]() { return get_vtk_uid(); }},
+        {"identifier", read_only, [this]() { return m_identifier; }},
+        {"base_folder", read_only, [this]() { return m_base_folder; }},
+        {"prefix", read_only, [this]() { return m_prefix; }},
+        {"observables", read_only,
+         [this]() { return serialize_obs_flag(m_flag_obs); }},
+        {"execution_count", read_only,
+         [this]() { return m_vtk_handle->execution_count; }},
+        {"units", read_only,
+         [this]() { return make_unordered_map_of_variants(m_units); }},
+    });
+  }
+
+private:
+  void do_construct(VariantMap const &params) override {
+    m_delta_N = get_value<int>(params, "delta_N");
+    m_identifier = get_value<std::string>(params, "identifier");
+    m_base_folder = get_value<std::string>(params, "base_folder");
+    m_prefix = get_value<std::string>(params, "prefix");
+    auto const is_enabled = get_value<bool>(params, "enabled");
+    auto const execution_count = get_value<int>(params, "execution_count");
+    ObjectHandle::context()->parallel_try_catch([&]() {
+      m_flag_obs = deserialize_obs_flag(
+          get_value<std::vector<std::string>>(params, "observables"));
+      if (m_delta_N < 0) {
+        throw std::domain_error("Parameter 'delta_N' must be >= 0");
+      }
+      if (m_identifier.empty()) {
+        throw std::domain_error("Parameter 'identifier' cannot be empty");
+      }
+      if (m_identifier.find(std::filesystem::path::preferred_separator) !=
+          std::string::npos) {
+        throw std::invalid_argument(
+            "Parameter 'identifier' cannot be a filepath");
+      }
+    });
+    m_pending_arguments.emplace_back(is_enabled);
+    m_pending_arguments.emplace_back(execution_count);
+  }
+
+protected:
+  Variant do_call_method(std::string const &name,
+                         VariantMap const &params) override {
+    if (name == "enable") {
+      ObjectHandle::context()->parallel_try_catch([&]() {
+        if (m_delta_N == 0) {
+          throw std::runtime_error("Manual VTK callbacks cannot be enabled");
+        }
+        get_field_instance()->switch_vtk(get_vtk_uid(), true);
+      });
+      return {};
+    }
+    if (name == "disable") {
+      ObjectHandle::context()->parallel_try_catch([&]() {
+        if (m_delta_N == 0) {
+          throw std::runtime_error("Manual VTK callbacks cannot be disabled");
+        }
+        get_field_instance()->switch_vtk(get_vtk_uid(), false);
+      });
+      return {};
+    }
+    if (name == "write") {
+      ObjectHandle::context()->parallel_try_catch([&]() {
+        if (m_delta_N) {
+          throw std::runtime_error("Automatic VTK callbacks cannot be "
+                                   "triggered manually");
+        }
+        get_field_instance()->write_vtk(get_vtk_uid());
+      });
+      return {};
+    }
+    if (name == "get_valid_observable_names") {
+      return make_vector_of_variants(get_valid_observable_names());
+    }
+
+    return {};
+  }
+
+public:
+  void detach_from_lattice() { m_field.reset(); }
+
+  void attach_to_lattice(std::weak_ptr<Field> field,
+                         ::LatticeModel::units_map const &units) {
+    auto const was_attached_once = m_pending_arguments.empty();
+    if (not m_field.expired()) {
+      throw std::runtime_error("Cannot attach VTK object to multiple lattices");
+    }
+    if (was_attached_once) {
+      throw std::runtime_error("Detached VTK objects cannot be attached again");
+    }
+    assert(m_pending_arguments.size() == 2u);
+    auto const is_enabled = get_value<bool>(m_pending_arguments[0]);
+    auto const execution_count = get_value<int>(m_pending_arguments[1]);
+    m_units = units;
+    m_field = field;
+    auto instance = get_field_instance();
+    m_vtk_handle =
+        instance->create_vtk(m_delta_N, execution_count, m_flag_obs, m_units,
+                             m_identifier, m_base_folder, m_prefix);
+    if (m_delta_N and not is_enabled) {
+      instance->switch_vtk(get_vtk_uid(), false);
+    }
+    m_pending_arguments.clear();
+  }
+};
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/WalberlaCheckpoint.hpp b/src/script_interface/walberla/WalberlaCheckpoint.hpp
new file mode 100644
index 00000000000..182edb76807
--- /dev/null
+++ b/src/script_interface/walberla/WalberlaCheckpoint.hpp
@@ -0,0 +1,250 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "script_interface/Context.hpp"
+
+#include <utils/Vector.hpp>
+
+#include <boost/mpi/collectives/broadcast.hpp>
+
+#include <fstream>
+#include <ios>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace ScriptInterface::walberla {
+
+enum class CptMode : int {
+  ascii = 0,
+  binary = 1,
+  unit_test_runtime_error = -1,
+  unit_test_ios_failure = -2
+};
+
+/** Inject code for unit tests. */
+inline void unit_test_handle(int mode) {
+  switch (mode) {
+  case static_cast<int>(CptMode::ascii):
+  case static_cast<int>(CptMode::binary):
+    return;
+  case static_cast<int>(CptMode::unit_test_runtime_error):
+    throw std::runtime_error("unit test error");
+  case static_cast<int>(CptMode::unit_test_ios_failure):
+    throw std::ios_base::failure("unit test error");
+  default:
+    throw std::domain_error("Unknown mode " + std::to_string(mode));
+  }
+}
+
+/** Handle for a checkpoint file. */
+class CheckpointFile {
+private:
+  bool m_binary;
+
+public:
+  std::fstream stream;
+
+  CheckpointFile(std::string const &filename, std::ios_base::openmode mode,
+                 bool binary) {
+    m_binary = binary;
+    auto flags = mode;
+    if (m_binary)
+      flags |= std::ios_base::binary;
+    stream.open(filename, flags);
+  }
+
+  ~CheckpointFile() = default;
+
+  template <typename T> void write(T const &value) {
+    if (m_binary) {
+      stream.write(reinterpret_cast<const char *>(&value), sizeof(T));
+    } else {
+      stream << value << "\n";
+    }
+  }
+
+  template <typename T> void write(std::vector<T> const &vector) {
+    if (m_binary) {
+      stream.write(reinterpret_cast<const char *>(vector.data()),
+                   vector.size() * sizeof(T));
+    } else {
+      for (auto const &value : vector) {
+        stream << value << "\n";
+      }
+    }
+  }
+
+  template <typename T, std::size_t N>
+  void write(Utils::Vector<T, N> const &vector) {
+    if (m_binary) {
+      stream.write(reinterpret_cast<const char *>(vector.data()),
+                   N * sizeof(T));
+    } else {
+      stream << Utils::Vector<T, N>::formatter(" ") << vector << "\n";
+    }
+  }
+
+  template <typename T> void read(T &value) {
+    if (m_binary) {
+      stream.read(reinterpret_cast<char *>(&value), sizeof(T));
+    } else {
+      stream >> value;
+    }
+  }
+
+  template <typename T, std::size_t N> void read(Utils::Vector<T, N> &vector) {
+    if (m_binary) {
+      stream.read(reinterpret_cast<char *>(vector.data()), N * sizeof(T));
+    } else {
+      for (auto &value : vector) {
+        stream >> value;
+      }
+    }
+  }
+
+  template <typename T> void read(std::vector<T> &vector) {
+    if (m_binary) {
+      stream.read(reinterpret_cast<char *>(vector.data()),
+                  vector.size() * sizeof(T));
+    } else {
+      for (auto &value : vector) {
+        stream >> value;
+      }
+    }
+  }
+};
+
+template <typename F1, typename F2, typename F3>
+void load_checkpoint_common(Context const &context, std::string const classname,
+                            std::string const &filename, int mode,
+                            F1 const read_metadata, F2 const read_data,
+                            F3 const on_success) {
+  auto const err_msg =
+      std::string("Error while reading " + classname + " checkpoint: ");
+  auto const binary = mode == static_cast<int>(CptMode::binary);
+  auto const &comm = context.get_comm();
+  auto const is_head_node = context.is_head_node();
+
+  // open file and set exceptions
+  CheckpointFile cpfile(filename, std::ios_base::in, binary);
+  if (!cpfile.stream) {
+    if (is_head_node) {
+      throw std::runtime_error(err_msg + "could not open file " + filename);
+    }
+    return;
+  }
+  cpfile.stream.exceptions(std::ios_base::failbit | std::ios_base::badbit);
+
+  try {
+    read_metadata(cpfile);
+    read_data(cpfile);
+    comm.barrier();
+    on_success();
+    // check EOF
+    if (!binary) {
+      if (cpfile.stream.peek() == '\n') {
+        static_cast<void>(cpfile.stream.get());
+      }
+    }
+    if (cpfile.stream.peek() != EOF) {
+      throw std::runtime_error(err_msg + "extra data found, expected EOF.");
+    }
+  } catch (std::ios_base::failure const &) {
+    auto const eof_error = cpfile.stream.eof();
+    cpfile.stream.close();
+    if (eof_error) {
+      if (is_head_node) {
+        throw std::runtime_error(err_msg + "EOF found.");
+      }
+      return;
+    }
+    if (is_head_node) {
+      throw std::runtime_error(err_msg + "incorrectly formatted data.");
+    }
+    return;
+  } catch (std::runtime_error const &err) {
+    cpfile.stream.close();
+    if (is_head_node) {
+      throw std::runtime_error(err_msg + err.what());
+    }
+    return;
+  }
+}
+
+template <typename F1, typename F2, typename F3>
+void save_checkpoint_common(Context const &context, std::string const classname,
+                            std::string const &filename, int mode,
+                            F1 const write_metadata, F2 const write_data,
+                            F3 const on_failure) {
+  auto const err_msg =
+      std::string("Error while writing " + classname + " checkpoint: ");
+  auto const binary = mode == static_cast<int>(CptMode::binary);
+  auto const &comm = context.get_comm();
+  auto const is_head_node = context.is_head_node();
+
+  // open file and set exceptions
+  auto failure = false;
+  std::shared_ptr<CheckpointFile> cpfile;
+  if (is_head_node) {
+    cpfile =
+        std::make_shared<CheckpointFile>(filename, std::ios_base::out, binary);
+    failure = !cpfile->stream;
+    boost::mpi::broadcast(comm, failure, 0);
+    if (failure) {
+      throw std::runtime_error(err_msg + "could not open file " + filename);
+    }
+    cpfile->stream.exceptions(std::ios_base::failbit | std::ios_base::badbit);
+    if (!binary) {
+      cpfile->stream.precision(16);
+      cpfile->stream << std::fixed;
+    }
+  } else {
+    boost::mpi::broadcast(comm, failure, 0);
+    if (failure) {
+      return;
+    }
+  }
+
+  try {
+    write_metadata(cpfile, context);
+    write_data(cpfile, context);
+  } catch (std::exception const &error) {
+    on_failure(cpfile, context);
+    if (is_head_node) {
+      cpfile->stream.close();
+      if (dynamic_cast<std::ios_base::failure const *>(&error)) {
+        throw std::runtime_error(err_msg + "could not write to " + filename);
+      }
+      throw;
+    }
+  }
+}
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/walberla/initialize.cpp b/src/script_interface/walberla/initialize.cpp
new file mode 100644
index 00000000000..fde6f8d0d1c
--- /dev/null
+++ b/src/script_interface/walberla/initialize.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "LatticeWalberla.hpp"
+
+#include "LBFluid.hpp"
+#include "LBFluidNode.hpp"
+#include "LBFluidSlice.hpp"
+
+#include "EKContainer.hpp"
+#include "EKFFT.hpp"
+#include "EKNone.hpp"
+
+#include "EKSpecies.hpp"
+#include "EKSpeciesNode.hpp"
+#include "EKSpeciesSlice.hpp"
+
+#include "EKReactant.hpp"
+#include "EKReaction.hpp"
+#include "EKReactions.hpp"
+
+#include <script_interface/ObjectHandle.hpp>
+
+#include <utils/Factory.hpp>
+
+#ifdef WALBERLA_STATIC_ASSERT
+#error "waLberla headers should not be visible to the ESPResSo script interface"
+#endif
+
+namespace ScriptInterface::walberla {
+
+void initialize(Utils::Factory<ObjectHandle> *om) {
+  om->register_new<LatticeWalberla>("walberla::LatticeWalberla");
+
+  om->register_new<LBFluid>("walberla::LBFluid");
+  om->register_new<LBFluidNode>("walberla::LBFluidNode");
+  om->register_new<LBFluidSlice>("walberla::LBFluidSlice");
+  om->register_new<LBVTKHandle>("walberla::LBVTKHandle");
+
+  om->register_new<EKContainer>("walberla::EKContainer");
+  om->register_new<EKSpecies>("walberla::EKSpecies");
+  om->register_new<EKSpeciesNode>("walberla::EKSpeciesNode");
+  om->register_new<EKSpeciesSlice>("walberla::EKSpeciesSlice");
+#ifdef WALBERLA_FFT
+  om->register_new<EKFFT>("walberla::EKFFT");
+#endif // WALBERLA_FFT
+  om->register_new<EKNone>("walberla::EKNone");
+  om->register_new<EKVTKHandle>("walberla::EKVTKHandle");
+
+  om->register_new<EKReactant>("walberla::EKReactant");
+  om->register_new<EKBulkReaction>("walberla::EKBulkReaction");
+  om->register_new<EKIndexedReaction>("walberla::EKIndexedReaction");
+  om->register_new<EKReactions>("walberla::EKReactions");
+}
+
+} // namespace ScriptInterface::walberla
+
+#endif // WALBERLA
diff --git a/src/script_interface/lbboundaries/initialize.hpp b/src/script_interface/walberla/initialize.hpp
similarity index 67%
rename from src/script_interface/lbboundaries/initialize.hpp
rename to src/script_interface/walberla/initialize.hpp
index 083fd848510..0257406f3eb 100644
--- a/src/script_interface/lbboundaries/initialize.hpp
+++ b/src/script_interface/walberla/initialize.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015-2022 The ESPResSo project
+ * Copyright (C) 2021-2023 The ESPResSo project
  *
  * This file is part of ESPResSo.
  *
@@ -17,19 +17,14 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef SCRIPT_INTERFACE_LBBOUNDARIES_INITIALIZE_HPP
-#define SCRIPT_INTERFACE_LBBOUNDARIES_INITIALIZE_HPP
+#pragma once
 
-#include <script_interface/ObjectHandle.hpp>
+#include "config/config.hpp"
 
-#include <utils/Factory.hpp>
-
-namespace ScriptInterface {
-namespace LBBoundaries {
+#ifdef WALBERLA
 
+namespace ScriptInterface::walberla {
 void initialize(Utils::Factory<ObjectHandle> *om);
+} // namespace ScriptInterface::walberla
 
-} /* namespace LBBoundaries */
-} /* namespace ScriptInterface */
-
-#endif
+#endif // WALBERLA
diff --git a/src/shapes/CMakeLists.txt b/src/shapes/CMakeLists.txt
index 6d71e19cfa0..85a04f4743e 100644
--- a/src/shapes/CMakeLists.txt
+++ b/src/shapes/CMakeLists.txt
@@ -20,8 +20,8 @@
 add_library(
   espresso_shapes SHARED
   src/HollowConicalFrustum.cpp src/Cylinder.cpp src/Ellipsoid.cpp
-  src/Rhomboid.cpp src/SimplePore.cpp src/Slitpore.cpp src/Sphere.cpp
-  src/SpheroCylinder.cpp src/Torus.cpp src/Wall.cpp)
+  src/Rhomboid.cpp src/Shape.cpp src/SimplePore.cpp src/Slitpore.cpp
+  src/Sphere.cpp src/SpheroCylinder.cpp src/Torus.cpp src/Wall.cpp)
 add_library(espresso::shapes ALIAS espresso_shapes)
 set_target_properties(espresso_shapes PROPERTIES CXX_CLANG_TIDY
                                                  "${ESPRESSO_CXX_CLANG_TIDY}")
diff --git a/src/shapes/include/shapes/Shape.hpp b/src/shapes/include/shapes/Shape.hpp
index ba4b9e60932..2068150bc44 100644
--- a/src/shapes/include/shapes/Shape.hpp
+++ b/src/shapes/include/shapes/Shape.hpp
@@ -52,6 +52,15 @@ class Shape {
     calculate_dist(pos, dist, vec);
     return dist <= 0.0;
   }
+  /**
+   * @brief Rasterize a shape on a regular grid.
+   * @param grid_size     Number of grid points in every direction.
+   * @param grid_spacing  %Lattice distance.
+   * @param grid_offset   %Lattice offset.
+   * @return Flattened 3D matrix with 1's inside the shape and 0's outside.
+   */
+  std::vector<int> rasterize(Utils::Vector3i const &grid_size,
+                             double grid_spacing, double grid_offset) const;
   virtual ~Shape() = default;
 };
 
diff --git a/src/shapes/src/Shape.cpp b/src/shapes/src/Shape.cpp
new file mode 100644
index 00000000000..5050e9c000f
--- /dev/null
+++ b/src/shapes/src/Shape.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <shapes/Shape.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <boost/multi_array.hpp>
+
+#include <vector>
+
+namespace Shapes {
+std::vector<int> Shape::rasterize(Utils::Vector3i const &grid_size,
+                                  double grid_spacing,
+                                  double grid_offset) const {
+  boost::multi_array<int, 3> raster(grid_size);
+  for (int i = 0; i < grid_size[0]; ++i) {
+    for (int j = 0; j < grid_size[1]; ++j) {
+      for (int k = 0; k < grid_size[2]; ++k) {
+        auto const pos = Utils::Vector3d{{(i + grid_offset) * grid_spacing,
+                                          (j + grid_offset) * grid_spacing,
+                                          (k + grid_offset) * grid_spacing}};
+        raster[i][j][k] = is_inside(pos);
+      }
+    }
+  }
+  return {raster.data(), raster.data() + raster.num_elements()};
+}
+} // namespace Shapes
diff --git a/src/shapes/unit_tests/Wall_test.cpp b/src/shapes/unit_tests/Wall_test.cpp
index e81c5cec0f8..7c5a6d5f0d1 100644
--- a/src/shapes/unit_tests/Wall_test.cpp
+++ b/src/shapes/unit_tests/Wall_test.cpp
@@ -53,3 +53,50 @@ BOOST_AUTO_TEST_CASE(dist_function) {
     }
   }
 }
+
+BOOST_AUTO_TEST_CASE(rasterize_function) {
+  {
+    Shapes::Wall shape;
+    shape.set_normal(Utils::Vector3d{1., 0., 0.});
+    shape.d() = 1.0;
+    auto const agrid = 1.0;
+
+    auto const raster = shape.rasterize({5, 5, 5}, agrid, 0.5);
+    for (int i = 0; i < 25; ++i) {
+      BOOST_REQUIRE_EQUAL(raster[i], 1);
+    }
+    for (int i = 25; i < 125; ++i) {
+      BOOST_REQUIRE_EQUAL(raster[i], 0);
+    }
+  }
+  // edge case: wall right before the second slice of LB nodes
+  {
+    Shapes::Wall shape;
+    shape.set_normal(Utils::Vector3d{1., 0., 0.});
+    shape.d() = 1.49999999;
+    auto const agrid = 1.0;
+
+    auto const raster = shape.rasterize({5, 5, 5}, agrid, 0.5);
+    for (int i = 0; i < 25; ++i) {
+      BOOST_REQUIRE_EQUAL(raster[i], 1);
+    }
+    for (int i = 25; i < 125; ++i) {
+      BOOST_REQUIRE_EQUAL(raster[i], 0);
+    }
+  }
+  // edge case: wall right on the second slice of LB nodes
+  {
+    Shapes::Wall shape;
+    shape.set_normal(Utils::Vector3d{1., 0., 0.});
+    shape.d() = 1.50000000;
+    auto const agrid = 1.0;
+
+    auto const raster = shape.rasterize({5, 5, 5}, agrid, 0.5);
+    for (int i = 0; i < 2 * 25; ++i) {
+      BOOST_REQUIRE_EQUAL(raster[i], 1);
+    }
+    for (int i = 2 * 25; i < 125; ++i) {
+      BOOST_REQUIRE_EQUAL(raster[i], 0);
+    }
+  }
+}
diff --git a/src/utils/include/utils/Vector.hpp b/src/utils/include/utils/Vector.hpp
index 9f12e6d7a05..288e459b83c 100644
--- a/src/utils/include/utils/Vector.hpp
+++ b/src/utils/include/utils/Vector.hpp
@@ -158,7 +158,6 @@ using Vector3d = VectorXd<3>;
 using Vector4d = VectorXd<4>;
 using Vector6d = VectorXd<6>;
 using Vector9d = VectorXd<9>;
-using Vector19d = VectorXd<19>;
 
 template <std::size_t N> using VectorXf = Vector<float, N>;
 using Vector3f = VectorXf<3>;
diff --git a/src/walberla_bridge/CMakeLists.txt b/src/walberla_bridge/CMakeLists.txt
new file mode 100644
index 00000000000..78040cab5f8
--- /dev/null
+++ b/src/walberla_bridge/CMakeLists.txt
@@ -0,0 +1,64 @@
+#
+# Copyright (C) 2020-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+add_library(espresso_walberla SHARED)
+add_library(espresso::walberla ALIAS espresso_walberla)
+
+set_target_properties(espresso_walberla PROPERTIES CXX_CLANG_TIDY "")
+target_include_directories(espresso_walberla PUBLIC include)
+
+add_library(espresso_walberla_cpp_flags INTERFACE)
+set_target_properties(espresso_walberla_cpp_flags PROPERTIES CXX_CLANG_TIDY "")
+add_library(espresso::walberla::cpp_flags ALIAS espresso_walberla_cpp_flags)
+if(ESPRESSO_BUILD_WITH_WALBERLA_AVX)
+  target_link_libraries(espresso_walberla_cpp_flags
+                        INTERFACE espresso::avx_flags)
+endif()
+install(TARGETS espresso_walberla
+        LIBRARY DESTINATION ${ESPRESSO_INSTALL_PYTHON}/espressomd)
+
+if(ESPRESSO_BUILD_WITH_CUDA AND WALBERLA_BUILD_WITH_CUDA)
+  espresso_add_gpu_library(espresso_walberla_cuda SHARED)
+  add_library(espresso::walberla_cuda ALIAS espresso_walberla_cuda)
+  target_link_libraries(espresso_walberla_cuda PRIVATE CUDA::cuda_driver
+                                                       CUDA::cudart)
+  target_link_libraries(espresso_walberla_cuda PUBLIC espresso::utils
+                        PRIVATE ${WALBERLA_LIBS})
+  target_include_directories(espresso_walberla_cuda PUBLIC include)
+  target_include_directories(
+    espresso_walberla_cuda PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
+    PRIVATE ${WALBERLA_INCLUDE_DIRS} ${walberla_BINARY_DIR}/src)
+  install(TARGETS espresso_walberla_cuda
+          LIBRARY DESTINATION ${ESPRESSO_INSTALL_PYTHON}/espressomd)
+  target_link_libraries(espresso_walberla PUBLIC espresso::walberla_cuda)
+endif()
+
+target_link_libraries(
+  espresso_walberla PUBLIC MPI::MPI_CXX espresso::utils
+  PRIVATE Boost::boost espresso::cpp_flags espresso::walberla::cpp_flags
+          ${WALBERLA_LIBS})
+target_include_directories(
+  espresso_walberla PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
+  PRIVATE ${WALBERLA_INCLUDE_DIRS} ${walberla_BINARY_DIR}/src)
+
+add_subdirectory(src)
+
+if(ESPRESSO_BUILD_TESTS)
+  add_subdirectory(tests)
+endif()
diff --git a/src/walberla_bridge/include/walberla_bridge/Architecture.hpp b/src/walberla_bridge/include/walberla_bridge/Architecture.hpp
new file mode 100644
index 00000000000..b7939279cce
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/Architecture.hpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+namespace lbmpy {
+enum class Arch { CPU, GPU };
+}
diff --git a/src/walberla_bridge/include/walberla_bridge/BlockAndCell.hpp b/src/walberla_bridge/include/walberla_bridge/BlockAndCell.hpp
new file mode 100644
index 00000000000..1ad71b75b5e
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/BlockAndCell.hpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2020-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <blockforest/StructuredBlockForest.h>
+#include <core/DataTypes.h>
+#include <core/cell/Cell.h>
+#include <domain_decomposition/IBlock.h>
+
+#include "LatticeWalberla.hpp"
+
+#include <boost/optional.hpp>
+
+#include <memory>
+
+namespace walberla {
+// Helpers to retrieve blocks and cells
+struct BlockAndCell {
+  IBlock *block;
+  Cell cell;
+};
+
+template <typename T>
+IBlock *get_block_extended(std::shared_ptr<StructuredBlockForest> const &blocks,
+                           Utils::Vector<T, 3> const &pos,
+                           unsigned int n_ghost_layers) {
+  for (auto block = blocks->begin(); block != blocks->end(); ++block) {
+    if (block->getAABB()
+            .getExtended(real_c(n_ghost_layers))
+            .contains(real_c(pos[0]), real_c(pos[1]), real_c(pos[2]))) {
+      return &(*block);
+    }
+  }
+  // Cell not in local blocks
+  return nullptr;
+}
+
+inline boost::optional<BlockAndCell>
+get_block_and_cell(::LatticeWalberla const &lattice,
+                   Utils::Vector3i const &node, bool consider_ghost_layers) {
+  // Get block and local cell
+  auto const blocks = lattice.get_blocks();
+  Cell global_cell{uint_c(node[0]), uint_c(node[1]), uint_c(node[2])};
+  auto block = blocks->getBlock(global_cell, 0);
+  // Return if we don't have the cell
+  if (consider_ghost_layers and !block) {
+    // Try to find a block which has the cell as ghost layer
+    block = get_block_extended(blocks, node, lattice.get_ghost_layers());
+  }
+  if (!block)
+    return {boost::none};
+
+  // Transform coords to block local
+  Cell local_cell;
+  blocks->transformGlobalToBlockLocalCell(local_cell, *block, global_cell);
+  return {{block, local_cell}};
+}
+
+inline IBlock *get_block(::LatticeWalberla const &lattice,
+                         Utils::Vector3d const &pos,
+                         bool consider_ghost_layers) {
+  // Get block
+  auto const blocks = lattice.get_blocks();
+  auto block = blocks->getBlock(real_c(pos[0]), real_c(pos[1]), real_c(pos[2]));
+  if (consider_ghost_layers and !block) {
+    block = get_block_extended(blocks, pos, lattice.get_ghost_layers());
+  }
+  return block;
+}
+
+} // namespace walberla
diff --git a/src/walberla_bridge/include/walberla_bridge/LatticeModel.hpp b/src/walberla_bridge/include/walberla_bridge/LatticeModel.hpp
new file mode 100644
index 00000000000..52826ed1ee3
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/LatticeModel.hpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/VTKHandle.hpp>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+/** @brief Abstract representation of a lattice-based model. */
+class LatticeModel {
+public:
+  using units_map = std::unordered_map<std::string, double>;
+
+protected:
+  /** VTK writers that are executed automatically */
+  std::map<std::string, std::shared_ptr<VTKHandle>> m_vtk_auto;
+  /** VTK writers that are executed manually */
+  std::map<std::string, std::shared_ptr<VTKHandle>> m_vtk_manual;
+
+  /** Register VTK writers. Use the multi-piece uniform grid format. */
+  virtual void register_vtk_field_writers(walberla::vtk::VTKOutput &vtk_obj,
+                                          units_map const &units_conversion,
+                                          int flag_observables) = 0;
+
+  virtual void
+  register_vtk_field_filters(walberla::vtk::VTKOutput &vtk_obj) = 0;
+
+  virtual void integrate_vtk_writers() = 0;
+
+public:
+  virtual ~LatticeModel() = default;
+
+  /** @brief Get the underlying lattice. */
+  virtual LatticeWalberla const &get_lattice() const noexcept = 0;
+
+  /** @brief Create a VTK observable.
+   *
+   *  @param delta_N          Write frequency, if 0 write a single frame,
+   *                          otherwise add a callback to write every
+   *                          @p delta_N EK steps to a new file
+   *  @param initial_count    Initial execution count
+   *  @param flag_observables Which observables to measure (OR'ing of
+   *                          @ref OutputVTK or @ref EKOutputVTK values)
+   *  @param units_conversion Lattice-to-MD units conversion
+   *  @param identifier       Name of the VTK dataset
+   *  @param base_folder      Path to the VTK folder
+   *  @param prefix           Prefix of the VTK files
+   */
+  std::shared_ptr<VTKHandle>
+  create_vtk(int delta_N, int initial_count, int flag_observables,
+             units_map const &units_conversion, std::string const &identifier,
+             std::string const &base_folder, std::string const &prefix);
+
+  /** @brief Write a VTK observable to disk.
+   *
+   *  @param vtk_uid          Name of the VTK object
+   */
+  void write_vtk(std::string const &vtk_uid);
+
+  /** @brief Toggle a VTK observable on/off.
+   *
+   *  @param vtk_uid          Name of the VTK object
+   *  @param status           @c true to switch on, @c false to switch off
+   */
+  void switch_vtk(std::string const &vtk_uid, bool status);
+};
diff --git a/src/walberla_bridge/include/walberla_bridge/LatticeWalberla.hpp b/src/walberla_bridge/include/walberla_bridge/LatticeWalberla.hpp
new file mode 100644
index 00000000000..da5c4e1c8bf
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/LatticeWalberla.hpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <utils/Vector.hpp>
+
+#include <cassert>
+#include <cmath>
+#include <initializer_list>
+#include <memory>
+#include <utility>
+
+namespace walberla::blockforest {
+// forward declare
+class StructuredBlockForest;
+} // namespace walberla::blockforest
+
+/** Class that runs and controls the BlockForest in waLBerla. */
+class LatticeWalberla {
+public:
+  using Lattice_T = walberla::blockforest::StructuredBlockForest;
+
+private:
+  Utils::Vector3i m_grid_dimensions;
+  unsigned int m_n_ghost_layers;
+
+  /** Block forest */
+  std::shared_ptr<Lattice_T> m_blocks;
+
+public:
+  LatticeWalberla(Utils::Vector3i const &grid_dimensions,
+                  Utils::Vector3i const &node_grid,
+                  unsigned int n_ghost_layers);
+
+  // Grid, domain, halo
+  [[nodiscard]] auto get_ghost_layers() const { return m_n_ghost_layers; }
+  [[nodiscard]] auto get_grid_dimensions() const { return m_grid_dimensions; }
+  [[nodiscard]] auto get_blocks() const { return m_blocks; }
+  [[nodiscard]] std::pair<Utils::Vector3d, Utils::Vector3d>
+  get_local_domain() const;
+  [[nodiscard]] auto get_local_grid_range() const {
+    auto const conversion = [](Utils::Vector3d const &pos) -> Utils::Vector3i {
+      auto const dim =
+          Utils::Vector3i{{static_cast<int>(pos[0]), static_cast<int>(pos[1]),
+                           static_cast<int>(pos[2])}};
+#ifndef NDEBUG
+      for (auto const i : {0u, 1u, 2u}) {
+        assert(std::abs(static_cast<double>(dim[i]) - pos[i]) < 1e-10);
+      }
+#endif
+      return dim;
+    };
+    auto const [lower_corner, upper_corner] = get_local_domain();
+    return std::make_pair(conversion(lower_corner), conversion(upper_corner));
+  }
+
+  [[nodiscard]] bool node_in_local_domain(Utils::Vector3i const &node) const;
+  [[nodiscard]] bool node_in_local_halo(Utils::Vector3i const &node) const;
+  [[nodiscard]] bool pos_in_local_domain(Utils::Vector3d const &pos) const;
+  [[nodiscard]] bool pos_in_local_halo(Utils::Vector3d const &pos) const;
+  [[nodiscard]] static Utils::Vector3i
+  calc_grid_dimensions(Utils::Vector3d const &box_size, double agrid);
+};
diff --git a/src/walberla_bridge/include/walberla_bridge/VTKHandle.hpp b/src/walberla_bridge/include/walberla_bridge/VTKHandle.hpp
new file mode 100644
index 00000000000..40f20e78ca7
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/VTKHandle.hpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2020-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <walberla_bridge/utils/ResourceManager.hpp>
+#include <walberla_bridge/walberla_init.hpp>
+
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <utility>
+
+namespace walberla::vtk {
+// forward declare
+class VTKOutput;
+} // namespace walberla::vtk
+
+/** @brief Handle to a VTK object */
+class VTKHandle {
+  std::unique_ptr<ResourceManager> m_vtk_resources_lock;
+
+public:
+  VTKHandle(std::shared_ptr<walberla::vtk::VTKOutput> sp, int ec, bool en)
+      : ptr(std::move(sp)), execution_count(ec), enabled(en) {
+    m_vtk_resources_lock = ::walberla::get_vtk_dependent_resources();
+  }
+  ~VTKHandle() {
+    // vtk objects must be cleared *before* the MPI resources can be freed,
+    // because file handles need to closed on all ranks
+    ptr.reset();
+    m_vtk_resources_lock.reset();
+  }
+
+  std::shared_ptr<walberla::vtk::VTKOutput> ptr;
+  int execution_count;
+  bool enabled;
+};
+
+/** @brief LB statistics to write to VTK files */
+enum class OutputVTK : int {
+  density = 1 << 0,
+  velocity_vector = 1 << 1,
+  pressure_tensor = 1 << 2,
+};
+
+/** @brief EK statistics to write to VTK files */
+enum class EKOutputVTK : int {
+  density = 1 << 0,
+};
+
+class vtk_runtime_error : public std::runtime_error {
+public:
+  explicit vtk_runtime_error(std::string const &vtk_uid,
+                             std::string const &reason)
+      : std::runtime_error("VTKOutput object '" + vtk_uid + "' " + reason) {}
+};
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/EKContainer.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/EKContainer.hpp
new file mode 100644
index 00000000000..83a5b861c84
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/EKContainer.hpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/PoissonSolver/PoissonSolver.hpp>
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <memory>
+#include <stdexcept>
+#include <vector>
+
+template <class EKSpecies> class EKContainer {
+  using container_type = std::vector<std::shared_ptr<EKSpecies>>;
+
+public:
+  using value_type = typename container_type::value_type;
+  using iterator = typename container_type::iterator;
+  using const_iterator = typename container_type::const_iterator;
+
+private:
+  container_type m_ekcontainer;
+  double m_tau{};
+  std::shared_ptr<walberla::PoissonSolver> m_poisson_solver;
+
+  bool lattice_equal(LatticeWalberla const &lhs,
+                     LatticeWalberla const &rhs) const {
+    return (lhs.get_ghost_layers() == rhs.get_ghost_layers()) and
+           (lhs.get_grid_dimensions() == rhs.get_grid_dimensions());
+  }
+
+  void sanity_checks(std::shared_ptr<EKSpecies> const &new_ek_epecies) const {
+    if (m_tau == 0.) {
+      throw std::runtime_error("EKContainer parameter 'tau' needs to be set");
+    }
+    if (is_poisson_solver_set()) {
+      if (not lattice_equal(new_ek_epecies->get_lattice(),
+                            m_poisson_solver->get_lattice())) {
+        throw std::runtime_error("EKSpecies lattice incompatible with existing "
+                                 "Poisson solver lattice");
+      }
+    }
+    if (not m_ekcontainer.empty()) {
+      auto const &old_ek_species = m_ekcontainer.front();
+      if (not lattice_equal(new_ek_epecies->get_lattice(),
+                            old_ek_species->get_lattice())) {
+        throw std::runtime_error(
+            "EKSpecies lattice incompatible with existing EKSpecies lattice");
+      }
+    }
+  }
+
+  void sanity_checks(
+      std::shared_ptr<walberla::PoissonSolver> const &new_solver) const {
+    if (not m_ekcontainer.empty()) {
+      auto const &old_ek_species = m_ekcontainer.front();
+      if (not lattice_equal(new_solver->get_lattice(),
+                            old_ek_species->get_lattice())) {
+        throw std::runtime_error("Poisson solver lattice incompatible with "
+                                 "existing EKSpecies lattice");
+      }
+    }
+  }
+
+public:
+  void add(std::shared_ptr<EKSpecies> const &ek_species) {
+    assert(std::find(m_ekcontainer.begin(), m_ekcontainer.end(), ek_species) ==
+           m_ekcontainer.end());
+
+    sanity_checks(ek_species);
+    m_ekcontainer.emplace_back(ek_species);
+  }
+
+  void remove(std::shared_ptr<EKSpecies> const &ek_species) {
+    assert(std::find(m_ekcontainer.begin(), m_ekcontainer.end(), ek_species) !=
+           m_ekcontainer.end());
+    m_ekcontainer.erase(
+        std::remove(m_ekcontainer.begin(), m_ekcontainer.end(), ek_species),
+        m_ekcontainer.end());
+  }
+
+  iterator begin() noexcept { return m_ekcontainer.begin(); }
+  iterator end() noexcept { return m_ekcontainer.end(); }
+  const_iterator begin() const noexcept { return m_ekcontainer.begin(); }
+  const_iterator end() const noexcept { return m_ekcontainer.end(); }
+  [[nodiscard]] bool empty() const noexcept { return m_ekcontainer.empty(); }
+
+  void
+  set_poisson_solver(std::shared_ptr<walberla::PoissonSolver> const &solver) {
+    if (solver != nullptr) {
+      sanity_checks(solver);
+    }
+    m_poisson_solver = solver;
+  }
+
+  [[nodiscard]] bool is_poisson_solver_set() const noexcept {
+    return m_poisson_solver != nullptr;
+  }
+
+  [[nodiscard]] double get_tau() const noexcept { return m_tau; }
+
+  void set_tau(double tau) noexcept { m_tau = tau; }
+
+  void reset_charge() const { m_poisson_solver->reset_charge_field(); }
+
+  void add_charge(std::size_t const id, double valency,
+                  bool is_double_precision) const {
+    m_poisson_solver->add_charge_to_field(id, valency, is_double_precision);
+  }
+
+  void solve_poisson() const { m_poisson_solver->solve(); }
+
+  [[nodiscard]] std::size_t get_potential_field_id() const {
+    return m_poisson_solver->get_potential_field_id();
+  }
+};
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/EKWalberlaNodeState.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/EKWalberlaNodeState.hpp
new file mode 100644
index 00000000000..6c4987c5418
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/EKWalberlaNodeState.hpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <utils/Vector.hpp>
+
+/** Checkpoint data for a EK node. */
+struct EKWalberlaNodeState {
+  double density;
+  bool is_boundary_density;
+  double density_boundary;
+  bool is_boundary_flux;
+  Utils::Vector3d flux_boundary;
+
+private:
+  friend boost::serialization::access;
+  template <typename Archive>
+  void serialize(Archive &ar, long int /* version */) {
+    ar &density &is_boundary_density &density_boundary &is_boundary_flux
+        &flux_boundary;
+  }
+};
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/EKinWalberlaBase.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/EKinWalberlaBase.hpp
new file mode 100644
index 00000000000..baa6a65cbab
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/EKinWalberlaBase.hpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <walberla_bridge/LatticeModel.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <boost/optional.hpp>
+
+#include <cstddef>
+#include <vector>
+
+/** @brief Interface of a lattice-based electrokinetic model. */
+class EKinWalberlaBase : public LatticeModel {
+public:
+  /** @brief Integrate EKin for one time step */
+  virtual void integrate(std::size_t potential_id, std::size_t velocity_id,
+                         std::size_t force_id) = 0;
+
+  /** @brief perform ghost communication of densities */
+  virtual void ghost_communication() = 0;
+
+  /** @brief Number of discretized fluxes */
+  [[nodiscard]] virtual std::size_t stencil_size() const noexcept = 0;
+
+  /** @brief Set node density. */
+  virtual bool set_node_density(Utils::Vector3i const &node,
+                                double density) = 0;
+
+  /** @brief Get node density. */
+  [[nodiscard]] virtual boost::optional<double>
+  get_node_density(Utils::Vector3i const &node,
+                   bool consider_ghosts = false) const = 0;
+
+  /** @brief Set slice density. */
+  virtual void set_slice_density(Utils::Vector3i const &lower_corner,
+                                 Utils::Vector3i const &upper_corner,
+                                 std::vector<double> const &density) = 0;
+
+  /** @brief Get slice density. */
+  [[nodiscard]] virtual std::vector<double>
+  get_slice_density(Utils::Vector3i const &lower_corner,
+                    Utils::Vector3i const &upper_corner) const = 0;
+
+  /** @brief Set node flux boundary conditions. */
+  virtual bool set_node_flux_boundary(Utils::Vector3i const &node,
+                                      Utils::Vector3d const &flux) = 0;
+
+  /** @brief Get node flux boundary conditions. */
+  [[nodiscard]] virtual boost::optional<Utils::Vector3d>
+  get_node_flux_at_boundary(Utils::Vector3i const &node,
+                            bool consider_ghosts = false) const = 0;
+
+  /** @brief Set slice flux boundary conditions. */
+  virtual void set_slice_flux_boundary(
+      Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner,
+      std::vector<boost::optional<Utils::Vector3d>> const &flux) = 0;
+
+  /** @brief Get slice flux boundary conditions. */
+  [[nodiscard]] virtual std::vector<boost::optional<Utils::Vector3d>>
+  get_slice_flux_at_boundary(Utils::Vector3i const &lower_corner,
+                             Utils::Vector3i const &upper_corner) const = 0;
+
+  virtual bool remove_node_from_flux_boundary(Utils::Vector3i const &node) = 0;
+
+  /** @brief Set node density boundary conditions. */
+  virtual bool set_node_density_boundary(Utils::Vector3i const &node,
+                                         double density) = 0;
+
+  /** @brief Get node density boundary conditions. */
+  [[nodiscard]] virtual boost::optional<double>
+  get_node_density_at_boundary(Utils::Vector3i const &node,
+                               bool consider_ghosts = false) const = 0;
+
+  /** @brief Set slice density boundary conditions. */
+  virtual void set_slice_density_boundary(
+      Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner,
+      std::vector<boost::optional<double>> const &density) = 0;
+
+  /** @brief Get slice density boundary conditions. */
+  [[nodiscard]] virtual std::vector<boost::optional<double>>
+  get_slice_density_at_boundary(Utils::Vector3i const &lower_corner,
+                                Utils::Vector3i const &upper_corner) const = 0;
+
+  virtual bool
+  remove_node_from_density_boundary(Utils::Vector3i const &node) = 0;
+
+  /** @brief Check if node has flux boundary conditions. */
+  [[nodiscard]] virtual boost::optional<bool>
+  get_node_is_flux_boundary(Utils::Vector3i const &node,
+                            bool consider_ghosts = false) const = 0;
+
+  /** @brief Check if node has density boundary conditions. */
+  [[nodiscard]] virtual boost::optional<bool>
+  get_node_is_density_boundary(Utils::Vector3i const &node,
+                               bool consider_ghosts = false) const = 0;
+
+  /** @brief Check if node has any boundary conditions. */
+  [[nodiscard]] virtual boost::optional<bool>
+  get_node_is_boundary(Utils::Vector3i const &node,
+                       bool consider_ghosts = false) const = 0;
+
+  /** @brief Check if slice has any boundary conditions. */
+  [[nodiscard]] virtual std::vector<bool>
+  get_slice_is_boundary(Utils::Vector3i const &lower_corner,
+                        Utils::Vector3i const &upper_corner) const = 0;
+
+  virtual void clear_flux_boundaries() = 0;
+  virtual void clear_density_boundaries() = 0;
+
+  virtual void update_flux_boundary_from_shape(std::vector<int> const &,
+                                               std::vector<double> const &) = 0;
+  virtual void
+  update_density_boundary_from_shape(std::vector<int> const &,
+                                     std::vector<double> const &) = 0;
+
+  // Global parameters
+  [[nodiscard]] virtual double get_diffusion() const noexcept = 0;
+  [[nodiscard]] virtual double get_kT() const noexcept = 0;
+  [[nodiscard]] virtual double get_valency() const noexcept = 0;
+  [[nodiscard]] virtual bool get_advection() const noexcept = 0;
+  [[nodiscard]] virtual bool get_friction_coupling() const noexcept = 0;
+  [[nodiscard]] virtual Utils::Vector3d get_ext_efield() const noexcept = 0;
+  [[nodiscard]] virtual bool is_double_precision() const noexcept = 0;
+
+  virtual void set_diffusion(double diffusion) = 0;
+  virtual void set_kT(double kT) = 0;
+  virtual void set_valency(double valency) = 0;
+  virtual void set_advection(bool advection) = 0;
+  virtual void set_friction_coupling(bool friction_coupling) = 0;
+  virtual void set_ext_efield(Utils::Vector3d const &field) = 0;
+
+  [[nodiscard]] virtual std::size_t get_density_id() const noexcept = 0;
+
+  ~EKinWalberlaBase() override = default;
+};
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/PoissonSolver/FFT.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/PoissonSolver/FFT.hpp
new file mode 100644
index 00000000000..a4e7025dbed
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/PoissonSolver/FFT.hpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "PoissonSolver.hpp"
+
+#include <blockforest/communication/UniformBufferedScheme.h>
+#include <domain_decomposition/BlockDataID.h>
+#include <fft/Fft.h>
+#include <field/AddToStorage.h>
+#include <field/GhostLayerField.h>
+#include <field/communication/PackInfo.h>
+#include <stencil/D3Q27.h>
+
+#include <utils/constants.hpp>
+
+#include <cmath>
+#include <cstddef>
+#include <memory>
+#include <utility>
+
+namespace walberla {
+
+template <typename FloatType> class FFT : public PoissonSolver {
+private:
+  template <typename T> FloatType FloatType_c(T t) {
+    return numeric_cast<FloatType>(t);
+  }
+
+  domain_decomposition::BlockDataID m_potential_field_id;
+
+  using PotentialField = GhostLayerField<FloatType, 1>;
+
+  std::shared_ptr<fft::FourierTransform<PotentialField>> m_ft;
+  std::shared_ptr<blockforest::StructuredBlockForest> m_blocks;
+
+  using FullCommunicator = blockforest::communication::UniformBufferedScheme<
+      typename stencil::D3Q27>;
+  std::shared_ptr<FullCommunicator> m_full_communication;
+
+public:
+  FFT(std::shared_ptr<LatticeWalberla> lattice, double permittivity)
+      : PoissonSolver(std::move(lattice), permittivity) {
+    m_blocks = get_lattice().get_blocks();
+
+    Vector3<uint_t> dim(m_blocks->getNumberOfXCells(),
+                        m_blocks->getNumberOfYCells(),
+                        m_blocks->getNumberOfZCells());
+    auto const greens = [dim](uint_t x, uint_t y, uint_t z) -> real_t {
+      if (x == 0u && y == 0u && z == 0u)
+        return 0.;
+      return -0.5 /
+             (std::cos(2. * Utils::pi() * real_c(x) / real_c(dim[0])) +
+              std::cos(2. * Utils::pi() * real_c(y) / real_c(dim[1])) +
+              std::cos(2. * Utils::pi() * real_c(z) / real_c(dim[2])) - 3.) /
+             real_c(dim[0] * dim[1] * dim[2]);
+    };
+
+    m_potential_field_id = field::addToStorage<PotentialField>(
+        get_lattice().get_blocks(), "potential field", 0.0, field::fzyx,
+        get_lattice().get_ghost_layers());
+
+    m_ft = std::make_shared<fft::FourierTransform<PotentialField>>(
+        m_blocks, m_potential_field_id, greens);
+
+    m_full_communication =
+        std::make_shared<FullCommunicator>(get_lattice().get_blocks());
+    m_full_communication->addPackInfo(
+        std::make_shared<field::communication::PackInfo<PotentialField>>(
+            m_potential_field_id));
+  }
+  ~FFT() override = default;
+
+  void reset_charge_field() override {
+    // the FFT-solver re-uses the potential field for the charge
+    auto const potential_id = walberla::BlockDataID(get_potential_field_id());
+
+    for (auto &block : *get_lattice().get_blocks()) {
+      auto field = block.template getData<PotentialField>(potential_id);
+      WALBERLA_FOR_ALL_CELLS_XYZ(field, field->get(x, y, z) = 0.;)
+    }
+  }
+
+  void add_charge_to_field(std::size_t id, double valency,
+                           bool is_double_precision) override {
+    auto const factor = FloatType_c(valency) / FloatType_c(get_permittivity());
+    // the FFT-solver re-uses the potential field for the charge
+    const auto charge_id = walberla::BlockDataID(get_potential_field_id());
+    const auto density_id = walberla::BlockDataID(id);
+    for (auto &block : *get_lattice().get_blocks()) {
+      auto charge_field = block.template getData<PotentialField>(charge_id);
+      if (is_double_precision) {
+        auto density_field =
+            block.template getData<walberla::GhostLayerField<double, 1>>(
+                density_id);
+        WALBERLA_FOR_ALL_CELLS_XYZ(
+            charge_field, charge_field->get(x, y, z) +=
+                          factor * FloatType_c(density_field->get(x, y, z));)
+      } else {
+        auto density_field =
+            block.template getData<walberla::GhostLayerField<float, 1>>(
+                density_id);
+        WALBERLA_FOR_ALL_CELLS_XYZ(
+            charge_field, charge_field->get(x, y, z) +=
+                          factor * FloatType_c(density_field->get(x, y, z));)
+      }
+    }
+  }
+
+  [[nodiscard]] std::size_t get_potential_field_id() const noexcept override {
+    return static_cast<std::size_t>(m_potential_field_id);
+  }
+
+  void solve() override {
+    (*m_ft)();
+    ghost_communication();
+  }
+
+private:
+  void ghost_communication() { (*m_full_communication)(); }
+};
+
+} // namespace walberla
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/PoissonSolver/None.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/PoissonSolver/None.hpp
new file mode 100644
index 00000000000..0e49841ed47
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/PoissonSolver/None.hpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "PoissonSolver.hpp"
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+
+#include <field/AddToStorage.h>
+#include <field/GhostLayerField.h>
+
+#include <cstddef>
+#include <memory>
+#include <utility>
+
+namespace walberla {
+
+template <typename FloatType> class None : public PoissonSolver {
+private:
+  BlockDataID m_potential_field_id;
+
+  using PotentialField = GhostLayerField<FloatType, 1>;
+
+public:
+  explicit None(std::shared_ptr<LatticeWalberla> lattice)
+      : PoissonSolver(std::move(lattice), 0.0) {
+    m_potential_field_id = field::addToStorage<PotentialField>(
+        get_lattice().get_blocks(), "potential field", 0.0, field::fzyx,
+        get_lattice().get_ghost_layers());
+  }
+  ~None() override = default;
+
+  void reset_charge_field() override {}
+  void add_charge_to_field(std::size_t, double, bool) override {}
+
+  [[nodiscard]] std::size_t get_potential_field_id() const noexcept override {
+    return m_potential_field_id;
+  }
+
+  void solve() override {}
+};
+
+} // namespace walberla
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/PoissonSolver/PoissonSolver.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/PoissonSolver/PoissonSolver.hpp
new file mode 100644
index 00000000000..b74fbd1918b
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/PoissonSolver/PoissonSolver.hpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+
+#include <cstddef>
+#include <memory>
+#include <utility>
+
+namespace walberla {
+
+class PoissonSolver {
+private:
+  std::shared_ptr<LatticeWalberla> m_lattice;
+  double m_permittivity;
+
+public:
+  PoissonSolver(std::shared_ptr<LatticeWalberla> lattice, double permittivity)
+      : m_lattice(std::move(lattice)), m_permittivity(permittivity) {}
+  virtual ~PoissonSolver() = default;
+
+  virtual void reset_charge_field() = 0;
+
+  virtual void add_charge_to_field(std::size_t id, double valency,
+                                   bool is_double_precision) = 0;
+
+  [[nodiscard]] virtual std::size_t get_potential_field_id() const noexcept = 0;
+
+  void set_permittivity(double permittivity) noexcept {
+    m_permittivity = permittivity;
+  }
+
+  [[nodiscard]] double get_permittivity() const noexcept {
+    return m_permittivity;
+  }
+
+  [[nodiscard]] auto const &get_lattice() const noexcept { return *m_lattice; }
+
+  virtual void solve() = 0;
+};
+
+} // namespace walberla
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_fft_init.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_fft_init.hpp
new file mode 100644
index 00000000000..f0a7a2db619
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_fft_init.hpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+
+#include "PoissonSolver/PoissonSolver.hpp"
+
+#include <memory>
+
+std::shared_ptr<walberla::PoissonSolver>
+new_ek_poisson_fft(std::shared_ptr<LatticeWalberla> const &lattice,
+                   double permittivity, bool single_precision);
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_none_init.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_none_init.hpp
new file mode 100644
index 00000000000..af7d318989f
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_none_init.hpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+
+#include "PoissonSolver/PoissonSolver.hpp"
+
+#include <memory>
+
+std::shared_ptr<walberla::PoissonSolver>
+new_ek_poisson_none(std::shared_ptr<LatticeWalberla> const &lattice,
+                    bool single_precision);
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_walberla_init.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_walberla_init.hpp
new file mode 100644
index 00000000000..fb700df86ce
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_walberla_init.hpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "EKinWalberlaBase.hpp"
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <memory>
+
+std::shared_ptr<EKinWalberlaBase>
+new_ek_walberla(std::shared_ptr<LatticeWalberla> const &lattice,
+                double diffusion, double kT, double valency,
+                Utils::Vector3d ext_efield, double density, bool advection,
+                bool friction_coupling, bool single_precision);
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactant.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactant.hpp
new file mode 100644
index 00000000000..fb45b3fed06
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactant.hpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <walberla_bridge/electrokinetics/EKinWalberlaBase.hpp>
+
+#include <memory>
+#include <utility>
+
+namespace walberla {
+
+class EKReactant {
+private:
+  std::shared_ptr<EKinWalberlaBase> m_ekspecies;
+  double m_stoech_coeff;
+  double m_order;
+
+public:
+  EKReactant(std::shared_ptr<EKinWalberlaBase> ekspecies, double stoech_coeff,
+             double order)
+      : m_ekspecies(std::move(ekspecies)), m_stoech_coeff(stoech_coeff),
+        m_order(order) {}
+
+  void set_stoech_coefficient(double stoech_coeff) noexcept {
+    m_stoech_coeff = stoech_coeff;
+  }
+
+  [[nodiscard]] double get_stoech_coeff() const noexcept {
+    return m_stoech_coeff;
+  }
+
+  void set_order(double order) noexcept { m_order = order; }
+
+  [[nodiscard]] double get_order() const noexcept { return m_order; }
+
+  void set_species(std::shared_ptr<EKinWalberlaBase> ekspecies) noexcept {
+    m_ekspecies = std::move(ekspecies);
+  }
+
+  [[nodiscard]] auto get_species() const noexcept { return m_ekspecies; }
+};
+
+} // namespace walberla
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp
new file mode 100644
index 00000000000..bdfb8c1eb52
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "EKReactant.hpp"
+#include <walberla_bridge/LatticeWalberla.hpp>
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+namespace walberla {
+
+class EKReactionBase {
+private:
+  std::vector<std::shared_ptr<EKReactant>> m_reactants;
+  double m_coefficient;
+
+  std::shared_ptr<LatticeWalberla> m_lattice;
+
+public:
+  EKReactionBase(std::shared_ptr<LatticeWalberla> lattice,
+                 std::vector<std::shared_ptr<EKReactant>> reactants,
+                 double coefficient)
+      : m_reactants(std::move(reactants)), m_coefficient(coefficient),
+        m_lattice(std::move(lattice)) {}
+
+  virtual ~EKReactionBase() = default;
+
+  void set_coefficient(double coefficient) noexcept {
+    m_coefficient = coefficient;
+  }
+  [[nodiscard]] double get_coefficient() const noexcept {
+    return m_coefficient;
+  }
+  [[nodiscard]] auto get_lattice() const noexcept { return m_lattice; }
+  [[nodiscard]] auto get_reactants() const noexcept { return m_reactants; }
+
+  virtual void perform_reaction() = 0;
+};
+
+} // namespace walberla
diff --git a/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp b/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp
new file mode 100644
index 00000000000..c3299ef0a18
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+/**
+ * @file
+ * @ref LBWalberlaBase provides the public interface of the LB
+ * waLBerla bridge. It relies on type erasure to hide the waLBerla
+ * implementation details from the ESPResSo core. It is implemented
+ * by @ref walberla::LBWalberlaImpl.
+ */
+
+#include <walberla_bridge/LatticeModel.hpp>
+#include <walberla_bridge/lattice_boltzmann/LeesEdwardsPack.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <boost/optional.hpp>
+
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+/** @brief Interface of a lattice-based fluid model. */
+class LBWalberlaBase : public LatticeModel {
+public:
+  ~LBWalberlaBase() override = default;
+
+  /** @brief Integrate LB for one time step. */
+  virtual void integrate() = 0;
+
+  /** @brief Perform ghost communication of PDF and applied forces. */
+  virtual void ghost_communication() = 0;
+
+  /** @brief Number of discretized velocities in the PDF. */
+  virtual std::size_t stencil_size() const noexcept = 0;
+
+  /** @brief Whether kernels use double-precision floating point numbers. */
+  [[nodiscard]] virtual bool is_double_precision() const noexcept = 0;
+
+  /** @brief Get interpolated velocities at a position. */
+  virtual boost::optional<Utils::Vector3d>
+  get_velocity_at_pos(Utils::Vector3d const &position,
+                      bool consider_points_in_halo = false) const = 0;
+
+  /** @brief Get interpolated densities at a position. */
+  virtual boost::optional<double> get_interpolated_density_at_pos(
+      Utils::Vector3d const &position,
+      bool consider_points_in_halo = false) const = 0;
+
+  /**
+   * @brief Interpolate a force to the stored forces to be applied on nodes
+   * in the next time step.
+   */
+  virtual bool add_force_at_pos(Utils::Vector3d const &position,
+                                Utils::Vector3d const &force) = 0;
+
+  /** @brief Get stored force to be applied on node in the next time step. */
+  virtual boost::optional<Utils::Vector3d>
+  get_node_force_to_be_applied(Utils::Vector3i const &node) const = 0;
+
+  /** @brief Get stored force that was applied on node in the last time step. */
+  virtual boost::optional<Utils::Vector3d>
+  get_node_last_applied_force(Utils::Vector3i const &node,
+                              bool consider_ghosts = false) const = 0;
+
+  /** @brief Set stored force that was applied on node in the last time step. */
+  virtual bool set_node_last_applied_force(Utils::Vector3i const &node,
+                                           Utils::Vector3d const &force) = 0;
+
+  /** @brief Get stored force that was applied on slice in the last time step.
+   */
+  virtual std::vector<double>
+  get_slice_last_applied_force(Utils::Vector3i const &lower_corner,
+                               Utils::Vector3i const &upper_corner) const = 0;
+
+  /** @brief Set stored force that was applied on slice in the last time step.
+   */
+  virtual void
+  set_slice_last_applied_force(Utils::Vector3i const &lower_corner,
+                               Utils::Vector3i const &upper_corner,
+                               std::vector<double> const &force) = 0;
+
+  /** @brief Get node population. */
+  virtual boost::optional<std::vector<double>>
+  get_node_population(Utils::Vector3i const &node,
+                      bool consider_ghosts = false) const = 0;
+
+  /** @brief Set node population. */
+  virtual bool set_node_population(Utils::Vector3i const &node,
+                                   std::vector<double> const &population) = 0;
+
+  /** @brief Get slice population. */
+  virtual std::vector<double>
+  get_slice_population(Utils::Vector3i const &lower_corner,
+                       Utils::Vector3i const &upper_corner) const = 0;
+
+  /** @brief Set slice population. */
+  virtual void set_slice_population(Utils::Vector3i const &lower_corner,
+                                    Utils::Vector3i const &upper_corner,
+                                    std::vector<double> const &population) = 0;
+
+  /** @brief Get node velocity. */
+  virtual boost::optional<Utils::Vector3d>
+  get_node_velocity(Utils::Vector3i const &node,
+                    bool consider_ghosts = false) const = 0;
+
+  /** @brief Set node velocity. */
+  virtual bool set_node_velocity(Utils::Vector3i const &node,
+                                 Utils::Vector3d const &v) = 0;
+
+  /** @brief Get slice velocity. */
+  virtual std::vector<double>
+  get_slice_velocity(Utils::Vector3i const &lower_corner,
+                     Utils::Vector3i const &upper_corner) const = 0;
+
+  /** @brief Set slice velocity. */
+  virtual void set_slice_velocity(Utils::Vector3i const &lower_corner,
+                                  Utils::Vector3i const &upper_corner,
+                                  std::vector<double> const &velocity) = 0;
+
+  /** @brief Get node density. */
+  virtual boost::optional<double>
+  get_node_density(Utils::Vector3i const &node,
+                   bool consider_ghosts = false) const = 0;
+
+  /** @brief Set node density. */
+  virtual bool set_node_density(Utils::Vector3i const &node,
+                                double density) = 0;
+
+  /** @brief Get slice density. */
+  virtual std::vector<double>
+  get_slice_density(Utils::Vector3i const &lower_corner,
+                    Utils::Vector3i const &upper_corner) const = 0;
+
+  /** @brief Set slice density. */
+  virtual void set_slice_density(Utils::Vector3i const &lower_corner,
+                                 Utils::Vector3i const &upper_corner,
+                                 std::vector<double> const &density) = 0;
+
+  /** @brief Get node velocity boundary conditions. */
+  virtual boost::optional<Utils::Vector3d>
+  get_node_velocity_at_boundary(Utils::Vector3i const &node,
+                                bool consider_ghosts = false) const = 0;
+
+  /** @brief Set node velocity boundary conditions. */
+  virtual bool
+  set_node_velocity_at_boundary(Utils::Vector3i const &node,
+                                Utils::Vector3d const &velocity) = 0;
+
+  /** @brief Get slice velocity boundary conditions. */
+  virtual std::vector<boost::optional<Utils::Vector3d>>
+  get_slice_velocity_at_boundary(Utils::Vector3i const &lower_corner,
+                                 Utils::Vector3i const &upper_corner) const = 0;
+
+  /** @brief Set slice velocity boundary conditions. */
+  virtual void set_slice_velocity_at_boundary(
+      Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner,
+      std::vector<boost::optional<Utils::Vector3d>> const &velocity) = 0;
+
+  /** @brief Get (stored) force applied on node due to boundary condition. */
+  virtual boost::optional<Utils::Vector3d>
+  get_node_boundary_force(Utils::Vector3i const &node) const = 0;
+
+  /** @brief Remove a node from the boundaries. */
+  virtual bool remove_node_from_boundary(Utils::Vector3i const &node) = 0;
+
+  /** @brief Check if node has velocity boundary conditions. */
+  virtual boost::optional<bool>
+  get_node_is_boundary(Utils::Vector3i const &node,
+                       bool consider_ghosts = false) const = 0;
+
+  /** @brief Check if slice has velocity boundary conditions. */
+  virtual std::vector<bool>
+  get_slice_is_boundary(Utils::Vector3i const &lower_corner,
+                        Utils::Vector3i const &upper_corner) const = 0;
+
+  /** @brief Rebuild the UBB field. This is an expensive operation. */
+  virtual void reallocate_ubb_field() = 0;
+
+  /** @brief Clear the boundary flag field and the UBB field. */
+  virtual void clear_boundaries() = 0;
+
+  /** @brief Update boundary conditions from a rasterized shape. */
+  virtual void update_boundary_from_shape(std::vector<int> const &,
+                                          std::vector<double> const &) = 0;
+
+  /** @brief Configure the default collision model. */
+  virtual void set_collision_model(double kT, unsigned int seed) = 0;
+
+  /** @brief Configure a thermalized collision model for Lees-Edwards. */
+  virtual void
+  set_collision_model(std::unique_ptr<LeesEdwardsPack> &&lees_edwards_pack) = 0;
+
+  /** @brief Check Lees-Edwards boundary conditions. */
+  virtual void check_lebc(unsigned int shear_direction,
+                          unsigned int shear_plane_normal) const = 0;
+
+  /** @brief Get node pressure tensor. */
+  virtual boost::optional<Utils::VectorXd<9>>
+  get_node_pressure_tensor(Utils::Vector3i const &node) const = 0;
+
+  /** @brief Get slice pressure tensor. */
+  virtual std::vector<double>
+  get_slice_pressure_tensor(Utils::Vector3i const &lower_corner,
+                            Utils::Vector3i const &upper_corner) const = 0;
+
+  /** @brief Calculate average pressure tensor of the local domain. */
+  virtual Utils::VectorXd<9> get_pressure_tensor() const = 0;
+
+  /** @brief Calculate momentum of the local domain. */
+  virtual Utils::Vector3d get_momentum() const = 0;
+
+  /** @brief Set a global external force. */
+  virtual void set_external_force(Utils::Vector3d const &ext_force) = 0;
+
+  /** @brief Get the global external force. */
+  virtual Utils::Vector3d get_external_force() const noexcept = 0;
+
+  /** @brief Set the fluid viscosity. */
+  virtual void set_viscosity(double viscosity) = 0;
+
+  /** @brief Get the fluid viscosity. */
+  virtual double get_viscosity() const noexcept = 0;
+
+  /** @brief Get the fluid density. */
+  virtual double get_density() const noexcept = 0;
+
+  /** @brief Get the fluid temperature (if thermalized). */
+  virtual double get_kT() const noexcept = 0;
+
+  /** @brief Set the RNG counter (if thermalized). */
+  [[nodiscard]] virtual boost::optional<uint64_t> get_rng_state() const = 0;
+
+  /** @brief Set the rng state of thermalized LBs */
+  virtual void set_rng_state(uint64_t counter) = 0;
+
+  /** @brief get the velocity field id */
+  [[nodiscard]] virtual std::size_t get_velocity_field_id() const noexcept = 0;
+
+  /** @brief get the force field id */
+  [[nodiscard]] virtual std::size_t get_force_field_id() const noexcept = 0;
+};
diff --git a/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LBWalberlaNodeState.hpp b/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LBWalberlaNodeState.hpp
new file mode 100644
index 00000000000..020ffa2cd34
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LBWalberlaNodeState.hpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <utils/Vector.hpp>
+
+#include <vector>
+
+/** Checkpoint data for a LB node. */
+struct LBWalberlaNodeState {
+  std::vector<double> populations;
+  Utils::Vector3d last_applied_force;
+  Utils::Vector3d slip_velocity;
+  bool is_boundary;
+
+private:
+  friend boost::serialization::access;
+  template <typename Archive>
+  void serialize(Archive &ar, long int /* version */) {
+    ar &populations &last_applied_force &slip_velocity &is_boundary;
+  }
+};
diff --git a/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LeesEdwardsPack.hpp b/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LeesEdwardsPack.hpp
new file mode 100644
index 00000000000..aed9d5360d7
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LeesEdwardsPack.hpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <functional>
+#include <utility>
+
+/** Pack Lees-Edwards parameters for LB. */
+struct LeesEdwardsPack {
+  LeesEdwardsPack(unsigned int shear_direction, unsigned int shear_plane_normal,
+                  std::function<double()> get_pos_offset,
+                  std::function<double()> get_shear_velocity)
+      : shear_direction(shear_direction),
+        shear_plane_normal(shear_plane_normal),
+        get_pos_offset(std::move(get_pos_offset)),
+        get_shear_velocity(std::move(get_shear_velocity)) {}
+  unsigned int shear_direction;
+  unsigned int shear_plane_normal;
+  std::function<double()> get_pos_offset;
+  std::function<double()> get_shear_velocity;
+};
diff --git a/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/lb_walberla_init.hpp b/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/lb_walberla_init.hpp
new file mode 100644
index 00000000000..cf3177797f0
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/lb_walberla_init.hpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "LBWalberlaBase.hpp"
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+
+#include <memory>
+
+std::shared_ptr<LBWalberlaBase>
+new_lb_walberla(std::shared_ptr<LatticeWalberla> const &lattice,
+                double viscosity, double density, bool single_precision);
diff --git a/src/walberla_bridge/include/walberla_bridge/utils/ResourceManager.hpp b/src/walberla_bridge/include/walberla_bridge/utils/ResourceManager.hpp
new file mode 100644
index 00000000000..356b48557b3
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/utils/ResourceManager.hpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <list>
+#include <memory>
+#include <stack>
+
+/**
+ * @brief Manager to control the lifetime of shared resources.
+ *
+ * Resources that need to be available globally, for example
+ * via singletons, need to expire after all objects that depend
+ * on them have already expired.
+ * When static objects reside in different translation units, they
+ * can expire in any order, potentially creating race conditions
+ * if one static object relies on the other it its destructor.
+ *
+ * This class "locks" resources by storing a shared pointer to them,
+ * ensuring that the resources lifetime is extended by the lifetime
+ * of the class instance. Client code can then keep this class
+ * instance alive until the resources are no longer needed, at which
+ * point the class instance can be released. Type erasure hides
+ * implementation details of the resources being locked.
+ *
+ * Multiple resources can be locked, using a LIFO (last-in, first-out)
+ * container to ensure that resources are freed in a controlled order.
+ * This design choice avoids undefined behavior due to race conditions,
+ * which may occur if one global resource's destruction depends on the
+ * existence of another global resource. This behavior cannot be achieved
+ * with STL containers like @p std::stack or @p std::vector, since the
+ * destruction order of the stored data is under-specified.
+ */
+class ResourceManager {
+  class ResourceLock {
+  public:
+    virtual ~ResourceLock() = default;
+  };
+
+  template <typename T> class ResourceLockImpl : public ResourceLock {
+    std::shared_ptr<T> m_resource;
+
+  public:
+    explicit ResourceLockImpl(std::shared_ptr<T> const &resource)
+        : m_resource(resource) {}
+    ~ResourceLockImpl() override { m_resource.reset(); }
+  };
+
+  template <typename T> using LifoList = std::stack<T, std::list<T>>;
+
+  LifoList<std::unique_ptr<ResourceLock>> m_resources;
+
+public:
+  ResourceManager() = default;
+
+  ~ResourceManager() {
+    while (not m_resources.empty()) {
+      m_resources.pop();
+    }
+  }
+
+  template <typename T> void acquire_lock(std::shared_ptr<T> resource) {
+    m_resources.emplace(std::make_unique<ResourceLockImpl<T>>(resource));
+  }
+};
diff --git a/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp b/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp
new file mode 100644
index 00000000000..98150003f75
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2020-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "walberla_utils.hpp"
+
+#include <utils/Vector.hpp>
+
+#include <boost/multi_array/multi_array_ref.hpp>
+
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <vector>
+
+namespace walberla {
+
+inline std::vector<Utils::Vector3d>
+fill_3D_vector_array(std::vector<double> const &vec_flat,
+                     Utils::Vector3i const &grid_size) {
+  auto const n_grid_points =
+      static_cast<std::size_t>(Utils::product(grid_size));
+  assert(vec_flat.size() == 3u * n_grid_points or vec_flat.size() == 3u);
+  std::vector<Utils::Vector3d> output_vector;
+  output_vector.reserve(3u * n_grid_points);
+
+  auto const vec_begin = std::begin(vec_flat);
+  auto const vec_end = std::end(vec_flat);
+  if (vec_flat.size() == 3u) {
+    auto const uniform_vector = Utils::Vector3d(vec_begin, vec_end);
+    output_vector.assign(n_grid_points, uniform_vector);
+  } else {
+    output_vector.reserve(n_grid_points);
+    for (auto it = vec_begin; it < vec_end; it += 3u) {
+      output_vector.emplace_back(Utils::Vector3d(it, it + 3u));
+    }
+  }
+
+  return output_vector;
+}
+
+inline std::vector<double>
+fill_3D_scalar_array(std::vector<double> const &vec_flat,
+                     Utils::Vector3i const &grid_size) {
+  auto const n_grid_points =
+      static_cast<std::size_t>(Utils::product(grid_size));
+  assert(vec_flat.size() == n_grid_points or vec_flat.size() == 1u);
+  std::vector<double> output_vector;
+  output_vector.reserve(n_grid_points);
+
+  auto const vec_begin = std::begin(vec_flat);
+  auto const vec_end = std::end(vec_flat);
+  if (vec_flat.size() == 1u) {
+    auto const uniform_value = vec_flat[0];
+    output_vector.assign(n_grid_points, uniform_value);
+  } else {
+    output_vector.assign(vec_begin, vec_end);
+  }
+
+  return output_vector;
+}
+
+template <class BoundaryModel, class DataType>
+void set_boundary_from_grid(BoundaryModel &boundary,
+                            LatticeWalberla const &lattice,
+                            std::vector<int> const &raster_flat,
+                            std::vector<DataType> const &data_flat) {
+  // reshape grids
+  auto const grid_size = lattice.get_grid_dimensions();
+  assert(raster_flat.size() == Utils::product(grid_size));
+  boost::const_multi_array_ref<DataType, 3> data_grid(data_flat.data(),
+                                                      grid_size);
+  boost::const_multi_array_ref<int, 3> raster(raster_flat.data(), grid_size);
+
+  auto const &blocks = lattice.get_blocks();
+  for (auto block = blocks->begin(); block != blocks->end(); ++block) {
+    auto const [size_i, size_j, size_k] = boundary.block_dims(*block);
+    auto const offset = lattice.get_local_grid_range().first;
+    auto const off_i = offset[0];
+    auto const off_j = offset[1];
+    auto const off_k = offset[2];
+    // Get field data which knows about the indices
+    // In the loop, x,y,z are in block-local coordinates
+    auto const n_ghost_layers = lattice.get_ghost_layers();
+    auto const ghosts = static_cast<int>(n_ghost_layers);
+    for (int i = off_i - ghosts; i < size_i + off_i + ghosts; ++i) {
+      for (int j = off_j - ghosts; j < size_j + off_j + ghosts; ++j) {
+        for (int k = off_k - ghosts; k < size_k + off_k + ghosts; ++k) {
+          auto const node = Utils::Vector3i{{i, j, k}};
+          auto const idx = (node + grid_size) % grid_size;
+          if (raster(idx)) {
+            auto const bc = get_block_and_cell(lattice, node, true);
+            boundary.set_node_value_at_boundary(node, data_grid(idx), *bc);
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace walberla
diff --git a/src/walberla_bridge/include/walberla_bridge/utils/walberla_utils.hpp b/src/walberla_bridge/include/walberla_bridge/utils/walberla_utils.hpp
new file mode 100644
index 00000000000..6f196cb57aa
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/utils/walberla_utils.hpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2020-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+#include <core/math/Matrix3.h>
+#include <core/math/Vector3.h>
+
+#include <utils/Vector.hpp>
+#include <utils/interpolation/bspline_3d.hpp>
+
+namespace walberla {
+
+template <typename T, typename U = T> inline U es2walberla(T v) {
+  return numeric_cast<U>(v);
+}
+template <> inline Vector3<float> es2walberla(Utils::Vector3d const v) {
+  return Vector3<float>{numeric_cast<float>(v[0]), numeric_cast<float>(v[1]),
+                        numeric_cast<float>(v[2])};
+}
+template <> inline Vector3<double> es2walberla(Utils::Vector3d const v) {
+  return Vector3<double>{v[0], v[1], v[2]};
+}
+
+template <typename T> inline T walberla2es(T v) { return v; }
+inline Utils::Vector3d walberla2es(Vector3<float> const v) {
+  return Utils::Vector3d{double_c(v[0]), double_c(v[1]), double_c(v[2])};
+}
+inline Utils::Vector3d walberla2es(Vector3<double> const v) {
+  return Utils::Vector3d{v[0], v[1], v[2]};
+}
+
+// Vector conversion helpers
+inline Utils::Vector3d to_vector3d(Vector3<float> const &v) {
+  return {double_c(v[0]), double_c(v[1]), double_c(v[2])};
+}
+inline Utils::Vector3d to_vector3d(Vector3<double> const &v) {
+  return {v[0], v[1], v[2]};
+}
+template <typename FloatType>
+inline Vector3<FloatType> to_vector3(Utils::Vector3d const &v) {
+  return Vector3<FloatType>{numeric_cast<FloatType>(v[0]),
+                            numeric_cast<FloatType>(v[1]),
+                            numeric_cast<FloatType>(v[2])};
+}
+inline Utils::VectorXd<9> to_vector9d(Matrix3<double> const &m) {
+  return {m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]};
+}
+inline Utils::VectorXd<9> to_vector9d(Matrix3<float> const &m) {
+  return {double_c(m[0]), double_c(m[1]), double_c(m[2]),
+          double_c(m[3]), double_c(m[4]), double_c(m[5]),
+          double_c(m[6]), double_c(m[7]), double_c(m[8])};
+}
+
+template <typename Function>
+void interpolate_bspline_at_pos(Utils::Vector3d const &pos, Function const &f) {
+  Utils::Interpolation::bspline_3d<2>(
+      pos, f, Utils::Vector3d::broadcast(1.), // grid spacing
+      Utils::Vector3d::broadcast(.5));        // offset
+}
+
+} // namespace walberla
diff --git a/src/walberla_bridge/include/walberla_bridge/walberla_init.hpp b/src/walberla_bridge/include/walberla_bridge/walberla_init.hpp
new file mode 100644
index 00000000000..c99379e1a2c
--- /dev/null
+++ b/src/walberla_bridge/include/walberla_bridge/walberla_init.hpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <walberla_bridge/utils/ResourceManager.hpp>
+
+#include <memory>
+
+namespace walberla {
+
+/** @brief Initialize waLBerla's MPI manager. */
+void mpi_init();
+
+/** @brief Get a lock on waLBerla's global resources for VTK. */
+std::unique_ptr<ResourceManager> get_vtk_dependent_resources();
+
+} // namespace walberla
diff --git a/src/walberla_bridge/src/BoundaryHandling.hpp b/src/walberla_bridge/src/BoundaryHandling.hpp
new file mode 100644
index 00000000000..4d004de9f89
--- /dev/null
+++ b/src/walberla_bridge/src/BoundaryHandling.hpp
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <walberla_bridge/BlockAndCell.hpp>
+#include <walberla_bridge/utils/walberla_utils.hpp>
+
+#include <blockforest/StructuredBlockForest.h>
+#include <field/FlagField.h>
+
+#include <utils/Vector.hpp>
+
+#include <cassert>
+#include <functional>
+#include <memory>
+#include <tuple>
+#include <unordered_map>
+
+namespace walberla {
+
+/// Flag for domain cells, i.e. all cells
+FlagUID const Domain_flag("domain");
+/// Flag for boundary cells
+FlagUID const Boundary_flag("boundary");
+
+template <typename T, typename BoundaryClass> class BoundaryHandling {
+  /** Container for the map between cells and values. */
+  class DynamicValueCallback {
+  public:
+    DynamicValueCallback() {
+      m_value_boundary = std::make_shared<std::unordered_map<Cell, T>>();
+    }
+
+    [[nodiscard]] T operator()(
+        Cell const &local,
+        std::shared_ptr<blockforest::StructuredBlockForest> const &blocks,
+        IBlock &block) const {
+      Cell global;
+      blocks->transformBlockLocalToGlobalCell(global, block, local);
+      return get_value(global);
+    }
+
+    template <typename U>
+    void set_node_boundary_value(Utils::Vector3i const &node, U const &val) {
+      auto const global = Cell(node[0], node[1], node[2]);
+      (*m_value_boundary)[global] = es2walberla<U, T>(val);
+    }
+
+    void unset_node_boundary_value(Utils::Vector3i const &node) {
+      auto const global = Cell(node[0], node[1], node[2]);
+      assert(m_value_boundary->count(global));
+      m_value_boundary->erase(global);
+    }
+
+    [[nodiscard]] auto
+    get_node_boundary_value(Utils::Vector3i const &node) const {
+      auto const global = Cell(node[0], node[1], node[2]);
+      return walberla2es(get_value(global));
+    }
+
+    bool node_is_boundary(Utils::Vector3i const &node) const {
+      auto const global = Cell(node[0], node[1], node[2]);
+      return m_value_boundary->count(global) != 0;
+    }
+
+  private:
+    std::shared_ptr<std::unordered_map<Cell, T>> m_value_boundary;
+    static constexpr T default_value{};
+
+    [[nodiscard]] T get_value(Cell const &cell) const {
+      if (m_value_boundary->count(cell) == 0) {
+        return default_value;
+      }
+      return m_value_boundary->at(cell);
+    }
+  };
+
+  [[nodiscard]] inline auto get_flag_field_and_flag(IBlock *block) const {
+    auto const flag_field =
+        block->template uncheckedFastGetData<FlagField>(m_flag_field_id);
+    auto const boundary_flag = flag_field->getFlag(Boundary_flag);
+    return std::make_tuple(flag_field, boundary_flag);
+  }
+
+public:
+  using value_type = T;
+  using FlagField = field::FlagField<uint8_t>;
+
+  BoundaryHandling(std::shared_ptr<StructuredBlockForest> blocks,
+                   BlockDataID value_field_id, BlockDataID flag_field_id)
+      : m_blocks(std::move(blocks)), m_value_field_id(value_field_id),
+        m_flag_field_id(flag_field_id), m_callback(DynamicValueCallback()),
+        m_pending_changes(false) {
+    // reinitialize the flag field
+    for (auto b = m_blocks->begin(); b != m_blocks->end(); ++b) {
+      flag_reset_kernel(&*b);
+    }
+    // instantiate the boundary sweep
+    std::function callback = m_callback;
+    m_boundary =
+        std::make_shared<BoundaryClass>(m_blocks, m_value_field_id, callback);
+  }
+
+  void operator()(IBlock *block) { (*m_boundary)(block); }
+
+  [[nodiscard]] bool node_is_boundary(Utils::Vector3i const &node) const {
+    return m_callback.node_is_boundary(node);
+  }
+
+  [[nodiscard]] auto
+  get_node_value_at_boundary(Utils::Vector3i const &node) const {
+    return m_callback.get_node_boundary_value(node);
+  }
+
+  template <typename U>
+  void set_node_value_at_boundary(Utils::Vector3i const &node, U const &v,
+                                  BlockAndCell const &bc) {
+    auto [flag_field, boundary_flag] = get_flag_field_and_flag(bc.block);
+    m_callback.set_node_boundary_value(node, v);
+    flag_field->addFlag(bc.cell, boundary_flag);
+    m_pending_changes = true;
+  }
+
+  void remove_node_from_boundary(Utils::Vector3i const &node,
+                                 BlockAndCell const &bc) {
+    auto [flag_field, boundary_flag] = get_flag_field_and_flag(bc.block);
+    m_callback.unset_node_boundary_value(node);
+    flag_field->removeFlag(bc.cell, boundary_flag);
+    m_pending_changes = true;
+  }
+
+  /** Assign boundary conditions to boundary cells. */
+  void boundary_update() {
+    if (m_pending_changes) {
+      m_boundary->template fillFromFlagField<FlagField>(
+          m_blocks, m_flag_field_id, Boundary_flag, Domain_flag);
+      m_pending_changes = false;
+    }
+  }
+
+  std::tuple<int, int, int> block_dims(IBlock const &block) const {
+    auto const field = block.template getData<FlagField>(m_flag_field_id);
+    return {static_cast<int>(field->xSize()), static_cast<int>(field->ySize()),
+            static_cast<int>(field->zSize())};
+  }
+
+private:
+  std::shared_ptr<StructuredBlockForest> m_blocks;
+  BlockDataID m_value_field_id;
+  BlockDataID m_flag_field_id;
+  DynamicValueCallback m_callback;
+  std::shared_ptr<BoundaryClass> m_boundary;
+  bool m_pending_changes;
+
+  /** Register flags and set all cells to @ref Domain_flag. */
+  void flag_reset_kernel(IBlock *const block) {
+    auto flag_field = block->template getData<FlagField>(m_flag_field_id);
+    // register flags
+    if (!flag_field->flagExists(Domain_flag))
+      flag_field->registerFlag(Domain_flag);
+    if (!flag_field->flagExists(Boundary_flag))
+      flag_field->registerFlag(Boundary_flag);
+    // mark all cells as domain cells and fluid cells
+    auto domain_flag = flag_field->getFlag(Domain_flag);
+    auto boundary_flag = flag_field->getFlag(Boundary_flag);
+    for (auto it = flag_field->begin(); it != flag_field->end(); ++it) {
+      flag_field->addFlag(it.x(), it.y(), it.z(), domain_flag);
+      flag_field->removeFlag(it.x(), it.y(), it.z(), boundary_flag);
+    }
+  }
+};
+
+} // namespace walberla
diff --git a/src/walberla_bridge/src/CMakeLists.txt b/src/walberla_bridge/src/CMakeLists.txt
new file mode 100644
index 00000000000..1733628c84d
--- /dev/null
+++ b/src/walberla_bridge/src/CMakeLists.txt
@@ -0,0 +1,24 @@
+#
+# Copyright (C) 2020-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+add_subdirectory(electrokinetics)
+add_subdirectory(lattice_boltzmann)
+
+target_sources(espresso_walberla PRIVATE LatticeModel.cpp LatticeWalberla.cpp
+                                         walberla_init.cpp)
diff --git a/src/walberla_bridge/src/LatticeModel.cpp b/src/walberla_bridge/src/LatticeModel.cpp
new file mode 100644
index 00000000000..d887be97e7a
--- /dev/null
+++ b/src/walberla_bridge/src/LatticeModel.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2020-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <walberla_bridge/LatticeModel.hpp>
+#include <walberla_bridge/VTKHandle.hpp>
+
+#include <blockforest/StructuredBlockForest.h>
+#include <field/vtk/VTKWriter.h>
+#include <vtk/VTKOutput.h>
+
+#include <memory>
+#include <sstream>
+#include <string>
+
+std::shared_ptr<VTKHandle> LatticeModel::create_vtk(
+    int delta_N, int initial_count, int flag_observables,
+    units_map const &units_conversion, std::string const &identifier,
+    std::string const &base_folder, std::string const &prefix) {
+
+  using walberla::uint_c;
+
+  // VTKOutput object must be unique
+  std::stringstream unique_identifier;
+  unique_identifier << base_folder << "/" << identifier;
+  std::string const vtk_uid = unique_identifier.str();
+  if (m_vtk_auto.find(vtk_uid) != m_vtk_auto.end() or
+      m_vtk_manual.find(vtk_uid) != m_vtk_manual.end()) {
+    throw vtk_runtime_error(vtk_uid, "already exists");
+  }
+
+  // instantiate VTKOutput object
+  auto const &blocks = get_lattice().get_blocks();
+  auto const write_freq = (delta_N) ? static_cast<unsigned int>(delta_N) : 1u;
+  auto vtk_obj = walberla::vtk::createVTKOutput_BlockData(
+      blocks, identifier, uint_c(write_freq), uint_c(0), false, base_folder,
+      prefix, true, true, true, true, uint_c(initial_count));
+
+  // add filters
+  register_vtk_field_filters(*vtk_obj);
+
+  // add writers
+  register_vtk_field_writers(*vtk_obj, units_conversion, flag_observables);
+
+  auto vtk_handle = std::make_shared<VTKHandle>(vtk_obj, initial_count, true);
+  if (delta_N) {
+    m_vtk_auto[vtk_uid] = vtk_handle;
+  } else {
+    m_vtk_manual[vtk_uid] = vtk_handle;
+  }
+  return vtk_handle;
+}
+
+void LatticeModel::write_vtk(std::string const &vtk_uid) {
+  if (m_vtk_auto.find(vtk_uid) != m_vtk_auto.end()) {
+    throw vtk_runtime_error(vtk_uid, "is an automatic observable");
+  }
+  if (m_vtk_manual.find(vtk_uid) == m_vtk_manual.end()) {
+    throw vtk_runtime_error(vtk_uid, "doesn't exist");
+  }
+  auto &vtk_handle = m_vtk_manual[vtk_uid];
+  walberla::vtk::writeFiles(vtk_handle->ptr)();
+  vtk_handle->execution_count++;
+}
+
+void LatticeModel::switch_vtk(std::string const &vtk_uid, bool status) {
+  if (m_vtk_manual.find(vtk_uid) != m_vtk_manual.end()) {
+    throw vtk_runtime_error(vtk_uid, "is a manual observable");
+  }
+  if (m_vtk_auto.find(vtk_uid) == m_vtk_auto.end()) {
+    throw vtk_runtime_error(vtk_uid, "doesn't exist");
+  }
+  m_vtk_auto[vtk_uid]->enabled = status;
+}
diff --git a/src/walberla_bridge/src/LatticeWalberla.cpp b/src/walberla_bridge/src/LatticeWalberla.cpp
new file mode 100644
index 00000000000..e48bd17f6e4
--- /dev/null
+++ b/src/walberla_bridge/src/LatticeWalberla.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <walberla_bridge/BlockAndCell.hpp>
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/utils/walberla_utils.hpp>
+
+#include <blockforest/Initialization.h>
+#include <blockforest/StructuredBlockForest.h>
+#include <core/DataTypes.h>
+#include <core/cell/Cell.h>
+#include <domain_decomposition/IBlock.h>
+
+#include <utils/Vector.hpp>
+
+#include <boost/optional.hpp>
+
+#include <cmath>
+#include <initializer_list>
+#include <limits>
+#include <stdexcept>
+#include <string>
+#include <utility>
+
+LatticeWalberla::LatticeWalberla(Utils::Vector3i const &grid_dimensions,
+                                 Utils::Vector3i const &node_grid,
+                                 unsigned int n_ghost_layers)
+    : m_grid_dimensions{grid_dimensions}, m_n_ghost_layers{n_ghost_layers} {
+  using walberla::real_t;
+  using walberla::uint_c;
+
+  for (auto const i : {0u, 1u, 2u}) {
+    if (m_grid_dimensions[i] % node_grid[i] != 0) {
+      throw std::runtime_error(
+          "Lattice grid dimensions and MPI node grid are not compatible.");
+    }
+  }
+
+  auto constexpr lattice_constant = real_t{1};
+  auto const cells_block = Utils::hadamard_division(grid_dimensions, node_grid);
+
+  m_blocks = walberla::blockforest::createUniformBlockGrid(
+      // number of blocks in each direction
+      uint_c(node_grid[0]), uint_c(node_grid[1]), uint_c(node_grid[2]),
+      // number of cells per block in each direction
+      uint_c(cells_block[0]), uint_c(cells_block[1]), uint_c(cells_block[2]),
+      lattice_constant,
+      // number of cpus per direction
+      uint_c(node_grid[0]), uint_c(node_grid[1]), uint_c(node_grid[2]),
+      // periodicity
+      true, true, true);
+}
+
+[[nodiscard]] std::pair<Utils::Vector3d, Utils::Vector3d>
+LatticeWalberla::get_local_domain() const {
+  using walberla::to_vector3d;
+  // We only have one block per mpi rank
+  assert(++(m_blocks->begin()) == m_blocks->end());
+
+  auto const ab = m_blocks->begin()->getAABB();
+  return {to_vector3d(ab.min()), to_vector3d(ab.max())};
+}
+
+[[nodiscard]] bool
+LatticeWalberla::node_in_local_domain(Utils::Vector3i const &node) const {
+  // Note: Lattice constant =1, cell centers offset by .5
+  return ::walberla::get_block_and_cell(*this, node, false) != boost::none;
+}
+[[nodiscard]] bool
+LatticeWalberla::node_in_local_halo(Utils::Vector3i const &node) const {
+  return ::walberla::get_block_and_cell(*this, node, true) != boost::none;
+}
+[[nodiscard]] bool
+LatticeWalberla::pos_in_local_domain(Utils::Vector3d const &pos) const {
+  return ::walberla::get_block(*this, pos, false) != nullptr;
+}
+[[nodiscard]] bool
+LatticeWalberla::pos_in_local_halo(Utils::Vector3d const &pos) const {
+  return ::walberla::get_block(*this, pos, true) != nullptr;
+}
+
+[[nodiscard]] Utils::Vector3i
+LatticeWalberla::calc_grid_dimensions(Utils::Vector3d const &box_size,
+                                      double agrid) {
+  auto const grid_dimensions =
+      Utils::Vector3i{{static_cast<int>(std::round(box_size[0] / agrid)),
+                       static_cast<int>(std::round(box_size[1] / agrid)),
+                       static_cast<int>(std::round(box_size[2] / agrid))}};
+  for (auto const i : {0u, 1u, 2u}) {
+    if (std::abs(grid_dimensions[i] * agrid - box_size[i]) / box_size[i] >
+        std::numeric_limits<double>::epsilon()) {
+      throw std::runtime_error(
+          "Box length not commensurate with agrid in direction " +
+          std::to_string(i) + " length " + std::to_string(box_size[i]) +
+          " agrid " + std::to_string(agrid));
+    }
+  }
+  return grid_dimensions;
+}
diff --git a/src/walberla_bridge/src/electrokinetics/CMakeLists.txt b/src/walberla_bridge/src/electrokinetics/CMakeLists.txt
new file mode 100644
index 00000000000..f03116e0b25
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/CMakeLists.txt
@@ -0,0 +1,27 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+add_subdirectory(generated_kernels)
+add_subdirectory(reactions)
+
+target_sources(espresso_walberla PRIVATE ek_walberla_init.cpp)
+target_sources(espresso_walberla PRIVATE ek_poisson_none_init.cpp)
+if(ESPRESSO_BUILD_WITH_WALBERLA_FFT)
+  target_sources(espresso_walberla PRIVATE ek_poisson_fft_init.cpp)
+endif()
diff --git a/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp b/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp
new file mode 100644
index 00000000000..5d6c1cb5d7f
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp
@@ -0,0 +1,770 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <blockforest/communication/UniformBufferedScheme.h>
+#include <field/AddToStorage.h>
+#include <field/FlagField.h>
+#include <field/GhostLayerField.h>
+#include <field/communication/PackInfo.h>
+#include <field/vtk/FlagFieldCellFilter.h>
+#include <field/vtk/VTKWriter.h>
+#include <lbm/lattice_model/D3Q27.h>
+#include <timeloop/SweepTimeloop.h>
+
+#include "../BoundaryHandling.hpp"
+#include "ek_kernels.hpp"
+
+#include <walberla_bridge/BlockAndCell.hpp>
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/EKinWalberlaBase.hpp>
+#include <walberla_bridge/utils/boundary_utils.hpp>
+#include <walberla_bridge/utils/walberla_utils.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <cstddef>
+#include <iterator>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+namespace walberla {
+
+/** @brief Class that runs and controls the EK on waLBerla. */
+template <std::size_t FluxCount = 13, typename FloatType = double>
+class EKinWalberlaImpl : public EKinWalberlaBase {
+  using ContinuityKernel =
+      typename detail::KernelTrait<FloatType>::ContinuityKernel;
+  using DiffusiveFluxKernel =
+      typename detail::KernelTrait<FloatType>::DiffusiveFluxKernel;
+  using AdvectiveFluxKernel =
+      typename detail::KernelTrait<FloatType>::AdvectiveFluxKernel;
+  using FrictionCouplingKernel =
+      typename detail::KernelTrait<FloatType>::FrictionCouplingKernel;
+  using DiffusiveFluxKernelElectrostatic =
+      typename detail::KernelTrait<FloatType>::DiffusiveFluxKernelElectrostatic;
+
+  using Dirichlet = typename detail::KernelTrait<FloatType>::Dirichlet;
+  using FixedFlux = typename detail::KernelTrait<FloatType>::FixedFlux;
+
+protected:
+  // Type definitions
+  using FluxField = GhostLayerField<FloatType, FluxCount>;
+  using FlagField = walberla::FlagField<walberla::uint8_t>;
+  using DensityField = GhostLayerField<FloatType, 1>;
+
+  using BoundaryModelDensity = BoundaryHandling<FloatType, Dirichlet>;
+  using BoundaryModelFlux = BoundaryHandling<Vector3<FloatType>, FixedFlux>;
+
+public:
+  template <typename T> FloatType FloatType_c(T t) {
+    return numeric_cast<FloatType>(t);
+  }
+
+  [[nodiscard]] std::size_t stencil_size() const noexcept override {
+    return FluxCount;
+  }
+
+  [[nodiscard]] bool is_double_precision() const noexcept override {
+    return std::is_same<FloatType, double>::value;
+  }
+
+private:
+  FloatType m_diffusion;
+  FloatType m_kT;
+  FloatType m_valency;
+  Utils::Vector3d m_ext_efield;
+  bool m_advection;
+  bool m_friction_coupling;
+
+protected:
+  // Block data access handles
+  BlockDataID m_density_field_id;
+  BlockDataID m_density_field_flattened_id;
+
+  BlockDataID m_flux_field_id;
+  BlockDataID m_flux_field_flattened_id;
+
+  BlockDataID m_flag_field_density_id;
+  BlockDataID m_flag_field_flux_id;
+
+  /** Block forest */
+  std::shared_ptr<LatticeWalberla> m_lattice;
+
+  std::unique_ptr<BoundaryModelDensity> m_boundary_density;
+  std::unique_ptr<BoundaryModelFlux> m_boundary_flux;
+
+  std::unique_ptr<ContinuityKernel> m_continuity;
+
+  // ResetFlux + external force
+  // TODO: kernel for that
+  // std::shared_ptr<ResetForce<PdfField, VectorField>> m_reset_force;
+
+  [[nodiscard]] boost::optional<CellInterval>
+  get_interval(Utils::Vector3i const &lower_corner,
+               Utils::Vector3i const &upper_corner) const {
+    auto const &lattice = get_lattice();
+    auto const &cell_min = lower_corner;
+    auto const cell_max = upper_corner - Utils::Vector3i::broadcast(1);
+    auto const lower_bc = get_block_and_cell(lattice, cell_min, true);
+    auto const upper_bc = get_block_and_cell(lattice, cell_max, true);
+    if (not lower_bc or not upper_bc) {
+      return {};
+    }
+    assert(&(*(lower_bc->block)) == &(*(upper_bc->block)));
+    return {CellInterval(lower_bc->cell, upper_bc->cell)};
+  }
+
+  void reset_density_boundary_handling() {
+    auto const &blocks = get_lattice().get_blocks();
+    m_boundary_density = std::make_unique<BoundaryModelDensity>(
+        blocks, m_density_field_id, m_flag_field_density_id);
+  }
+
+  void reset_flux_boundary_handling() {
+    auto const &blocks = get_lattice().get_blocks();
+    m_boundary_flux = std::make_unique<BoundaryModelFlux>(
+        blocks, m_flux_field_id, m_flag_field_flux_id);
+  }
+
+  using FullCommunicator = blockforest::communication::UniformBufferedScheme<
+      typename stencil::D3Q27>;
+  std::shared_ptr<FullCommunicator> m_full_communication;
+
+public:
+  EKinWalberlaImpl(std::shared_ptr<LatticeWalberla> lattice, double diffusion,
+                   double kT, double valency, Utils::Vector3d const &ext_efield,
+                   double density, bool advection, bool friction_coupling)
+      : m_diffusion(FloatType_c(diffusion)), m_kT(FloatType_c(kT)),
+        m_valency(FloatType_c(valency)), m_ext_efield(ext_efield),
+        m_advection(advection), m_friction_coupling(friction_coupling),
+        m_lattice(std::move(lattice)) {
+    m_density_field_id = field::addToStorage<DensityField>(
+        m_lattice->get_blocks(), "density field", FloatType_c(density),
+        field::fzyx, m_lattice->get_ghost_layers());
+    m_density_field_flattened_id =
+        field::addFlattenedShallowCopyToStorage<DensityField>(
+            m_lattice->get_blocks(), m_density_field_id,
+            "flattened density field");
+    m_flux_field_id = field::addToStorage<FluxField>(
+        m_lattice->get_blocks(), "flux field", FloatType{0}, field::fzyx,
+        m_lattice->get_ghost_layers());
+    m_flux_field_flattened_id =
+        field::addFlattenedShallowCopyToStorage<FluxField>(
+            m_lattice->get_blocks(), m_flux_field_id, "flattened flux field");
+
+    m_continuity = std::make_unique<ContinuityKernel>(
+        m_flux_field_flattened_id, m_density_field_flattened_id);
+
+    // Init boundary related stuff
+    m_flag_field_density_id = field::addFlagFieldToStorage<FlagField>(
+        m_lattice->get_blocks(), "flag field density",
+        m_lattice->get_ghost_layers());
+    reset_density_boundary_handling();
+
+    m_flag_field_flux_id = field::addFlagFieldToStorage<FlagField>(
+        m_lattice->get_blocks(), "flag field flux",
+        m_lattice->get_ghost_layers());
+    reset_flux_boundary_handling();
+
+    m_full_communication =
+        std::make_shared<FullCommunicator>(m_lattice->get_blocks());
+    m_full_communication->addPackInfo(
+        std::make_shared<field::communication::PackInfo<DensityField>>(
+            m_density_field_id));
+
+    // Synchronize ghost layers
+    (*m_full_communication)();
+  }
+
+  // Global parameters
+  [[nodiscard]] double get_diffusion() const noexcept override {
+    return m_diffusion;
+  }
+  [[nodiscard]] double get_kT() const noexcept override { return m_kT; }
+  [[nodiscard]] double get_valency() const noexcept override {
+    return m_valency;
+  }
+  [[nodiscard]] bool get_advection() const noexcept override {
+    return m_advection;
+  }
+  [[nodiscard]] bool get_friction_coupling() const noexcept override {
+    return m_friction_coupling;
+  }
+  [[nodiscard]] Utils::Vector3d get_ext_efield() const noexcept override {
+    return m_ext_efield;
+  }
+
+  void set_diffusion(double diffusion) override {
+    m_diffusion = FloatType_c(diffusion);
+  }
+  void set_kT(double kT) override { m_kT = FloatType_c(kT); }
+  void set_valency(double valency) override {
+    m_valency = FloatType_c(valency);
+  }
+  void set_advection(bool advection) override { m_advection = advection; }
+  void set_friction_coupling(bool friction_coupling) override {
+    m_friction_coupling = friction_coupling;
+  }
+  void set_ext_efield(Utils::Vector3d const &field) override {
+    m_ext_efield = field;
+  }
+
+  void ghost_communication() override { (*m_full_communication)(); }
+
+private:
+  void kernel_boundary_density() {
+    for (auto &block : *m_lattice->get_blocks()) {
+      (*m_boundary_density)(&block);
+    }
+  }
+
+  void kernel_boundary_flux() {
+    for (auto &block : *m_lattice->get_blocks()) {
+      (*m_boundary_flux)(&block);
+    }
+  }
+
+  void kernel_continuity() {
+    for (auto &block : *m_lattice->get_blocks()) {
+      (*m_continuity).run(&block);
+    }
+  }
+
+  void kernel_diffusion() {
+    auto kernel = DiffusiveFluxKernel(m_flux_field_flattened_id,
+                                      m_density_field_flattened_id,
+                                      FloatType_c(get_diffusion()));
+
+    for (auto &block : *m_lattice->get_blocks()) {
+      kernel.run(&block);
+    }
+  }
+
+  void kernel_advection(const std::size_t &velocity_id) {
+    auto kernel =
+        AdvectiveFluxKernel(m_flux_field_flattened_id, m_density_field_id,
+                            BlockDataID(velocity_id));
+    for (auto &block : *m_lattice->get_blocks()) {
+      kernel.run(&block);
+    }
+  }
+
+  void kernel_friction_coupling(const std::size_t &force_id) {
+    auto kernel = FrictionCouplingKernel(
+        BlockDataID(force_id), m_flux_field_flattened_id,
+        FloatType_c(get_diffusion()), FloatType_c(get_kT()));
+    for (auto &block : *m_lattice->get_blocks()) {
+      kernel.run(&block);
+    }
+  }
+
+  void kernel_diffusion_electrostatic(const std::size_t &potential_id) {
+    auto const ext_field = get_ext_efield();
+    auto kernel = DiffusiveFluxKernelElectrostatic(
+        m_flux_field_flattened_id, BlockDataID(potential_id),
+        m_density_field_flattened_id, FloatType_c(get_diffusion()),
+        FloatType_c(ext_field[0]), FloatType_c(ext_field[1]),
+        FloatType_c(ext_field[2]), FloatType_c(get_kT()),
+        FloatType_c(get_valency()));
+    for (auto &block : *m_lattice->get_blocks()) {
+      kernel.run(&block);
+    }
+  }
+
+  void kernel_migration() {}
+
+  void updated_boundary_fields() {
+    m_boundary_flux->boundary_update();
+    m_boundary_density->boundary_update();
+  }
+
+protected:
+  void integrate_vtk_writers() override {
+    for (auto const &it : m_vtk_auto) {
+      auto &vtk_handle = it.second;
+      if (vtk_handle->enabled) {
+        vtk::writeFiles(vtk_handle->ptr)();
+        vtk_handle->execution_count++;
+      }
+    }
+  }
+
+public:
+  void integrate(std::size_t potential_id, std::size_t velocity_id,
+                 std::size_t force_id) override {
+
+    updated_boundary_fields();
+
+    if (get_diffusion() == 0.)
+      return;
+
+    if (get_valency() != 0.) {
+      if (potential_id == walberla::BlockDataID{}) {
+        throw std::runtime_error("Walberla EK: electrostatic potential enabled "
+                                 "but no field accessible. potential id is " +
+                                 std::to_string(potential_id));
+      }
+      kernel_diffusion_electrostatic(potential_id);
+    } else {
+      kernel_diffusion();
+    }
+
+    kernel_migration();
+    kernel_boundary_flux();
+    // friction coupling
+    if (get_friction_coupling()) {
+      if (force_id == walberla::BlockDataID{}) {
+        throw std::runtime_error("Walberla EK: friction coupling enabled but "
+                                 "no force field accessible. force_id is " +
+                                 std::to_string(force_id) +
+                                 ". Hint: LB may be inactive.");
+      }
+      kernel_friction_coupling(force_id);
+    }
+
+    if (get_advection()) {
+      if (velocity_id == walberla::BlockDataID{}) {
+        throw std::runtime_error("Walberla EK: advection enabled but no "
+                                 "velocity field accessible. velocity_id is " +
+                                 std::to_string(velocity_id) +
+                                 ". Hint: LB may be inactive.");
+      }
+      kernel_advection(velocity_id);
+    }
+    kernel_continuity();
+
+    // is this the expected behavior when reactions are included?
+    kernel_boundary_density();
+
+    // Handle VTK writers
+    integrate_vtk_writers();
+  }
+
+  [[nodiscard]] std::size_t get_density_id() const noexcept override {
+    static_assert(std::is_same_v<std::size_t, walberla::uint_t>);
+    return static_cast<std::size_t>(m_density_field_id);
+  }
+
+  bool set_node_density(Utils::Vector3i const &node, double density) override {
+    auto bc = get_block_and_cell(get_lattice(), node, false);
+    if (!bc)
+      return false;
+
+    auto density_field =
+        bc->block->template getData<DensityField>(m_density_field_id);
+    density_field->get(bc->cell) = FloatType_c(density);
+
+    return true;
+  }
+
+  [[nodiscard]] boost::optional<double>
+  get_node_density(Utils::Vector3i const &node,
+                   bool consider_ghosts = false) const override {
+    auto bc = get_block_and_cell(get_lattice(), node, consider_ghosts);
+
+    if (!bc)
+      return {boost::none};
+
+    auto const density_field =
+        bc->block->template getData<DensityField>(m_density_field_id);
+
+    return {double_c(density_field->get(bc->cell))};
+  }
+
+  [[nodiscard]] std::vector<double>
+  get_slice_density(Utils::Vector3i const &lower_corner,
+                    Utils::Vector3i const &upper_corner) const override {
+    std::vector<double> out;
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto const &block = *(lattice.get_blocks()->begin());
+      auto const density_field =
+          block.template getData<DensityField>(m_density_field_id);
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      auto const n_values = ci->numCells();
+      out.reserve(n_values);
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            out.emplace_back(density_field->get(Cell{x, y, z}));
+          }
+        }
+      }
+      assert(out.size() == n_values);
+    }
+    return out;
+  }
+
+  void set_slice_density(Utils::Vector3i const &lower_corner,
+                         Utils::Vector3i const &upper_corner,
+                         std::vector<double> const &density) override {
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto &block = *(lattice.get_blocks()->begin());
+      auto density_field =
+          block.template getData<DensityField>(m_density_field_id);
+      auto it = density.begin();
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      assert(density.size() == ci->numCells());
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            density_field->get(Cell{x, y, z}) = FloatType_c(*it);
+            ++it;
+          }
+        }
+      }
+    }
+  }
+
+  void clear_flux_boundaries() override { reset_flux_boundary_handling(); }
+
+  void clear_density_boundaries() override {
+    reset_density_boundary_handling();
+  }
+
+  bool set_node_flux_boundary(Utils::Vector3i const &node,
+                              Utils::Vector3d const &flux) override {
+    auto bc = get_block_and_cell(get_lattice(), node, true);
+    if (!bc)
+      return false;
+
+    m_boundary_flux->set_node_value_at_boundary(node, flux, *bc);
+
+    return true;
+  }
+
+  [[nodiscard]] boost::optional<Utils::Vector3d>
+  get_node_flux_at_boundary(Utils::Vector3i const &node,
+                            bool consider_ghosts = false) const override {
+    auto const bc = get_block_and_cell(get_lattice(), node, consider_ghosts);
+    if (!bc or !m_boundary_flux->node_is_boundary(node))
+      return {boost::none};
+
+    return {m_boundary_flux->get_node_value_at_boundary(node)};
+  }
+
+  bool remove_node_from_flux_boundary(Utils::Vector3i const &node) override {
+    auto bc = get_block_and_cell(get_lattice(), node, true);
+    if (!bc)
+      return false;
+
+    m_boundary_flux->remove_node_from_boundary(node, *bc);
+
+    return true;
+  }
+
+  bool set_node_density_boundary(Utils::Vector3i const &node,
+                                 double density) override {
+    auto bc = get_block_and_cell(get_lattice(), node, true);
+    if (!bc)
+      return false;
+
+    m_boundary_density->set_node_value_at_boundary(node, FloatType_c(density),
+                                                   *bc);
+
+    return true;
+  }
+
+  [[nodiscard]] boost::optional<double>
+  get_node_density_at_boundary(Utils::Vector3i const &node,
+                               bool consider_ghosts = false) const override {
+    auto const bc = get_block_and_cell(get_lattice(), node, consider_ghosts);
+    if (!bc or !m_boundary_density->node_is_boundary(node))
+      return {boost::none};
+
+    return {double_c(m_boundary_density->get_node_value_at_boundary(node))};
+  }
+
+  void set_slice_density_boundary(
+      Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner,
+      std::vector<boost::optional<double>> const &density) override {
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto const local_offset = std::get<0>(lattice.get_local_grid_range());
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      auto it = density.begin();
+      assert(density.size() == ci->numCells());
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            auto const node = local_offset + Utils::Vector3i{{x, y, z}};
+            auto const bc = get_block_and_cell(lattice, node, false);
+            auto const &opt = *it;
+            if (opt) {
+              m_boundary_density->set_node_value_at_boundary(node, *opt, *bc);
+            } else {
+              m_boundary_density->remove_node_from_boundary(node, *bc);
+            }
+            ++it;
+          }
+        }
+      }
+    }
+  }
+
+  [[nodiscard]] std::vector<boost::optional<double>>
+  get_slice_density_at_boundary(
+      Utils::Vector3i const &lower_corner,
+      Utils::Vector3i const &upper_corner) const override {
+    std::vector<boost::optional<double>> out;
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto const local_offset = std::get<0>(lattice.get_local_grid_range());
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      auto const n_values = ci->numCells();
+      out.reserve(n_values);
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            auto const node = local_offset + Utils::Vector3i{{x, y, z}};
+            if (m_boundary_density->node_is_boundary(node)) {
+              out.emplace_back(
+                  m_boundary_density->get_node_value_at_boundary(node));
+            } else {
+              out.emplace_back(boost::none);
+            }
+          }
+        }
+      }
+      assert(out.size() == n_values);
+    }
+    return out;
+  }
+
+  void set_slice_flux_boundary(
+      Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner,
+      std::vector<boost::optional<Utils::Vector3d>> const &flux) override {
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto const local_offset = std::get<0>(lattice.get_local_grid_range());
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      auto it = flux.begin();
+      assert(flux.size() == ci->numCells());
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            auto const node = local_offset + Utils::Vector3i{{x, y, z}};
+            auto const bc = get_block_and_cell(lattice, node, false);
+            auto const &opt = *it;
+            if (opt) {
+              m_boundary_flux->set_node_value_at_boundary(node, *opt, *bc);
+            } else {
+              m_boundary_flux->remove_node_from_boundary(node, *bc);
+            }
+            ++it;
+          }
+        }
+      }
+    }
+  }
+
+  [[nodiscard]] std::vector<boost::optional<Utils::Vector3d>>
+  get_slice_flux_at_boundary(
+      Utils::Vector3i const &lower_corner,
+      Utils::Vector3i const &upper_corner) const override {
+    std::vector<boost::optional<Utils::Vector3d>> out;
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto const local_offset = std::get<0>(lattice.get_local_grid_range());
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      auto const n_values = ci->numCells();
+      out.reserve(n_values);
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            auto const node = local_offset + Utils::Vector3i{{x, y, z}};
+            if (m_boundary_flux->node_is_boundary(node)) {
+              out.emplace_back(
+                  m_boundary_flux->get_node_value_at_boundary(node));
+            } else {
+              out.emplace_back(boost::none);
+            }
+          }
+        }
+      }
+      assert(out.size() == n_values);
+    }
+    return out;
+  }
+
+  [[nodiscard]] std::vector<bool>
+  get_slice_is_boundary(Utils::Vector3i const &lower_corner,
+                        Utils::Vector3i const &upper_corner) const override {
+    std::vector<bool> out;
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto const local_offset = std::get<0>(lattice.get_local_grid_range());
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      auto const n_values = ci->numCells();
+      out.reserve(n_values);
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            auto const node = local_offset + Utils::Vector3i{{x, y, z}};
+            out.emplace_back(m_boundary_density->node_is_boundary(node) or
+                             m_boundary_flux->node_is_boundary(node));
+          }
+        }
+      }
+      assert(out.size() == n_values);
+    }
+    return out;
+  }
+
+  bool remove_node_from_density_boundary(Utils::Vector3i const &node) override {
+    auto bc = get_block_and_cell(get_lattice(), node, true);
+    if (!bc)
+      return false;
+
+    m_boundary_density->remove_node_from_boundary(node, *bc);
+
+    return true;
+  }
+
+  [[nodiscard]] boost::optional<bool>
+  get_node_is_flux_boundary(Utils::Vector3i const &node,
+                            bool consider_ghosts) const override {
+    auto bc = get_block_and_cell(get_lattice(), node, consider_ghosts);
+    if (!bc)
+      return {boost::none};
+
+    return {m_boundary_flux->node_is_boundary(node)};
+  }
+
+  [[nodiscard]] boost::optional<bool>
+  get_node_is_density_boundary(Utils::Vector3i const &node,
+                               bool consider_ghosts) const override {
+    auto bc = get_block_and_cell(get_lattice(), node, consider_ghosts);
+    if (!bc)
+      return {boost::none};
+
+    return {m_boundary_density->node_is_boundary(node)};
+  }
+
+  [[nodiscard]] boost::optional<bool>
+  get_node_is_boundary(Utils::Vector3i const &node,
+                       bool consider_ghosts = false) const override {
+    auto bc = get_block_and_cell(get_lattice(), node, consider_ghosts);
+    if (!bc)
+      return {boost::none};
+
+    return {m_boundary_density->node_is_boundary(node) or
+            m_boundary_flux->node_is_boundary(node)};
+  }
+
+  void update_flux_boundary_from_shape(
+      const std::vector<int> &raster_flat,
+      const std::vector<double> &data_flat) override {
+    auto const grid_size = get_lattice().get_grid_dimensions();
+    auto const data = fill_3D_vector_array(data_flat, grid_size);
+    set_boundary_from_grid(*m_boundary_flux, get_lattice(), raster_flat, data);
+    reallocate_flux_boundary_field();
+  }
+
+  void update_density_boundary_from_shape(
+      const std::vector<int> &raster_flat,
+      const std::vector<double> &data_flat) override {
+    auto const grid_size = get_lattice().get_grid_dimensions();
+    auto const data = fill_3D_scalar_array(data_flat, grid_size);
+    set_boundary_from_grid(*m_boundary_density, get_lattice(), raster_flat,
+                           data);
+    reallocate_density_boundary_field();
+  }
+
+  void reallocate_flux_boundary_field() { m_boundary_flux->boundary_update(); }
+
+  void reallocate_density_boundary_field() {
+    m_boundary_density->boundary_update();
+  }
+
+  [[nodiscard]] LatticeWalberla const &get_lattice() const noexcept override {
+    return *m_lattice;
+  }
+
+  void register_vtk_field_filters(walberla::vtk::VTKOutput &vtk_obj) override {
+    field::FlagFieldCellFilter<FlagField> fluid_filter(m_flag_field_density_id);
+    fluid_filter.addFlag(Boundary_flag);
+    vtk_obj.addCellExclusionFilter(fluid_filter);
+  }
+
+protected:
+  template <typename Field_T, uint_t F_SIZE_ARG, typename OutputType>
+  class VTKWriter : public vtk::BlockCellDataWriter<OutputType, F_SIZE_ARG> {
+  public:
+    VTKWriter(ConstBlockDataID const &block_id, std::string const &id,
+              FloatType unit_conversion)
+        : vtk::BlockCellDataWriter<OutputType, F_SIZE_ARG>(id),
+          m_block_id(block_id), m_field(nullptr),
+          m_conversion(unit_conversion) {}
+
+  protected:
+    void configure() override {
+      WALBERLA_ASSERT_NOT_NULLPTR(this->block_);
+      m_field = this->block_->template getData<Field_T>(m_block_id);
+    }
+
+    ConstBlockDataID const m_block_id;
+    Field_T const *m_field;
+    FloatType const m_conversion;
+  };
+
+  template <typename OutputType = float,
+            class Base = VTKWriter<DensityField, 1u, OutputType>>
+  class DensityVTKWriter : public VTKWriter<DensityField, 1u, OutputType> {
+  public:
+    using VTKWriter<DensityField, 1u, OutputType>::VTKWriter;
+
+  protected:
+    OutputType evaluate(cell_idx_t const x, cell_idx_t const y,
+                        cell_idx_t const z, cell_idx_t const) override {
+      WALBERLA_ASSERT_NOT_NULLPTR(this->m_field);
+      auto const density = VectorTrait<typename DensityField::value_type>::get(
+          this->m_field->get(x, y, z, 0), uint_c(0));
+      return numeric_cast<OutputType>(this->m_conversion * density);
+    }
+  };
+
+public:
+  void register_vtk_field_writers(walberla::vtk::VTKOutput &vtk_obj,
+                                  LatticeModel::units_map const &units,
+                                  int flag_observables) override {
+    if (flag_observables & static_cast<int>(EKOutputVTK::density)) {
+      auto const unit_conversion = FloatType_c(units.at("density"));
+      vtk_obj.addCellDataWriter(make_shared<DensityVTKWriter<float>>(
+          m_density_field_id, "density", unit_conversion));
+    }
+  }
+
+  ~EKinWalberlaImpl() override = default;
+};
+
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/ek_kernels.hpp b/src/walberla_bridge/src/electrokinetics/ek_kernels.hpp
new file mode 100644
index 00000000000..4397bb8be5c
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/ek_kernels.hpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "generated_kernels/AdvectiveFluxKernel_double_precision.h"
+#include "generated_kernels/AdvectiveFluxKernel_single_precision.h"
+#include "generated_kernels/ContinuityKernel_double_precision.h"
+#include "generated_kernels/ContinuityKernel_single_precision.h"
+#include "generated_kernels/DiffusiveFluxKernelWithElectrostatic_double_precision.h"
+#include "generated_kernels/DiffusiveFluxKernelWithElectrostatic_single_precision.h"
+#include "generated_kernels/DiffusiveFluxKernel_double_precision.h"
+#include "generated_kernels/DiffusiveFluxKernel_single_precision.h"
+#include "generated_kernels/FrictionCouplingKernel_double_precision.h"
+#include "generated_kernels/FrictionCouplingKernel_single_precision.h"
+
+#include "generated_kernels/Dirichlet_double_precision.h"
+#include "generated_kernels/Dirichlet_single_precision.h"
+#include "generated_kernels/FixedFlux_double_precision.h"
+#include "generated_kernels/FixedFlux_single_precision.h"
+
+namespace walberla {
+namespace detail {
+template <typename FloatType = double> struct KernelTrait {
+  using ContinuityKernel = pystencils::ContinuityKernel_double_precision;
+  using DiffusiveFluxKernel = pystencils::DiffusiveFluxKernel_double_precision;
+  using AdvectiveFluxKernel = pystencils::AdvectiveFluxKernel_double_precision;
+  using FrictionCouplingKernel =
+      pystencils::FrictionCouplingKernel_double_precision;
+  using DiffusiveFluxKernelElectrostatic =
+      pystencils::DiffusiveFluxKernelWithElectrostatic_double_precision;
+
+  using Dirichlet = pystencils::Dirichlet_double_precision;
+  using FixedFlux = pystencils::FixedFlux_double_precision;
+};
+template <> struct KernelTrait<float> {
+  using ContinuityKernel = pystencils::ContinuityKernel_single_precision;
+  using DiffusiveFluxKernel = pystencils::DiffusiveFluxKernel_single_precision;
+  using AdvectiveFluxKernel = pystencils::AdvectiveFluxKernel_single_precision;
+  using FrictionCouplingKernel =
+      pystencils::FrictionCouplingKernel_single_precision;
+  using DiffusiveFluxKernelElectrostatic =
+      pystencils::DiffusiveFluxKernelWithElectrostatic_single_precision;
+
+  using Dirichlet = pystencils::Dirichlet_single_precision;
+  using FixedFlux = pystencils::FixedFlux_single_precision;
+};
+} // namespace detail
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/ek_poisson_fft_init.cpp b/src/walberla_bridge/src/electrokinetics/ek_poisson_fft_init.cpp
new file mode 100644
index 00000000000..2598c6616e5
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/ek_poisson_fft_init.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/PoissonSolver/FFT.hpp>
+#include <walberla_bridge/electrokinetics/ek_poisson_fft_init.hpp>
+
+#include <memory>
+
+std::shared_ptr<walberla::PoissonSolver>
+new_ek_poisson_fft(std::shared_ptr<LatticeWalberla> const &lattice,
+                   double permittivity, bool single_precision) {
+  if (single_precision) {
+    return std::make_shared<walberla::FFT<float>>(lattice, permittivity);
+  }
+  return std::make_shared<walberla::FFT<double>>(lattice, permittivity);
+}
diff --git a/src/walberla_bridge/src/electrokinetics/ek_poisson_none_init.cpp b/src/walberla_bridge/src/electrokinetics/ek_poisson_none_init.cpp
new file mode 100644
index 00000000000..f912cd49111
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/ek_poisson_none_init.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/PoissonSolver/None.hpp>
+#include <walberla_bridge/electrokinetics/ek_poisson_none_init.hpp>
+
+#include <memory>
+
+std::shared_ptr<walberla::PoissonSolver>
+new_ek_poisson_none(std::shared_ptr<LatticeWalberla> const &lattice,
+                    bool single_precision) {
+  if (single_precision) {
+    return std::make_shared<walberla::None<float>>(lattice);
+  }
+  return std::make_shared<walberla::None<double>>(lattice);
+}
diff --git a/src/walberla_bridge/src/electrokinetics/ek_walberla_init.cpp b/src/walberla_bridge/src/electrokinetics/ek_walberla_init.cpp
new file mode 100644
index 00000000000..a666e8f9acf
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/ek_walberla_init.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "EKinWalberlaImpl.hpp"
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/ek_walberla_init.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <memory>
+
+std::shared_ptr<EKinWalberlaBase>
+new_ek_walberla(std::shared_ptr<LatticeWalberla> const &lattice,
+                double diffusion, double kT, double valency,
+                Utils::Vector3d ext_efield, double density, bool advection,
+                bool friction_coupling, bool single_precision) {
+  if (single_precision) {
+    return std::make_shared<walberla::EKinWalberlaImpl<13, float>>(
+        lattice, diffusion, kT, valency, ext_efield, density, advection,
+        friction_coupling);
+  }
+
+  return std::make_shared<walberla::EKinWalberlaImpl<13, double>>(
+      lattice, diffusion, kT, valency, ext_efield, density, advection,
+      friction_coupling);
+}
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_double_precision.cpp
new file mode 100644
index 00000000000..17b23b9b878
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_double_precision.cpp
@@ -0,0 +1,1712 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file AdvectiveFluxKernel_double_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "AdvectiveFluxKernel_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_5255e1c780a944d646f270232511968b {
+static FUNC_PREFIX void advectivefluxkernel_double_precision_advectivefluxkernel_double_precision(double *RESTRICT const _data_j, double *RESTRICT const _data_rho, double *RESTRICT const _data_u, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3, int64_t const _stride_rho_0, int64_t const _stride_rho_1, int64_t const _stride_rho_2, int64_t const _stride_u_0, int64_t const _stride_u_1, int64_t const _stride_u_2, int64_t const _stride_u_3) {
+  {
+    {
+      {
+        if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+          double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+          double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+          double *RESTRICT _data_rho_20 = _data_rho;
+          double *RESTRICT _data_rho_20_10 = _data_rho_20;
+          double *RESTRICT _data_u_20_30 = _data_u;
+          double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+          double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+          double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+          double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+          double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+          double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+          double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+          double *RESTRICT _data_u_21_30_11 = _stride_u_1 + _data_u_21_30;
+          double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+          double *RESTRICT _data_u_21_31_11 = _stride_u_1 + _data_u_21_31;
+          double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+          double *RESTRICT _data_u_21_32_11 = _stride_u_1 + _data_u_21_32;
+          _data_j_20_312_10[_stride_j_0] = -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 < _data_u_20_31_10[_stride_u_0] && 0.0 < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] - 1.0 * ((double)(((0.0 > _data_u_21_31_11[0] && 0.0 > _data_u_21_32_11[0] && 0.0 < _data_u_21_30_11[0]) ? (1) : (0)))) * _data_rho_21_11[0] * _data_u_21_30_11[0] * _data_u_21_31_11[0] * _data_u_21_32_11[0] + _data_j_20_312_10[_stride_j_0];
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+            double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+            double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _data_rho_20;
+            double *RESTRICT _data_u_20_30 = _data_u;
+            double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+            double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+            double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+            double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+            double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+            double *RESTRICT _data_u_21_30_11 = _stride_u_1 + _data_u_21_30;
+            double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+            double *RESTRICT _data_u_21_31_11 = _stride_u_1 + _data_u_21_31;
+            double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+            double *RESTRICT _data_u_21_32_11 = _stride_u_1 + _data_u_21_32;
+            _data_j_20_312_10[_stride_j_0 * ctr_0] = -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] - 1.0 * ((double)(((0.0 > _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 > _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * ctr_0];
+          }
+        }
+        if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+          double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+          double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+          double *RESTRICT _data_rho_20 = _data_rho;
+          double *RESTRICT _data_rho_20_10 = _data_rho_20;
+          double *RESTRICT _data_u_20_30 = _data_u;
+          double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+          double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+          double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+          double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+          double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+          double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+          double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+          double *RESTRICT _data_u_21_30_11 = _stride_u_1 + _data_u_21_30;
+          double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+          double *RESTRICT _data_u_21_31_11 = _stride_u_1 + _data_u_21_31;
+          double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+          double *RESTRICT _data_u_21_32_11 = _stride_u_1 + _data_u_21_32;
+          _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] - 1.0 * ((double)(((0.0 > _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 > _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)];
+        }
+      }
+      for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+        {
+          {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_10 = _stride_u_1 * ctr_1 + _data_u_21_31;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              double *RESTRICT _data_u_21_30_10 = _stride_u_1 * ctr_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_10 = _stride_u_1 * ctr_1 + _data_u_21_32;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_36_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_31_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0 * fabs(_data_u_21_31_10[0]) + 1.0) * ((double)(((0.0 > _data_u_21_32_10[0] && 0.0 < _data_u_21_30_10[0]) ? (1) : (0)))) * _data_rho_21_10[0] * _data_u_21_30_10[0] * _data_u_21_32_10[0] + _data_j_20_36_10[_stride_j_0];
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && 1 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+              double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_32;
+              double *RESTRICT _data_u_20_30 = _data_u;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_38_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0 * fabs(_data_u_21_30_1m1[_stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0] * _data_u_21_31_1m1[_stride_u_0] * _data_u_21_32_1m1[_stride_u_0] + _data_j_20_38_10[_stride_j_0];
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_32;
+              _data_j_20_310_10[_stride_j_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((double)(((0.0 > _data_u_21_32_1m1[0] && 0.0 < _data_u_21_30_1m1[0] && 0.0 < _data_u_21_31_1m1[0]) ? (1) : (0)))) * _data_rho_21_1m1[0] * _data_u_21_30_1m1[0] * _data_u_21_31_1m1[0] * _data_u_21_32_1m1[0] + _data_j_20_310_10[_stride_j_0];
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              double *RESTRICT _data_u_21_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_32;
+              _data_j_20_312_10[_stride_j_0] = -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 < _data_u_20_31_10[_stride_u_0] && 0.0 < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] - 1.0 * ((double)(((0.0 > _data_u_21_31_11[0] && 0.0 > _data_u_21_32_11[0] && 0.0 < _data_u_21_30_11[0]) ? (1) : (0)))) * _data_rho_21_11[0] * _data_u_21_30_11[0] * _data_u_21_31_11[0] * _data_u_21_32_11[0] + _data_j_20_312_10[_stride_j_0];
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_10 = _stride_u_1 * ctr_1 + _data_u_21_31;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              double *RESTRICT _data_u_21_30_10 = _stride_u_1 * ctr_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_10 = _stride_u_1 * ctr_1 + _data_u_21_32;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_36_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_21_31_10[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_21_32_10[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_21_30_10[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_10[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_10[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_36_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_0 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+              double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_32;
+              double *RESTRICT _data_u_20_30 = _data_u;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_21_30_1m1[_stride_u_0 * ctr_0]) + 1.0) * ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0 * ctr_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_38_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_32;
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * ctr_0];
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              double *RESTRICT _data_u_21_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_32;
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] - 1.0 * ((double)(((0.0 > _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 > _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * ctr_0];
+            }
+          }
+          {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_10 = _stride_u_1 * ctr_1 + _data_u_21_31;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              double *RESTRICT _data_u_21_30_10 = _stride_u_1 * ctr_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_10 = _stride_u_1 * ctr_1 + _data_u_21_32;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0 * fabs(_data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0 * fabs(_data_u_21_31_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_21_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_21_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_32;
+              _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              double *RESTRICT _data_u_21_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_32;
+              _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] - 1.0 * ((double)(((0.0 > _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 > _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+          }
+        }
+      }
+      {
+        {
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1 && 1 < _size_j_0 - 1) {
+            double *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+            double *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+            double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+            double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+            double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+            double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+            double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+            double *RESTRICT _data_u_20_30 = _data_u;
+            double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+            double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+            double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            _data_j_20_38_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0 * fabs(_data_u_21_30_1m1[_stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0] * _data_u_21_31_1m1[_stride_u_0] * _data_u_21_32_1m1[_stride_u_0] + _data_j_20_38_10[_stride_j_0];
+          }
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+            double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+            double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_u_20_30 = _data_u;
+            double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+            double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+            double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+            double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+            double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+            double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+            double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+            double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+            _data_j_20_310_10[_stride_j_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((double)(((0.0 > _data_u_21_32_1m1[0] && 0.0 < _data_u_21_30_1m1[0] && 0.0 < _data_u_21_31_1m1[0]) ? (1) : (0)))) * _data_rho_21_1m1[0] * _data_u_21_30_1m1[0] * _data_u_21_31_1m1[0] * _data_u_21_32_1m1[0] + _data_j_20_310_10[_stride_j_0];
+          }
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1 && ctr_0 < _size_j_0 - 1) {
+            double *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+            double *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+            double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+            double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+            double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+            double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+            double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+            double *RESTRICT _data_u_20_30 = _data_u;
+            double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+            double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+            double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            _data_j_20_38_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_21_30_1m1[_stride_u_0 * ctr_0]) + 1.0) * ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0 * ctr_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_38_10[_stride_j_0 * ctr_0];
+          }
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+            double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+            double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_u_20_30 = _data_u;
+            double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+            double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+            double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+            double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+            double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+            double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+            double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+            double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+            _data_j_20_310_10[_stride_j_0 * ctr_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * ctr_0];
+          }
+        }
+        if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+          double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+          double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+          double *RESTRICT _data_rho_20 = _data_rho;
+          double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+          double *RESTRICT _data_u_20_30 = _data_u;
+          double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+          double *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+          double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+          double *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+          double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+          double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+          double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+          double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+          double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+          double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+          double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+          double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+          _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)];
+        }
+      }
+    }
+    for (int64_t ctr_2 = 1; ctr_2 < _size_j_2 - 1; ctr_2 += 1) {
+      double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+      double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+      double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+      double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+      double *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
+      double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+      double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+      double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+      double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+      double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+      double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+      double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+      double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+      {
+        {
+          {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              double *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_11 = _stride_u_1 + _data_u_20_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_11 = _stride_u_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_11 = _stride_u_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              _data_j_20_34_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_32_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 < _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] + (-1.0 * fabs(_data_u_20_32_11[0]) + 1.0) * ((double)(((0.0 > _data_u_20_31_11[0] && 0.0 < _data_u_20_30_11[0]) ? (1) : (0)))) * _data_rho_20_11[0] * _data_u_20_30_11[0] * _data_u_20_31_11[0] + _data_j_20_34_10[_stride_j_0];
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_11 = _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_11 = _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_11 = _stride_u_1 + _data_u_2m1_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              _data_j_20_311_10[_stride_j_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_32_10[_stride_u_0] && 0.0 < _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((double)(((0.0 > _data_u_2m1_31_11[0] && 0.0 < _data_u_2m1_30_11[0] && 0.0 < _data_u_2m1_32_11[0]) ? (1) : (0)))) * _data_rho_2m1_11[0] * _data_u_2m1_30_11[0] * _data_u_2m1_31_11[0] * _data_u_2m1_32_11[0] + _data_j_20_311_10[_stride_j_0];
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              double *RESTRICT _data_u_21_30_11 = _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_11 = _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_11 = _stride_u_1 + _data_u_21_32;
+              _data_j_20_312_10[_stride_j_0] = -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 < _data_u_20_31_10[_stride_u_0] && 0.0 < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] - 1.0 * ((double)(((0.0 > _data_u_21_31_11[0] && 0.0 > _data_u_21_32_11[0] && 0.0 < _data_u_21_30_11[0]) ? (1) : (0)))) * _data_rho_21_11[0] * _data_u_21_30_11[0] * _data_u_21_31_11[0] * _data_u_21_32_11[0] + _data_j_20_312_10[_stride_j_0];
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              double *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_11 = _stride_u_1 + _data_u_20_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_11 = _stride_u_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_11 = _stride_u_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              _data_j_20_34_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_20_32_11[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_20_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_20_30_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_20_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_20_31_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_34_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_11 = _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_11 = _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_11 = _stride_u_1 + _data_u_2m1_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((double)(((0.0 > _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * ctr_0];
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              double *RESTRICT _data_u_21_30_11 = _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_11 = _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_11 = _stride_u_1 + _data_u_21_32;
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] - 1.0 * ((double)(((0.0 > _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 > _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * ctr_0];
+            }
+          }
+          {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              double *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_11 = _stride_u_1 + _data_u_20_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_11 = _stride_u_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_11 = _stride_u_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0 * fabs(_data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0 * fabs(_data_u_20_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_20_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_20_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_20_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_20_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_11 = _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_11 = _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_11 = _stride_u_1 + _data_u_2m1_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((double)(((0.0 > _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              double *RESTRICT _data_u_21_30_11 = _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_11 = _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_11 = _stride_u_1 + _data_u_21_32;
+              _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] - 1.0 * ((double)(((0.0 > _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 > _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+          }
+        }
+        for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+          double *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+          double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+          double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+          double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+          double *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
+          double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+          double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+          double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+          double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+          double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+          double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+          double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+          double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+          {
+            double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+            double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+            double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+            double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+            double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+            double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+            _data_j_20_30_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_31_10[0]) + 1.0) * (-1.0 * fabs(_data_u_20_32_10[0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_20_30_10[0]) ? (1) : (0)))) * _data_rho_20_10[0] * _data_u_20_30_10[0] + (-1.0 * fabs(_data_u_20_31_10[_stride_u_0]) + 1.0) * (-1.0 * fabs(_data_u_20_32_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] + _data_j_20_30_10[_stride_j_0];
+            double *RESTRICT _data_u_20_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_20_32;
+            double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_20;
+            double *RESTRICT _data_u_20_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_20_30;
+            double *RESTRICT _data_u_20_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_20_31;
+            _data_j_20_33_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_32_10[_stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] + (-1.0 * fabs(_data_u_20_32_1m1[0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_20_30_1m1[0] && 0.0 < _data_u_20_31_1m1[0]) ? (1) : (0)))) * _data_rho_20_1m1[0] * _data_u_20_30_1m1[0] * _data_u_20_31_1m1[0] + _data_j_20_33_10[_stride_j_0];
+            double *RESTRICT _data_u_20_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_20_32;
+            double *RESTRICT _data_rho_20_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_20;
+            double *RESTRICT _data_u_20_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_20_30;
+            double *RESTRICT _data_u_20_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_20_31;
+            _data_j_20_34_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_32_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 < _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] + (-1.0 * fabs(_data_u_20_32_11[0]) + 1.0) * ((double)(((0.0 > _data_u_20_31_11[0] && 0.0 < _data_u_20_30_11[0]) ? (1) : (0)))) * _data_rho_20_11[0] * _data_u_20_30_11[0] * _data_u_20_31_11[0] + _data_j_20_34_10[_stride_j_0];
+            double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+            double *RESTRICT _data_u_2m1_31_10 = _stride_u_1 * ctr_1 + _data_u_2m1_31;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+            double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+            double *RESTRICT _data_u_2m1_30_10 = _stride_u_1 * ctr_1 + _data_u_2m1_30;
+            double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+            double *RESTRICT _data_u_2m1_32_10 = _stride_u_1 * ctr_1 + _data_u_2m1_32;
+            _data_j_20_35_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_31_10[_stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0 * fabs(_data_u_2m1_31_10[0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_30_10[0] && 0.0 < _data_u_2m1_32_10[0]) ? (1) : (0)))) * _data_rho_2m1_10[0] * _data_u_2m1_30_10[0] * _data_u_2m1_32_10[0] + _data_j_20_35_10[_stride_j_0];
+            double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+            double *RESTRICT _data_u_21_31_10 = _stride_u_1 * ctr_1 + _data_u_21_31;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+            double *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+            double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+            double *RESTRICT _data_u_21_30_10 = _stride_u_1 * ctr_1 + _data_u_21_30;
+            double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+            double *RESTRICT _data_u_21_32_10 = _stride_u_1 * ctr_1 + _data_u_21_32;
+            _data_j_20_36_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_31_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0 * fabs(_data_u_21_31_10[0]) + 1.0) * ((double)(((0.0 > _data_u_21_32_10[0] && 0.0 < _data_u_21_30_10[0]) ? (1) : (0)))) * _data_rho_21_10[0] * _data_u_21_30_10[0] * _data_u_21_32_10[0] + _data_j_20_36_10[_stride_j_0];
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_30;
+            double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_31;
+            double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_32;
+            _data_j_20_39_10[_stride_j_0] = -1.0 * ((double)(((0.0 < _data_u_2m1_30_1m1[0] && 0.0 < _data_u_2m1_31_1m1[0] && 0.0 < _data_u_2m1_32_1m1[0]) ? (1) : (0)))) * _data_rho_2m1_1m1[0] * _data_u_2m1_30_1m1[0] * _data_u_2m1_31_1m1[0] * _data_u_2m1_32_1m1[0] - 1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + _data_j_20_39_10[_stride_j_0];
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_30;
+            double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_31;
+            double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_32;
+            _data_j_20_310_10[_stride_j_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((double)(((0.0 > _data_u_21_32_1m1[0] && 0.0 < _data_u_21_30_1m1[0] && 0.0 < _data_u_21_31_1m1[0]) ? (1) : (0)))) * _data_rho_21_1m1[0] * _data_u_21_30_1m1[0] * _data_u_21_31_1m1[0] * _data_u_21_32_1m1[0] + _data_j_20_310_10[_stride_j_0];
+            double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_u_2m1_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_30;
+            double *RESTRICT _data_u_2m1_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_31;
+            double *RESTRICT _data_u_2m1_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_32;
+            _data_j_20_311_10[_stride_j_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_32_10[_stride_u_0] && 0.0 < _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((double)(((0.0 > _data_u_2m1_31_11[0] && 0.0 < _data_u_2m1_30_11[0] && 0.0 < _data_u_2m1_32_11[0]) ? (1) : (0)))) * _data_rho_2m1_11[0] * _data_u_2m1_30_11[0] * _data_u_2m1_31_11[0] * _data_u_2m1_32_11[0] + _data_j_20_311_10[_stride_j_0];
+            double *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_u_21_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_30;
+            double *RESTRICT _data_u_21_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_31;
+            double *RESTRICT _data_u_21_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_32;
+            _data_j_20_312_10[_stride_j_0] = -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 < _data_u_20_31_10[_stride_u_0] && 0.0 < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] - 1.0 * ((double)(((0.0 > _data_u_21_31_11[0] && 0.0 > _data_u_21_32_11[0] && 0.0 < _data_u_21_30_11[0]) ? (1) : (0)))) * _data_rho_21_11[0] * _data_u_21_30_11[0] * _data_u_21_31_11[0] * _data_u_21_32_11[0] + _data_j_20_312_10[_stride_j_0];
+            {
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+                double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+                double *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+                double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+                double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+                double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+                double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+                double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+                double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+                double *RESTRICT _data_u_20_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_20_30;
+                double *RESTRICT _data_u_20_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_20_32;
+                double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_20;
+                double *RESTRICT _data_u_20_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_20_31;
+                _data_j_20_31_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0) * (-1.0 * fabs(_data_u_20_32_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] + (-1.0 * fabs(_data_u_20_30_1m1[_stride_u_0]) + 1.0) * (-1.0 * fabs(_data_u_20_32_1m1[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_20_31_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0] * _data_u_20_31_1m1[_stride_u_0] + _data_j_20_31_10[_stride_j_0];
+              }
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+                double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+                double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+                double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+                double *RESTRICT _data_u_2m1_30_10 = _stride_u_1 * ctr_1 + _data_u_2m1_30;
+                double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+                double *RESTRICT _data_u_2m1_31_10 = _stride_u_1 * ctr_1 + _data_u_2m1_31;
+                double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+                double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+                double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+                double *RESTRICT _data_u_2m1_32_10 = _stride_u_1 * ctr_1 + _data_u_2m1_32;
+                double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+                double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+                double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+                double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+                double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+                double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+                _data_j_20_32_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0) * (-1.0 * fabs(_data_u_20_31_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_32_10[_stride_u_0] + (-1.0 * fabs(_data_u_2m1_30_10[_stride_u_0]) + 1.0) * (-1.0 * fabs(_data_u_2m1_31_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0] * _data_u_2m1_32_10[_stride_u_0] + _data_j_20_32_10[_stride_j_0];
+              }
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1) {
+                double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+                double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+                double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+                double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+                double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+                double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+                double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+                double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+                double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+                double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_30;
+                double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+                double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+                double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+                double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_31;
+                double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+                double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_32;
+                _data_j_20_37_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0 * fabs(_data_u_2m1_30_1m1[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_31_1m1[_stride_u_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0] * _data_u_2m1_31_1m1[_stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0] + _data_j_20_37_10[_stride_j_0];
+              }
+              if (ctr_1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+                double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+                double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+                double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+                double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_30;
+                double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+                double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+                double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+                double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_31;
+                double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+                double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_32;
+                double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+                double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+                double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+                double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+                double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+                double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+                _data_j_20_38_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0 * fabs(_data_u_21_30_1m1[_stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0] * _data_u_21_31_1m1[_stride_u_0] * _data_u_21_32_1m1[_stride_u_0] + _data_j_20_38_10[_stride_j_0];
+              }
+            }
+            for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+              _data_j_20_30_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0) * (-1.0 * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_20_30_10[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_20_30_10[_stride_u_0 * ctr_0 - _stride_u_0] + (-1.0 * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0]) + 1.0) * (-1.0 * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] + _data_j_20_30_10[_stride_j_0 * ctr_0];
+              _data_j_20_31_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0) * (-1.0 * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_20_30_1m1[_stride_u_0 * ctr_0]) + 1.0) * (-1.0 * fabs(_data_u_20_32_1m1[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_20_31_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0 * ctr_0] * _data_u_20_31_1m1[_stride_u_0 * ctr_0] + _data_j_20_31_10[_stride_j_0 * ctr_0];
+              _data_j_20_32_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0) * (-1.0 * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_2m1_30_10[_stride_u_0 * ctr_0]) + 1.0) * (-1.0 * fabs(_data_u_2m1_31_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0 * ctr_0] * _data_u_2m1_32_10[_stride_u_0 * ctr_0] + _data_j_20_32_10[_stride_j_0 * ctr_0];
+              _data_j_20_33_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0]) + 1.0) * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_20_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_20_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_20_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_20_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_20_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_33_10[_stride_j_0 * ctr_0];
+              _data_j_20_34_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_20_32_11[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_20_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_20_30_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_20_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_20_31_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_34_10[_stride_j_0 * ctr_0];
+              _data_j_20_35_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0]) + 1.0) * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_2m1_31_10[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_30_10[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_32_10[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_10[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_10[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_35_10[_stride_j_0 * ctr_0];
+              _data_j_20_36_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_21_31_10[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_21_32_10[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_21_30_10[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_10[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_10[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_36_10[_stride_j_0 * ctr_0];
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0) * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_2m1_30_1m1[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_37_10[_stride_j_0 * ctr_0];
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_21_30_1m1[_stride_u_0 * ctr_0]) + 1.0) * ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0 * ctr_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_38_10[_stride_j_0 * ctr_0];
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = -1.0 * ((double)(((0.0 < _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] - 1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + _data_j_20_39_10[_stride_j_0 * ctr_0];
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * ctr_0];
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((double)(((0.0 > _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * ctr_0];
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] - 1.0 * ((double)(((0.0 > _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 > _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * ctr_0];
+            }
+            _data_j_20_30_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0 * fabs(_data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0) * (-1.0 * fabs(_data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + (-1.0 * fabs(_data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0) * (-1.0 * fabs(_data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] + _data_j_20_30_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0 * fabs(_data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0) * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0 * fabs(_data_u_20_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_20_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_20_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_20_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_20_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0 * fabs(_data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0 * fabs(_data_u_20_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_20_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_20_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_20_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_20_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0 * fabs(_data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0) * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0 * fabs(_data_u_2m1_31_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0 * fabs(_data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0 * fabs(_data_u_21_31_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_21_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_21_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0 * ((double)(((0.0 < _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] - 1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((double)(((0.0 > _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] - 1.0 * ((double)(((0.0 > _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 > _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)];
+            {
+            }
+          }
+        }
+        {
+          {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+              double *RESTRICT _data_j_20_31_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_31;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_32;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_31;
+              _data_j_20_31_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0) * (-1.0 * fabs(_data_u_20_32_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] + (-1.0 * fabs(_data_u_20_30_1m1[_stride_u_0]) + 1.0) * (-1.0 * fabs(_data_u_20_32_1m1[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_20_31_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0] * _data_u_20_31_1m1[_stride_u_0] + _data_j_20_31_10[_stride_j_0];
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              double *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_32;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_31;
+              _data_j_20_33_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_32_10[_stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] + (-1.0 * fabs(_data_u_20_32_1m1[0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_20_30_1m1[0] && 0.0 < _data_u_20_31_1m1[0]) ? (1) : (0)))) * _data_rho_20_1m1[0] * _data_u_20_30_1m1[0] * _data_u_20_31_1m1[0] + _data_j_20_33_10[_stride_j_0];
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && 1 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+              double *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+              _data_j_20_37_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0 * fabs(_data_u_2m1_30_1m1[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_31_1m1[_stride_u_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0] * _data_u_2m1_31_1m1[_stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0] + _data_j_20_37_10[_stride_j_0];
+            }
+            if (_size_j_1 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+              double *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              _data_j_20_38_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0 * fabs(_data_u_21_30_1m1[_stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0] * _data_u_21_31_1m1[_stride_u_0] * _data_u_21_32_1m1[_stride_u_0] + _data_j_20_38_10[_stride_j_0];
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              _data_j_20_39_10[_stride_j_0] = -1.0 * ((double)(((0.0 < _data_u_2m1_30_1m1[0] && 0.0 < _data_u_2m1_31_1m1[0] && 0.0 < _data_u_2m1_32_1m1[0]) ? (1) : (0)))) * _data_rho_2m1_1m1[0] * _data_u_2m1_30_1m1[0] * _data_u_2m1_31_1m1[0] * _data_u_2m1_32_1m1[0] - 1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + _data_j_20_39_10[_stride_j_0];
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+              _data_j_20_310_10[_stride_j_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((double)(((0.0 > _data_u_21_32_1m1[0] && 0.0 < _data_u_21_30_1m1[0] && 0.0 < _data_u_21_31_1m1[0]) ? (1) : (0)))) * _data_rho_21_1m1[0] * _data_u_21_30_1m1[0] * _data_u_21_31_1m1[0] * _data_u_21_32_1m1[0] + _data_j_20_310_10[_stride_j_0];
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+              double *RESTRICT _data_j_20_31_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_31;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_32;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_31;
+              _data_j_20_31_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0) * (-1.0 * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_20_30_1m1[_stride_u_0 * ctr_0]) + 1.0) * (-1.0 * fabs(_data_u_20_32_1m1[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_20_31_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0 * ctr_0] * _data_u_20_31_1m1[_stride_u_0 * ctr_0] + _data_j_20_31_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              double *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_32;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_31;
+              _data_j_20_33_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0]) + 1.0) * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_20_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_20_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_20_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_20_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_20_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_33_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+              double *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0) * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_2m1_30_1m1[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_37_10[_stride_j_0 * ctr_0];
+            }
+            if (_size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+              double *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_21_30_1m1[_stride_u_0 * ctr_0]) + 1.0) * ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0 * ctr_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_38_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = -1.0 * ((double)(((0.0 < _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] - 1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + _data_j_20_39_10[_stride_j_0 * ctr_0];
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * ctr_0];
+            }
+          }
+          {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              double *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_32;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_31;
+              _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0 * fabs(_data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0) * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0 * fabs(_data_u_20_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_20_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_20_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_20_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_20_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0 * ((double)(((0.0 < _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] - 1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              double *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+              double *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+              double *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+              _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((double)(((0.0 > _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+          }
+        }
+      }
+    }
+    {
+      {
+        if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+          double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+          double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+          double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+          double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+          double *RESTRICT _data_u_2m1_30_11 = _stride_u_1 + _data_u_2m1_30;
+          double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+          double *RESTRICT _data_u_2m1_31_11 = _stride_u_1 + _data_u_2m1_31;
+          double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+          double *RESTRICT _data_u_2m1_32_11 = _stride_u_1 + _data_u_2m1_32;
+          double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_rho_20_10 = _data_rho_20;
+          double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+          double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+          double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+          double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+          double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+          _data_j_20_311_10[_stride_j_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_32_10[_stride_u_0] && 0.0 < _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((double)(((0.0 > _data_u_2m1_31_11[0] && 0.0 < _data_u_2m1_30_11[0] && 0.0 < _data_u_2m1_32_11[0]) ? (1) : (0)))) * _data_rho_2m1_11[0] * _data_u_2m1_30_11[0] * _data_u_2m1_31_11[0] * _data_u_2m1_32_11[0] + _data_j_20_311_10[_stride_j_0];
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+            double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+            double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+            double *RESTRICT _data_u_2m1_30_11 = _stride_u_1 + _data_u_2m1_30;
+            double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+            double *RESTRICT _data_u_2m1_31_11 = _stride_u_1 + _data_u_2m1_31;
+            double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+            double *RESTRICT _data_u_2m1_32_11 = _stride_u_1 + _data_u_2m1_32;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _data_rho_20;
+            double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+            double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+            double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+            double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+            double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+            _data_j_20_311_10[_stride_j_0 * ctr_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((double)(((0.0 > _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * ctr_0];
+          }
+        }
+        if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+          double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+          double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+          double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+          double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+          double *RESTRICT _data_u_2m1_30_11 = _stride_u_1 + _data_u_2m1_30;
+          double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+          double *RESTRICT _data_u_2m1_31_11 = _stride_u_1 + _data_u_2m1_31;
+          double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+          double *RESTRICT _data_u_2m1_32_11 = _stride_u_1 + _data_u_2m1_32;
+          double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_rho_20_10 = _data_rho_20;
+          double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+          double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+          double *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+          double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+          double *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+          _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((double)(((0.0 > _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)];
+        }
+      }
+      for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+        {
+          {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 2 * _stride_j_3;
+              double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_10 = _stride_u_1 * ctr_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_10 = _stride_u_1 * ctr_1 + _data_u_2m1_31;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_10 = _stride_u_1 * ctr_1 + _data_u_2m1_32;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_32_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0) * (-1.0 * fabs(_data_u_20_31_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_32_10[_stride_u_0] + (-1.0 * fabs(_data_u_2m1_30_10[_stride_u_0]) + 1.0) * (-1.0 * fabs(_data_u_2m1_31_10[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0] * _data_u_2m1_32_10[_stride_u_0] + _data_j_20_32_10[_stride_j_0];
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_10 = _stride_u_1 * ctr_1 + _data_u_2m1_31;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_10 = _stride_u_1 * ctr_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_10 = _stride_u_1 * ctr_1 + _data_u_2m1_32;
+              _data_j_20_35_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_31_10[_stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0 * fabs(_data_u_2m1_31_10[0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_30_10[0] && 0.0 < _data_u_2m1_32_10[0]) ? (1) : (0)))) * _data_rho_2m1_10[0] * _data_u_2m1_30_10[0] * _data_u_2m1_32_10[0] + _data_j_20_35_10[_stride_j_0];
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+              double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_32;
+              _data_j_20_37_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0 * fabs(_data_u_2m1_30_1m1[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_31_1m1[_stride_u_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0] * _data_u_2m1_31_1m1[_stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0] + _data_j_20_37_10[_stride_j_0];
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_39_10[_stride_j_0] = -1.0 * ((double)(((0.0 < _data_u_2m1_30_1m1[0] && 0.0 < _data_u_2m1_31_1m1[0] && 0.0 < _data_u_2m1_32_1m1[0]) ? (1) : (0)))) * _data_rho_2m1_1m1[0] * _data_u_2m1_30_1m1[0] * _data_u_2m1_31_1m1[0] * _data_u_2m1_32_1m1[0] - 1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + _data_j_20_39_10[_stride_j_0];
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_311_10[_stride_j_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_32_10[_stride_u_0] && 0.0 < _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((double)(((0.0 > _data_u_2m1_31_11[0] && 0.0 < _data_u_2m1_30_11[0] && 0.0 < _data_u_2m1_32_11[0]) ? (1) : (0)))) * _data_rho_2m1_11[0] * _data_u_2m1_30_11[0] * _data_u_2m1_31_11[0] * _data_u_2m1_32_11[0] + _data_j_20_311_10[_stride_j_0];
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 2 * _stride_j_3;
+              double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_10 = _stride_u_1 * ctr_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_10 = _stride_u_1 * ctr_1 + _data_u_2m1_31;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_10 = _stride_u_1 * ctr_1 + _data_u_2m1_32;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_32_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0) * (-1.0 * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_2m1_30_10[_stride_u_0 * ctr_0]) + 1.0) * (-1.0 * fabs(_data_u_2m1_31_10[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0 * ctr_0] * _data_u_2m1_32_10[_stride_u_0 * ctr_0] + _data_j_20_32_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_10 = _stride_u_1 * ctr_1 + _data_u_2m1_31;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_10 = _stride_u_1 * ctr_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_10 = _stride_u_1 * ctr_1 + _data_u_2m1_32;
+              _data_j_20_35_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0]) + 1.0) * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_2m1_31_10[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_30_10[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_32_10[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_10[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_10[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_35_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+              double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_32;
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0) * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_2m1_30_1m1[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_37_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = -1.0 * ((double)(((0.0 < _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] - 1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + _data_j_20_39_10[_stride_j_0 * ctr_0];
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0] && 0.0 < _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((double)(((0.0 > _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * ctr_0];
+            }
+          }
+          {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_10 = _stride_u_1 * ctr_1 + _data_u_2m1_31;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_10 = _stride_u_1 * ctr_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_10 = _stride_u_1 * ctr_1 + _data_u_2m1_32;
+              _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0 * fabs(_data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0) * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0 * fabs(_data_u_2m1_31_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0 * ((double)(((0.0 < _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] - 1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              double *RESTRICT _data_u_2m1_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_30;
+              double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              double *RESTRICT _data_u_2m1_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_31;
+              double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              double *RESTRICT _data_u_2m1_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_32;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              double *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              double *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((double)(((0.0 > _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+          }
+        }
+      }
+      {
+        {
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1) {
+            double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+            double *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+            double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+            double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+            double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+            double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+            double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+            double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+            double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+            _data_j_20_37_10[_stride_j_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0) * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0 * fabs(_data_u_2m1_30_1m1[_stride_u_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_31_1m1[_stride_u_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0] * _data_u_2m1_31_1m1[_stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0] + _data_j_20_37_10[_stride_j_0];
+          }
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+            double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+            double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+            double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+            double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+            double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+            double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+            double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+            double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+            double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            _data_j_20_39_10[_stride_j_0] = -1.0 * ((double)(((0.0 < _data_u_2m1_30_1m1[0] && 0.0 < _data_u_2m1_31_1m1[0] && 0.0 < _data_u_2m1_32_1m1[0]) ? (1) : (0)))) * _data_rho_2m1_1m1[0] * _data_u_2m1_30_1m1[0] * _data_u_2m1_31_1m1[0] * _data_u_2m1_32_1m1[0] - 1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0] && 0.0 > _data_u_20_31_10[_stride_u_0] && 0.0 > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + _data_j_20_39_10[_stride_j_0];
+          }
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+            double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+            double *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+            double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+            double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+            double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+            double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+            double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+            double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+            double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+            _data_j_20_37_10[_stride_j_0 * ctr_0] = (-1.0 * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0) * ((double)(((0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0 * fabs(_data_u_2m1_30_1m1[_stride_u_0 * ctr_0]) + 1.0) * -1.0 * ((double)(((0.0 < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_37_10[_stride_j_0 * ctr_0];
+          }
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+            double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+            double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+            double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+            double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+            double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+            double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+            double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+            double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+            double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            _data_j_20_39_10[_stride_j_0 * ctr_0] = -1.0 * ((double)(((0.0 < _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] - 1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0 > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + _data_j_20_39_10[_stride_j_0 * ctr_0];
+          }
+        }
+        if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+          double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+          double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+          double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+          double *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+          double *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+          double *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+          double *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+          double *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+          double *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+          double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+          double *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+          double *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+          double *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+          double *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+          double *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+          _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0 * ((double)(((0.0 < _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0 < _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] - 1.0 * ((double)(((0.0 > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0 > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)];
+        }
+      }
+    }
+  }
+}
+} // namespace internal_5255e1c780a944d646f270232511968b
+
+void AdvectiveFluxKernel_double_precision::run(IBlock *block) {
+  auto u = block->getData<field::GhostLayerField<double, 3>>(uID);
+  auto rho = block->getData<field::GhostLayerField<double, 1>>(rhoID);
+  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
+
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(j->nrOfGhostLayers()));
+  double *RESTRICT const _data_j = j->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(rho->nrOfGhostLayers()));
+  double *RESTRICT const _data_rho = rho->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(u->nrOfGhostLayers()));
+  double *RESTRICT const _data_u = u->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(j->xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(j->xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(j->ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(j->ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(j->zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(j->zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  const int64_t _stride_u_0 = int64_t(u->xStride());
+  const int64_t _stride_u_1 = int64_t(u->yStride());
+  const int64_t _stride_u_2 = int64_t(u->zStride());
+  const int64_t _stride_u_3 = int64_t(1 * int64_t(u->fStride()));
+  internal_5255e1c780a944d646f270232511968b::advectivefluxkernel_double_precision_advectivefluxkernel_double_precision(_data_j, _data_rho, _data_u, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_rho_0, _stride_rho_1, _stride_rho_2, _stride_u_0, _stride_u_1, _stride_u_2, _stride_u_3);
+}
+
+void AdvectiveFluxKernel_double_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto u = block->getData<field::GhostLayerField<double, 3>>(uID);
+  auto rho = block->getData<field::GhostLayerField<double, 1>>(rhoID);
+  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
+
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(j->nrOfGhostLayers()));
+  double *RESTRICT const _data_j = j->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  double *RESTRICT const _data_rho = rho->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(u->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(u->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(u->nrOfGhostLayers()));
+  double *RESTRICT const _data_u = u->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  const int64_t _stride_u_0 = int64_t(u->xStride());
+  const int64_t _stride_u_1 = int64_t(u->yStride());
+  const int64_t _stride_u_2 = int64_t(u->zStride());
+  const int64_t _stride_u_3 = int64_t(1 * int64_t(u->fStride()));
+  internal_5255e1c780a944d646f270232511968b::advectivefluxkernel_double_precision_advectivefluxkernel_double_precision(_data_j, _data_rho, _data_u, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_rho_0, _stride_rho_1, _stride_rho_2, _stride_u_0, _stride_u_1, _stride_u_2, _stride_u_3);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_double_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_double_precision.h
new file mode 100644
index 00000000000..d4e9f42423c
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_double_precision.h
@@ -0,0 +1,104 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file AdvectiveFluxKernel_double_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class AdvectiveFluxKernel_double_precision {
+public:
+  AdvectiveFluxKernel_double_precision(BlockDataID jID_, BlockDataID rhoID_,
+                                       BlockDataID uID_)
+      : jID(jID_), rhoID(rhoID_), uID(uID_){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<AdvectiveFluxKernel_double_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<AdvectiveFluxKernel_double_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID jID;
+  BlockDataID rhoID;
+  BlockDataID uID;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_single_precision.cpp
new file mode 100644
index 00000000000..2648cf12931
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_single_precision.cpp
@@ -0,0 +1,1712 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file AdvectiveFluxKernel_single_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "AdvectiveFluxKernel_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_47df4b171f276b8c3a55fc08d45e245e {
+static FUNC_PREFIX void advectivefluxkernel_single_precision_advectivefluxkernel_single_precision(float *RESTRICT const _data_j, float *RESTRICT const _data_rho, float *RESTRICT const _data_u, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3, int64_t const _stride_rho_0, int64_t const _stride_rho_1, int64_t const _stride_rho_2, int64_t const _stride_u_0, int64_t const _stride_u_1, int64_t const _stride_u_2, int64_t const _stride_u_3) {
+  {
+    {
+      {
+        if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+          float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+          float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+          float *RESTRICT _data_rho_20 = _data_rho;
+          float *RESTRICT _data_rho_20_10 = _data_rho_20;
+          float *RESTRICT _data_u_20_30 = _data_u;
+          float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+          float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+          float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+          float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+          float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+          float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+          float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+          float *RESTRICT _data_u_21_30_11 = _stride_u_1 + _data_u_21_30;
+          float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+          float *RESTRICT _data_u_21_31_11 = _stride_u_1 + _data_u_21_31;
+          float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+          float *RESTRICT _data_u_21_32_11 = _stride_u_1 + _data_u_21_32;
+          _data_j_20_312_10[_stride_j_0] = -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f < _data_u_20_31_10[_stride_u_0] && 0.0f < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] - 1.0f * ((float)(((0.0f > _data_u_21_31_11[0] && 0.0f > _data_u_21_32_11[0] && 0.0f < _data_u_21_30_11[0]) ? (1) : (0)))) * _data_rho_21_11[0] * _data_u_21_30_11[0] * _data_u_21_31_11[0] * _data_u_21_32_11[0] + _data_j_20_312_10[_stride_j_0];
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+            float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+            float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _data_rho_20;
+            float *RESTRICT _data_u_20_30 = _data_u;
+            float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+            float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+            float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+            float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+            float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+            float *RESTRICT _data_u_21_30_11 = _stride_u_1 + _data_u_21_30;
+            float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+            float *RESTRICT _data_u_21_31_11 = _stride_u_1 + _data_u_21_31;
+            float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+            float *RESTRICT _data_u_21_32_11 = _stride_u_1 + _data_u_21_32;
+            _data_j_20_312_10[_stride_j_0 * ctr_0] = -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] - 1.0f * ((float)(((0.0f > _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f > _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * ctr_0];
+          }
+        }
+        if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+          float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+          float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+          float *RESTRICT _data_rho_20 = _data_rho;
+          float *RESTRICT _data_rho_20_10 = _data_rho_20;
+          float *RESTRICT _data_u_20_30 = _data_u;
+          float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+          float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+          float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+          float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+          float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+          float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+          float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+          float *RESTRICT _data_u_21_30_11 = _stride_u_1 + _data_u_21_30;
+          float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+          float *RESTRICT _data_u_21_31_11 = _stride_u_1 + _data_u_21_31;
+          float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+          float *RESTRICT _data_u_21_32_11 = _stride_u_1 + _data_u_21_32;
+          _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] - 1.0f * ((float)(((0.0f > _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f > _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)];
+        }
+      }
+      for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+        {
+          {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_10 = _stride_u_1 * ctr_1 + _data_u_21_31;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              float *RESTRICT _data_u_21_30_10 = _stride_u_1 * ctr_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_10 = _stride_u_1 * ctr_1 + _data_u_21_32;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_36_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_31_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0f * fabs(_data_u_21_31_10[0]) + 1.0f) * ((float)(((0.0f > _data_u_21_32_10[0] && 0.0f < _data_u_21_30_10[0]) ? (1) : (0)))) * _data_rho_21_10[0] * _data_u_21_30_10[0] * _data_u_21_32_10[0] + _data_j_20_36_10[_stride_j_0];
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && 1 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+              float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_32;
+              float *RESTRICT _data_u_20_30 = _data_u;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_38_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0f * fabs(_data_u_21_30_1m1[_stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0] * _data_u_21_31_1m1[_stride_u_0] * _data_u_21_32_1m1[_stride_u_0] + _data_j_20_38_10[_stride_j_0];
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_32;
+              _data_j_20_310_10[_stride_j_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((float)(((0.0f > _data_u_21_32_1m1[0] && 0.0f < _data_u_21_30_1m1[0] && 0.0f < _data_u_21_31_1m1[0]) ? (1) : (0)))) * _data_rho_21_1m1[0] * _data_u_21_30_1m1[0] * _data_u_21_31_1m1[0] * _data_u_21_32_1m1[0] + _data_j_20_310_10[_stride_j_0];
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              float *RESTRICT _data_u_21_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_32;
+              _data_j_20_312_10[_stride_j_0] = -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f < _data_u_20_31_10[_stride_u_0] && 0.0f < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] - 1.0f * ((float)(((0.0f > _data_u_21_31_11[0] && 0.0f > _data_u_21_32_11[0] && 0.0f < _data_u_21_30_11[0]) ? (1) : (0)))) * _data_rho_21_11[0] * _data_u_21_30_11[0] * _data_u_21_31_11[0] * _data_u_21_32_11[0] + _data_j_20_312_10[_stride_j_0];
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_10 = _stride_u_1 * ctr_1 + _data_u_21_31;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              float *RESTRICT _data_u_21_30_10 = _stride_u_1 * ctr_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_10 = _stride_u_1 * ctr_1 + _data_u_21_32;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_36_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_21_31_10[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_21_32_10[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_21_30_10[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_10[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_10[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_36_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_0 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+              float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_32;
+              float *RESTRICT _data_u_20_30 = _data_u;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_21_30_1m1[_stride_u_0 * ctr_0]) + 1.0f) * ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0 * ctr_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_38_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_32;
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * ctr_0];
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              float *RESTRICT _data_u_21_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_32;
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] - 1.0f * ((float)(((0.0f > _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f > _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * ctr_0];
+            }
+          }
+          {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_10 = _stride_u_1 * ctr_1 + _data_u_21_31;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              float *RESTRICT _data_u_21_30_10 = _stride_u_1 * ctr_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_10 = _stride_u_1 * ctr_1 + _data_u_21_32;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0f * fabs(_data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0f * fabs(_data_u_21_31_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_21_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_21_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_32;
+              _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+              float *RESTRICT _data_u_21_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_32;
+              _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] - 1.0f * ((float)(((0.0f > _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f > _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+          }
+        }
+      }
+      {
+        {
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1 && 1 < _size_j_0 - 1) {
+            float *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+            float *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+            float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+            float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+            float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+            float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+            float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+            float *RESTRICT _data_u_20_30 = _data_u;
+            float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+            float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+            float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            _data_j_20_38_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0f * fabs(_data_u_21_30_1m1[_stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0] * _data_u_21_31_1m1[_stride_u_0] * _data_u_21_32_1m1[_stride_u_0] + _data_j_20_38_10[_stride_j_0];
+          }
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+            float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+            float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_u_20_30 = _data_u;
+            float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+            float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+            float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+            float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+            float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+            float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+            float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+            float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+            _data_j_20_310_10[_stride_j_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((float)(((0.0f > _data_u_21_32_1m1[0] && 0.0f < _data_u_21_30_1m1[0] && 0.0f < _data_u_21_31_1m1[0]) ? (1) : (0)))) * _data_rho_21_1m1[0] * _data_u_21_30_1m1[0] * _data_u_21_31_1m1[0] * _data_u_21_32_1m1[0] + _data_j_20_310_10[_stride_j_0];
+          }
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1 && ctr_0 < _size_j_0 - 1) {
+            float *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+            float *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+            float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+            float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+            float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+            float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+            float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+            float *RESTRICT _data_u_20_30 = _data_u;
+            float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+            float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+            float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            _data_j_20_38_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_21_30_1m1[_stride_u_0 * ctr_0]) + 1.0f) * ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0 * ctr_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_38_10[_stride_j_0 * ctr_0];
+          }
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+            float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+            float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_u_20_30 = _data_u;
+            float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+            float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+            float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+            float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+            float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+            float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+            float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+            float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+            _data_j_20_310_10[_stride_j_0 * ctr_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * ctr_0];
+          }
+        }
+        if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+          float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+          float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+          float *RESTRICT _data_rho_20 = _data_rho;
+          float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+          float *RESTRICT _data_u_20_30 = _data_u;
+          float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+          float *RESTRICT _data_u_20_31 = _data_u + _stride_u_3;
+          float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+          float *RESTRICT _data_u_20_32 = _data_u + 2 * _stride_u_3;
+          float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+          float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+          float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2;
+          float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+          float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 + _stride_u_3;
+          float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+          float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 + 2 * _stride_u_3;
+          float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+          _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)];
+        }
+      }
+    }
+    for (int64_t ctr_2 = 1; ctr_2 < _size_j_2 - 1; ctr_2 += 1) {
+      float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+      float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+      float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+      float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+      float *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
+      float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+      float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+      float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+      float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+      float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+      float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+      float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+      float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+      {
+        {
+          {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              float *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_11 = _stride_u_1 + _data_u_20_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_11 = _stride_u_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_11 = _stride_u_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              _data_j_20_34_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_32_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f < _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] + (-1.0f * fabs(_data_u_20_32_11[0]) + 1.0f) * ((float)(((0.0f > _data_u_20_31_11[0] && 0.0f < _data_u_20_30_11[0]) ? (1) : (0)))) * _data_rho_20_11[0] * _data_u_20_30_11[0] * _data_u_20_31_11[0] + _data_j_20_34_10[_stride_j_0];
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_11 = _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_11 = _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_11 = _stride_u_1 + _data_u_2m1_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              _data_j_20_311_10[_stride_j_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_32_10[_stride_u_0] && 0.0f < _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((float)(((0.0f > _data_u_2m1_31_11[0] && 0.0f < _data_u_2m1_30_11[0] && 0.0f < _data_u_2m1_32_11[0]) ? (1) : (0)))) * _data_rho_2m1_11[0] * _data_u_2m1_30_11[0] * _data_u_2m1_31_11[0] * _data_u_2m1_32_11[0] + _data_j_20_311_10[_stride_j_0];
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              float *RESTRICT _data_u_21_30_11 = _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_11 = _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_11 = _stride_u_1 + _data_u_21_32;
+              _data_j_20_312_10[_stride_j_0] = -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f < _data_u_20_31_10[_stride_u_0] && 0.0f < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] - 1.0f * ((float)(((0.0f > _data_u_21_31_11[0] && 0.0f > _data_u_21_32_11[0] && 0.0f < _data_u_21_30_11[0]) ? (1) : (0)))) * _data_rho_21_11[0] * _data_u_21_30_11[0] * _data_u_21_31_11[0] * _data_u_21_32_11[0] + _data_j_20_312_10[_stride_j_0];
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              float *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_11 = _stride_u_1 + _data_u_20_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_11 = _stride_u_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_11 = _stride_u_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              _data_j_20_34_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_20_32_11[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_20_30_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_20_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_20_31_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_34_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_11 = _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_11 = _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_11 = _stride_u_1 + _data_u_2m1_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((float)(((0.0f > _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * ctr_0];
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              float *RESTRICT _data_u_21_30_11 = _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_11 = _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_11 = _stride_u_1 + _data_u_21_32;
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] - 1.0f * ((float)(((0.0f > _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f > _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * ctr_0];
+            }
+          }
+          {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              float *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_11 = _stride_u_1 + _data_u_20_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_11 = _stride_u_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_11 = _stride_u_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0f * fabs(_data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0f * fabs(_data_u_20_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_20_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_20_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_20_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_11 = _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_11 = _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_11 = _stride_u_1 + _data_u_2m1_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((float)(((0.0f > _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              float *RESTRICT _data_u_21_30_11 = _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_11 = _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_11 = _stride_u_1 + _data_u_21_32;
+              _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] - 1.0f * ((float)(((0.0f > _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f > _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+          }
+        }
+        for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+          float *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+          float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+          float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+          float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+          float *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
+          float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+          float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+          float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+          float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+          float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+          float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+          float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+          float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+          {
+            float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+            float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+            float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+            float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+            float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+            float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+            _data_j_20_30_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_31_10[0]) + 1.0f) * (-1.0f * fabs(_data_u_20_32_10[0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_20_30_10[0]) ? (1) : (0)))) * _data_rho_20_10[0] * _data_u_20_30_10[0] + (-1.0f * fabs(_data_u_20_31_10[_stride_u_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_32_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] + _data_j_20_30_10[_stride_j_0];
+            float *RESTRICT _data_u_20_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_20_32;
+            float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_20;
+            float *RESTRICT _data_u_20_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_20_30;
+            float *RESTRICT _data_u_20_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_20_31;
+            _data_j_20_33_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_32_10[_stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] + (-1.0f * fabs(_data_u_20_32_1m1[0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_20_30_1m1[0] && 0.0f < _data_u_20_31_1m1[0]) ? (1) : (0)))) * _data_rho_20_1m1[0] * _data_u_20_30_1m1[0] * _data_u_20_31_1m1[0] + _data_j_20_33_10[_stride_j_0];
+            float *RESTRICT _data_u_20_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_20_32;
+            float *RESTRICT _data_rho_20_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_20;
+            float *RESTRICT _data_u_20_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_20_30;
+            float *RESTRICT _data_u_20_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_20_31;
+            _data_j_20_34_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_32_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f < _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] + (-1.0f * fabs(_data_u_20_32_11[0]) + 1.0f) * ((float)(((0.0f > _data_u_20_31_11[0] && 0.0f < _data_u_20_30_11[0]) ? (1) : (0)))) * _data_rho_20_11[0] * _data_u_20_30_11[0] * _data_u_20_31_11[0] + _data_j_20_34_10[_stride_j_0];
+            float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+            float *RESTRICT _data_u_2m1_31_10 = _stride_u_1 * ctr_1 + _data_u_2m1_31;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+            float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+            float *RESTRICT _data_u_2m1_30_10 = _stride_u_1 * ctr_1 + _data_u_2m1_30;
+            float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+            float *RESTRICT _data_u_2m1_32_10 = _stride_u_1 * ctr_1 + _data_u_2m1_32;
+            _data_j_20_35_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_31_10[_stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0f * fabs(_data_u_2m1_31_10[0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_30_10[0] && 0.0f < _data_u_2m1_32_10[0]) ? (1) : (0)))) * _data_rho_2m1_10[0] * _data_u_2m1_30_10[0] * _data_u_2m1_32_10[0] + _data_j_20_35_10[_stride_j_0];
+            float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+            float *RESTRICT _data_u_21_31_10 = _stride_u_1 * ctr_1 + _data_u_21_31;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+            float *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+            float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+            float *RESTRICT _data_u_21_30_10 = _stride_u_1 * ctr_1 + _data_u_21_30;
+            float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+            float *RESTRICT _data_u_21_32_10 = _stride_u_1 * ctr_1 + _data_u_21_32;
+            _data_j_20_36_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_31_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0f * fabs(_data_u_21_31_10[0]) + 1.0f) * ((float)(((0.0f > _data_u_21_32_10[0] && 0.0f < _data_u_21_30_10[0]) ? (1) : (0)))) * _data_rho_21_10[0] * _data_u_21_30_10[0] * _data_u_21_32_10[0] + _data_j_20_36_10[_stride_j_0];
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_30;
+            float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_31;
+            float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_32;
+            _data_j_20_39_10[_stride_j_0] = -1.0f * ((float)(((0.0f < _data_u_2m1_30_1m1[0] && 0.0f < _data_u_2m1_31_1m1[0] && 0.0f < _data_u_2m1_32_1m1[0]) ? (1) : (0)))) * _data_rho_2m1_1m1[0] * _data_u_2m1_30_1m1[0] * _data_u_2m1_31_1m1[0] * _data_u_2m1_32_1m1[0] - 1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + _data_j_20_39_10[_stride_j_0];
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_30;
+            float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_31;
+            float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_32;
+            _data_j_20_310_10[_stride_j_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((float)(((0.0f > _data_u_21_32_1m1[0] && 0.0f < _data_u_21_30_1m1[0] && 0.0f < _data_u_21_31_1m1[0]) ? (1) : (0)))) * _data_rho_21_1m1[0] * _data_u_21_30_1m1[0] * _data_u_21_31_1m1[0] * _data_u_21_32_1m1[0] + _data_j_20_310_10[_stride_j_0];
+            float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_u_2m1_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_30;
+            float *RESTRICT _data_u_2m1_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_31;
+            float *RESTRICT _data_u_2m1_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_32;
+            _data_j_20_311_10[_stride_j_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_32_10[_stride_u_0] && 0.0f < _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((float)(((0.0f > _data_u_2m1_31_11[0] && 0.0f < _data_u_2m1_30_11[0] && 0.0f < _data_u_2m1_32_11[0]) ? (1) : (0)))) * _data_rho_2m1_11[0] * _data_u_2m1_30_11[0] * _data_u_2m1_31_11[0] * _data_u_2m1_32_11[0] + _data_j_20_311_10[_stride_j_0];
+            float *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_u_21_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_30;
+            float *RESTRICT _data_u_21_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_31;
+            float *RESTRICT _data_u_21_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_21_32;
+            _data_j_20_312_10[_stride_j_0] = -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f < _data_u_20_31_10[_stride_u_0] && 0.0f < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] - 1.0f * ((float)(((0.0f > _data_u_21_31_11[0] && 0.0f > _data_u_21_32_11[0] && 0.0f < _data_u_21_30_11[0]) ? (1) : (0)))) * _data_rho_21_11[0] * _data_u_21_30_11[0] * _data_u_21_31_11[0] * _data_u_21_32_11[0] + _data_j_20_312_10[_stride_j_0];
+            {
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+                float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+                float *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+                float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+                float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+                float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+                float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+                float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+                float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+                float *RESTRICT _data_u_20_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_20_30;
+                float *RESTRICT _data_u_20_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_20_32;
+                float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_20;
+                float *RESTRICT _data_u_20_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_20_31;
+                _data_j_20_31_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_32_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] + (-1.0f * fabs(_data_u_20_30_1m1[_stride_u_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_32_1m1[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_20_31_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0] * _data_u_20_31_1m1[_stride_u_0] + _data_j_20_31_10[_stride_j_0];
+              }
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+                float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+                float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+                float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+                float *RESTRICT _data_u_2m1_30_10 = _stride_u_1 * ctr_1 + _data_u_2m1_30;
+                float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+                float *RESTRICT _data_u_2m1_31_10 = _stride_u_1 * ctr_1 + _data_u_2m1_31;
+                float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+                float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+                float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+                float *RESTRICT _data_u_2m1_32_10 = _stride_u_1 * ctr_1 + _data_u_2m1_32;
+                float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+                float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+                float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+                float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+                float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+                float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+                _data_j_20_32_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_31_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_32_10[_stride_u_0] + (-1.0f * fabs(_data_u_2m1_30_10[_stride_u_0]) + 1.0f) * (-1.0f * fabs(_data_u_2m1_31_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0] * _data_u_2m1_32_10[_stride_u_0] + _data_j_20_32_10[_stride_j_0];
+              }
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1) {
+                float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+                float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+                float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+                float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+                float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+                float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+                float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+                float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+                float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+                float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_30;
+                float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+                float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+                float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+                float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_31;
+                float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+                float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_32;
+                _data_j_20_37_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0f * fabs(_data_u_2m1_30_1m1[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_31_1m1[_stride_u_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0] * _data_u_2m1_31_1m1[_stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0] + _data_j_20_37_10[_stride_j_0];
+              }
+              if (ctr_1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+                float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+                float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+                float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+                float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_30;
+                float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+                float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+                float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+                float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_31;
+                float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+                float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_21_32;
+                float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+                float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+                float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+                float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+                float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+                float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+                _data_j_20_38_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0f * fabs(_data_u_21_30_1m1[_stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0] * _data_u_21_31_1m1[_stride_u_0] * _data_u_21_32_1m1[_stride_u_0] + _data_j_20_38_10[_stride_j_0];
+              }
+            }
+            for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+              _data_j_20_30_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_20_30_10[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_20_30_10[_stride_u_0 * ctr_0 - _stride_u_0] + (-1.0f * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] + _data_j_20_30_10[_stride_j_0 * ctr_0];
+              _data_j_20_31_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_20_30_1m1[_stride_u_0 * ctr_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_32_1m1[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_20_31_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0 * ctr_0] * _data_u_20_31_1m1[_stride_u_0 * ctr_0] + _data_j_20_31_10[_stride_j_0 * ctr_0];
+              _data_j_20_32_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_2m1_30_10[_stride_u_0 * ctr_0]) + 1.0f) * (-1.0f * fabs(_data_u_2m1_31_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0 * ctr_0] * _data_u_2m1_32_10[_stride_u_0 * ctr_0] + _data_j_20_32_10[_stride_j_0 * ctr_0];
+              _data_j_20_33_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_20_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_20_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_20_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_20_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_20_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_33_10[_stride_j_0 * ctr_0];
+              _data_j_20_34_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_20_32_11[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_20_30_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_20_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_20_31_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_34_10[_stride_j_0 * ctr_0];
+              _data_j_20_35_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_2m1_31_10[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_30_10[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_32_10[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_10[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_10[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_35_10[_stride_j_0 * ctr_0];
+              _data_j_20_36_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_21_31_10[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_21_32_10[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_21_30_10[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_10[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_10[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_36_10[_stride_j_0 * ctr_0];
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_2m1_30_1m1[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_37_10[_stride_j_0 * ctr_0];
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_21_30_1m1[_stride_u_0 * ctr_0]) + 1.0f) * ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0 * ctr_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_38_10[_stride_j_0 * ctr_0];
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = -1.0f * ((float)(((0.0f < _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] - 1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + _data_j_20_39_10[_stride_j_0 * ctr_0];
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * ctr_0];
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((float)(((0.0f > _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * ctr_0];
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] - 1.0f * ((float)(((0.0f > _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f > _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * ctr_0];
+            }
+            _data_j_20_30_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0f * fabs(_data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + (-1.0f * fabs(_data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0f) * (-1.0f * fabs(_data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] + _data_j_20_30_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0f * fabs(_data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0f) * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0f * fabs(_data_u_20_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_20_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_20_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_20_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_20_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0f * fabs(_data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0f * fabs(_data_u_20_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_20_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_20_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_20_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0f * fabs(_data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0f) * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0f * fabs(_data_u_2m1_31_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0f * fabs(_data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0f * fabs(_data_u_21_31_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_21_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_21_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0f * ((float)(((0.0f < _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] - 1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((float)(((0.0f > _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)];
+            _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] - 1.0f * ((float)(((0.0f > _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f > _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)];
+            {
+            }
+          }
+        }
+        {
+          {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+              float *RESTRICT _data_j_20_31_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_31;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_32;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_31;
+              _data_j_20_31_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_32_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] + (-1.0f * fabs(_data_u_20_30_1m1[_stride_u_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_32_1m1[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_20_31_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0] * _data_u_20_31_1m1[_stride_u_0] + _data_j_20_31_10[_stride_j_0];
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              float *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_32;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_31;
+              _data_j_20_33_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_32_10[_stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] + (-1.0f * fabs(_data_u_20_32_1m1[0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_20_30_1m1[0] && 0.0f < _data_u_20_31_1m1[0]) ? (1) : (0)))) * _data_rho_20_1m1[0] * _data_u_20_30_1m1[0] * _data_u_20_31_1m1[0] + _data_j_20_33_10[_stride_j_0];
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && 1 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+              float *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+              _data_j_20_37_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0f * fabs(_data_u_2m1_30_1m1[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_31_1m1[_stride_u_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0] * _data_u_2m1_31_1m1[_stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0] + _data_j_20_37_10[_stride_j_0];
+            }
+            if (_size_j_1 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+              float *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              _data_j_20_38_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0f * fabs(_data_u_21_30_1m1[_stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0] * _data_u_21_31_1m1[_stride_u_0] * _data_u_21_32_1m1[_stride_u_0] + _data_j_20_38_10[_stride_j_0];
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              _data_j_20_39_10[_stride_j_0] = -1.0f * ((float)(((0.0f < _data_u_2m1_30_1m1[0] && 0.0f < _data_u_2m1_31_1m1[0] && 0.0f < _data_u_2m1_32_1m1[0]) ? (1) : (0)))) * _data_rho_2m1_1m1[0] * _data_u_2m1_30_1m1[0] * _data_u_2m1_31_1m1[0] * _data_u_2m1_32_1m1[0] - 1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + _data_j_20_39_10[_stride_j_0];
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+              _data_j_20_310_10[_stride_j_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f < _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((float)(((0.0f > _data_u_21_32_1m1[0] && 0.0f < _data_u_21_30_1m1[0] && 0.0f < _data_u_21_31_1m1[0]) ? (1) : (0)))) * _data_rho_21_1m1[0] * _data_u_21_30_1m1[0] * _data_u_21_31_1m1[0] * _data_u_21_32_1m1[0] + _data_j_20_310_10[_stride_j_0];
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+              float *RESTRICT _data_j_20_31_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_31;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_32;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_31;
+              _data_j_20_31_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_20_30_1m1[_stride_u_0 * ctr_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_32_1m1[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_20_31_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0 * ctr_0] * _data_u_20_31_1m1[_stride_u_0 * ctr_0] + _data_j_20_31_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              float *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_32;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_31;
+              _data_j_20_33_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_32_10[_stride_u_0 * ctr_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_20_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_20_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_20_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_20_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_20_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_33_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+              float *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_2m1_30_1m1[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_37_10[_stride_j_0 * ctr_0];
+            }
+            if (_size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+              float *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_21_30_1m1[_stride_u_0 * ctr_0]) + 1.0f) * ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0 * ctr_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_38_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = -1.0f * ((float)(((0.0f < _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] - 1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + _data_j_20_39_10[_stride_j_0 * ctr_0];
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * ctr_0];
+            }
+          }
+          {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              float *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_32;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_20_31;
+              _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0f * fabs(_data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0f) * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0f * fabs(_data_u_20_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_20_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_20_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_20_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_20_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * ctr_2 - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0f * ((float)(((0.0f < _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] - 1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * ctr_2;
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * ctr_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_u_21_30 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2;
+              float *RESTRICT _data_u_21_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_30;
+              float *RESTRICT _data_u_21_31 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_21_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_31;
+              float *RESTRICT _data_u_21_32 = _data_u + _stride_u_2 * ctr_2 + _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_21_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_21_32;
+              _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((float)(((0.0f > _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_21_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_21_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+          }
+        }
+      }
+    }
+    {
+      {
+        if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+          float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+          float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+          float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+          float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+          float *RESTRICT _data_u_2m1_30_11 = _stride_u_1 + _data_u_2m1_30;
+          float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+          float *RESTRICT _data_u_2m1_31_11 = _stride_u_1 + _data_u_2m1_31;
+          float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+          float *RESTRICT _data_u_2m1_32_11 = _stride_u_1 + _data_u_2m1_32;
+          float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_rho_20_10 = _data_rho_20;
+          float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+          float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+          float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+          float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+          float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+          _data_j_20_311_10[_stride_j_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_32_10[_stride_u_0] && 0.0f < _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((float)(((0.0f > _data_u_2m1_31_11[0] && 0.0f < _data_u_2m1_30_11[0] && 0.0f < _data_u_2m1_32_11[0]) ? (1) : (0)))) * _data_rho_2m1_11[0] * _data_u_2m1_30_11[0] * _data_u_2m1_31_11[0] * _data_u_2m1_32_11[0] + _data_j_20_311_10[_stride_j_0];
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+            float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+            float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+            float *RESTRICT _data_u_2m1_30_11 = _stride_u_1 + _data_u_2m1_30;
+            float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+            float *RESTRICT _data_u_2m1_31_11 = _stride_u_1 + _data_u_2m1_31;
+            float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+            float *RESTRICT _data_u_2m1_32_11 = _stride_u_1 + _data_u_2m1_32;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _data_rho_20;
+            float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+            float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+            float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+            float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+            float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+            _data_j_20_311_10[_stride_j_0 * ctr_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((float)(((0.0f > _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * ctr_0];
+          }
+        }
+        if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+          float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+          float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+          float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+          float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+          float *RESTRICT _data_u_2m1_30_11 = _stride_u_1 + _data_u_2m1_30;
+          float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+          float *RESTRICT _data_u_2m1_31_11 = _stride_u_1 + _data_u_2m1_31;
+          float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+          float *RESTRICT _data_u_2m1_32_11 = _stride_u_1 + _data_u_2m1_32;
+          float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_rho_20_10 = _data_rho_20;
+          float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_u_20_30_10 = _data_u_20_30;
+          float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+          float *RESTRICT _data_u_20_31_10 = _data_u_20_31;
+          float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+          float *RESTRICT _data_u_20_32_10 = _data_u_20_32;
+          _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((float)(((0.0f > _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)];
+        }
+      }
+      for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+        {
+          {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 2 * _stride_j_3;
+              float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_10 = _stride_u_1 * ctr_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_10 = _stride_u_1 * ctr_1 + _data_u_2m1_31;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_10 = _stride_u_1 * ctr_1 + _data_u_2m1_32;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_32_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_31_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_32_10[_stride_u_0] + (-1.0f * fabs(_data_u_2m1_30_10[_stride_u_0]) + 1.0f) * (-1.0f * fabs(_data_u_2m1_31_10[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0] * _data_u_2m1_32_10[_stride_u_0] + _data_j_20_32_10[_stride_j_0];
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_10 = _stride_u_1 * ctr_1 + _data_u_2m1_31;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_10 = _stride_u_1 * ctr_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_10 = _stride_u_1 * ctr_1 + _data_u_2m1_32;
+              _data_j_20_35_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_31_10[_stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0f * fabs(_data_u_2m1_31_10[0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_30_10[0] && 0.0f < _data_u_2m1_32_10[0]) ? (1) : (0)))) * _data_rho_2m1_10[0] * _data_u_2m1_30_10[0] * _data_u_2m1_32_10[0] + _data_j_20_35_10[_stride_j_0];
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+              float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_32;
+              _data_j_20_37_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0f * fabs(_data_u_2m1_30_1m1[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_31_1m1[_stride_u_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0] * _data_u_2m1_31_1m1[_stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0] + _data_j_20_37_10[_stride_j_0];
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_39_10[_stride_j_0] = -1.0f * ((float)(((0.0f < _data_u_2m1_30_1m1[0] && 0.0f < _data_u_2m1_31_1m1[0] && 0.0f < _data_u_2m1_32_1m1[0]) ? (1) : (0)))) * _data_rho_2m1_1m1[0] * _data_u_2m1_30_1m1[0] * _data_u_2m1_31_1m1[0] * _data_u_2m1_32_1m1[0] - 1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + _data_j_20_39_10[_stride_j_0];
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_311_10[_stride_j_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_32_10[_stride_u_0] && 0.0f < _data_u_20_31_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + ((float)(((0.0f > _data_u_2m1_31_11[0] && 0.0f < _data_u_2m1_30_11[0] && 0.0f < _data_u_2m1_32_11[0]) ? (1) : (0)))) * _data_rho_2m1_11[0] * _data_u_2m1_30_11[0] * _data_u_2m1_31_11[0] * _data_u_2m1_32_11[0] + _data_j_20_311_10[_stride_j_0];
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 2 * _stride_j_3;
+              float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_10 = _stride_u_1 * ctr_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_10 = _stride_u_1 * ctr_1 + _data_u_2m1_31;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_10 = _stride_u_1 * ctr_1 + _data_u_2m1_32;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_32_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0f) * (-1.0f * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_2m1_30_10[_stride_u_0 * ctr_0]) + 1.0f) * (-1.0f * fabs(_data_u_2m1_31_10[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0 * ctr_0] * _data_u_2m1_32_10[_stride_u_0 * ctr_0] + _data_j_20_32_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_10 = _stride_u_1 * ctr_1 + _data_u_2m1_31;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_10 = _stride_u_1 * ctr_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_10 = _stride_u_1 * ctr_1 + _data_u_2m1_32;
+              _data_j_20_35_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_31_10[_stride_u_0 * ctr_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_2m1_31_10[_stride_u_0 * ctr_0 - _stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_30_10[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_32_10[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_10[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_10[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_35_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+              float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_32;
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_2m1_30_1m1[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_37_10[_stride_j_0 * ctr_0];
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = -1.0f * ((float)(((0.0f < _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] - 1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + _data_j_20_39_10[_stride_j_0 * ctr_0];
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0] && 0.0f < _data_u_20_31_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + ((float)(((0.0f > _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * ctr_0 - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * ctr_0];
+            }
+          }
+          {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_10 = _stride_u_1 * ctr_1 + _data_u_2m1_31;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_10 = _stride_u_1 * ctr_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_10 = _stride_u_1 * ctr_1 + _data_u_2m1_32;
+              _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)] = (-1.0f * fabs(_data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) + 1.0f) * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + (-1.0f * fabs(_data_u_2m1_31_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_10[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * ctr_1 - _stride_u_1 + _data_u_2m1_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0f * ((float)(((0.0f < _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] - 1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+              float *RESTRICT _data_u_2m1_30_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_30;
+              float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+              float *RESTRICT _data_u_2m1_31_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_31;
+              float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+              float *RESTRICT _data_u_2m1_32_11 = _stride_u_1 * ctr_1 + _stride_u_1 + _data_u_2m1_32;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_u_20_30_10 = _stride_u_1 * ctr_1 + _data_u_20_30;
+              float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+              float *RESTRICT _data_u_20_31_10 = _stride_u_1 * ctr_1 + _data_u_20_31;
+              float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+              float *RESTRICT _data_u_20_32_10 = _stride_u_1 * ctr_1 + _data_u_20_32;
+              _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f < _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + ((float)(((0.0f > _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_11[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] + _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)];
+            }
+          }
+        }
+      }
+      {
+        {
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1) {
+            float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+            float *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+            float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+            float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+            float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+            float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+            float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+            float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+            float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+            _data_j_20_37_10[_stride_j_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + (-1.0f * fabs(_data_u_2m1_30_1m1[_stride_u_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_31_1m1[_stride_u_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0] * _data_u_2m1_31_1m1[_stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0] + _data_j_20_37_10[_stride_j_0];
+          }
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+            float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+            float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+            float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+            float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+            float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+            float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+            float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+            float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+            float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            _data_j_20_39_10[_stride_j_0] = -1.0f * ((float)(((0.0f < _data_u_2m1_30_1m1[0] && 0.0f < _data_u_2m1_31_1m1[0] && 0.0f < _data_u_2m1_32_1m1[0]) ? (1) : (0)))) * _data_rho_2m1_1m1[0] * _data_u_2m1_30_1m1[0] * _data_u_2m1_31_1m1[0] * _data_u_2m1_32_1m1[0] - 1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0] && 0.0f > _data_u_20_31_10[_stride_u_0] && 0.0f > _data_u_20_32_10[_stride_u_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0] * _data_u_20_30_10[_stride_u_0] * _data_u_20_31_10[_stride_u_0] * _data_u_20_32_10[_stride_u_0] + _data_j_20_39_10[_stride_j_0];
+          }
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+            float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+            float *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+            float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+            float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+            float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+            float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+            float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+            float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+            float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+            _data_j_20_37_10[_stride_j_0 * ctr_0] = (-1.0f * fabs(_data_u_20_30_10[_stride_u_0 * ctr_0]) + 1.0f) * ((float)(((0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + (-1.0f * fabs(_data_u_2m1_30_1m1[_stride_u_0 * ctr_0]) + 1.0f) * -1.0f * ((float)(((0.0f < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0] + _data_j_20_37_10[_stride_j_0 * ctr_0];
+          }
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+            float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+            float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+            float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+            float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+            float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+            float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+            float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+            float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+            float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+            float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+            float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+            _data_j_20_39_10[_stride_j_0 * ctr_0] = -1.0f * ((float)(((0.0f < _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * ctr_0 - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * ctr_0 - _stride_u_0] - 1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_31_10[_stride_u_0 * ctr_0] && 0.0f > _data_u_20_32_10[_stride_u_0 * ctr_0]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * ctr_0] * _data_u_20_30_10[_stride_u_0 * ctr_0] * _data_u_20_31_10[_stride_u_0 * ctr_0] * _data_u_20_32_10[_stride_u_0 * ctr_0] + _data_j_20_39_10[_stride_j_0 * ctr_0];
+          }
+        }
+        if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+          float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+          float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+          float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+          float *RESTRICT _data_u_2m1_30 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2;
+          float *RESTRICT _data_u_2m1_30_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_30;
+          float *RESTRICT _data_u_2m1_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + _stride_u_3;
+          float *RESTRICT _data_u_2m1_31_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_31;
+          float *RESTRICT _data_u_2m1_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) - _stride_u_2 + 2 * _stride_u_3;
+          float *RESTRICT _data_u_2m1_32_1m1 = _stride_u_1 * (_size_j_1 - 1) - _stride_u_1 + _data_u_2m1_32;
+          float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+          float *RESTRICT _data_u_20_30 = _data_u + _stride_u_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_u_20_30_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_30;
+          float *RESTRICT _data_u_20_31 = _data_u + _stride_u_2 * (_size_j_2 - 1) + _stride_u_3;
+          float *RESTRICT _data_u_20_31_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_31;
+          float *RESTRICT _data_u_20_32 = _data_u + _stride_u_2 * (_size_j_2 - 1) + 2 * _stride_u_3;
+          float *RESTRICT _data_u_20_32_10 = _stride_u_1 * (_size_j_1 - 1) + _data_u_20_32;
+          _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = -1.0f * ((float)(((0.0f < _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] && 0.0f < _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0]) ? (1) : (0)))) * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] * _data_u_2m1_30_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_31_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] * _data_u_2m1_32_1m1[_stride_u_0 * (_size_j_0 - 1) - _stride_u_0] - 1.0f * ((float)(((0.0f > _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] && 0.0f > _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)]) ? (1) : (0)))) * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] * _data_u_20_30_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_31_10[_stride_u_0 * (_size_j_0 - 1)] * _data_u_20_32_10[_stride_u_0 * (_size_j_0 - 1)] + _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)];
+        }
+      }
+    }
+  }
+}
+} // namespace internal_47df4b171f276b8c3a55fc08d45e245e
+
+void AdvectiveFluxKernel_single_precision::run(IBlock *block) {
+  auto u = block->getData<field::GhostLayerField<float, 3>>(uID);
+  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
+  auto rho = block->getData<field::GhostLayerField<float, 1>>(rhoID);
+
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(j->nrOfGhostLayers()));
+  float *RESTRICT const _data_j = j->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(rho->nrOfGhostLayers()));
+  float *RESTRICT const _data_rho = rho->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(u->nrOfGhostLayers()));
+  float *RESTRICT const _data_u = u->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(j->xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(j->xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(j->ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(j->ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(j->zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(j->zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  const int64_t _stride_u_0 = int64_t(u->xStride());
+  const int64_t _stride_u_1 = int64_t(u->yStride());
+  const int64_t _stride_u_2 = int64_t(u->zStride());
+  const int64_t _stride_u_3 = int64_t(1 * int64_t(u->fStride()));
+  internal_47df4b171f276b8c3a55fc08d45e245e::advectivefluxkernel_single_precision_advectivefluxkernel_single_precision(_data_j, _data_rho, _data_u, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_rho_0, _stride_rho_1, _stride_rho_2, _stride_u_0, _stride_u_1, _stride_u_2, _stride_u_3);
+}
+
+void AdvectiveFluxKernel_single_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto u = block->getData<field::GhostLayerField<float, 3>>(uID);
+  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
+  auto rho = block->getData<field::GhostLayerField<float, 1>>(rhoID);
+
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(j->nrOfGhostLayers()));
+  float *RESTRICT const _data_j = j->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  float *RESTRICT const _data_rho = rho->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(u->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(u->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(u->nrOfGhostLayers()));
+  float *RESTRICT const _data_u = u->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  const int64_t _stride_u_0 = int64_t(u->xStride());
+  const int64_t _stride_u_1 = int64_t(u->yStride());
+  const int64_t _stride_u_2 = int64_t(u->zStride());
+  const int64_t _stride_u_3 = int64_t(1 * int64_t(u->fStride()));
+  internal_47df4b171f276b8c3a55fc08d45e245e::advectivefluxkernel_single_precision_advectivefluxkernel_single_precision(_data_j, _data_rho, _data_u, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_rho_0, _stride_rho_1, _stride_rho_2, _stride_u_0, _stride_u_1, _stride_u_2, _stride_u_3);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_single_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_single_precision.h
new file mode 100644
index 00000000000..ee59500015b
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/AdvectiveFluxKernel_single_precision.h
@@ -0,0 +1,104 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file AdvectiveFluxKernel_single_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class AdvectiveFluxKernel_single_precision {
+public:
+  AdvectiveFluxKernel_single_precision(BlockDataID jID_, BlockDataID rhoID_,
+                                       BlockDataID uID_)
+      : jID(jID_), rhoID(rhoID_), uID(uID_){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<AdvectiveFluxKernel_single_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<AdvectiveFluxKernel_single_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID jID;
+  BlockDataID rhoID;
+  BlockDataID uID;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/CMakeLists.txt b/src/walberla_bridge/src/electrokinetics/generated_kernels/CMakeLists.txt
new file mode 100644
index 00000000000..13ba02765e0
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/CMakeLists.txt
@@ -0,0 +1,29 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+foreach(precision double_precision single_precision)
+  target_sources(
+    espresso_walberla
+    PRIVATE DiffusiveFluxKernel_${precision}.cpp
+            DiffusiveFluxKernelWithElectrostatic_${precision}.cpp
+            ContinuityKernel_${precision}.cpp
+            AdvectiveFluxKernel_${precision}.cpp
+            FrictionCouplingKernel_${precision}.cpp FixedFlux_${precision}.cpp
+            Dirichlet_${precision}.cpp)
+endforeach()
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_double_precision.cpp
new file mode 100644
index 00000000000..46777265348
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_double_precision.cpp
@@ -0,0 +1,179 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ContinuityKernel_double_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ContinuityKernel_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_5c5e903f8ea7925cf790d7c2318b2c56 {
+static FUNC_PREFIX void continuitykernel_double_precision_continuitykernel_double_precision(double *RESTRICT const _data_j, double *RESTRICT _data_rho, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3, int64_t const _stride_rho_0, int64_t const _stride_rho_1, int64_t const _stride_rho_2) {
+  for (int64_t ctr_2 = 1; ctr_2 < _size_j_2 - 1; ctr_2 += 1) {
+    double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+    double *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
+    double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+    double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_2m1_36 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 6 * _stride_j_3;
+    double *RESTRICT _data_j_2m1_310 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 10 * _stride_j_3;
+    double *RESTRICT _data_j_2m1_38 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 8 * _stride_j_3;
+    double *RESTRICT _data_j_2m1_312 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 12 * _stride_j_3;
+    double *RESTRICT _data_j_21_35 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 5 * _stride_j_3;
+    double *RESTRICT _data_j_21_39 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_21_37 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 7 * _stride_j_3;
+    double *RESTRICT _data_j_21_311 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 11 * _stride_j_3;
+    double *RESTRICT _data_j_21_32 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 2 * _stride_j_3;
+    for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+      double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+      double *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
+      double *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+      double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      double *RESTRICT _data_j_2m1_36_10 = _stride_j_1 * ctr_1 + _data_j_2m1_36;
+      double *RESTRICT _data_j_2m1_310_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_2m1_310;
+      double *RESTRICT _data_j_2m1_38_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_2m1_38;
+      double *RESTRICT _data_j_2m1_312_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_2m1_312;
+      double *RESTRICT _data_j_20_33_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_20_33;
+      double *RESTRICT _data_j_20_31_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_20_31;
+      double *RESTRICT _data_j_20_34_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_20_34;
+      double *RESTRICT _data_j_21_35_10 = _stride_j_1 * ctr_1 + _data_j_21_35;
+      double *RESTRICT _data_j_21_39_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_21_39;
+      double *RESTRICT _data_j_21_37_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_21_37;
+      double *RESTRICT _data_j_21_311_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_21_311;
+      double *RESTRICT _data_j_21_32_10 = _stride_j_1 * ctr_1 + _data_j_21_32;
+      for (int64_t ctr_0 = 1; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+        _data_rho_20_10[_stride_rho_0 * ctr_0] = -1.0 * _data_j_20_30_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_310_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_311_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_312_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_31_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_32_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_33_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_34_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_35_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_36_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_37_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_38_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_39_10[_stride_j_0 * ctr_0] + _data_j_20_30_10[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_20_31_11[_stride_j_0 * ctr_0] + _data_j_20_33_11[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_20_34_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_21_311_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_21_32_10[_stride_j_0 * ctr_0] + _data_j_21_35_10[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_21_37_11[_stride_j_0 * ctr_0] + _data_j_21_39_11[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_310_11[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_312_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_36_10[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_38_11[_stride_j_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_5c5e903f8ea7925cf790d7c2318b2c56
+
+void ContinuityKernel_double_precision::run(IBlock *block) {
+  auto rho = block->getData<field::GhostLayerField<double, 1>>(rhoID);
+  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
+
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(j->nrOfGhostLayers()));
+  double *RESTRICT const _data_j = j->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(rho->nrOfGhostLayers()));
+  double *RESTRICT _data_rho = rho->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(j->xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(j->xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(j->ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(j->ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(j->zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(j->zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  internal_5c5e903f8ea7925cf790d7c2318b2c56::continuitykernel_double_precision_continuitykernel_double_precision(_data_j, _data_rho, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_rho_0, _stride_rho_1, _stride_rho_2);
+}
+
+void ContinuityKernel_double_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto rho = block->getData<field::GhostLayerField<double, 1>>(rhoID);
+  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
+
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(j->nrOfGhostLayers()));
+  double *RESTRICT const _data_j = j->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  double *RESTRICT _data_rho = rho->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  internal_5c5e903f8ea7925cf790d7c2318b2c56::continuitykernel_double_precision_continuitykernel_double_precision(_data_j, _data_rho, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_rho_0, _stride_rho_1, _stride_rho_2);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_double_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_double_precision.h
new file mode 100644
index 00000000000..96c5b05f3ae
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_double_precision.h
@@ -0,0 +1,102 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ContinuityKernel_double_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ContinuityKernel_double_precision {
+public:
+  ContinuityKernel_double_precision(BlockDataID jID_, BlockDataID rhoID_)
+      : jID(jID_), rhoID(rhoID_){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<ContinuityKernel_double_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<ContinuityKernel_double_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID jID;
+  BlockDataID rhoID;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_single_precision.cpp
new file mode 100644
index 00000000000..4660e9ba857
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_single_precision.cpp
@@ -0,0 +1,179 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ContinuityKernel_single_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ContinuityKernel_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_990034b4e4dd57d2802b4bcb5f716e46 {
+static FUNC_PREFIX void continuitykernel_single_precision_continuitykernel_single_precision(float *RESTRICT const _data_j, float *RESTRICT _data_rho, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3, int64_t const _stride_rho_0, int64_t const _stride_rho_1, int64_t const _stride_rho_2) {
+  for (int64_t ctr_2 = 1; ctr_2 < _size_j_2 - 1; ctr_2 += 1) {
+    float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+    float *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
+    float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+    float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_2m1_36 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 6 * _stride_j_3;
+    float *RESTRICT _data_j_2m1_310 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 10 * _stride_j_3;
+    float *RESTRICT _data_j_2m1_38 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 8 * _stride_j_3;
+    float *RESTRICT _data_j_2m1_312 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 12 * _stride_j_3;
+    float *RESTRICT _data_j_21_35 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 5 * _stride_j_3;
+    float *RESTRICT _data_j_21_39 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_21_37 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 7 * _stride_j_3;
+    float *RESTRICT _data_j_21_311 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 11 * _stride_j_3;
+    float *RESTRICT _data_j_21_32 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 2 * _stride_j_3;
+    for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+      float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+      float *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
+      float *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+      float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      float *RESTRICT _data_j_2m1_36_10 = _stride_j_1 * ctr_1 + _data_j_2m1_36;
+      float *RESTRICT _data_j_2m1_310_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_2m1_310;
+      float *RESTRICT _data_j_2m1_38_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_2m1_38;
+      float *RESTRICT _data_j_2m1_312_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_2m1_312;
+      float *RESTRICT _data_j_20_33_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_20_33;
+      float *RESTRICT _data_j_20_31_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_20_31;
+      float *RESTRICT _data_j_20_34_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_20_34;
+      float *RESTRICT _data_j_21_35_10 = _stride_j_1 * ctr_1 + _data_j_21_35;
+      float *RESTRICT _data_j_21_39_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_21_39;
+      float *RESTRICT _data_j_21_37_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_21_37;
+      float *RESTRICT _data_j_21_311_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_21_311;
+      float *RESTRICT _data_j_21_32_10 = _stride_j_1 * ctr_1 + _data_j_21_32;
+      for (int64_t ctr_0 = 1; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+        _data_rho_20_10[_stride_rho_0 * ctr_0] = -1.0f * _data_j_20_30_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_310_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_311_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_312_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_31_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_32_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_33_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_34_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_35_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_36_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_37_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_38_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_39_10[_stride_j_0 * ctr_0] + _data_j_20_30_10[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_20_31_11[_stride_j_0 * ctr_0] + _data_j_20_33_11[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_20_34_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_21_311_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_21_32_10[_stride_j_0 * ctr_0] + _data_j_21_35_10[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_21_37_11[_stride_j_0 * ctr_0] + _data_j_21_39_11[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_310_11[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_312_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_36_10[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_38_11[_stride_j_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_990034b4e4dd57d2802b4bcb5f716e46
+
+void ContinuityKernel_single_precision::run(IBlock *block) {
+  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
+  auto rho = block->getData<field::GhostLayerField<float, 1>>(rhoID);
+
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(j->nrOfGhostLayers()));
+  float *RESTRICT const _data_j = j->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(rho->nrOfGhostLayers()));
+  float *RESTRICT _data_rho = rho->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(j->xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(j->xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(j->ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(j->ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(j->zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(j->zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  internal_990034b4e4dd57d2802b4bcb5f716e46::continuitykernel_single_precision_continuitykernel_single_precision(_data_j, _data_rho, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_rho_0, _stride_rho_1, _stride_rho_2);
+}
+
+void ContinuityKernel_single_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
+  auto rho = block->getData<field::GhostLayerField<float, 1>>(rhoID);
+
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(j->nrOfGhostLayers()));
+  float *RESTRICT const _data_j = j->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  float *RESTRICT _data_rho = rho->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  internal_990034b4e4dd57d2802b4bcb5f716e46::continuitykernel_single_precision_continuitykernel_single_precision(_data_j, _data_rho, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_rho_0, _stride_rho_1, _stride_rho_2);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_single_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_single_precision.h
new file mode 100644
index 00000000000..9b246cd1828
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/ContinuityKernel_single_precision.h
@@ -0,0 +1,102 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ContinuityKernel_single_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ContinuityKernel_single_precision {
+public:
+  ContinuityKernel_single_precision(BlockDataID jID_, BlockDataID rhoID_)
+      : jID(jID_), rhoID(rhoID_){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<ContinuityKernel_single_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<ContinuityKernel_single_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID jID;
+  BlockDataID rhoID;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.cpp
new file mode 100644
index 00000000000..39b73e77a9d
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.cpp
@@ -0,0 +1,1484 @@
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include "DensityPackInfo_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "stencil/Directions.h"
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+using walberla::cell::CellInterval;
+using walberla::stencil::Direction;
+
+namespace internal_pack_BSW {
+static FUNC_PREFIX void pack_BSW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_BSW
+
+namespace internal_pack_SW {
+static FUNC_PREFIX void pack_SW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_SW
+
+namespace internal_pack_TSW {
+static FUNC_PREFIX void pack_TSW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_TSW
+
+namespace internal_pack_BW {
+static FUNC_PREFIX void pack_BW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_35_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_311_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_BW
+
+namespace internal_pack_W {
+static FUNC_PREFIX void pack_W(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    double *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
+    double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      double *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
+      double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 3] = _data_j_20_35_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 4] = _data_j_20_30_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 5] = _data_j_20_36_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 6] = _data_j_20_311_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 7] = _data_j_20_34_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 8] = _data_j_20_312_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_W
+
+namespace internal_pack_TW {
+static FUNC_PREFIX void pack_TW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_36_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_TW
+
+namespace internal_pack_BNW {
+static FUNC_PREFIX void pack_BNW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_BNW
+
+namespace internal_pack_NW {
+static FUNC_PREFIX void pack_NW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_34_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_NW
+
+namespace internal_pack_TNW {
+static FUNC_PREFIX void pack_TNW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_312_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_TNW
+
+namespace internal_pack_BS {
+static FUNC_PREFIX void pack_BS(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
+        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1] = _data_j_20_37_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_BS
+
+namespace internal_pack_S {
+static FUNC_PREFIX void pack_S(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+    double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      double *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+      double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
+        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0];
+        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0];
+        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 3] = _data_j_20_37_10[_stride_j_0 * ctr_0];
+        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 4] = _data_j_20_31_10[_stride_j_0 * ctr_0];
+        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 5] = _data_j_20_38_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_S
+
+namespace internal_pack_TS {
+static FUNC_PREFIX void pack_TS(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
+        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1] = _data_j_20_38_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_TS
+
+namespace internal_pack_B {
+static FUNC_PREFIX void pack_B(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
+        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 1] = _data_j_20_35_10[_stride_j_0 * ctr_0];
+        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 2] = _data_j_20_311_10[_stride_j_0 * ctr_0];
+        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 3] = _data_j_20_37_10[_stride_j_0 * ctr_0];
+        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 4] = _data_j_20_32_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_B
+
+namespace internal_pack_T {
+static FUNC_PREFIX void pack_T(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
+        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 1] = _data_j_20_36_10[_stride_j_0 * ctr_0];
+        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
+        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 3] = _data_j_20_38_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_T
+
+namespace internal_pack_BN {
+static FUNC_PREFIX void pack_BN(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_BN
+
+namespace internal_pack_N {
+static FUNC_PREFIX void pack_N(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_34_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_N
+
+namespace internal_pack_TN {
+static FUNC_PREFIX void pack_TN(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_312_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_TN
+
+namespace internal_unpack_BSW {
+static FUNC_PREFIX void unpack_BSW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_BSW
+
+namespace internal_unpack_SW {
+static FUNC_PREFIX void unpack_SW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
+        _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
+        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_SW
+
+namespace internal_unpack_TSW {
+static FUNC_PREFIX void unpack_TSW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_TSW
+
+namespace internal_unpack_BW {
+static FUNC_PREFIX void unpack_BW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
+        _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
+        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_BW
+
+namespace internal_unpack_W {
+static FUNC_PREFIX void unpack_W(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    double *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
+    double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      double *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
+      double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0];
+        _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 1];
+        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 2];
+        _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 3];
+        _data_j_20_30_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 4];
+        _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 5];
+        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 6];
+        _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 7];
+        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 8];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_W
+
+namespace internal_unpack_TW {
+static FUNC_PREFIX void unpack_TW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
+        _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
+        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_TW
+
+namespace internal_unpack_BNW {
+static FUNC_PREFIX void unpack_BNW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_BNW
+
+namespace internal_unpack_NW {
+static FUNC_PREFIX void unpack_NW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
+        _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
+        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_NW
+
+namespace internal_unpack_TNW {
+static FUNC_PREFIX void unpack_TNW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_TNW
+
+namespace internal_unpack_BS {
+static FUNC_PREFIX void unpack_BS(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0];
+        _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_BS
+
+namespace internal_unpack_S {
+static FUNC_PREFIX void unpack_S(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+    double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      double *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+      double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0];
+        _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 1];
+        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 2];
+        _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 3];
+        _data_j_20_31_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 4];
+        _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 5];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_S
+
+namespace internal_unpack_TS {
+static FUNC_PREFIX void unpack_TS(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0];
+        _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_TS
+
+namespace internal_unpack_B {
+static FUNC_PREFIX void unpack_B(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0];
+        _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 1];
+        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 2];
+        _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 3];
+        _data_j_20_32_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 4];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_B
+
+namespace internal_unpack_T {
+static FUNC_PREFIX void unpack_T(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0];
+        _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 1];
+        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 2];
+        _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 3];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_T
+
+namespace internal_unpack_BN {
+static FUNC_PREFIX void unpack_BN(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_BN
+
+namespace internal_unpack_N {
+static FUNC_PREFIX void unpack_N(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
+        _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
+        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_N
+
+namespace internal_unpack_TN {
+static FUNC_PREFIX void unpack_TN(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_TN
+
+void DensityPackInfo_double_precision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const {
+  double *buffer = reinterpret_cast<double *>(byte_buffer);
+
+  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
+
+  CellInterval ci;
+  j->getSliceBeforeGhostLayer(dir, ci, 1, false);
+
+  switch (dir) {
+  case stencil::BSW: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_BSW::pack_BSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::SW: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_SW::pack_SW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TSW: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_TSW::pack_TSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BW: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_BW::pack_BW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::W: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_W::pack_W(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TW: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_TW::pack_TW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BNW: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_BNW::pack_BNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::NW: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_NW::pack_NW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TNW: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_TNW::pack_TNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BS: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_BS::pack_BS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::S: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_S::pack_S(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TS: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_TS::pack_TS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::B: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_B::pack_B(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::T: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_T::pack_T(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BN: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_BN::pack_BN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::N: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_N::pack_N(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TN: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_TN::pack_TN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  default:
+    WALBERLA_ASSERT(false);
+  }
+}
+
+void DensityPackInfo_double_precision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const {
+  double *buffer = reinterpret_cast<double *>(byte_buffer);
+
+  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
+
+  CellInterval ci;
+  j->getGhostRegion(dir, ci, 1, false);
+  auto communciationDirection = stencil::inverseDir[dir];
+
+  switch (communciationDirection) {
+  case stencil::BSW: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_BSW::unpack_BSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::SW: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_SW::unpack_SW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TSW: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_TSW::unpack_TSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BW: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_BW::unpack_BW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::W: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_W::unpack_W(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TW: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_TW::unpack_TW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BNW: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_BNW::unpack_BNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::NW: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_NW::unpack_NW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TNW: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_TNW::unpack_TNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BS: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_BS::unpack_BS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::S: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_S::unpack_S(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TS: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_TS::unpack_TS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::B: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_B::unpack_B(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::T: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_T::unpack_T(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BN: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_BN::unpack_BN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::N: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_N::unpack_N(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TN: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_TN::unpack_TN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  default:
+    WALBERLA_ASSERT(false);
+  }
+}
+
+uint_t DensityPackInfo_double_precision::size(stencil::Direction dir, const IBlock *block) const {
+  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
+
+  CellInterval ci;
+  j->getGhostRegion(dir, ci, 1, false);
+
+  uint_t elementsPerCell = 0;
+
+  switch (dir) {
+  case stencil::BSW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::SW:
+    elementsPerCell = 3;
+    break;
+
+  case stencil::TSW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::BW:
+    elementsPerCell = 3;
+    break;
+
+  case stencil::W:
+    elementsPerCell = 9;
+    break;
+
+  case stencil::TW:
+    elementsPerCell = 3;
+    break;
+
+  case stencil::BNW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::NW:
+    elementsPerCell = 3;
+    break;
+
+  case stencil::TNW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::BS:
+    elementsPerCell = 2;
+    break;
+
+  case stencil::S:
+    elementsPerCell = 6;
+    break;
+
+  case stencil::TS:
+    elementsPerCell = 2;
+    break;
+
+  case stencil::B:
+    elementsPerCell = 5;
+    break;
+
+  case stencil::T:
+    elementsPerCell = 4;
+    break;
+
+  case stencil::BN:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::N:
+    elementsPerCell = 3;
+    break;
+
+  case stencil::TN:
+    elementsPerCell = 1;
+    break;
+
+  default:
+    elementsPerCell = 0;
+  }
+  return ci.numCells() * elementsPerCell * sizeof(double);
+}
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.h
new file mode 100644
index 00000000000..d5cb19678b2
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.h
@@ -0,0 +1,67 @@
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "communication/UniformPackInfo.h"
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "domain_decomposition/IBlock.h"
+#include "field/GhostLayerField.h"
+#include "stencil/Directions.h"
+
+#define FUNC_PREFIX
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class DensityPackInfo_double_precision
+    : public ::walberla::communication::UniformPackInfo {
+public:
+  DensityPackInfo_double_precision(BlockDataID jID_) : jID(jID_){};
+  virtual ~DensityPackInfo_double_precision() {}
+
+  bool constantDataExchange() const { return true; }
+  bool threadsafeReceiving() const { return true; }
+
+  void unpackData(IBlock *receiver, stencil::Direction dir,
+                  mpi::RecvBuffer &buffer) {
+    const auto dataSize = size(dir, receiver);
+    unpack(dir, buffer.skip(dataSize), receiver);
+  }
+
+  void communicateLocal(const IBlock *sender, IBlock *receiver,
+                        stencil::Direction dir) {
+    // TODO: optimize by generating kernel for this case
+    mpi::SendBuffer sBuffer;
+    packData(sender, dir, sBuffer);
+    mpi::RecvBuffer rBuffer(sBuffer);
+    unpackData(receiver, stencil::inverseDir[dir], rBuffer);
+  }
+
+  void packDataImpl(const IBlock *sender, stencil::Direction dir,
+                    mpi::SendBuffer &outBuffer) const {
+    const auto dataSize = size(dir, sender);
+    pack(dir, outBuffer.forward(dataSize), const_cast<IBlock *>(sender));
+  }
+
+  void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const;
+  void unpack(stencil::Direction dir, unsigned char *buffer,
+              IBlock *block) const;
+  uint_t size(stencil::Direction dir, const IBlock *block) const;
+
+private:
+  BlockDataID jID;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.cpp
new file mode 100644
index 00000000000..fab1ca30230
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.cpp
@@ -0,0 +1,1484 @@
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include "DensityPackInfo_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "stencil/Directions.h"
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+using walberla::cell::CellInterval;
+using walberla::stencil::Direction;
+
+namespace internal_pack_BSW {
+static FUNC_PREFIX void pack_BSW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_BSW
+
+namespace internal_pack_SW {
+static FUNC_PREFIX void pack_SW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_SW
+
+namespace internal_pack_TSW {
+static FUNC_PREFIX void pack_TSW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_TSW
+
+namespace internal_pack_BW {
+static FUNC_PREFIX void pack_BW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_35_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_311_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_BW
+
+namespace internal_pack_W {
+static FUNC_PREFIX void pack_W(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    float *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
+    float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      float *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
+      float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 3] = _data_j_20_35_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 4] = _data_j_20_30_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 5] = _data_j_20_36_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 6] = _data_j_20_311_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 7] = _data_j_20_34_10[_stride_j_0 * ctr_0];
+        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 8] = _data_j_20_312_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_W
+
+namespace internal_pack_TW {
+static FUNC_PREFIX void pack_TW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_36_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_TW
+
+namespace internal_pack_BNW {
+static FUNC_PREFIX void pack_BNW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_BNW
+
+namespace internal_pack_NW {
+static FUNC_PREFIX void pack_NW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_34_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_NW
+
+namespace internal_pack_TNW {
+static FUNC_PREFIX void pack_TNW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_312_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_TNW
+
+namespace internal_pack_BS {
+static FUNC_PREFIX void pack_BS(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
+        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1] = _data_j_20_37_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_BS
+
+namespace internal_pack_S {
+static FUNC_PREFIX void pack_S(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+    float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      float *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+      float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
+        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0];
+        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0];
+        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 3] = _data_j_20_37_10[_stride_j_0 * ctr_0];
+        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 4] = _data_j_20_31_10[_stride_j_0 * ctr_0];
+        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 5] = _data_j_20_38_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_S
+
+namespace internal_pack_TS {
+static FUNC_PREFIX void pack_TS(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
+        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1] = _data_j_20_38_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_TS
+
+namespace internal_pack_B {
+static FUNC_PREFIX void pack_B(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
+        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 1] = _data_j_20_35_10[_stride_j_0 * ctr_0];
+        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 2] = _data_j_20_311_10[_stride_j_0 * ctr_0];
+        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 3] = _data_j_20_37_10[_stride_j_0 * ctr_0];
+        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 4] = _data_j_20_32_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_B
+
+namespace internal_pack_T {
+static FUNC_PREFIX void pack_T(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
+        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 1] = _data_j_20_36_10[_stride_j_0 * ctr_0];
+        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
+        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 3] = _data_j_20_38_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_T
+
+namespace internal_pack_BN {
+static FUNC_PREFIX void pack_BN(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_BN
+
+namespace internal_pack_N {
+static FUNC_PREFIX void pack_N(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_34_10[_stride_j_0 * ctr_0];
+        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_N
+
+namespace internal_pack_TN {
+static FUNC_PREFIX void pack_TN(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_312_10[_stride_j_0 * ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_pack_TN
+
+namespace internal_unpack_BSW {
+static FUNC_PREFIX void unpack_BSW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_BSW
+
+namespace internal_unpack_SW {
+static FUNC_PREFIX void unpack_SW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
+        _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
+        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_SW
+
+namespace internal_unpack_TSW {
+static FUNC_PREFIX void unpack_TSW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_TSW
+
+namespace internal_unpack_BW {
+static FUNC_PREFIX void unpack_BW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
+        _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
+        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_BW
+
+namespace internal_unpack_W {
+static FUNC_PREFIX void unpack_W(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    float *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
+    float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      float *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
+      float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0];
+        _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 1];
+        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 2];
+        _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 3];
+        _data_j_20_30_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 4];
+        _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 5];
+        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 6];
+        _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 7];
+        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 8];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_W
+
+namespace internal_unpack_TW {
+static FUNC_PREFIX void unpack_TW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
+        _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
+        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_TW
+
+namespace internal_unpack_BNW {
+static FUNC_PREFIX void unpack_BNW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_BNW
+
+namespace internal_unpack_NW {
+static FUNC_PREFIX void unpack_NW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
+        _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
+        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_NW
+
+namespace internal_unpack_TNW {
+static FUNC_PREFIX void unpack_TNW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_TNW
+
+namespace internal_unpack_BS {
+static FUNC_PREFIX void unpack_BS(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0];
+        _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_BS
+
+namespace internal_unpack_S {
+static FUNC_PREFIX void unpack_S(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+    float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      float *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+      float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0];
+        _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 1];
+        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 2];
+        _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 3];
+        _data_j_20_31_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 4];
+        _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 5];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_S
+
+namespace internal_unpack_TS {
+static FUNC_PREFIX void unpack_TS(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0];
+        _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_TS
+
+namespace internal_unpack_B {
+static FUNC_PREFIX void unpack_B(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0];
+        _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 1];
+        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 2];
+        _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 3];
+        _data_j_20_32_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 4];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_B
+
+namespace internal_unpack_T {
+static FUNC_PREFIX void unpack_T(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0];
+        _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 1];
+        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 2];
+        _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 3];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_T
+
+namespace internal_unpack_BN {
+static FUNC_PREFIX void unpack_BN(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_BN
+
+namespace internal_unpack_N {
+static FUNC_PREFIX void unpack_N(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
+        _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
+        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_N
+
+namespace internal_unpack_TN {
+static FUNC_PREFIX void unpack_TN(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
+        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
+      }
+    }
+  }
+}
+} // namespace internal_unpack_TN
+
+void DensityPackInfo_single_precision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const {
+  float *buffer = reinterpret_cast<float *>(byte_buffer);
+
+  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
+
+  CellInterval ci;
+  j->getSliceBeforeGhostLayer(dir, ci, 1, false);
+
+  switch (dir) {
+  case stencil::BSW: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_BSW::pack_BSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::SW: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_SW::pack_SW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TSW: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_TSW::pack_TSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BW: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_BW::pack_BW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::W: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_W::pack_W(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TW: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_TW::pack_TW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BNW: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_BNW::pack_BNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::NW: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_NW::pack_NW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TNW: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_TNW::pack_TNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BS: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_BS::pack_BS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::S: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_S::pack_S(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TS: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_TS::pack_TS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::B: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_B::pack_B(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::T: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_T::pack_T(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BN: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_BN::pack_BN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::N: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_N::pack_N(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TN: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_pack_TN::pack_TN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  default:
+    WALBERLA_ASSERT(false);
+  }
+}
+
+void DensityPackInfo_single_precision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const {
+  float *buffer = reinterpret_cast<float *>(byte_buffer);
+
+  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
+
+  CellInterval ci;
+  j->getGhostRegion(dir, ci, 1, false);
+  auto communciationDirection = stencil::inverseDir[dir];
+
+  switch (communciationDirection) {
+  case stencil::BSW: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_BSW::unpack_BSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::SW: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_SW::unpack_SW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TSW: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_TSW::unpack_TSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BW: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_BW::unpack_BW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::W: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_W::unpack_W(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TW: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_TW::unpack_TW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BNW: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_BNW::unpack_BNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::NW: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_NW::unpack_NW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TNW: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_TNW::unpack_TNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BS: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_BS::unpack_BS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::S: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_S::unpack_S(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TS: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_TS::unpack_TS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::B: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_B::unpack_B(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::T: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_T::unpack_T(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::BN: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_BN::unpack_BN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::N: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_N::unpack_N(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  case stencil::TN: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
+    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+    const int64_t _stride_j_0 = int64_t(j->xStride());
+    const int64_t _stride_j_1 = int64_t(j->yStride());
+    const int64_t _stride_j_2 = int64_t(j->zStride());
+    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+    internal_unpack_TN::unpack_TN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
+    break;
+  }
+
+  default:
+    WALBERLA_ASSERT(false);
+  }
+}
+
+uint_t DensityPackInfo_single_precision::size(stencil::Direction dir, const IBlock *block) const {
+  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
+
+  CellInterval ci;
+  j->getGhostRegion(dir, ci, 1, false);
+
+  uint_t elementsPerCell = 0;
+
+  switch (dir) {
+  case stencil::BSW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::SW:
+    elementsPerCell = 3;
+    break;
+
+  case stencil::TSW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::BW:
+    elementsPerCell = 3;
+    break;
+
+  case stencil::W:
+    elementsPerCell = 9;
+    break;
+
+  case stencil::TW:
+    elementsPerCell = 3;
+    break;
+
+  case stencil::BNW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::NW:
+    elementsPerCell = 3;
+    break;
+
+  case stencil::TNW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::BS:
+    elementsPerCell = 2;
+    break;
+
+  case stencil::S:
+    elementsPerCell = 6;
+    break;
+
+  case stencil::TS:
+    elementsPerCell = 2;
+    break;
+
+  case stencil::B:
+    elementsPerCell = 5;
+    break;
+
+  case stencil::T:
+    elementsPerCell = 4;
+    break;
+
+  case stencil::BN:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::N:
+    elementsPerCell = 3;
+    break;
+
+  case stencil::TN:
+    elementsPerCell = 1;
+    break;
+
+  default:
+    elementsPerCell = 0;
+  }
+  return ci.numCells() * elementsPerCell * sizeof(float);
+}
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.h
new file mode 100644
index 00000000000..08ea0c09882
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.h
@@ -0,0 +1,67 @@
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "communication/UniformPackInfo.h"
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "domain_decomposition/IBlock.h"
+#include "field/GhostLayerField.h"
+#include "stencil/Directions.h"
+
+#define FUNC_PREFIX
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class DensityPackInfo_single_precision
+    : public ::walberla::communication::UniformPackInfo {
+public:
+  DensityPackInfo_single_precision(BlockDataID jID_) : jID(jID_){};
+  virtual ~DensityPackInfo_single_precision() {}
+
+  bool constantDataExchange() const { return true; }
+  bool threadsafeReceiving() const { return true; }
+
+  void unpackData(IBlock *receiver, stencil::Direction dir,
+                  mpi::RecvBuffer &buffer) {
+    const auto dataSize = size(dir, receiver);
+    unpack(dir, buffer.skip(dataSize), receiver);
+  }
+
+  void communicateLocal(const IBlock *sender, IBlock *receiver,
+                        stencil::Direction dir) {
+    // TODO: optimize by generating kernel for this case
+    mpi::SendBuffer sBuffer;
+    packData(sender, dir, sBuffer);
+    mpi::RecvBuffer rBuffer(sBuffer);
+    unpackData(receiver, stencil::inverseDir[dir], rBuffer);
+  }
+
+  void packDataImpl(const IBlock *sender, stencil::Direction dir,
+                    mpi::SendBuffer &outBuffer) const {
+    const auto dataSize = size(dir, sender);
+    pack(dir, outBuffer.forward(dataSize), const_cast<IBlock *>(sender));
+  }
+
+  void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const;
+  void unpack(stencil::Direction dir, unsigned char *buffer,
+              IBlock *block) const;
+  uint_t size(stencil::Direction dir, const IBlock *block) const;
+
+private:
+  BlockDataID jID;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_double_precision.cpp
new file mode 100644
index 00000000000..6306281641a
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_double_precision.cpp
@@ -0,0 +1,1200 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file DiffusiveFluxKernelWithElectrostatic_double_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "DiffusiveFluxKernelWithElectrostatic_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_32fac7f834b08f4a768ccef85dadf7a1 {
+static FUNC_PREFIX void diffusivefluxkernelwithelectrostatic_double_precision_diffusivefluxkernelwithelectrostatic_double_precision(double D, double *RESTRICT const _data_j, double *RESTRICT const _data_phi, double *RESTRICT const _data_rho, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3, int64_t const _stride_phi_0, int64_t const _stride_phi_1, int64_t const _stride_phi_2, int64_t const _stride_rho_0, int64_t const _stride_rho_1, int64_t const _stride_rho_2, double f_ext_0, double f_ext_1, double f_ext_2, double kT, double z) {
+  {
+    {
+      {
+        if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+          double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+          double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+          double *RESTRICT _data_phi_20 = _data_phi;
+          double *RESTRICT _data_phi_20_10 = _data_phi_20;
+          double *RESTRICT _data_rho_20 = _data_rho;
+          double *RESTRICT _data_rho_20_10 = _data_rho_20;
+          double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+          double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+          double *RESTRICT _data_phi_21_11 = _stride_phi_1 + _data_phi_21;
+          _data_j_20_312_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_21_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_21_11[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_11[0] + kT * -2.0 * _data_rho_21_11[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_11[0] * _data_rho_21_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_11[0]) * 0.04703213011469496 * ((1.0) / (kT));
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+            double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+            double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+            double *RESTRICT _data_phi_20 = _data_phi;
+            double *RESTRICT _data_phi_20_10 = _data_phi_20;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _data_rho_20;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+            double *RESTRICT _data_phi_21_11 = _stride_phi_1 + _data_phi_21;
+            _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+          }
+        }
+        if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+          double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+          double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+          double *RESTRICT _data_phi_20 = _data_phi;
+          double *RESTRICT _data_phi_20_10 = _data_phi_20;
+          double *RESTRICT _data_rho_20 = _data_rho;
+          double *RESTRICT _data_rho_20_10 = _data_rho_20;
+          double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+          double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+          double *RESTRICT _data_phi_21_11 = _stride_phi_1 + _data_phi_21;
+          _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+        }
+      }
+      for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+        {
+          {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              double *RESTRICT _data_phi_21_10 = _stride_phi_1 * ctr_1 + _data_phi_21;
+              double *RESTRICT _data_phi_20 = _data_phi;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              _data_j_20_36_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * -2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * 2.0 + kT * (-1.0 * _data_rho_21_10[0] + _data_rho_20_10[_stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * (-1.0 * _data_phi_20_10[0] - 1.0 * _data_phi_21_10[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_21_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * (-1.0 * _data_phi_21_10[0] - 1.0 * _data_phi_21_10[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_20_10[_stride_phi_0])) * 0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && 1 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+              double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_phi_20 = _data_phi;
+              double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_21;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_phi_21_10 = _stride_phi_1 * ctr_1 + _data_phi_21;
+              _data_j_20_38_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * -2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_21_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0 * _data_phi_21_10[_stride_phi_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_1m1[_stride_phi_0])) * 0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              double *RESTRICT _data_phi_20 = _data_phi;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_21;
+              _data_j_20_310_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_21_1m1[0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_21_1m1[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_1m1[0] + kT * -2.0 * _data_rho_21_1m1[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_1m1[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_1m1[0] * _data_rho_21_1m1[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_1m1[0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              double *RESTRICT _data_phi_20 = _data_phi;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              double *RESTRICT _data_phi_21_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_21;
+              _data_j_20_312_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_21_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_21_11[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_11[0] + kT * -2.0 * _data_rho_21_11[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_11[0] * _data_rho_21_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_11[0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              double *RESTRICT _data_phi_21_10 = _stride_phi_1 * ctr_1 + _data_phi_21;
+              double *RESTRICT _data_phi_20 = _data_phi;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              _data_j_20_36_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_21_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_21_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_21_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_21_10[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0])) * 0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_0 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+              double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_phi_20 = _data_phi;
+              double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_21;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_phi_21_10 = _stride_phi_1 * ctr_1 + _data_phi_21;
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * -2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * 2.0 + kT * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_21_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_21_10[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0])) * 0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              double *RESTRICT _data_phi_20 = _data_phi;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_21;
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              double *RESTRICT _data_phi_20 = _data_phi;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              double *RESTRICT _data_phi_21_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_21;
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+          }
+          {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              double *RESTRICT _data_phi_21_10 = _stride_phi_1 * ctr_1 + _data_phi_21;
+              double *RESTRICT _data_phi_20 = _data_phi;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)])) * 0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              double *RESTRICT _data_phi_20 = _data_phi;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_21;
+              _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              double *RESTRICT _data_phi_20 = _data_phi;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              double *RESTRICT _data_phi_21_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_21;
+              _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+          }
+        }
+      }
+      {
+        {
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1 && 1 < _size_j_0 - 1) {
+            double *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+            double *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_phi_20 = _data_phi;
+            double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+            double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+            double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+            double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            double *RESTRICT _data_phi_21_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_21;
+            _data_j_20_38_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * -2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_21_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0 * _data_phi_21_10[_stride_phi_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_1m1[_stride_phi_0])) * 0.028801180074297286 * ((1.0) / (kT));
+          }
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+            double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+            double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+            double *RESTRICT _data_phi_20 = _data_phi;
+            double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+            double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+            _data_j_20_310_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_21_1m1[0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_21_1m1[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_1m1[0] + kT * -2.0 * _data_rho_21_1m1[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_1m1[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_1m1[0] * _data_rho_21_1m1[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_1m1[0]) * 0.04703213011469496 * ((1.0) / (kT));
+          }
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1 && ctr_0 < _size_j_0 - 1) {
+            double *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+            double *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_phi_20 = _data_phi;
+            double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+            double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+            double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+            double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            double *RESTRICT _data_phi_21_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_21;
+            _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * -2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * 2.0 + kT * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_21_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_21_10[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0])) * 0.028801180074297286 * ((1.0) / (kT));
+          }
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+            double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+            double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+            double *RESTRICT _data_phi_20 = _data_phi;
+            double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+            double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+            _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+          }
+        }
+        if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+          double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+          double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+          double *RESTRICT _data_phi_20 = _data_phi;
+          double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+          double *RESTRICT _data_rho_20 = _data_rho;
+          double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+          double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+          double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+          double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+          _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+        }
+      }
+    }
+    for (int64_t ctr_2 = 1; ctr_2 < _size_j_2 - 1; ctr_2 += 1) {
+      double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+      double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+      double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+      double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+      double *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
+      double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+      double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+      double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+      double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+      double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+      double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+      double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+      double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+      {
+        {
+          {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              double *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_11 = _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_20_10 = _data_phi_20;
+              _data_j_20_34_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * -2.0 + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * 2.0 + kT * (-1.0 * _data_rho_20_11[0] + _data_rho_20_10[_stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * (-1.0 * _data_phi_20_10[0] - 1.0 * _data_phi_20_11[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_11[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * (-1.0 * _data_phi_20_11[0] - 1.0 * _data_phi_20_11[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_20_10[_stride_phi_0])) * 0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_10 = _data_phi_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_11 = _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_311_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_2m1_11[0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_11[0] + kT * -2.0 * _data_rho_2m1_11[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_11[0] * _data_rho_2m1_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_2m1_11[0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_10 = _data_phi_20;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              double *RESTRICT _data_phi_21_11 = _stride_phi_1 + _data_phi_21;
+              _data_j_20_312_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_21_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_21_11[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_11[0] + kT * -2.0 * _data_rho_21_11[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_11[0] * _data_rho_21_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_11[0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              double *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_11 = _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_20_10 = _data_phi_20;
+              _data_j_20_34_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0 + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_20_11[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_11[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_20_11[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_20_11[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0])) * 0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_10 = _data_phi_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_11 = _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_10 = _data_phi_20;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              double *RESTRICT _data_phi_21_11 = _stride_phi_1 + _data_phi_21;
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+          }
+          {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              double *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_11 = _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_20_10 = _data_phi_20;
+              _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0 + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)])) * 0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_10 = _data_phi_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_11 = _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_10 = _data_phi_20;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              double *RESTRICT _data_phi_21_11 = _stride_phi_1 + _data_phi_21;
+              _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+          }
+        }
+        for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+          double *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+          double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+          double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+          double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+          double *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
+          double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+          double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+          double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+          double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+          double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+          double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+          double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+          double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+          {
+            double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+            double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+            _data_j_20_30_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[0] + _data_rho_20_10[_stride_rho_0]) * -1.0 + kT * (-1.0 * _data_rho_20_10[0] + _data_rho_20_10[_stride_rho_0]) * 2.0 + z * (-1.0 * _data_phi_20_10[0] + _data_phi_20_10[_stride_phi_0]) * (_data_rho_20_10[0] + _data_rho_20_10[_stride_rho_0])) * 0.081462038946841925 * ((1.0) / (kT));
+            double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_20;
+            double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+            _data_j_20_33_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * -2.0 + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * -2.0 + kT * (-1.0 * _data_rho_20_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * (-1.0 * _data_phi_20_10[0] - 1.0 * _data_phi_20_1m1[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * (-1.0 * _data_phi_20_1m1[0] - 1.0 * _data_phi_20_1m1[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_20_10[_stride_phi_0])) * 0.028801180074297286 * ((1.0) / (kT));
+            double *RESTRICT _data_rho_20_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_20;
+            double *RESTRICT _data_phi_20_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_20;
+            _data_j_20_34_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * -2.0 + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * 2.0 + kT * (-1.0 * _data_rho_20_11[0] + _data_rho_20_10[_stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * (-1.0 * _data_phi_20_10[0] - 1.0 * _data_phi_20_11[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_11[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * (-1.0 * _data_phi_20_11[0] - 1.0 * _data_phi_20_11[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_20_10[_stride_phi_0])) * 0.028801180074297286 * ((1.0) / (kT));
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+            double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+            double *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+            _data_j_20_35_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * 2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * 2.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * (-1.0 * _data_phi_20_10[0] - 1.0 * _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[0] + _data_phi_2m1_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * (-1.0 * _data_phi_20_10[0] - 1.0 * _data_phi_2m1_10[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0) * -0.028801180074297286 * ((1.0) / (kT));
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+            double *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+            double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+            double *RESTRICT _data_phi_21_10 = _stride_phi_1 * ctr_1 + _data_phi_21;
+            _data_j_20_36_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * -2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * 2.0 + kT * (-1.0 * _data_rho_21_10[0] + _data_rho_20_10[_stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * (-1.0 * _data_phi_20_10[0] - 1.0 * _data_phi_21_10[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_21_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * (-1.0 * _data_phi_21_10[0] - 1.0 * _data_phi_21_10[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_20_10[_stride_phi_0])) * 0.028801180074297286 * ((1.0) / (kT));
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_2m1;
+            _data_j_20_39_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_1m1[0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_2m1_1m1[0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_1m1[0] + kT * -2.0 * _data_rho_2m1_1m1[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_1m1[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_1m1[0] * _data_rho_2m1_1m1[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_2m1_1m1[0]) * 0.04703213011469496 * ((1.0) / (kT));
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_21;
+            _data_j_20_310_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_21_1m1[0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_21_1m1[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_1m1[0] + kT * -2.0 * _data_rho_21_1m1[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_1m1[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_1m1[0] * _data_rho_21_1m1[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_1m1[0]) * 0.04703213011469496 * ((1.0) / (kT));
+            double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_phi_2m1_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_2m1;
+            _data_j_20_311_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_2m1_11[0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_11[0] + kT * -2.0 * _data_rho_2m1_11[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_11[0] * _data_rho_2m1_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_2m1_11[0]) * 0.04703213011469496 * ((1.0) / (kT));
+            double *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_phi_21_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_21;
+            _data_j_20_312_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_21_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_21_11[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_11[0] + kT * -2.0 * _data_rho_21_11[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_11[0] * _data_rho_21_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_11[0]) * 0.04703213011469496 * ((1.0) / (kT));
+            {
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+                double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+                double *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+                double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+                double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+                double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+                double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_20;
+                _data_j_20_31_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[_stride_rho_0]) * -1.0 + kT * (-1.0 * _data_rho_20_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 2.0 + z * (-1.0 * _data_phi_20_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0]) * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[_stride_rho_0])) * 0.081462038946841925 * ((1.0) / (kT));
+              }
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+                double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+                double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+                double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+                double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+                double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+                double *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+                double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+                double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+                double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                _data_j_20_32_10[_stride_j_0] = D * (f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[_stride_rho_0]) + kT * (-1.0 * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[_stride_rho_0]) * 2.0 + z * (-1.0 * _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[_stride_rho_0])) * -0.081462038946841925 * ((1.0) / (kT));
+              }
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1) {
+                double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+                double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+                double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+                double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+                double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+                double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+                double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+                double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+                double *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+                double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_2m1;
+                _data_j_20_37_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0] - 1.0 * _data_phi_2m1_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0) * -0.028801180074297286 * ((1.0) / (kT));
+              }
+              if (ctr_1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+                double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+                double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+                double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+                double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+                double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+                double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+                double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+                double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_21;
+                double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+                double *RESTRICT _data_phi_21_10 = _stride_phi_1 * ctr_1 + _data_phi_21;
+                _data_j_20_38_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * -2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_21_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0 * _data_phi_21_10[_stride_phi_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_1m1[_stride_phi_0])) * 0.028801180074297286 * ((1.0) / (kT));
+              }
+            }
+            for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+              _data_j_20_30_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * -1.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 2.0 + z * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0]) * (_data_rho_20_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0])) * 0.081462038946841925 * ((1.0) / (kT));
+              _data_j_20_31_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0]) * -1.0 + kT * (-1.0 * _data_rho_20_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 2.0 + z * (-1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0]) * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0])) * 0.081462038946841925 * ((1.0) / (kT));
+              _data_j_20_32_10[_stride_j_0 * ctr_0] = D * (f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0]) + kT * (-1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0]) * 2.0 + z * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0])) * -0.081462038946841925 * ((1.0) / (kT));
+              _data_j_20_33_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0 + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0 + kT * (-1.0 * _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0])) * 0.028801180074297286 * ((1.0) / (kT));
+              _data_j_20_34_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0 + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_20_11[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_11[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_20_11[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_20_11[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0])) * 0.028801180074297286 * ((1.0) / (kT));
+              _data_j_20_35_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0) * -0.028801180074297286 * ((1.0) / (kT));
+              _data_j_20_36_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_21_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_21_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_21_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_21_10[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0])) * 0.028801180074297286 * ((1.0) / (kT));
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0) * -0.028801180074297286 * ((1.0) / (kT));
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * -2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * 2.0 + kT * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_21_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_21_10[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0])) * 0.028801180074297286 * ((1.0) / (kT));
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+            _data_j_20_30_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * -1.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 2.0 + z * (-1.0 * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)]) * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)])) * 0.081462038946841925 * ((1.0) / (kT));
+            _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0 + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0 + kT * (-1.0 * _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)])) * 0.028801180074297286 * ((1.0) / (kT));
+            _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0 + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)])) * 0.028801180074297286 * ((1.0) / (kT));
+            _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) * -1.0) * -0.028801180074297286 * ((1.0) / (kT));
+            _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)])) * 0.028801180074297286 * ((1.0) / (kT));
+            _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            {
+            }
+          }
+        }
+        {
+          {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+              double *RESTRICT _data_j_20_31_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_31;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              _data_j_20_31_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[_stride_rho_0]) * -1.0 + kT * (-1.0 * _data_rho_20_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 2.0 + z * (-1.0 * _data_phi_20_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0]) * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[_stride_rho_0])) * 0.081462038946841925 * ((1.0) / (kT));
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              double *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              _data_j_20_33_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * -2.0 + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * -2.0 + kT * (-1.0 * _data_rho_20_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * (-1.0 * _data_phi_20_10[0] - 1.0 * _data_phi_20_1m1[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * (-1.0 * _data_phi_20_1m1[0] - 1.0 * _data_phi_20_1m1[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_20_10[_stride_phi_0])) * 0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && 1 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+              double *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_2m1;
+              double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_37_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0] - 1.0 * _data_phi_2m1_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0) * -0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (_size_j_1 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+              double *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              double *RESTRICT _data_phi_21_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_21;
+              _data_j_20_38_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * -2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_21_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0 * _data_phi_21_10[_stride_phi_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_1m1[_stride_phi_0])) * 0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_39_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_1m1[0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_2m1_1m1[0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_1m1[0] + kT * -2.0 * _data_rho_2m1_1m1[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_1m1[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_1m1[0] * _data_rho_2m1_1m1[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_2m1_1m1[0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+              _data_j_20_310_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_21_1m1[0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_21_1m1[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_1m1[0] + kT * -2.0 * _data_rho_21_1m1[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_1m1[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_21_1m1[0] * _data_rho_21_1m1[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_1m1[0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+              double *RESTRICT _data_j_20_31_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_31;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              _data_j_20_31_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0]) * -1.0 + kT * (-1.0 * _data_rho_20_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 2.0 + z * (-1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0]) * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0])) * 0.081462038946841925 * ((1.0) / (kT));
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              double *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              _data_j_20_33_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0 + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0 + kT * (-1.0 * _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0])) * 0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+              double *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_2m1;
+              double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0) * -0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (_size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+              double *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              double *RESTRICT _data_phi_21_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_21;
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * -2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * 2.0 + kT * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_21_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_21_10[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0])) * 0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+          }
+          {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              double *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0 + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0 + kT * (-1.0 * _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)])) * 0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              double *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+              _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * -1.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+          }
+        }
+      }
+    }
+    {
+      {
+        if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+          double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+          double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+          double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_phi_20_10 = _data_phi_20;
+          double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+          double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_rho_20_10 = _data_rho_20;
+          double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+          double *RESTRICT _data_phi_2m1_11 = _stride_phi_1 + _data_phi_2m1;
+          _data_j_20_311_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_2m1_11[0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_11[0] + kT * -2.0 * _data_rho_2m1_11[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_11[0] * _data_rho_2m1_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_2m1_11[0]) * 0.04703213011469496 * ((1.0) / (kT));
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+            double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+            double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+            double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_phi_20_10 = _data_phi_20;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _data_rho_20;
+            double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+            double *RESTRICT _data_phi_2m1_11 = _stride_phi_1 + _data_phi_2m1;
+            _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+          }
+        }
+        if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+          double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+          double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+          double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_phi_20_10 = _data_phi_20;
+          double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+          double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_rho_20_10 = _data_rho_20;
+          double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+          double *RESTRICT _data_phi_2m1_11 = _stride_phi_1 + _data_phi_2m1;
+          _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+        }
+      }
+      for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+        {
+          {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 2 * _stride_j_3;
+              double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_32_10[_stride_j_0] = D * (f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[_stride_rho_0]) + kT * (-1.0 * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[_stride_rho_0]) * 2.0 + z * (-1.0 * _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[_stride_rho_0])) * -0.081462038946841925 * ((1.0) / (kT));
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              _data_j_20_35_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * 2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * 2.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * (-1.0 * _data_phi_20_10[0] - 1.0 * _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[0] + _data_phi_2m1_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * (-1.0 * _data_phi_20_10[0] - 1.0 * _data_phi_2m1_10[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0) * -0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+              double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_37_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0] - 1.0 * _data_phi_2m1_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0) * -0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_39_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_1m1[0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_2m1_1m1[0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_1m1[0] + kT * -2.0 * _data_rho_2m1_1m1[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_1m1[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_1m1[0] * _data_rho_2m1_1m1[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_2m1_1m1[0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_311_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_2m1_11[0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_11[0] + kT * -2.0 * _data_rho_2m1_11[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_11[0] * _data_rho_2m1_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_2m1_11[0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 2 * _stride_j_3;
+              double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_32_10[_stride_j_0 * ctr_0] = D * (f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0]) + kT * (-1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0]) * 2.0 + z * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0])) * -0.081462038946841925 * ((1.0) / (kT));
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              _data_j_20_35_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0 * _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0) * -0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+              double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0) * -0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+          }
+          {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0 * _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) * -1.0) * -0.028801180074297286 * ((1.0) / (kT));
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              double *RESTRICT _data_phi_2m1_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * -1.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+            }
+          }
+        }
+      }
+      {
+        {
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1) {
+            double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+            double *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+            double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+            double *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_2m1;
+            double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+            _data_j_20_37_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0] - 1.0 * _data_phi_2m1_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0) * -0.028801180074297286 * ((1.0) / (kT));
+          }
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+            double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+            double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+            double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+            double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+            _data_j_20_39_10[_stride_j_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_1m1[0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_2m1_1m1[0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_1m1[0] + kT * -2.0 * _data_rho_2m1_1m1[0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_1m1[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0 * _data_phi_2m1_1m1[0] * _data_rho_2m1_1m1[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_2m1_1m1[0]) * 0.04703213011469496 * ((1.0) / (kT));
+          }
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+            double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+            double *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            double *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+            double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+            double *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_2m1;
+            double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+            _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0 + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0 + kT * (-1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 4.0 + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_20_10[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0 * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0 * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0) * -0.028801180074297286 * ((1.0) / (kT));
+          }
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+            double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+            double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+            double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+            double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+            _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+          }
+        }
+        if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+          double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+          double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+          double *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+          double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+          double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+          double *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+          double *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+          _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * -1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * -1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0 * _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496 * ((1.0) / (kT));
+        }
+      }
+    }
+  }
+}
+} // namespace internal_32fac7f834b08f4a768ccef85dadf7a1
+
+void DiffusiveFluxKernelWithElectrostatic_double_precision::run(IBlock *block) {
+  auto rho = block->getData<field::GhostLayerField<double, 1>>(rhoID);
+  auto phi = block->getData<field::GhostLayerField<double, 1>>(phiID);
+  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
+
+  auto &f_ext_2 = this->f_ext_2_;
+  auto &z = this->z_;
+  auto &f_ext_1 = this->f_ext_1_;
+  auto &f_ext_0 = this->f_ext_0_;
+  auto &kT = this->kT_;
+  auto &D = this->D_;
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(j->nrOfGhostLayers()));
+  double *RESTRICT const _data_j = j->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(phi->nrOfGhostLayers()));
+  double *RESTRICT const _data_phi = phi->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(rho->nrOfGhostLayers()));
+  double *RESTRICT const _data_rho = rho->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(j->xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(j->xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(j->ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(j->ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(j->zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(j->zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_phi_0 = int64_t(phi->xStride());
+  const int64_t _stride_phi_1 = int64_t(phi->yStride());
+  const int64_t _stride_phi_2 = int64_t(phi->zStride());
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  internal_32fac7f834b08f4a768ccef85dadf7a1::diffusivefluxkernelwithelectrostatic_double_precision_diffusivefluxkernelwithelectrostatic_double_precision(D, _data_j, _data_phi, _data_rho, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_phi_0, _stride_phi_1, _stride_phi_2, _stride_rho_0, _stride_rho_1, _stride_rho_2, f_ext_0, f_ext_1, f_ext_2, kT, z);
+}
+
+void DiffusiveFluxKernelWithElectrostatic_double_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto rho = block->getData<field::GhostLayerField<double, 1>>(rhoID);
+  auto phi = block->getData<field::GhostLayerField<double, 1>>(phiID);
+  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
+
+  auto &f_ext_2 = this->f_ext_2_;
+  auto &z = this->z_;
+  auto &f_ext_1 = this->f_ext_1_;
+  auto &f_ext_0 = this->f_ext_0_;
+  auto &kT = this->kT_;
+  auto &D = this->D_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(j->nrOfGhostLayers()));
+  double *RESTRICT const _data_j = j->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(phi->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(phi->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(phi->nrOfGhostLayers()));
+  double *RESTRICT const _data_phi = phi->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  double *RESTRICT const _data_rho = rho->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_phi_0 = int64_t(phi->xStride());
+  const int64_t _stride_phi_1 = int64_t(phi->yStride());
+  const int64_t _stride_phi_2 = int64_t(phi->zStride());
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  internal_32fac7f834b08f4a768ccef85dadf7a1::diffusivefluxkernelwithelectrostatic_double_precision_diffusivefluxkernelwithelectrostatic_double_precision(D, _data_j, _data_phi, _data_rho, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_phi_0, _stride_phi_1, _stride_phi_2, _stride_rho_0, _stride_rho_1, _stride_rho_2, f_ext_0, f_ext_1, f_ext_2, kT, z);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_double_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_double_precision.h
new file mode 100644
index 00000000000..fe4b396409d
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_double_precision.h
@@ -0,0 +1,114 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file DiffusiveFluxKernelWithElectrostatic_double_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class DiffusiveFluxKernelWithElectrostatic_double_precision {
+public:
+  DiffusiveFluxKernelWithElectrostatic_double_precision(
+      BlockDataID jID_, BlockDataID phiID_, BlockDataID rhoID_, double D,
+      double f_ext_0, double f_ext_1, double f_ext_2, double kT, double z)
+      : jID(jID_), phiID(phiID_), rhoID(rhoID_), D_(D), f_ext_0_(f_ext_0),
+        f_ext_1_(f_ext_1), f_ext_2_(f_ext_2), kT_(kT), z_(z){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)> getSweep(
+      const shared_ptr<DiffusiveFluxKernelWithElectrostatic_double_precision>
+          &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<DiffusiveFluxKernelWithElectrostatic_double_precision>
+          &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID jID;
+  BlockDataID phiID;
+  BlockDataID rhoID;
+  double D_;
+  double f_ext_0_;
+  double f_ext_1_;
+  double f_ext_2_;
+  double kT_;
+  double z_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_single_precision.cpp
new file mode 100644
index 00000000000..328cba1c3fe
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_single_precision.cpp
@@ -0,0 +1,1218 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file DiffusiveFluxKernelWithElectrostatic_single_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "DiffusiveFluxKernelWithElectrostatic_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_823ab2463d465630661d5edc8f90930c {
+static FUNC_PREFIX void diffusivefluxkernelwithelectrostatic_single_precision_diffusivefluxkernelwithelectrostatic_single_precision(float D, float *RESTRICT const _data_j, float *RESTRICT const _data_phi, float *RESTRICT const _data_rho, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3, int64_t const _stride_phi_0, int64_t const _stride_phi_1, int64_t const _stride_phi_2, int64_t const _stride_rho_0, int64_t const _stride_rho_1, int64_t const _stride_rho_2, float f_ext_0, float f_ext_1, float f_ext_2, float kT, float z) {
+  {
+    {
+      {
+        if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+          float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+          float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+          float *RESTRICT _data_phi_20 = _data_phi;
+          float *RESTRICT _data_phi_20_10 = _data_phi_20;
+          float *RESTRICT _data_rho_20 = _data_rho;
+          float *RESTRICT _data_rho_20_10 = _data_rho_20;
+          float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+          float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+          float *RESTRICT _data_phi_21_11 = _stride_phi_1 + _data_phi_21;
+          _data_j_20_312_10[_stride_j_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0f * _data_rho_21_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_21_11[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_11[0] + kT * -2.0f * _data_rho_21_11[0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_11[0] * _data_rho_21_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_11[0]) * 0.04703213011469496f * ((1.0f) / (kT));
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+            float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+            float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+            float *RESTRICT _data_phi_20 = _data_phi;
+            float *RESTRICT _data_phi_20_10 = _data_phi_20;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _data_rho_20;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+            float *RESTRICT _data_phi_21_11 = _stride_phi_1 + _data_phi_21;
+            _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0f * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0f * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+          }
+        }
+        if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+          float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+          float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+          float *RESTRICT _data_phi_20 = _data_phi;
+          float *RESTRICT _data_phi_20_10 = _data_phi_20;
+          float *RESTRICT _data_rho_20 = _data_rho;
+          float *RESTRICT _data_rho_20_10 = _data_rho_20;
+          float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+          float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+          float *RESTRICT _data_phi_21_11 = _stride_phi_1 + _data_phi_21;
+          _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0f * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0f * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+        }
+      }
+      for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+        {
+          {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              float *RESTRICT _data_phi_21_10 = _stride_phi_1 * ctr_1 + _data_phi_21;
+              float *RESTRICT _data_phi_20 = _data_phi;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              _data_j_20_36_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * -2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * 2.0f + kT * (-1.0f * _data_rho_21_10[0] + _data_rho_20_10[_stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * (-1.0f * _data_phi_20_10[0] - 1.0f * _data_phi_21_10[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_21_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * (-1.0f * _data_phi_21_10[0] - 1.0f * _data_phi_21_10[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_20_10[_stride_phi_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && 1 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+              float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_phi_20 = _data_phi;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_21;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_phi_21_10 = _stride_phi_1 * ctr_1 + _data_phi_21;
+              _data_j_20_38_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * -2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_21_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0f * _data_phi_21_10[_stride_phi_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_1m1[_stride_phi_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              float *RESTRICT _data_phi_20 = _data_phi;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_21;
+              _data_j_20_310_10[_stride_j_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0f * _data_rho_21_1m1[0] + f_ext_1 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * -1.0f * _data_rho_21_1m1[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_1m1[0] + kT * -2.0f * _data_rho_21_1m1[0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_1m1[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_1m1[0] * _data_rho_21_1m1[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_1m1[0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              float *RESTRICT _data_phi_20 = _data_phi;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              float *RESTRICT _data_phi_21_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_21;
+              _data_j_20_312_10[_stride_j_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0f * _data_rho_21_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_21_11[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_11[0] + kT * -2.0f * _data_rho_21_11[0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_11[0] * _data_rho_21_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_11[0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              float *RESTRICT _data_phi_21_10 = _stride_phi_1 * ctr_1 + _data_phi_21;
+              float *RESTRICT _data_phi_20 = _data_phi;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              _data_j_20_36_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_21_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_21_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_21_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_21_10[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_0 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+              float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_phi_20 = _data_phi;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_21;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_phi_21_10 = _stride_phi_1 * ctr_1 + _data_phi_21;
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * -2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * 2.0f + kT * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_21_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_21_10[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              float *RESTRICT _data_phi_20 = _data_phi;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_21;
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              float *RESTRICT _data_phi_20 = _data_phi;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              float *RESTRICT _data_phi_21_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_21;
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0f * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0f * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+          }
+          {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              float *RESTRICT _data_phi_21_10 = _stride_phi_1 * ctr_1 + _data_phi_21;
+              float *RESTRICT _data_phi_20 = _data_phi;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)])) * 0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              float *RESTRICT _data_phi_20 = _data_phi;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_21;
+              _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              float *RESTRICT _data_phi_20 = _data_phi;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+              float *RESTRICT _data_phi_21_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_21;
+              _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0f * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0f * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+          }
+        }
+      }
+      {
+        {
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1 && 1 < _size_j_0 - 1) {
+            float *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+            float *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_phi_20 = _data_phi;
+            float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+            float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+            float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+            float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            float *RESTRICT _data_phi_21_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_21;
+            _data_j_20_38_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * -2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_21_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0f * _data_phi_21_10[_stride_phi_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_1m1[_stride_phi_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+          }
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+            float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+            float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+            float *RESTRICT _data_phi_20 = _data_phi;
+            float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+            float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+            _data_j_20_310_10[_stride_j_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0f * _data_rho_21_1m1[0] + f_ext_1 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * -1.0f * _data_rho_21_1m1[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_1m1[0] + kT * -2.0f * _data_rho_21_1m1[0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_1m1[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_1m1[0] * _data_rho_21_1m1[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_1m1[0]) * 0.04703213011469496f * ((1.0f) / (kT));
+          }
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1 && ctr_0 < _size_j_0 - 1) {
+            float *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+            float *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_phi_20 = _data_phi;
+            float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+            float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+            float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+            float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            float *RESTRICT _data_phi_21_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_21;
+            _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * -2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * 2.0f + kT * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_21_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_21_10[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+          }
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+            float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+            float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+            float *RESTRICT _data_phi_20 = _data_phi;
+            float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+            float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+            _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+          }
+        }
+        if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+          float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+          float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+          float *RESTRICT _data_phi_20 = _data_phi;
+          float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+          float *RESTRICT _data_rho_20 = _data_rho;
+          float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+          float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+          float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2;
+          float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+          _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+        }
+      }
+    }
+    for (int64_t ctr_2 = 1; ctr_2 < _size_j_2 - 1; ctr_2 += 1) {
+      float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+      float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+      float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+      float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+      float *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
+      float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+      float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+      float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+      float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+      float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+      float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+      float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+      float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+      {
+        {
+          {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              float *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_11 = _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_10 = _data_phi_20;
+              _data_j_20_34_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * -2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * 2.0f + kT * (-1.0f * _data_rho_20_11[0] + _data_rho_20_10[_stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * (-1.0f * _data_phi_20_10[0] - 1.0f * _data_phi_20_11[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_11[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * (-1.0f * _data_phi_20_11[0] - 1.0f * _data_phi_20_11[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_20_10[_stride_phi_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_10 = _data_phi_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_11 = _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_311_10[_stride_j_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0f * _data_rho_2m1_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_2m1_11[0] + f_ext_2 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * -1.0f * _data_rho_2m1_11[0] + kT * -2.0f * _data_rho_2m1_11[0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_2m1_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_2m1_11[0] * _data_rho_2m1_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_2m1_11[0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_10 = _data_phi_20;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              float *RESTRICT _data_phi_21_11 = _stride_phi_1 + _data_phi_21;
+              _data_j_20_312_10[_stride_j_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0f * _data_rho_21_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_21_11[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_11[0] + kT * -2.0f * _data_rho_21_11[0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_11[0] * _data_rho_21_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_11[0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              float *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_11 = _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_10 = _data_phi_20;
+              _data_j_20_34_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_20_11[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_11[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_11[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_20_11[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_10 = _data_phi_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_11 = _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_10 = _data_phi_20;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              float *RESTRICT _data_phi_21_11 = _stride_phi_1 + _data_phi_21;
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0f * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0f * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+          }
+          {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              float *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_11 = _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_10 = _data_phi_20;
+              _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)])) * 0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_10 = _data_phi_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_11 = _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_10 = _data_phi_20;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              float *RESTRICT _data_phi_21_11 = _stride_phi_1 + _data_phi_21;
+              _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0f * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0f * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+          }
+        }
+        for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+          float *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+          float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+          float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+          float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+          float *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
+          float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+          float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+          float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+          float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+          float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+          float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+          float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+          float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+          {
+            float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+            float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+            _data_j_20_30_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[0] + _data_rho_20_10[_stride_rho_0]) * -1.0f + kT * (-1.0f * _data_rho_20_10[0] + _data_rho_20_10[_stride_rho_0]) * 2.0f + z * (-1.0f * _data_phi_20_10[0] + _data_phi_20_10[_stride_phi_0]) * (_data_rho_20_10[0] + _data_rho_20_10[_stride_rho_0])) * 0.081462038946841925f * ((1.0f) / (kT));
+            float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_20;
+            float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+            _data_j_20_33_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * -2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * -2.0f + kT * (-1.0f * _data_rho_20_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * (-1.0f * _data_phi_20_10[0] - 1.0f * _data_phi_20_1m1[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * (-1.0f * _data_phi_20_1m1[0] - 1.0f * _data_phi_20_1m1[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_20_10[_stride_phi_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+            float *RESTRICT _data_rho_20_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_20;
+            float *RESTRICT _data_phi_20_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_20;
+            _data_j_20_34_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * -2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * 2.0f + kT * (-1.0f * _data_rho_20_11[0] + _data_rho_20_10[_stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * (-1.0f * _data_phi_20_10[0] - 1.0f * _data_phi_20_11[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_11[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_11[0]) * (-1.0f * _data_phi_20_11[0] - 1.0f * _data_phi_20_11[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_20_10[_stride_phi_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+            float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+            float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+            _data_j_20_35_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * (-1.0f * _data_phi_20_10[0] - 1.0f * _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[0] + _data_phi_2m1_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * (-1.0f * _data_phi_20_10[0] - 1.0f * _data_phi_2m1_10[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0f) * -0.028801180074297286f * ((1.0f) / (kT));
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+            float *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+            float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+            float *RESTRICT _data_phi_21_10 = _stride_phi_1 * ctr_1 + _data_phi_21;
+            _data_j_20_36_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * -2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * 2.0f + kT * (-1.0f * _data_rho_21_10[0] + _data_rho_20_10[_stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * (-1.0f * _data_phi_20_10[0] - 1.0f * _data_phi_21_10[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_21_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_10[0]) * (-1.0f * _data_phi_21_10[0] - 1.0f * _data_phi_21_10[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_20_10[_stride_phi_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_2m1;
+            _data_j_20_39_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * (-1.0f * _data_phi_20_10[0] - 1.0f * _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[0] + _data_phi_2m1_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * (-1.0f * _data_phi_20_1m1[0] - 1.0f * _data_phi_2m1_10[0] + _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * (-1.0f * _data_phi_20_1m1[0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0f) * -0.02351606505734748f * ((1.0f) / (kT));
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_21;
+            _data_j_20_310_10[_stride_j_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0f * _data_rho_21_1m1[0] + f_ext_1 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * -1.0f * _data_rho_21_1m1[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_1m1[0] + kT * -2.0f * _data_rho_21_1m1[0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_1m1[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_1m1[0] * _data_rho_21_1m1[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_1m1[0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_phi_2m1_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_2m1;
+            _data_j_20_311_10[_stride_j_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0f * _data_rho_2m1_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_2m1_11[0] + f_ext_2 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * -1.0f * _data_rho_2m1_11[0] + kT * -2.0f * _data_rho_2m1_11[0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_2m1_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_2m1_11[0] * _data_rho_2m1_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_2m1_11[0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            float *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_phi_21_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_21;
+            _data_j_20_312_10[_stride_j_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0f * _data_rho_21_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_21_11[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_11[0] + kT * -2.0f * _data_rho_21_11[0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_11[0] * _data_rho_21_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_11[0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            {
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+                float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+                float *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+                float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+                float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+                float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+                float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_20;
+                _data_j_20_31_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[_stride_rho_0]) * -1.0f + kT * (-1.0f * _data_rho_20_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 2.0f + z * (-1.0f * _data_phi_20_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0]) * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[_stride_rho_0])) * 0.081462038946841925f * ((1.0f) / (kT));
+              }
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+                float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+                float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+                float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+                float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+                float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+                float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+                float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+                float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+                float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                _data_j_20_32_10[_stride_j_0] = D * (f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[_stride_rho_0]) + kT * (-1.0f * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[_stride_rho_0]) * 2.0f + z * (-1.0f * _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[_stride_rho_0])) * -0.081462038946841925f * ((1.0f) / (kT));
+              }
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1) {
+                float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+                float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+                float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+                float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+                float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+                float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+                float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+                float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+                float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+                float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_2m1;
+                _data_j_20_37_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0f) * -0.028801180074297286f * ((1.0f) / (kT));
+              }
+              if (ctr_1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+                float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+                float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+                float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+                float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+                float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+                float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+                float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+                float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_21;
+                float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+                float *RESTRICT _data_phi_21_10 = _stride_phi_1 * ctr_1 + _data_phi_21;
+                _data_j_20_38_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * -2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_21_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0f * _data_phi_21_10[_stride_phi_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_1m1[_stride_phi_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+              }
+            }
+            for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+              _data_j_20_30_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * -1.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 2.0f + z * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0]) * (_data_rho_20_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0])) * 0.081462038946841925f * ((1.0f) / (kT));
+              _data_j_20_31_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0]) * -1.0f + kT * (-1.0f * _data_rho_20_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 2.0f + z * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0]) * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0])) * 0.081462038946841925f * ((1.0f) / (kT));
+              _data_j_20_32_10[_stride_j_0 * ctr_0] = D * (f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0]) + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0]) * 2.0f + z * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0])) * -0.081462038946841925f * ((1.0f) / (kT));
+              _data_j_20_33_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0f + kT * (-1.0f * _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+              _data_j_20_34_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_20_11[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_11[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_11[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_20_11[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+              _data_j_20_35_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0f) * -0.028801180074297286f * ((1.0f) / (kT));
+              _data_j_20_36_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_21_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_21_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_21_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_21_10[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0f) * -0.028801180074297286f * ((1.0f) / (kT));
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * -2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * 2.0f + kT * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_21_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_21_10[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0f) * -0.02351606505734748f * ((1.0f) / (kT));
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0f * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0f * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+            _data_j_20_30_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * -1.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 2.0f + z * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)]) * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)])) * 0.081462038946841925f * ((1.0f) / (kT));
+            _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0f + kT * (-1.0f * _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)])) * 0.028801180074297286f * ((1.0f) / (kT));
+            _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_20_11[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)])) * 0.028801180074297286f * ((1.0f) / (kT));
+            _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) * -1.0f) * -0.028801180074297286f * ((1.0f) / (kT));
+            _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_21_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)])) * 0.028801180074297286f * ((1.0f) / (kT));
+            _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) * -1.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) * -1.0f) * -0.02351606505734748f * ((1.0f) / (kT));
+            _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0f * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0f * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            {
+            }
+          }
+        }
+        {
+          {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+              float *RESTRICT _data_j_20_31_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_31;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              _data_j_20_31_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[_stride_rho_0]) * -1.0f + kT * (-1.0f * _data_rho_20_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 2.0f + z * (-1.0f * _data_phi_20_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0]) * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[_stride_rho_0])) * 0.081462038946841925f * ((1.0f) / (kT));
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              float *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              _data_j_20_33_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * -2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * -2.0f + kT * (-1.0f * _data_rho_20_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * (-1.0f * _data_phi_20_10[0] - 1.0f * _data_phi_20_1m1[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_20_1m1[0]) * (-1.0f * _data_phi_20_1m1[0] - 1.0f * _data_phi_20_1m1[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_20_10[_stride_phi_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && 1 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+              float *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_2m1;
+              float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_37_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0f) * -0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (_size_j_1 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+              float *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              float *RESTRICT _data_phi_21_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_21;
+              _data_j_20_38_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * -2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_21_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_21_1m1[_stride_rho_0]) * (-1.0f * _data_phi_21_10[_stride_phi_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_20_1m1[_stride_phi_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_2m1;
+              _data_j_20_39_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * (-1.0f * _data_phi_20_10[0] - 1.0f * _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[0] + _data_phi_2m1_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * (-1.0f * _data_phi_20_1m1[0] - 1.0f * _data_phi_2m1_10[0] + _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * (-1.0f * _data_phi_20_1m1[0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0f) * -0.02351606505734748f * ((1.0f) / (kT));
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+              _data_j_20_310_10[_stride_j_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0f * _data_rho_21_1m1[0] + f_ext_1 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * -1.0f * _data_rho_21_1m1[0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * _data_rho_21_1m1[0] + kT * -2.0f * _data_rho_21_1m1[0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_1m1[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_21_1m1[0] * _data_rho_21_1m1[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_21_1m1[0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+              float *RESTRICT _data_j_20_31_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_31;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              _data_j_20_31_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0]) * -1.0f + kT * (-1.0f * _data_rho_20_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 2.0f + z * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0]) * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0])) * 0.081462038946841925f * ((1.0f) / (kT));
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              float *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              _data_j_20_33_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * -2.0f + kT * (-1.0f * _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+              float *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_2m1;
+              float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0f) * -0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (_size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+              float *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              float *RESTRICT _data_phi_21_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_21;
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * -2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * 2.0f + kT * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_21_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_21_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_21_10[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0])) * 0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_2m1;
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0f) * -0.02351606505734748f * ((1.0f) / (kT));
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+          }
+          {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              float *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * -2.0f + kT * (-1.0f * _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)])) * 0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * ctr_2 - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_2m1;
+              _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) * -1.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) * -1.0f) * -0.02351606505734748f * ((1.0f) / (kT));
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * ctr_2;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_phi_21 = _data_phi + _stride_phi_2 * ctr_2 + _stride_phi_2;
+              float *RESTRICT _data_phi_21_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_21;
+              _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * -1.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_21_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+          }
+        }
+      }
+    }
+    {
+      {
+        if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+          float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+          float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+          float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_phi_20_10 = _data_phi_20;
+          float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+          float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_rho_20_10 = _data_rho_20;
+          float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+          float *RESTRICT _data_phi_2m1_11 = _stride_phi_1 + _data_phi_2m1;
+          _data_j_20_311_10[_stride_j_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0f * _data_rho_2m1_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_2m1_11[0] + f_ext_2 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * -1.0f * _data_rho_2m1_11[0] + kT * -2.0f * _data_rho_2m1_11[0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_2m1_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_2m1_11[0] * _data_rho_2m1_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_2m1_11[0]) * 0.04703213011469496f * ((1.0f) / (kT));
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+            float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+            float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+            float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_phi_20_10 = _data_phi_20;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _data_rho_20;
+            float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+            float *RESTRICT _data_phi_2m1_11 = _stride_phi_1 + _data_phi_2m1;
+            _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+          }
+        }
+        if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+          float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+          float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+          float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_phi_20_10 = _data_phi_20;
+          float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+          float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_rho_20_10 = _data_rho_20;
+          float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+          float *RESTRICT _data_phi_2m1_11 = _stride_phi_1 + _data_phi_2m1;
+          _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+        }
+      }
+      for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+        {
+          {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 2 * _stride_j_3;
+              float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_32_10[_stride_j_0] = D * (f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[_stride_rho_0]) + kT * (-1.0f * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[_stride_rho_0]) * 2.0f + z * (-1.0f * _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[_stride_rho_0])) * -0.081462038946841925f * ((1.0f) / (kT));
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              _data_j_20_35_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * (-1.0f * _data_phi_20_10[0] - 1.0f * _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[0] + _data_phi_2m1_10[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_10[0]) * (-1.0f * _data_phi_20_10[0] - 1.0f * _data_phi_2m1_10[0] + _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0f) * -0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+              float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_37_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0f) * -0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_2m1;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              _data_j_20_39_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * (-1.0f * _data_phi_20_10[0] - 1.0f * _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[0] + _data_phi_2m1_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * (-1.0f * _data_phi_20_1m1[0] - 1.0f * _data_phi_2m1_10[0] + _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * (-1.0f * _data_phi_20_1m1[0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0f) * -0.02351606505734748f * ((1.0f) / (kT));
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_311_10[_stride_j_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_0 * z * -1.0f * _data_rho_2m1_11[0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0] + f_ext_1 * z * _data_rho_2m1_11[0] + f_ext_2 * z * -1.0f * _data_rho_20_10[_stride_rho_0] + f_ext_2 * z * -1.0f * _data_rho_2m1_11[0] + kT * -2.0f * _data_rho_2m1_11[0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_2m1_11[0] * _data_rho_20_10[_stride_rho_0] + z * -1.0f * _data_phi_2m1_11[0] * _data_rho_2m1_11[0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_20_10[_stride_rho_0] + z * _data_phi_20_10[_stride_phi_0] * _data_rho_2m1_11[0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 2 * _stride_j_3;
+              float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_32_10[_stride_j_0 * ctr_0] = D * (f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0]) + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0]) * 2.0f + z * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0])) * -0.081462038946841925f * ((1.0f) / (kT));
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              _data_j_20_35_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0f) * -0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+              float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0f) * -0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_2m1;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0f) * -0.02351606505734748f * ((1.0f) / (kT));
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_0 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + f_ext_2 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + f_ext_2 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * -2.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * ctr_0 - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_20_10[_stride_rho_0 * ctr_0] + z * _data_phi_20_10[_stride_phi_0 * ctr_0] * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+          }
+          {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) * -1.0f) * -0.028801180074297286f * ((1.0f) / (kT));
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_20;
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * ctr_1 - _stride_phi_1 + _data_phi_2m1;
+              float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * ctr_1 + _data_phi_2m1;
+              _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) * -1.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) * -1.0f) * -0.02351606505734748f * ((1.0f) / (kT));
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_phi_20_10 = _stride_phi_1 * ctr_1 + _data_phi_20;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+              float *RESTRICT _data_phi_2m1_11 = _stride_phi_1 * ctr_1 + _stride_phi_1 + _data_phi_2m1;
+              _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_0 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_1 * z * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_1 * z * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + f_ext_2 * z * -1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + f_ext_2 * z * -1.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * -2.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + kT * 2.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * -1.0f * _data_phi_2m1_11[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + z * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1)] * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 0.04703213011469496f * ((1.0f) / (kT));
+            }
+          }
+        }
+      }
+      {
+        {
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1) {
+            float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+            float *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+            float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+            float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_2m1;
+            float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+            _data_j_20_37_10[_stride_j_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[_stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0] + _data_phi_20_10[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0f) * -0.028801180074297286f * ((1.0f) / (kT));
+          }
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+            float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+            float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+            float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+            float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+            float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_2m1;
+            _data_j_20_39_10[_stride_j_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * (-1.0f * _data_phi_20_10[0] - 1.0f * _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[0] + _data_phi_2m1_1m1[_stride_phi_0]) + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * (-1.0f * _data_phi_20_1m1[0] - 1.0f * _data_phi_2m1_10[0] + _data_phi_20_1m1[_stride_phi_0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0f + z * (_data_rho_20_10[_stride_rho_0] + _data_rho_2m1_1m1[0]) * (-1.0f * _data_phi_20_1m1[0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0] + _data_phi_20_10[0] + _data_phi_2m1_10[_stride_phi_0]) * -1.0f) * -0.02351606505734748f * ((1.0f) / (kT));
+          }
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+            float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+            float *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+            float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+            float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_2m1;
+            float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+            _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0f) * -0.028801180074297286f * ((1.0f) / (kT));
+          }
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+            float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+            float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+            float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+            float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+            float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+            float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_2m1;
+            _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0 * ctr_0]) + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_2m1_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_20_1m1[_stride_phi_0 * ctr_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0f + z * (_data_rho_20_10[_stride_rho_0 * ctr_0] + _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * ctr_0 - _stride_phi_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0 * ctr_0] + _data_phi_20_10[_stride_phi_0 * ctr_0 - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * ctr_0]) * -1.0f) * -0.02351606505734748f * ((1.0f) / (kT));
+          }
+        }
+        if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+          float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+          float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+          float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+          float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+          float *RESTRICT _data_phi_20 = _data_phi + _stride_phi_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_phi_20_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_20;
+          float *RESTRICT _data_phi_20_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_20;
+          float *RESTRICT _data_phi_2m1 = _data_phi + _stride_phi_2 * (_size_j_2 - 1) - _stride_phi_2;
+          float *RESTRICT _data_phi_2m1_1m1 = _stride_phi_1 * (_size_j_1 - 1) - _stride_phi_1 + _data_phi_2m1;
+          float *RESTRICT _data_phi_2m1_10 = _stride_phi_1 * (_size_j_1 - 1) + _data_phi_2m1;
+          _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (f_ext_0 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + f_ext_1 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + f_ext_2 * z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 2.0f + kT * (-1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * 4.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1)]) + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) * -1.0f + z * (_data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)] + _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0]) * (-1.0f * _data_phi_20_1m1[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] - 1.0f * _data_phi_2m1_1m1[_stride_phi_0 * (_size_j_0 - 1)] + _data_phi_20_10[_stride_phi_0 * (_size_j_0 - 1) - _stride_phi_0] + _data_phi_2m1_10[_stride_phi_0 * (_size_j_0 - 1)]) * -1.0f) * -0.02351606505734748f * ((1.0f) / (kT));
+        }
+      }
+    }
+  }
+}
+} // namespace internal_823ab2463d465630661d5edc8f90930c
+
+void DiffusiveFluxKernelWithElectrostatic_single_precision::run(IBlock *block) {
+  auto phi = block->getData<field::GhostLayerField<float, 1>>(phiID);
+  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
+  auto rho = block->getData<field::GhostLayerField<float, 1>>(rhoID);
+
+  auto &kT = this->kT_;
+  auto &f_ext_2 = this->f_ext_2_;
+  auto &f_ext_1 = this->f_ext_1_;
+  auto &D = this->D_;
+  auto &z = this->z_;
+  auto &f_ext_0 = this->f_ext_0_;
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(j->nrOfGhostLayers()));
+  float *RESTRICT const _data_j = j->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(phi->nrOfGhostLayers()));
+  float *RESTRICT const _data_phi = phi->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(rho->nrOfGhostLayers()));
+  float *RESTRICT const _data_rho = rho->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(j->xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(j->xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(j->ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(j->ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(j->zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(j->zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_phi_0 = int64_t(phi->xStride());
+  const int64_t _stride_phi_1 = int64_t(phi->yStride());
+  const int64_t _stride_phi_2 = int64_t(phi->zStride());
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  internal_823ab2463d465630661d5edc8f90930c::diffusivefluxkernelwithelectrostatic_single_precision_diffusivefluxkernelwithelectrostatic_single_precision(D, _data_j, _data_phi, _data_rho, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_phi_0, _stride_phi_1, _stride_phi_2, _stride_rho_0, _stride_rho_1, _stride_rho_2, f_ext_0, f_ext_1, f_ext_2, kT, z);
+}
+
+void DiffusiveFluxKernelWithElectrostatic_single_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto phi = block->getData<field::GhostLayerField<float, 1>>(phiID);
+  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
+  auto rho = block->getData<field::GhostLayerField<float, 1>>(rhoID);
+
+  auto &kT = this->kT_;
+  auto &f_ext_2 = this->f_ext_2_;
+  auto &f_ext_1 = this->f_ext_1_;
+  auto &D = this->D_;
+  auto &z = this->z_;
+  auto &f_ext_0 = this->f_ext_0_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(j->nrOfGhostLayers()));
+  float *RESTRICT const _data_j = j->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(phi->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(phi->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(phi->nrOfGhostLayers()));
+  float *RESTRICT const _data_phi = phi->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  float *RESTRICT const _data_rho = rho->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_phi_0 = int64_t(phi->xStride());
+  const int64_t _stride_phi_1 = int64_t(phi->yStride());
+  const int64_t _stride_phi_2 = int64_t(phi->zStride());
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  internal_823ab2463d465630661d5edc8f90930c::diffusivefluxkernelwithelectrostatic_single_precision_diffusivefluxkernelwithelectrostatic_single_precision(D, _data_j, _data_phi, _data_rho, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_phi_0, _stride_phi_1, _stride_phi_2, _stride_rho_0, _stride_rho_1, _stride_rho_2, f_ext_0, f_ext_1, f_ext_2, kT, z);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_single_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_single_precision.h
new file mode 100644
index 00000000000..a47f59d8c72
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernelWithElectrostatic_single_precision.h
@@ -0,0 +1,114 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file DiffusiveFluxKernelWithElectrostatic_single_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class DiffusiveFluxKernelWithElectrostatic_single_precision {
+public:
+  DiffusiveFluxKernelWithElectrostatic_single_precision(
+      BlockDataID jID_, BlockDataID phiID_, BlockDataID rhoID_, float D,
+      float f_ext_0, float f_ext_1, float f_ext_2, float kT, float z)
+      : jID(jID_), phiID(phiID_), rhoID(rhoID_), D_(D), f_ext_0_(f_ext_0),
+        f_ext_1_(f_ext_1), f_ext_2_(f_ext_2), kT_(kT), z_(z){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)> getSweep(
+      const shared_ptr<DiffusiveFluxKernelWithElectrostatic_single_precision>
+          &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<DiffusiveFluxKernelWithElectrostatic_single_precision>
+          &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID jID;
+  BlockDataID phiID;
+  BlockDataID rhoID;
+  float D_;
+  float f_ext_0_;
+  float f_ext_1_;
+  float f_ext_2_;
+  float kT_;
+  float z_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_double_precision.cpp
new file mode 100644
index 00000000000..75d69ad67b0
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_double_precision.cpp
@@ -0,0 +1,873 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file DiffusiveFluxKernel_double_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "DiffusiveFluxKernel_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_e5e04d1215f19faa51f3c55db6d456a2 {
+static FUNC_PREFIX void diffusivefluxkernel_double_precision_diffusivefluxkernel_double_precision(double D, double *RESTRICT const _data_j, double *RESTRICT const _data_rho, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3, int64_t const _stride_rho_0, int64_t const _stride_rho_1, int64_t const _stride_rho_2) {
+  {
+    {
+      {
+        if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+          double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+          double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+          double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+          double *RESTRICT _data_rho_20 = _data_rho;
+          double *RESTRICT _data_rho_20_10 = _data_rho_20;
+          _data_j_20_312_10[_stride_j_0] = D * (-1.0 * _data_rho_21_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+            double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+            double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _data_rho_20;
+            _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+          }
+        }
+        if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+          double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+          double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+          double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+          double *RESTRICT _data_rho_20 = _data_rho;
+          double *RESTRICT _data_rho_20_10 = _data_rho_20;
+          _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+        }
+      }
+      for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+        {
+          {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_36_10[_stride_j_0] = D * (-1.0 * _data_rho_21_10[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && 1 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+              double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_38_10[_stride_j_0] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_310_10[_stride_j_0] = D * (-1.0 * _data_rho_21_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_312_10[_stride_j_0] = D * (-1.0 * _data_rho_21_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_36_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_0 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+              double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+            }
+          }
+          {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914;
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+            }
+          }
+        }
+      }
+      {
+        {
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1 && 1 < _size_j_0 - 1) {
+            double *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+            double *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_38_10[_stride_j_0] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+          }
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+            double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+            double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_310_10[_stride_j_0] = D * (-1.0 * _data_rho_21_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+          }
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1 && ctr_0 < _size_j_0 - 1) {
+            double *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+            double *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+          }
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+            double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+            double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            double *RESTRICT _data_rho_20 = _data_rho;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+          }
+        }
+        if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+          double *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+          double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+          double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+          double *RESTRICT _data_rho_20 = _data_rho;
+          double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+          _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+        }
+      }
+    }
+    for (int64_t ctr_2 = 1; ctr_2 < _size_j_2 - 1; ctr_2 += 1) {
+      double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+      double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+      double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+      double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+      double *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
+      double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+      double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+      double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+      double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+      double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+      double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+      double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+      double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+      {
+        {
+          {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              double *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_34_10[_stride_j_0] = D * (-1.0 * _data_rho_20_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_311_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_312_10[_stride_j_0] = D * (-1.0 * _data_rho_21_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              double *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_34_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+            }
+          }
+          {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              double *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914;
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              double *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+            }
+          }
+        }
+        for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+          double *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+          double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+          double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+          double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+          double *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
+          double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+          double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+          double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+          double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+          double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+          double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+          double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+          double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+          {
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+            _data_j_20_30_10[_stride_j_0] = D * (-1.0 * _data_rho_20_10[0] + _data_rho_20_10[_stride_rho_0]) * 0.16292407789368385;
+            double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_20;
+            _data_j_20_33_10[_stride_j_0] = D * (-1.0 * _data_rho_20_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+            double *RESTRICT _data_rho_20_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_20;
+            _data_j_20_34_10[_stride_j_0] = D * (-1.0 * _data_rho_20_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+            _data_j_20_35_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_10[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+            double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+            double *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+            _data_j_20_36_10[_stride_j_0] = D * (-1.0 * _data_rho_21_10[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+            _data_j_20_39_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+            double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+            _data_j_20_310_10[_stride_j_0] = D * (-1.0 * _data_rho_21_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+            double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+            _data_j_20_311_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+            double *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+            _data_j_20_312_10[_stride_j_0] = D * (-1.0 * _data_rho_21_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+            {
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+                double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+                double *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+                double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_20;
+                double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                _data_j_20_31_10[_stride_j_0] = D * (-1.0 * _data_rho_20_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.16292407789368385;
+              }
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+                double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+                double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+                double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+                double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+                double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                _data_j_20_32_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_10[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.16292407789368385;
+              }
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1) {
+                double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+                double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+                double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+                double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+                double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                _data_j_20_37_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+              }
+              if (ctr_1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+                double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+                double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+                double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+                double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+                double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                _data_j_20_38_10[_stride_j_0] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+              }
+            }
+            for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+              _data_j_20_30_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_20_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.16292407789368385;
+              _data_j_20_31_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_20_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.16292407789368385;
+              _data_j_20_32_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_10[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.16292407789368385;
+              _data_j_20_33_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+              _data_j_20_34_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+              _data_j_20_35_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+              _data_j_20_36_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+            }
+            _data_j_20_30_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.16292407789368385;
+            _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914;
+            _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914;
+            _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914;
+            _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914;
+            _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+            _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+            _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+            _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+            {
+            }
+          }
+        }
+        {
+          {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+              double *RESTRICT _data_j_20_31_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_31;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_31_10[_stride_j_0] = D * (-1.0 * _data_rho_20_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.16292407789368385;
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              double *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_33_10[_stride_j_0] = D * (-1.0 * _data_rho_20_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && 1 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+              double *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_37_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+            }
+            if (_size_j_1 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+              double *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_38_10[_stride_j_0] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_39_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_310_10[_stride_j_0] = D * (-1.0 * _data_rho_21_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+              double *RESTRICT _data_j_20_31_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_31;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_31_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_20_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.16292407789368385;
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              double *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_33_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+              double *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+            }
+            if (_size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+              double *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+            }
+          }
+          {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              double *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914;
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              double *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              double *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              double *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+            }
+          }
+        }
+      }
+    }
+    {
+      {
+        if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+          double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+          double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+          double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+          double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_rho_20_10 = _data_rho_20;
+          _data_j_20_311_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+            double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+            double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _data_rho_20;
+            _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+          }
+        }
+        if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+          double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+          double *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+          double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+          double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_rho_20_10 = _data_rho_20;
+          _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+        }
+      }
+      for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+        {
+          {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 2 * _stride_j_3;
+              double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_32_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_10[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.16292407789368385;
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_35_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_10[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+              double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_37_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_39_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_311_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 2 * _stride_j_3;
+              double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_32_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_10[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.16292407789368385;
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_35_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+              double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+              double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+            }
+          }
+          {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914;
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              double *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              double *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+            }
+          }
+        }
+      }
+      {
+        {
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1) {
+            double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+            double *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_37_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914;
+          }
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+            double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+            double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_39_10[_stride_j_0] = D * (-1.0 * _data_rho_2m1_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992;
+          }
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+            double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+            double *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914;
+          }
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+            double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+            double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+            double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992;
+          }
+        }
+        if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+          double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+          double *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+          double *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          double *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+          double *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          double *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+          _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0 * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992;
+        }
+      }
+    }
+  }
+}
+} // namespace internal_e5e04d1215f19faa51f3c55db6d456a2
+
+void DiffusiveFluxKernel_double_precision::run(IBlock *block) {
+  auto rho = block->getData<field::GhostLayerField<double, 1>>(rhoID);
+  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
+
+  auto &D = this->D_;
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(j->nrOfGhostLayers()));
+  double *RESTRICT const _data_j = j->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(rho->nrOfGhostLayers()));
+  double *RESTRICT const _data_rho = rho->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(j->xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(j->xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(j->ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(j->ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(j->zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(j->zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  internal_e5e04d1215f19faa51f3c55db6d456a2::diffusivefluxkernel_double_precision_diffusivefluxkernel_double_precision(D, _data_j, _data_rho, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_rho_0, _stride_rho_1, _stride_rho_2);
+}
+
+void DiffusiveFluxKernel_double_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto rho = block->getData<field::GhostLayerField<double, 1>>(rhoID);
+  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
+
+  auto &D = this->D_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(j->nrOfGhostLayers()));
+  double *RESTRICT const _data_j = j->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  double *RESTRICT const _data_rho = rho->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  internal_e5e04d1215f19faa51f3c55db6d456a2::diffusivefluxkernel_double_precision_diffusivefluxkernel_double_precision(D, _data_j, _data_rho, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_rho_0, _stride_rho_1, _stride_rho_2);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_double_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_double_precision.h
new file mode 100644
index 00000000000..eb27e3acb74
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_double_precision.h
@@ -0,0 +1,104 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file DiffusiveFluxKernel_double_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class DiffusiveFluxKernel_double_precision {
+public:
+  DiffusiveFluxKernel_double_precision(BlockDataID jID_, BlockDataID rhoID_,
+                                       double D)
+      : jID(jID_), rhoID(rhoID_), D_(D){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<DiffusiveFluxKernel_double_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<DiffusiveFluxKernel_double_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID jID;
+  BlockDataID rhoID;
+  double D_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_single_precision.cpp
new file mode 100644
index 00000000000..4a699edee0b
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_single_precision.cpp
@@ -0,0 +1,873 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file DiffusiveFluxKernel_single_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "DiffusiveFluxKernel_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_2fab63cfdbacde4ac630f257442231a8 {
+static FUNC_PREFIX void diffusivefluxkernel_single_precision_diffusivefluxkernel_single_precision(float D, float *RESTRICT const _data_j, float *RESTRICT const _data_rho, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3, int64_t const _stride_rho_0, int64_t const _stride_rho_1, int64_t const _stride_rho_2) {
+  {
+    {
+      {
+        if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+          float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+          float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+          float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+          float *RESTRICT _data_rho_20 = _data_rho;
+          float *RESTRICT _data_rho_20_10 = _data_rho_20;
+          _data_j_20_312_10[_stride_j_0] = D * (-1.0f * _data_rho_21_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+            float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+            float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _data_rho_20;
+            _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+          }
+        }
+        if (0 < _size_j_1 - 1 && 0 < _size_j_2 - 1) {
+          float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+          float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+          float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+          float *RESTRICT _data_rho_20 = _data_rho;
+          float *RESTRICT _data_rho_20_10 = _data_rho_20;
+          _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+        }
+      }
+      for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+        {
+          {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_36_10[_stride_j_0] = D * (-1.0f * _data_rho_21_10[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && 1 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+              float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_38_10[_stride_j_0] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_310_10[_stride_j_0] = D * (-1.0f * _data_rho_21_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_312_10[_stride_j_0] = D * (-1.0f * _data_rho_21_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_36_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_0 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+              float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+            }
+          }
+          {
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_36 = _data_j + 6 * _stride_j_3;
+              float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914f;
+            }
+            if (ctr_1 > 0 && 0 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+            }
+            if (0 < _size_j_2 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+            }
+          }
+        }
+      }
+      {
+        {
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1 && 1 < _size_j_0 - 1) {
+            float *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+            float *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_38_10[_stride_j_0] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+          }
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+            float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+            float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_310_10[_stride_j_0] = D * (-1.0f * _data_rho_21_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+          }
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1 && ctr_0 < _size_j_0 - 1) {
+            float *RESTRICT _data_j_20_38 = _data_j + 8 * _stride_j_3;
+            float *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+          }
+          if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+            float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+            float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+            float *RESTRICT _data_rho_20 = _data_rho;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+          }
+        }
+        if (_size_j_1 - 1 > 0 && 0 < _size_j_2 - 1) {
+          float *RESTRICT _data_j_20_310 = _data_j + 10 * _stride_j_3;
+          float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+          float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2;
+          float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+          float *RESTRICT _data_rho_20 = _data_rho;
+          float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+          _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+        }
+      }
+    }
+    for (int64_t ctr_2 = 1; ctr_2 < _size_j_2 - 1; ctr_2 += 1) {
+      float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+      float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+      float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+      float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+      float *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
+      float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+      float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+      float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+      float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+      float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+      float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+      float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+      float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+      {
+        {
+          {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              float *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_34_10[_stride_j_0] = D * (-1.0f * _data_rho_20_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_311_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_312_10[_stride_j_0] = D * (-1.0f * _data_rho_21_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              float *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_34_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+            }
+          }
+          {
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+              float *RESTRICT _data_j_20_34_10 = _data_j_20_34;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_11 = _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914f;
+            }
+            if (ctr_2 > 0 && 0 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+            }
+            if (0 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+              float *RESTRICT _data_j_20_312_10 = _data_j_20_312;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_11 = _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _data_rho_20;
+              _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+            }
+          }
+        }
+        for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+          float *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+          float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+          float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+          float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+          float *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
+          float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+          float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+          float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+          float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+          float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+          float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+          float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+          float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+          {
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+            _data_j_20_30_10[_stride_j_0] = D * (-1.0f * _data_rho_20_10[0] + _data_rho_20_10[_stride_rho_0]) * 0.16292407789368385f;
+            float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_20;
+            _data_j_20_33_10[_stride_j_0] = D * (-1.0f * _data_rho_20_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+            float *RESTRICT _data_rho_20_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_20;
+            _data_j_20_34_10[_stride_j_0] = D * (-1.0f * _data_rho_20_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+            _data_j_20_35_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_10[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+            float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+            float *RESTRICT _data_rho_21_10 = _stride_rho_1 * ctr_1 + _data_rho_21;
+            _data_j_20_36_10[_stride_j_0] = D * (-1.0f * _data_rho_21_10[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+            _data_j_20_39_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+            float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+            _data_j_20_310_10[_stride_j_0] = D * (-1.0f * _data_rho_21_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+            float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+            _data_j_20_311_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+            float *RESTRICT _data_rho_21_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_21;
+            _data_j_20_312_10[_stride_j_0] = D * (-1.0f * _data_rho_21_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+            {
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+                float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+                float *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+                float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_20;
+                float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                _data_j_20_31_10[_stride_j_0] = D * (-1.0f * _data_rho_20_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.16292407789368385f;
+              }
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+                float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+                float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+                float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+                float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+                float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                _data_j_20_32_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_10[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.16292407789368385f;
+              }
+              if (ctr_1 > 0 && ctr_2 > 0 && 1 < _size_j_0 - 1) {
+                float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+                float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+                float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+                float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+                float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                _data_j_20_37_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+              }
+              if (ctr_1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+                float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+                float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+                float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+                float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_21;
+                float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+                float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+                _data_j_20_38_10[_stride_j_0] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+              }
+            }
+            for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+              _data_j_20_30_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_20_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.16292407789368385f;
+              _data_j_20_31_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_20_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.16292407789368385f;
+              _data_j_20_32_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_10[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.16292407789368385f;
+              _data_j_20_33_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+              _data_j_20_34_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_20_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+              _data_j_20_35_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+              _data_j_20_36_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_21_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+              _data_j_20_312_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_21_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+            }
+            _data_j_20_30_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.16292407789368385f;
+            _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914f;
+            _data_j_20_34_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_20_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914f;
+            _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914f;
+            _data_j_20_36_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_21_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914f;
+            _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+            _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+            _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+            _data_j_20_312_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_21_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+            {
+            }
+          }
+        }
+        {
+          {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+              float *RESTRICT _data_j_20_31_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_31;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_31_10[_stride_j_0] = D * (-1.0f * _data_rho_20_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.16292407789368385f;
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              float *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_33_10[_stride_j_0] = D * (-1.0f * _data_rho_20_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && 1 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+              float *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_37_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+            }
+            if (_size_j_1 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+              float *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_38_10[_stride_j_0] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_39_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_310_10[_stride_j_0] = D * (-1.0f * _data_rho_21_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+              float *RESTRICT _data_j_20_31_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_31;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_31_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_20_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.16292407789368385f;
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              float *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_33_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_20_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+              float *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+            }
+            if (_size_j_1 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+              float *RESTRICT _data_j_20_38_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_38;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_38_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_310_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+            }
+          }
+          {
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+              float *RESTRICT _data_j_20_33_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_33;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_20;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_33_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_20_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914f;
+            }
+            if (ctr_2 > 0 && _size_j_1 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * ctr_2 - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+            }
+            if (_size_j_1 - 1 > 0 && ctr_2 < _size_j_2 - 1) {
+              float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+              float *RESTRICT _data_j_20_310_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_310;
+              float *RESTRICT _data_rho_21 = _data_rho + _stride_rho_2 * ctr_2 + _stride_rho_2;
+              float *RESTRICT _data_rho_21_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_21;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * ctr_2;
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+              _data_j_20_310_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_21_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+            }
+          }
+        }
+      }
+    }
+    {
+      {
+        if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+          float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+          float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+          float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+          float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_rho_20_10 = _data_rho_20;
+          _data_j_20_311_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+            float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+            float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _data_rho_20;
+            _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+          }
+        }
+        if (_size_j_2 - 1 > 0 && 0 < _size_j_1 - 1) {
+          float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+          float *RESTRICT _data_j_20_311_10 = _data_j_20_311;
+          float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 + _data_rho_2m1;
+          float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_rho_20_10 = _data_rho_20;
+          _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+        }
+      }
+      for (int64_t ctr_1 = 1; ctr_1 < _size_j_1 - 1; ctr_1 += 1) {
+        {
+          {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 2 * _stride_j_3;
+              float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_32_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_10[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.16292407789368385f;
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_35_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_10[0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+              float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_37_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_39_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_311_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_11[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+            }
+          }
+          for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 2 * _stride_j_3;
+              float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_32_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_10[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.16292407789368385f;
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_35_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_10[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+              float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+              float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_311_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_11[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+            }
+          }
+          {
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 5 * _stride_j_3;
+              float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_10 = _stride_rho_1 * ctr_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_35_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_2m1_10[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.11520472029718914f;
+            }
+            if (ctr_1 > 0 && _size_j_2 - 1 > 0) {
+              float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+              float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * ctr_1 - _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+            }
+            if (_size_j_2 - 1 > 0 && ctr_1 < _size_j_1 - 1) {
+              float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 11 * _stride_j_3;
+              float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+              float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+              float *RESTRICT _data_rho_2m1_11 = _stride_rho_1 * ctr_1 + _stride_rho_1 + _data_rho_2m1;
+              float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+              float *RESTRICT _data_rho_20_10 = _stride_rho_1 * ctr_1 + _data_rho_20;
+              _data_j_20_311_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_2m1_11[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+            }
+          }
+        }
+      }
+      {
+        {
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0 && 1 < _size_j_0 - 1) {
+            float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+            float *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_37_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0] + _data_rho_20_10[_stride_rho_0]) * 0.11520472029718914f;
+          }
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+            float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+            float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_39_10[_stride_j_0] = D * (-1.0f * _data_rho_2m1_1m1[0] + _data_rho_20_10[_stride_rho_0]) * 0.09406426022938992f;
+          }
+        }
+        for (int64_t ctr_0 = 2; ctr_0 < _size_j_0 - 1; ctr_0 += 1) {
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0 && ctr_0 < _size_j_0 - 1) {
+            float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 7 * _stride_j_3;
+            float *RESTRICT _data_j_20_37_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_37;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_37_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.11520472029718914f;
+          }
+          if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+            float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+            float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+            float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+            float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+            float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+            float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+            _data_j_20_39_10[_stride_j_0 * ctr_0] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0 * ctr_0 - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * ctr_0]) * 0.09406426022938992f;
+          }
+        }
+        if (_size_j_1 - 1 > 0 && _size_j_2 - 1 > 0) {
+          float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * (_size_j_2 - 1) + 9 * _stride_j_3;
+          float *RESTRICT _data_j_20_39_10 = _stride_j_1 * (_size_j_1 - 1) + _data_j_20_39;
+          float *RESTRICT _data_rho_2m1 = _data_rho + _stride_rho_2 * (_size_j_2 - 1) - _stride_rho_2;
+          float *RESTRICT _data_rho_2m1_1m1 = _stride_rho_1 * (_size_j_1 - 1) - _stride_rho_1 + _data_rho_2m1;
+          float *RESTRICT _data_rho_20 = _data_rho + _stride_rho_2 * (_size_j_2 - 1);
+          float *RESTRICT _data_rho_20_10 = _stride_rho_1 * (_size_j_1 - 1) + _data_rho_20;
+          _data_j_20_39_10[_stride_j_0 * (_size_j_0 - 1)] = D * (-1.0f * _data_rho_2m1_1m1[_stride_rho_0 * (_size_j_0 - 1) - _stride_rho_0] + _data_rho_20_10[_stride_rho_0 * (_size_j_0 - 1)]) * 0.09406426022938992f;
+        }
+      }
+    }
+  }
+}
+} // namespace internal_2fab63cfdbacde4ac630f257442231a8
+
+void DiffusiveFluxKernel_single_precision::run(IBlock *block) {
+  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
+  auto rho = block->getData<field::GhostLayerField<float, 1>>(rhoID);
+
+  auto &D = this->D_;
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(j->nrOfGhostLayers()));
+  float *RESTRICT const _data_j = j->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(rho->nrOfGhostLayers()));
+  float *RESTRICT const _data_rho = rho->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(j->xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(j->xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(j->ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(j->ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(j->zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(j->zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  internal_2fab63cfdbacde4ac630f257442231a8::diffusivefluxkernel_single_precision_diffusivefluxkernel_single_precision(D, _data_j, _data_rho, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_rho_0, _stride_rho_1, _stride_rho_2);
+}
+
+void DiffusiveFluxKernel_single_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
+  auto rho = block->getData<field::GhostLayerField<float, 1>>(rhoID);
+
+  auto &D = this->D_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(j->nrOfGhostLayers()));
+  float *RESTRICT const _data_j = j->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(rho->nrOfGhostLayers()));
+  float *RESTRICT const _data_rho = rho->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+  const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+  const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+  const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  const int64_t _stride_rho_0 = int64_t(rho->xStride());
+  const int64_t _stride_rho_1 = int64_t(rho->yStride());
+  const int64_t _stride_rho_2 = int64_t(rho->zStride());
+  internal_2fab63cfdbacde4ac630f257442231a8::diffusivefluxkernel_single_precision_diffusivefluxkernel_single_precision(D, _data_j, _data_rho, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_rho_0, _stride_rho_1, _stride_rho_2);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_single_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_single_precision.h
new file mode 100644
index 00000000000..8b2542beb06
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/DiffusiveFluxKernel_single_precision.h
@@ -0,0 +1,104 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file DiffusiveFluxKernel_single_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class DiffusiveFluxKernel_single_precision {
+public:
+  DiffusiveFluxKernel_single_precision(BlockDataID jID_, BlockDataID rhoID_,
+                                       float D)
+      : jID(jID_), rhoID(rhoID_), D_(D){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<DiffusiveFluxKernel_single_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<DiffusiveFluxKernel_single_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID jID;
+  BlockDataID rhoID;
+  float D_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_double_precision.cpp
new file mode 100644
index 00000000000..6fc3791e4e1
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_double_precision.cpp
@@ -0,0 +1,110 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file Dirichlet_double_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "Dirichlet_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_74da74b67a122b7887d3d21c7ea5f414 {
+static FUNC_PREFIX void dirichlet_double_precision_boundary_Dirichlet_double_precision(double *RESTRICT _data_field, uint8_t *RESTRICT const _data_indexVector, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int32_t indexVectorSize) {
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[24 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[24 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[24 * ctr_0 + 8]));
+
+    const int32_t cx[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1, 1, -1, 1, -1, 1, -1, 1, -1};
+    const int32_t cy[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1};
+    const int32_t cz[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1};
+    const int32_t invdir[] = {0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13, 26, 25, 24, 23, 22, 21, 20, 19};
+
+    const int32_t dir = *((int32_t *)(&_data_indexVector[24 * ctr_0 + 12]));
+    _data_field[_stride_field_0 * x + _stride_field_1 * y + _stride_field_2 * z] = *((double *)(&_data_indexVector[24 * ctr_0 + 16]));
+  }
+}
+} // namespace internal_74da74b67a122b7887d3d21c7ea5f414
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void Dirichlet_double_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto field = block->getData<field::GhostLayerField<double, 1>>(fieldID);
+
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(field->nrOfGhostLayers()));
+  double *RESTRICT _data_field = field->dataAt(0, 0, 0, 0);
+  const int64_t _stride_field_0 = int64_t(field->xStride());
+  const int64_t _stride_field_1 = int64_t(field->yStride());
+  const int64_t _stride_field_2 = int64_t(field->zStride());
+  internal_74da74b67a122b7887d3d21c7ea5f414::dirichlet_double_precision_boundary_Dirichlet_double_precision(_data_field, _data_indexVector, _stride_field_0, _stride_field_1, _stride_field_2, indexVectorSize);
+}
+
+void Dirichlet_double_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void Dirichlet_double_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void Dirichlet_double_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_double_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_double_precision.h
new file mode 100644
index 00000000000..7506958518e
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_double_precision.h
@@ -0,0 +1,190 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file Dirichlet_double_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "blockforest/StructuredBlockForest.h"
+#include "core/debug/Debug.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "field/FlagField.h"
+#include "field/GhostLayerField.h"
+
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class Dirichlet_double_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    int32_t dir;
+    double value;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)
+        : x(x_), y(y_), z(z_), dir(dir_), value() {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z && dir == o.dir &&
+             floatIsEqual(value, o.value);
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  Dirichlet_double_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID fieldID_,
+      std::function<double(const Cell &,
+                           const shared_ptr<StructuredBlockForest> &, IBlock &)>
+          &dirichletCallback)
+      : elementInitaliser(dirichletCallback), fieldID(fieldID_) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_Dirichlet_double_precision");
+  };
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(blocks, &*blockIt, flagFieldID,
+                                     boundaryFlagUID, domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
+         ++it) {
+
+      if (!isFlagSet(it, boundaryFlag))
+        continue;
+      if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
+                                   it.y() + cell_idx_c(0),
+                                   it.z() + cell_idx_c(0)) &&
+          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
+
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+        double InitialisatonAdditionalData =
+            elementInitaliser(Cell(it.x(), it.y(), it.z()), blocks, *block);
+        element.value = InitialisatonAdditionalData;
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+  std::function<double(const Cell &, const shared_ptr<StructuredBlockForest> &,
+                       IBlock &)>
+      elementInitaliser;
+
+public:
+  BlockDataID fieldID;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_single_precision.cpp
new file mode 100644
index 00000000000..fdde1576cc6
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_single_precision.cpp
@@ -0,0 +1,110 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file Dirichlet_single_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "Dirichlet_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_43f4eae176e72ad2d9db0f0468064c30 {
+static FUNC_PREFIX void dirichlet_single_precision_boundary_Dirichlet_single_precision(float *RESTRICT _data_field, uint8_t *RESTRICT const _data_indexVector, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int32_t indexVectorSize) {
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[20 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[20 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[20 * ctr_0 + 8]));
+
+    const int32_t cx[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1, 1, -1, 1, -1, 1, -1, 1, -1};
+    const int32_t cy[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1};
+    const int32_t cz[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1};
+    const int32_t invdir[] = {0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13, 26, 25, 24, 23, 22, 21, 20, 19};
+
+    const int32_t dir = *((int32_t *)(&_data_indexVector[20 * ctr_0 + 12]));
+    _data_field[_stride_field_0 * x + _stride_field_1 * y + _stride_field_2 * z] = *((float *)(&_data_indexVector[20 * ctr_0 + 16]));
+  }
+}
+} // namespace internal_43f4eae176e72ad2d9db0f0468064c30
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void Dirichlet_single_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto field = block->getData<field::GhostLayerField<float, 1>>(fieldID);
+
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(field->nrOfGhostLayers()));
+  float *RESTRICT _data_field = field->dataAt(0, 0, 0, 0);
+  const int64_t _stride_field_0 = int64_t(field->xStride());
+  const int64_t _stride_field_1 = int64_t(field->yStride());
+  const int64_t _stride_field_2 = int64_t(field->zStride());
+  internal_43f4eae176e72ad2d9db0f0468064c30::dirichlet_single_precision_boundary_Dirichlet_single_precision(_data_field, _data_indexVector, _stride_field_0, _stride_field_1, _stride_field_2, indexVectorSize);
+}
+
+void Dirichlet_single_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void Dirichlet_single_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void Dirichlet_single_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_single_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_single_precision.h
new file mode 100644
index 00000000000..59a01a06db9
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/Dirichlet_single_precision.h
@@ -0,0 +1,190 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file Dirichlet_single_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "blockforest/StructuredBlockForest.h"
+#include "core/debug/Debug.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "field/FlagField.h"
+#include "field/GhostLayerField.h"
+
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class Dirichlet_single_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    int32_t dir;
+    float value;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)
+        : x(x_), y(y_), z(z_), dir(dir_), value() {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z && dir == o.dir &&
+             floatIsEqual(value, o.value);
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  Dirichlet_single_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID fieldID_,
+      std::function<float(const Cell &,
+                          const shared_ptr<StructuredBlockForest> &, IBlock &)>
+          &dirichletCallback)
+      : elementInitaliser(dirichletCallback), fieldID(fieldID_) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_Dirichlet_single_precision");
+  };
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(blocks, &*blockIt, flagFieldID,
+                                     boundaryFlagUID, domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
+         ++it) {
+
+      if (!isFlagSet(it, boundaryFlag))
+        continue;
+      if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
+                                   it.y() + cell_idx_c(0),
+                                   it.z() + cell_idx_c(0)) &&
+          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
+
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+        float InitialisatonAdditionalData =
+            elementInitaliser(Cell(it.x(), it.y(), it.z()), blocks, *block);
+        element.value = InitialisatonAdditionalData;
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+  std::function<float(const Cell &, const shared_ptr<StructuredBlockForest> &,
+                      IBlock &)>
+      elementInitaliser;
+
+public:
+  BlockDataID fieldID;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_double_precision.cpp
new file mode 100644
index 00000000000..7110c5922b5
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_double_precision.cpp
@@ -0,0 +1,213 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FixedFlux_double_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "FixedFlux_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_bdec58bfeb737088cd218660bd069d85 {
+static FUNC_PREFIX void fixedflux_double_precision_boundary_FixedFlux_double_precision(double *RESTRICT const _data_flux, uint8_t *RESTRICT const _data_indexVector, int64_t const _stride_flux_0, int64_t const _stride_flux_1, int64_t const _stride_flux_2, int64_t const _stride_flux_3, int32_t indexVectorSize) {
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[40 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[40 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[40 * ctr_0 + 8]));
+
+    const int32_t cx[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1, 1, -1, 1, -1, 1, -1, 1, -1};
+    const int32_t cy[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1};
+    const int32_t cz[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1};
+    const int32_t invdir[] = {0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13, 26, 25, 24, 23, 22, 21, 20, 19};
+
+    const int32_t dir = *((int32_t *)(&_data_indexVector[40 * ctr_0 + 12]));
+    if (((dir) == (26))) {
+      _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 9 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) - 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24])) - 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+    } else {
+      if (((dir) == (25))) {
+        _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y - _stride_flux_1 + _stride_flux_2 * z - _stride_flux_2 + 12 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) + 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24])) + 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+      } else {
+        if (((dir) == (24))) {
+          _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 11 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) - 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32])) + 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24]));
+        } else {
+          if (((dir) == (23))) {
+            _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z - _stride_flux_2 + 10 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) - 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24])) + 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+          } else {
+            if (((dir) == (22))) {
+              _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 10 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) - 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24])) + 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+            } else {
+              if (((dir) == (21))) {
+                _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y - _stride_flux_1 + _stride_flux_2 * z + _stride_flux_2 + 11 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) - 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32])) + 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24]));
+              } else {
+                if (((dir) == (20))) {
+                  _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 12 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) + 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24])) + 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+                } else {
+                  if (((dir) == (19))) {
+                    _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z + _stride_flux_2 + 9 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) - 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24])) - 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+                  } else {
+                    if (((dir) == (18))) {
+                      _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y + _stride_flux_2 * z - _stride_flux_2 + 6 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) + 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+                    } else {
+                      if (((dir) == (17))) {
+                        _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 5 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) - 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+                      } else {
+                        if (((dir) == (16))) {
+                          _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 7 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24])) - 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+                        } else {
+                          if (((dir) == (15))) {
+                            _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z - _stride_flux_2 + 8 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24])) + 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+                          } else {
+                            if (((dir) == (14))) {
+                              _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y + _stride_flux_2 * z + _stride_flux_2 + 5 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) - 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+                            } else {
+                              if (((dir) == (13))) {
+                                _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 6 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) + 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+                              } else {
+                                if (((dir) == (12))) {
+                                  _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 8 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24])) + 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+                                } else {
+                                  if (((dir) == (11))) {
+                                    _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z + _stride_flux_2 + 7 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24])) - 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+                                  } else {
+                                    if (((dir) == (10))) {
+                                      _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y - _stride_flux_1 + _stride_flux_2 * z + 4 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) + 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24]));
+                                    } else {
+                                      if (((dir) == (9))) {
+                                        _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 3 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) - 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24]));
+                                      } else {
+                                        if (((dir) == (8))) {
+                                          _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z + 3 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) - 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24]));
+                                        } else {
+                                          if (((dir) == (7))) {
+                                            _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 4 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) + 0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24]));
+                                          } else {
+                                            if (((dir) == (6))) {
+                                              _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 2 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+                                            } else {
+                                              if (((dir) == (5))) {
+                                                _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + _stride_flux_2 + 2 * _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 32]));
+                                              } else {
+                                                if (((dir) == (4))) {
+                                                  _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y + _stride_flux_2 * z] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16]));
+                                                } else {
+                                                  if (((dir) == (3))) {
+                                                    _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 16]));
+                                                  } else {
+                                                    if (((dir) == (2))) {
+                                                      _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24]));
+                                                    } else {
+                                                      if (((dir) == (1))) {
+                                                        _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z + _stride_flux_3] = -0.1111111111111111 * *((double *)(&_data_indexVector[40 * ctr_0 + 24]));
+                                                      }
+                                                    }
+                                                  }
+                                                }
+                                              }
+                                            }
+                                          }
+                                        }
+                                      }
+                                    }
+                                  }
+                                }
+                              }
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+} // namespace internal_bdec58bfeb737088cd218660bd069d85
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void FixedFlux_double_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto flux = block->getData<field::GhostLayerField<double, 13>>(fluxID);
+
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(flux->nrOfGhostLayers()));
+  double *RESTRICT const _data_flux = flux->dataAt(0, 0, 0, 0);
+  const int64_t _stride_flux_0 = int64_t(flux->xStride());
+  const int64_t _stride_flux_1 = int64_t(flux->yStride());
+  const int64_t _stride_flux_2 = int64_t(flux->zStride());
+  const int64_t _stride_flux_3 = int64_t(1 * int64_t(flux->fStride()));
+  internal_bdec58bfeb737088cd218660bd069d85::fixedflux_double_precision_boundary_FixedFlux_double_precision(_data_flux, _data_indexVector, _stride_flux_0, _stride_flux_1, _stride_flux_2, _stride_flux_3, indexVectorSize);
+}
+
+void FixedFlux_double_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void FixedFlux_double_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void FixedFlux_double_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_double_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_double_precision.h
new file mode 100644
index 00000000000..66d690c8e7a
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_double_precision.h
@@ -0,0 +1,737 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FixedFlux_double_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "blockforest/StructuredBlockForest.h"
+#include "core/debug/Debug.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "field/FlagField.h"
+#include "field/GhostLayerField.h"
+
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class FixedFlux_double_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    int32_t dir;
+    double flux_0;
+    double flux_1;
+    double flux_2;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)
+        : x(x_), y(y_), z(z_), dir(dir_), flux_0(), flux_1(), flux_2() {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z && dir == o.dir &&
+             floatIsEqual(flux_0, o.flux_0) && floatIsEqual(flux_1, o.flux_1) &&
+             floatIsEqual(flux_2, o.flux_2);
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  FixedFlux_double_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID fluxID_,
+      std::function<Vector3<double>(const Cell &,
+                                    const shared_ptr<StructuredBlockForest> &,
+                                    IBlock &)> &fluxCallback)
+      : elementInitaliser(fluxCallback), fluxID(fluxID_) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_FixedFlux_double_precision");
+  };
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(blocks, &*blockIt, flagFieldID,
+                                     boundaryFlagUID, domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 0, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 0, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 1);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 1, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, -1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 2);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + -1, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 0, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 3);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + 0, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 0, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 4);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 0, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 0, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 5);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 0, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 0, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 6);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 0, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 7);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + 1, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 8);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 1, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, -1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 9);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + -1, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, -1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 10);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + -1, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 11);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 1, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, -1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 12);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + -1, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 0, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 13);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + 0, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 0, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 14);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 0, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 15);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 1, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, -1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 16);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + -1, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 0, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 17);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + 0, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 0, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 18);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 0, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 19);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 1, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 20);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + 1, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, -1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 21);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + -1, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, -1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 22);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + -1, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 23);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 1, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 24);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + 1, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, -1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 25);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + -1, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, -1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 26);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + -1, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+  std::function<Vector3<double>(
+      const Cell &, const shared_ptr<StructuredBlockForest> &, IBlock &)>
+      elementInitaliser;
+
+public:
+  BlockDataID fluxID;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_single_precision.cpp
new file mode 100644
index 00000000000..3f3af21ebc3
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_single_precision.cpp
@@ -0,0 +1,213 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FixedFlux_single_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "FixedFlux_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_11b127eed0b5044a0a655e8444f26034 {
+static FUNC_PREFIX void fixedflux_single_precision_boundary_FixedFlux_single_precision(float *RESTRICT const _data_flux, uint8_t *RESTRICT const _data_indexVector, int64_t const _stride_flux_0, int64_t const _stride_flux_1, int64_t const _stride_flux_2, int64_t const _stride_flux_3, int32_t indexVectorSize) {
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[28 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[28 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[28 * ctr_0 + 8]));
+
+    const int32_t cx[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1, 1, -1, 1, -1, 1, -1, 1, -1};
+    const int32_t cy[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1};
+    const int32_t cz[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1};
+    const int32_t invdir[] = {0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13, 26, 25, 24, 23, 22, 21, 20, 19};
+
+    const int32_t dir = *((int32_t *)(&_data_indexVector[28 * ctr_0 + 12]));
+    if (((dir) == (26))) {
+      _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 9 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) - 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20])) - 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+    } else {
+      if (((dir) == (25))) {
+        _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y - _stride_flux_1 + _stride_flux_2 * z - _stride_flux_2 + 12 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) + 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20])) + 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+      } else {
+        if (((dir) == (24))) {
+          _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 11 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) - 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24])) + 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20]));
+        } else {
+          if (((dir) == (23))) {
+            _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z - _stride_flux_2 + 10 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) - 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20])) + 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+          } else {
+            if (((dir) == (22))) {
+              _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 10 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) - 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20])) + 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+            } else {
+              if (((dir) == (21))) {
+                _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y - _stride_flux_1 + _stride_flux_2 * z + _stride_flux_2 + 11 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) - 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24])) + 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20]));
+              } else {
+                if (((dir) == (20))) {
+                  _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 12 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) + 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20])) + 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+                } else {
+                  if (((dir) == (19))) {
+                    _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z + _stride_flux_2 + 9 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) - 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20])) - 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+                  } else {
+                    if (((dir) == (18))) {
+                      _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y + _stride_flux_2 * z - _stride_flux_2 + 6 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) + 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+                    } else {
+                      if (((dir) == (17))) {
+                        _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 5 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) - 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+                      } else {
+                        if (((dir) == (16))) {
+                          _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 7 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20])) - 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+                        } else {
+                          if (((dir) == (15))) {
+                            _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z - _stride_flux_2 + 8 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20])) + 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+                          } else {
+                            if (((dir) == (14))) {
+                              _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y + _stride_flux_2 * z + _stride_flux_2 + 5 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) - 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+                            } else {
+                              if (((dir) == (13))) {
+                                _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 6 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) + 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+                              } else {
+                                if (((dir) == (12))) {
+                                  _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 8 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20])) + 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+                                } else {
+                                  if (((dir) == (11))) {
+                                    _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z + _stride_flux_2 + 7 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20])) - 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+                                  } else {
+                                    if (((dir) == (10))) {
+                                      _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y - _stride_flux_1 + _stride_flux_2 * z + 4 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) + 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20]));
+                                    } else {
+                                      if (((dir) == (9))) {
+                                        _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 3 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) - 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20]));
+                                      } else {
+                                        if (((dir) == (8))) {
+                                          _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z + 3 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) - 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20]));
+                                        } else {
+                                          if (((dir) == (7))) {
+                                            _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 4 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) + 0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20]));
+                                          } else {
+                                            if (((dir) == (6))) {
+                                              _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + 2 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+                                            } else {
+                                              if (((dir) == (5))) {
+                                                _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + _stride_flux_2 + 2 * _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 24]));
+                                              } else {
+                                                if (((dir) == (4))) {
+                                                  _data_flux[_stride_flux_0 * x + _stride_flux_0 + _stride_flux_1 * y + _stride_flux_2 * z] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16]));
+                                                } else {
+                                                  if (((dir) == (3))) {
+                                                    _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 16]));
+                                                  } else {
+                                                    if (((dir) == (2))) {
+                                                      _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_2 * z + _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20]));
+                                                    } else {
+                                                      if (((dir) == (1))) {
+                                                        _data_flux[_stride_flux_0 * x + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z + _stride_flux_3] = -0.1111111111111111f * *((float *)(&_data_indexVector[28 * ctr_0 + 20]));
+                                                      }
+                                                    }
+                                                  }
+                                                }
+                                              }
+                                            }
+                                          }
+                                        }
+                                      }
+                                    }
+                                  }
+                                }
+                              }
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+} // namespace internal_11b127eed0b5044a0a655e8444f26034
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void FixedFlux_single_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto flux = block->getData<field::GhostLayerField<float, 13>>(fluxID);
+
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(flux->nrOfGhostLayers()));
+  float *RESTRICT const _data_flux = flux->dataAt(0, 0, 0, 0);
+  const int64_t _stride_flux_0 = int64_t(flux->xStride());
+  const int64_t _stride_flux_1 = int64_t(flux->yStride());
+  const int64_t _stride_flux_2 = int64_t(flux->zStride());
+  const int64_t _stride_flux_3 = int64_t(1 * int64_t(flux->fStride()));
+  internal_11b127eed0b5044a0a655e8444f26034::fixedflux_single_precision_boundary_FixedFlux_single_precision(_data_flux, _data_indexVector, _stride_flux_0, _stride_flux_1, _stride_flux_2, _stride_flux_3, indexVectorSize);
+}
+
+void FixedFlux_single_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void FixedFlux_single_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void FixedFlux_single_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_single_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_single_precision.h
new file mode 100644
index 00000000000..fa4f89e5e2a
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/FixedFlux_single_precision.h
@@ -0,0 +1,737 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FixedFlux_single_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "blockforest/StructuredBlockForest.h"
+#include "core/debug/Debug.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "field/FlagField.h"
+#include "field/GhostLayerField.h"
+
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class FixedFlux_single_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    int32_t dir;
+    float flux_0;
+    float flux_1;
+    float flux_2;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)
+        : x(x_), y(y_), z(z_), dir(dir_), flux_0(), flux_1(), flux_2() {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z && dir == o.dir &&
+             floatIsEqual(flux_0, o.flux_0) && floatIsEqual(flux_1, o.flux_1) &&
+             floatIsEqual(flux_2, o.flux_2);
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  FixedFlux_single_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID fluxID_,
+      std::function<Vector3<float>(const Cell &,
+                                   const shared_ptr<StructuredBlockForest> &,
+                                   IBlock &)> &fluxCallback)
+      : elementInitaliser(fluxCallback), fluxID(fluxID_) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_FixedFlux_single_precision");
+  };
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(blocks, &*blockIt, flagFieldID,
+                                     boundaryFlagUID, domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 0, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 0, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 1);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 1, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, -1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 2);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + -1, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 0, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 3);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + 0, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 0, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 4);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 0, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 0, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 5);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 0, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 0, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 6);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 0, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 7);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + 1, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 8);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 1, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, -1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 9);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + -1, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, -1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 10);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + -1, it.z() + 0), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 11);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 1, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, -1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 12);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + -1, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 0, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 13);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + 0, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 0, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 14);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 0, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 15);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 1, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, -1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 16);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + -1, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 0, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 17);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + 0, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 0, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 18);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 0, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 19);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 1, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 20);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + 1, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, -1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 21);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + -1, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, -1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 22);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + -1, it.z() + 1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 23);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 1, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 24);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + 1, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, -1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 25);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + -1, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, -1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 26);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + -1, it.y() + -1, it.z() + -1), blocks, *block);
+        element.flux_0 = InitialisatonAdditionalData[0];
+        element.flux_1 = InitialisatonAdditionalData[1];
+        element.flux_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+  std::function<Vector3<float>(
+      const Cell &, const shared_ptr<StructuredBlockForest> &, IBlock &)>
+      elementInitaliser;
+
+public:
+  BlockDataID fluxID;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_double_precision.cpp
new file mode 100644
index 00000000000..994031ebbdc
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_double_precision.cpp
@@ -0,0 +1,191 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FrictionCouplingKernel_double_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "FrictionCouplingKernel_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_828cb3fbc90c26a23ae54639862a1401 {
+static FUNC_PREFIX void frictioncouplingkernel_double_precision_frictioncouplingkernel_double_precision(double D, double *RESTRICT _data_f, double *RESTRICT const _data_j, int64_t const _size_f_0, int64_t const _size_f_1, int64_t const _size_f_2, int64_t const _stride_f_0, int64_t const _stride_f_1, int64_t const _stride_f_2, int64_t const _stride_f_3, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3, double kT) {
+  for (int64_t ctr_2 = 1; ctr_2 < _size_f_2 - 1; ctr_2 += 1) {
+    double *RESTRICT _data_f_20_30 = _data_f + _stride_f_2 * ctr_2;
+    double *RESTRICT _data_j_2m1_36 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 6 * _stride_j_3;
+    double *RESTRICT _data_j_2m1_310 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 10 * _stride_j_3;
+    double *RESTRICT _data_j_2m1_312 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 12 * _stride_j_3;
+    double *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
+    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_21_35 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 5 * _stride_j_3;
+    double *RESTRICT _data_j_21_39 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 9 * _stride_j_3;
+    double *RESTRICT _data_j_21_311 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 11 * _stride_j_3;
+    double *RESTRICT _data_f_20_31 = _data_f + _stride_f_2 * ctr_2 + _stride_f_3;
+    double *RESTRICT _data_j_2m1_38 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 8 * _stride_j_3;
+    double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+    double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    double *RESTRICT _data_j_21_37 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 7 * _stride_j_3;
+    double *RESTRICT _data_f_20_32 = _data_f + _stride_f_2 * ctr_2 + 2 * _stride_f_3;
+    double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+    double *RESTRICT _data_j_21_32 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 2 * _stride_j_3;
+    for (int64_t ctr_1 = 1; ctr_1 < _size_f_1 - 1; ctr_1 += 1) {
+      double *RESTRICT _data_f_20_30_10 = _stride_f_1 * ctr_1 + _data_f_20_30;
+      double *RESTRICT _data_j_2m1_36_10 = _stride_j_1 * ctr_1 + _data_j_2m1_36;
+      double *RESTRICT _data_j_2m1_310_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_2m1_310;
+      double *RESTRICT _data_j_2m1_312_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_2m1_312;
+      double *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
+      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      double *RESTRICT _data_j_20_33_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_20_33;
+      double *RESTRICT _data_j_20_34_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_20_34;
+      double *RESTRICT _data_j_21_35_10 = _stride_j_1 * ctr_1 + _data_j_21_35;
+      double *RESTRICT _data_j_21_39_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_21_39;
+      double *RESTRICT _data_j_21_311_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_21_311;
+      double *RESTRICT _data_f_20_31_10 = _stride_f_1 * ctr_1 + _data_f_20_31;
+      double *RESTRICT _data_j_2m1_38_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_2m1_38;
+      double *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+      double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      double *RESTRICT _data_j_20_31_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_20_31;
+      double *RESTRICT _data_j_21_37_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_21_37;
+      double *RESTRICT _data_f_20_32_10 = _stride_f_1 * ctr_1 + _data_f_20_32;
+      double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+      double *RESTRICT _data_j_21_32_10 = _stride_j_1 * ctr_1 + _data_j_21_32;
+      for (int64_t ctr_0 = 1; ctr_0 < _size_f_0 - 1; ctr_0 += 1) {
+        _data_f_20_30_10[_stride_f_0 * ctr_0] = kT * (-1.0 * _data_j_20_30_10[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0 * _data_j_20_30_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_310_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_311_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_312_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_33_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_33_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0 * _data_j_20_34_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_34_1m1[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0 * _data_j_20_35_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_36_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_39_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_21_311_1m1[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0 * _data_j_21_35_10[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0 * _data_j_21_39_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0 * _data_j_2m1_310_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0 * _data_j_2m1_312_1m1[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0 * _data_j_2m1_36_10[_stride_j_0 * ctr_0 + _stride_j_0]) * 0.5 * ((1.0) / (D));
+        _data_f_20_31_10[_stride_f_0 * ctr_0] = kT * (-1.0 * _data_j_20_310_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_31_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_31_11[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_33_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_33_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0 * _data_j_20_37_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_38_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_39_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_21_37_11[_stride_j_0 * ctr_0] - 1.0 * _data_j_21_39_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0 * _data_j_2m1_310_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0 * _data_j_2m1_38_11[_stride_j_0 * ctr_0] + _data_j_20_311_10[_stride_j_0 * ctr_0] + _data_j_20_312_10[_stride_j_0 * ctr_0] + _data_j_20_34_10[_stride_j_0 * ctr_0] + _data_j_20_34_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_21_311_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_312_1m1[_stride_j_0 * ctr_0 + _stride_j_0]) * 0.5 * ((1.0) / (D));
+        _data_f_20_32_10[_stride_f_0 * ctr_0] = kT * (-1.0 * _data_j_20_311_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_32_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_35_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_37_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_20_39_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_21_311_1m1[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0 * _data_j_21_32_10[_stride_j_0 * ctr_0] - 1.0 * _data_j_21_35_10[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0 * _data_j_21_37_11[_stride_j_0 * ctr_0] - 1.0 * _data_j_21_39_11[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_20_310_10[_stride_j_0 * ctr_0] + _data_j_20_312_10[_stride_j_0 * ctr_0] + _data_j_20_36_10[_stride_j_0 * ctr_0] + _data_j_20_38_10[_stride_j_0 * ctr_0] + _data_j_2m1_310_11[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_312_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_36_10[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_38_11[_stride_j_0 * ctr_0]) * 0.5 * ((1.0) / (D));
+      }
+    }
+  }
+}
+} // namespace internal_828cb3fbc90c26a23ae54639862a1401
+
+void FrictionCouplingKernel_double_precision::run(IBlock *block) {
+  auto f = block->getData<field::GhostLayerField<double, 3>>(fID);
+  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
+
+  auto &D = this->D_;
+  auto &kT = this->kT_;
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(f->nrOfGhostLayers()));
+  double *RESTRICT _data_f = f->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(j->nrOfGhostLayers()));
+  double *RESTRICT const _data_j = j->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(f->xSizeWithGhostLayer(), int64_t(cell_idx_c(f->xSize()) + 2));
+  const int64_t _size_f_0 = int64_t(cell_idx_c(f->xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(f->ySizeWithGhostLayer(), int64_t(cell_idx_c(f->ySize()) + 2));
+  const int64_t _size_f_1 = int64_t(cell_idx_c(f->ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(f->zSizeWithGhostLayer(), int64_t(cell_idx_c(f->zSize()) + 2));
+  const int64_t _size_f_2 = int64_t(cell_idx_c(f->zSize()) + 2);
+  const int64_t _stride_f_0 = int64_t(f->xStride());
+  const int64_t _stride_f_1 = int64_t(f->yStride());
+  const int64_t _stride_f_2 = int64_t(f->zStride());
+  const int64_t _stride_f_3 = int64_t(1 * int64_t(f->fStride()));
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  internal_828cb3fbc90c26a23ae54639862a1401::frictioncouplingkernel_double_precision_frictioncouplingkernel_double_precision(D, _data_f, _data_j, _size_f_0, _size_f_1, _size_f_2, _stride_f_0, _stride_f_1, _stride_f_2, _stride_f_3, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, kT);
+}
+
+void FrictionCouplingKernel_double_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto f = block->getData<field::GhostLayerField<double, 3>>(fID);
+  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
+
+  auto &D = this->D_;
+  auto &kT = this->kT_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(f->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(f->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(f->nrOfGhostLayers()));
+  double *RESTRICT _data_f = f->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(j->nrOfGhostLayers()));
+  double *RESTRICT const _data_j = j->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(f->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+  const int64_t _size_f_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(f->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+  const int64_t _size_f_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(f->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+  const int64_t _size_f_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
+  const int64_t _stride_f_0 = int64_t(f->xStride());
+  const int64_t _stride_f_1 = int64_t(f->yStride());
+  const int64_t _stride_f_2 = int64_t(f->zStride());
+  const int64_t _stride_f_3 = int64_t(1 * int64_t(f->fStride()));
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  internal_828cb3fbc90c26a23ae54639862a1401::frictioncouplingkernel_double_precision_frictioncouplingkernel_double_precision(D, _data_f, _data_j, _size_f_0, _size_f_1, _size_f_2, _stride_f_0, _stride_f_1, _stride_f_2, _stride_f_3, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, kT);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_double_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_double_precision.h
new file mode 100644
index 00000000000..750191b3613
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_double_precision.h
@@ -0,0 +1,105 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FrictionCouplingKernel_double_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class FrictionCouplingKernel_double_precision {
+public:
+  FrictionCouplingKernel_double_precision(BlockDataID fID_, BlockDataID jID_,
+                                          double D, double kT)
+      : fID(fID_), jID(jID_), D_(D), kT_(kT){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<FrictionCouplingKernel_double_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<FrictionCouplingKernel_double_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID fID;
+  BlockDataID jID;
+  double D_;
+  double kT_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_single_precision.cpp
new file mode 100644
index 00000000000..c2a3a0b6cb2
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_single_precision.cpp
@@ -0,0 +1,191 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FrictionCouplingKernel_single_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "FrictionCouplingKernel_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_910e9429dc8b77dbed969a16d3f227fb {
+static FUNC_PREFIX void frictioncouplingkernel_single_precision_frictioncouplingkernel_single_precision(float D, float *RESTRICT _data_f, float *RESTRICT const _data_j, int64_t const _size_f_0, int64_t const _size_f_1, int64_t const _size_f_2, int64_t const _stride_f_0, int64_t const _stride_f_1, int64_t const _stride_f_2, int64_t const _stride_f_3, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3, float kT) {
+  for (int64_t ctr_2 = 1; ctr_2 < _size_f_2 - 1; ctr_2 += 1) {
+    float *RESTRICT _data_f_20_30 = _data_f + _stride_f_2 * ctr_2;
+    float *RESTRICT _data_j_2m1_36 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 6 * _stride_j_3;
+    float *RESTRICT _data_j_2m1_310 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 10 * _stride_j_3;
+    float *RESTRICT _data_j_2m1_312 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 12 * _stride_j_3;
+    float *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
+    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
+    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
+    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
+    float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
+    float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
+    float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
+    float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
+    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_21_35 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 5 * _stride_j_3;
+    float *RESTRICT _data_j_21_39 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 9 * _stride_j_3;
+    float *RESTRICT _data_j_21_311 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 11 * _stride_j_3;
+    float *RESTRICT _data_f_20_31 = _data_f + _stride_f_2 * ctr_2 + _stride_f_3;
+    float *RESTRICT _data_j_2m1_38 = _data_j + _stride_j_2 * ctr_2 - _stride_j_2 + 8 * _stride_j_3;
+    float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
+    float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
+    float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
+    float *RESTRICT _data_j_21_37 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 7 * _stride_j_3;
+    float *RESTRICT _data_f_20_32 = _data_f + _stride_f_2 * ctr_2 + 2 * _stride_f_3;
+    float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
+    float *RESTRICT _data_j_21_32 = _data_j + _stride_j_2 * ctr_2 + _stride_j_2 + 2 * _stride_j_3;
+    for (int64_t ctr_1 = 1; ctr_1 < _size_f_1 - 1; ctr_1 += 1) {
+      float *RESTRICT _data_f_20_30_10 = _stride_f_1 * ctr_1 + _data_f_20_30;
+      float *RESTRICT _data_j_2m1_36_10 = _stride_j_1 * ctr_1 + _data_j_2m1_36;
+      float *RESTRICT _data_j_2m1_310_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_2m1_310;
+      float *RESTRICT _data_j_2m1_312_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_2m1_312;
+      float *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
+      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
+      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
+      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
+      float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
+      float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
+      float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
+      float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
+      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
+      float *RESTRICT _data_j_20_33_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_20_33;
+      float *RESTRICT _data_j_20_34_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_20_34;
+      float *RESTRICT _data_j_21_35_10 = _stride_j_1 * ctr_1 + _data_j_21_35;
+      float *RESTRICT _data_j_21_39_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_21_39;
+      float *RESTRICT _data_j_21_311_1m1 = _stride_j_1 * ctr_1 - _stride_j_1 + _data_j_21_311;
+      float *RESTRICT _data_f_20_31_10 = _stride_f_1 * ctr_1 + _data_f_20_31;
+      float *RESTRICT _data_j_2m1_38_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_2m1_38;
+      float *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
+      float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
+      float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
+      float *RESTRICT _data_j_20_31_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_20_31;
+      float *RESTRICT _data_j_21_37_11 = _stride_j_1 * ctr_1 + _stride_j_1 + _data_j_21_37;
+      float *RESTRICT _data_f_20_32_10 = _stride_f_1 * ctr_1 + _data_f_20_32;
+      float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
+      float *RESTRICT _data_j_21_32_10 = _stride_j_1 * ctr_1 + _data_j_21_32;
+      for (int64_t ctr_0 = 1; ctr_0 < _size_f_0 - 1; ctr_0 += 1) {
+        _data_f_20_30_10[_stride_f_0 * ctr_0] = kT * (-1.0f * _data_j_20_30_10[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_20_30_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_310_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_311_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_312_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_33_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_33_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_20_34_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_34_1m1[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_20_35_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_36_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_39_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_21_311_1m1[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_21_35_10[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_21_39_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_2m1_310_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_2m1_312_1m1[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_2m1_36_10[_stride_j_0 * ctr_0 + _stride_j_0]) * 0.5f * ((1.0f) / (D));
+        _data_f_20_31_10[_stride_f_0 * ctr_0] = kT * (-1.0f * _data_j_20_310_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_31_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_31_11[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_33_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_33_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_20_37_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_38_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_39_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_21_37_11[_stride_j_0 * ctr_0] - 1.0f * _data_j_21_39_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_2m1_310_11[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_2m1_38_11[_stride_j_0 * ctr_0] + _data_j_20_311_10[_stride_j_0 * ctr_0] + _data_j_20_312_10[_stride_j_0 * ctr_0] + _data_j_20_34_10[_stride_j_0 * ctr_0] + _data_j_20_34_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_21_311_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_312_1m1[_stride_j_0 * ctr_0 + _stride_j_0]) * 0.5f * ((1.0f) / (D));
+        _data_f_20_32_10[_stride_f_0 * ctr_0] = kT * (-1.0f * _data_j_20_311_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_32_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_35_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_37_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_20_39_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_21_311_1m1[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_21_32_10[_stride_j_0 * ctr_0] - 1.0f * _data_j_21_35_10[_stride_j_0 * ctr_0 + _stride_j_0] - 1.0f * _data_j_21_37_11[_stride_j_0 * ctr_0] - 1.0f * _data_j_21_39_11[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_20_310_10[_stride_j_0 * ctr_0] + _data_j_20_312_10[_stride_j_0 * ctr_0] + _data_j_20_36_10[_stride_j_0 * ctr_0] + _data_j_20_38_10[_stride_j_0 * ctr_0] + _data_j_2m1_310_11[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_312_1m1[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_36_10[_stride_j_0 * ctr_0 + _stride_j_0] + _data_j_2m1_38_11[_stride_j_0 * ctr_0]) * 0.5f * ((1.0f) / (D));
+      }
+    }
+  }
+}
+} // namespace internal_910e9429dc8b77dbed969a16d3f227fb
+
+void FrictionCouplingKernel_single_precision::run(IBlock *block) {
+  auto f = block->getData<field::GhostLayerField<float, 3>>(fID);
+  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
+
+  auto &kT = this->kT_;
+  auto &D = this->D_;
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(f->nrOfGhostLayers()));
+  float *RESTRICT _data_f = f->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(j->nrOfGhostLayers()));
+  float *RESTRICT const _data_j = j->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(f->xSizeWithGhostLayer(), int64_t(cell_idx_c(f->xSize()) + 2));
+  const int64_t _size_f_0 = int64_t(cell_idx_c(f->xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(f->ySizeWithGhostLayer(), int64_t(cell_idx_c(f->ySize()) + 2));
+  const int64_t _size_f_1 = int64_t(cell_idx_c(f->ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(f->zSizeWithGhostLayer(), int64_t(cell_idx_c(f->zSize()) + 2));
+  const int64_t _size_f_2 = int64_t(cell_idx_c(f->zSize()) + 2);
+  const int64_t _stride_f_0 = int64_t(f->xStride());
+  const int64_t _stride_f_1 = int64_t(f->yStride());
+  const int64_t _stride_f_2 = int64_t(f->zStride());
+  const int64_t _stride_f_3 = int64_t(1 * int64_t(f->fStride()));
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  internal_910e9429dc8b77dbed969a16d3f227fb::frictioncouplingkernel_single_precision_frictioncouplingkernel_single_precision(D, _data_f, _data_j, _size_f_0, _size_f_1, _size_f_2, _stride_f_0, _stride_f_1, _stride_f_2, _stride_f_3, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, kT);
+}
+
+void FrictionCouplingKernel_single_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto f = block->getData<field::GhostLayerField<float, 3>>(fID);
+  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
+
+  auto &kT = this->kT_;
+  auto &D = this->D_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(f->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(f->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(f->nrOfGhostLayers()));
+  float *RESTRICT _data_f = f->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(j->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(j->nrOfGhostLayers()));
+  float *RESTRICT const _data_j = j->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(f->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+  const int64_t _size_f_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(f->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+  const int64_t _size_f_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+  WALBERLA_ASSERT_GREATER_EQUAL(f->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+  const int64_t _size_f_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
+  const int64_t _stride_f_0 = int64_t(f->xStride());
+  const int64_t _stride_f_1 = int64_t(f->yStride());
+  const int64_t _stride_f_2 = int64_t(f->zStride());
+  const int64_t _stride_f_3 = int64_t(1 * int64_t(f->fStride()));
+  const int64_t _stride_j_0 = int64_t(j->xStride());
+  const int64_t _stride_j_1 = int64_t(j->yStride());
+  const int64_t _stride_j_2 = int64_t(j->zStride());
+  const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
+  internal_910e9429dc8b77dbed969a16d3f227fb::frictioncouplingkernel_single_precision_frictioncouplingkernel_single_precision(D, _data_f, _data_j, _size_f_0, _size_f_1, _size_f_2, _stride_f_0, _stride_f_1, _stride_f_2, _stride_f_3, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, kT);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_single_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_single_precision.h
new file mode 100644
index 00000000000..56202acd62d
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/generated_kernels/FrictionCouplingKernel_single_precision.h
@@ -0,0 +1,105 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FrictionCouplingKernel_single_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class FrictionCouplingKernel_single_precision {
+public:
+  FrictionCouplingKernel_single_precision(BlockDataID fID_, BlockDataID jID_,
+                                          float D, float kT)
+      : fID(fID_), jID(jID_), D_(D), kT_(kT){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<FrictionCouplingKernel_single_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<FrictionCouplingKernel_single_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID fID;
+  BlockDataID jID;
+  float D_;
+  float kT_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/CMakeLists.txt b/src/walberla_bridge/src/electrokinetics/reactions/CMakeLists.txt
new file mode 100644
index 00000000000..6559b5c9ff4
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/CMakeLists.txt
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+add_subdirectory(generated_kernels)
+
+target_sources(espresso_walberla PRIVATE EKReactionImplBulk.cpp
+                                         EKReactionImplIndexed.cpp)
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.cpp b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.cpp
new file mode 100644
index 00000000000..800a84af573
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "EKReactionImplBulk.hpp"
+
+#include "generated_kernels/ReactionKernelBulk_all.h"
+
+#include <blockforest/StructuredBlockForest.h>
+
+namespace walberla {
+
+void EKReactionImplBulk::perform_reaction() {
+  // TODO: if my understanding is correct:
+  //  the kernels need to either run in the ghost layers and do the
+  //  synchronization before or not run and do a synchronization afterwards.
+  //  The better solution is probably the latter one. Not sure why it fails
+  //  atm.
+
+  auto kernel = detail::ReactionKernelBulkSelector::get_kernel(
+      get_reactants(), get_coefficient());
+
+  for (auto &block : *get_lattice()->get_blocks()) {
+    kernel(&block);
+  }
+}
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.hpp b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.hpp
new file mode 100644
index 00000000000..33f7e21770f
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.hpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/reactions/EKReactant.hpp>
+#include <walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp>
+
+#include <memory>
+#include <vector>
+
+namespace walberla {
+
+class EKReactionImplBulk : public EKReactionBase {
+public:
+  EKReactionImplBulk(const std::shared_ptr<LatticeWalberla> &lattice,
+                     const std::vector<std::shared_ptr<EKReactant>> &reactants,
+                     double coefficient)
+      : EKReactionBase(lattice, reactants, coefficient) {}
+  ~EKReactionImplBulk() override = default;
+
+  using EKReactionBase::get_coefficient;
+  using EKReactionBase::get_lattice;
+  using EKReactionBase::get_reactants;
+
+  void perform_reaction() override;
+};
+
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp
new file mode 100644
index 00000000000..23b9d219918
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "EKReactionImplIndexed.hpp"
+
+#include "generated_kernels/ReactionKernelIndexed_all.h"
+
+#include <walberla_bridge/BlockAndCell.hpp>
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/reactions/EKReactant.hpp>
+#include <walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp>
+
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
+#include <field/AddToStorage.h>
+
+#include <boost/optional.hpp>
+
+#include <cstddef>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+namespace walberla {
+
+/// Flag for domain cells, i.e. all cells
+FlagUID const Domain_flag("domain");
+/// Flag for boundary cells
+FlagUID const Boundary_flag("boundary");
+
+namespace detail {
+// FlagField to use
+using FlagField = FlagField<uint8_t>;
+
+template <typename FlagField>
+inline auto
+get_flag_field_and_flag(IBlock *block,
+                        domain_decomposition::BlockDataID const &flagfield_id) {
+  auto const flag_field =
+      block->template uncheckedFastGetData<FlagField>(flagfield_id);
+  auto const boundary_flag = flag_field->getFlag(Boundary_flag);
+  return std::make_tuple(flag_field, boundary_flag);
+}
+
+template <typename FlagField, typename IndexVectors, typename IndexInfo>
+void fillFromFlagField(IBlock *block, BlockDataID indexVectorID,
+                       ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                       FlagUID domainFlagUID) {
+  auto *indexVectors = block->uncheckedFastGetData<IndexVectors>(indexVectorID);
+  auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+  auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+  auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+  auto *flagField = block->getData<FlagField>(flagFieldID);
+
+  if (!(flagField->flagExists(boundaryFlagUID) &&
+        flagField->flagExists(domainFlagUID)))
+    return;
+
+  auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+  auto domainFlag = flagField->getFlag(domainFlagUID);
+
+  auto inner = flagField->xyzSize();
+  inner.expand(cell_idx_t(-1));
+
+  indexVectorAll.clear();
+  indexVectorInner.clear();
+  indexVectorOuter.clear();
+
+  auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+  for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
+       ++it) {
+
+    if (!isFlagSet(it, boundaryFlag))
+      continue;
+    if (flagWithGLayers.contains(it.x(), it.y(), it.z()) &&
+        isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
+
+      auto element = IndexInfo(it.x(), it.y(), it.z());
+
+      indexVectorAll.push_back(element);
+      if (inner.contains(it.x(), it.y(), it.z()))
+        indexVectorInner.push_back(element);
+      else
+        indexVectorOuter.push_back(element);
+    }
+  }
+
+  indexVectors->syncGPU();
+}
+
+template <typename FlagField, typename IndexVectors, typename IndexInfo>
+void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                       BlockDataID indexVectorID, ConstBlockDataID flagFieldID,
+                       FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+  for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+    fillFromFlagField<FlagField, IndexVectors, IndexInfo>(
+        blockIt.get(), indexVectorID, flagFieldID, boundaryFlagUID,
+        domainFlagUID);
+}
+} // namespace detail
+
+EKReactionImplIndexed::EKReactionImplIndexed(
+    std::shared_ptr<LatticeWalberla> lattice,
+    std::vector<std::shared_ptr<EKReactant>> reactants, double coefficient)
+    : EKReactionBase(lattice, reactants, coefficient),
+      m_pending_changes(false) {
+  m_flagfield_id =
+      static_cast<std::size_t>(field::addFlagFieldToStorage<detail::FlagField>(
+          get_lattice()->get_blocks(), "flag field reaction",
+          get_lattice()->get_ghost_layers()));
+
+  // take one IndexVector as a dummy-value
+  using IndexVectors = detail::ReactionKernelIndexedSelector::KernelTrait<>::
+      ReactionKernelIndexed::IndexVectors;
+
+  auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+    return new IndexVectors();
+  };
+  m_indexvector_id = static_cast<std::size_t>(
+      get_lattice()
+          ->get_blocks()
+          ->template addStructuredBlockData<IndexVectors>(createIdxVector,
+                                                          "IndexField"));
+
+  for (auto &block : *get_lattice()->get_blocks()) {
+    auto flag_field =
+        block.template getData<detail::FlagField>(BlockDataID(m_flagfield_id));
+    // register flags
+    flag_field->registerFlag(Domain_flag);
+    flag_field->registerFlag(Boundary_flag);
+    // mark all cells as domain cells and fluid cells
+    auto domain_flag = flag_field->getFlag(Domain_flag);
+    auto boundary_flag = flag_field->getFlag(Boundary_flag);
+    for (auto it = flag_field->begin(); it != flag_field->end(); ++it) {
+      flag_field->addFlag(it.x(), it.y(), it.z(), domain_flag);
+      flag_field->removeFlag(it.x(), it.y(), it.z(), boundary_flag);
+    }
+  }
+}
+
+void EKReactionImplIndexed::perform_reaction() {
+  boundary_update();
+
+  auto kernel = detail::ReactionKernelIndexedSelector::get_kernel(
+      get_reactants(), get_coefficient(), BlockDataID(get_indexvector_id()));
+
+  for (auto &block : *get_lattice()->get_blocks()) {
+    kernel(&block);
+  }
+}
+
+void EKReactionImplIndexed::set_node_is_boundary(Utils::Vector3i const &node,
+                                                 bool is_boundary) {
+  auto bc = get_block_and_cell(*get_lattice(), node, true);
+  if (!bc)
+    return;
+
+  auto [flag_field, boundary_flag] =
+      detail::get_flag_field_and_flag<detail::FlagField>(
+          bc->block, BlockDataID(get_flagfield_id()));
+  if (is_boundary) {
+    flag_field->addFlag(bc->cell, boundary_flag);
+  } else {
+    flag_field->removeFlag(bc->cell, boundary_flag);
+  }
+  m_pending_changes = true;
+}
+
+boost::optional<bool>
+EKReactionImplIndexed::get_node_is_boundary(Utils::Vector3i const &node) {
+  auto bc = get_block_and_cell(*get_lattice(), node, true);
+  if (!bc)
+    return {boost::none};
+
+  auto [flag_field, boundary_flag] =
+      detail::get_flag_field_and_flag<detail::FlagField>(
+          bc->block, BlockDataID(get_flagfield_id()));
+  return {flag_field->isFlagSet(bc->cell, boundary_flag)};
+}
+
+void EKReactionImplIndexed::boundary_update() {
+  // take one IndexVector/IndexInfo as a dummy-value
+  using IndexVectors = detail::ReactionKernelIndexedSelector::KernelTrait<>::
+      ReactionKernelIndexed::IndexVectors;
+  using IndexInfo = detail::ReactionKernelIndexedSelector::KernelTrait<>::
+      ReactionKernelIndexed::IndexInfo;
+
+  if (m_pending_changes) {
+    detail::fillFromFlagField<detail::FlagField, IndexVectors, IndexInfo>(
+        get_lattice()->get_blocks(), BlockDataID(get_indexvector_id()),
+        BlockDataID(get_flagfield_id()), Boundary_flag, Domain_flag);
+    m_pending_changes = false;
+  }
+}
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.hpp b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.hpp
new file mode 100644
index 00000000000..b62f85d3015
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.hpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/reactions/EKReactant.hpp>
+#include <walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <boost/optional.hpp>
+
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+namespace walberla {
+
+class EKReactionImplIndexed : public EKReactionBase {
+private:
+  std::size_t m_flagfield_id;
+  std::size_t m_indexvector_id;
+
+  bool m_pending_changes;
+
+public:
+  EKReactionImplIndexed(std::shared_ptr<LatticeWalberla> lattice,
+                        std::vector<std::shared_ptr<EKReactant>> reactants,
+                        double coefficient);
+  ~EKReactionImplIndexed() override = default;
+
+  using EKReactionBase::get_coefficient;
+  using EKReactionBase::get_lattice;
+  using EKReactionBase::get_reactants;
+
+  void perform_reaction() override;
+
+  void set_node_is_boundary(Utils::Vector3i const &node, bool is_boundary);
+  [[nodiscard]] boost::optional<bool>
+  get_node_is_boundary(Utils::Vector3i const &node);
+
+  [[nodiscard]] auto get_indexvector_id() const noexcept {
+    return m_indexvector_id;
+  }
+  [[nodiscard]] auto get_flagfield_id() const noexcept {
+    return m_flagfield_id;
+  }
+
+  void boundary_update();
+};
+
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/CMakeLists.txt b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/CMakeLists.txt
new file mode 100644
index 00000000000..d8e7950e9a8
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/CMakeLists.txt
@@ -0,0 +1,27 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+foreach(n_species RANGE 1 5)
+  foreach(precision double_precision single_precision)
+    target_sources(
+      espresso_walberla
+      PRIVATE ReactionKernelBulk_${n_species}_${precision}.cpp
+              ReactionKernelIndexed_${n_species}_${precision}.cpp)
+  endforeach()
+endforeach()
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_double_precision.cpp
new file mode 100644
index 00000000000..a363a6b016e
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_double_precision.cpp
@@ -0,0 +1,124 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_1_double_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelBulk_1_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_a94c3c474646ee6905a4b90e8ccc47e6 {
+static FUNC_PREFIX void reactionkernelbulk_1_double_precision_reactionkernelbulk_1_double_precision(double *RESTRICT _data_rho_0, int64_t const _size_rho_0_0, int64_t const _size_rho_0_1, int64_t const _size_rho_0_2, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, double order_0, double rate_coefficient, double stoech_0) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_rho_0_2; ctr_2 += 1) {
+    double *RESTRICT _data_rho_0_20 = _data_rho_0 + _stride_rho_0_2 * ctr_2;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_rho_0_1; ctr_1 += 1) {
+      double *RESTRICT _data_rho_0_20_10 = _stride_rho_0_1 * ctr_1 + _data_rho_0_20;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_rho_0_0; ctr_0 += 1) {
+        const double local_rho_0 = _data_rho_0_20_10[_stride_rho_0_0 * ctr_0];
+        const double rate_factor = pow(local_rho_0, order_0) * rate_coefficient;
+        _data_rho_0_20_10[_stride_rho_0_0 * ctr_0] = local_rho_0 + rate_factor * stoech_0;
+      }
+    }
+  }
+}
+} // namespace internal_a94c3c474646ee6905a4b90e8ccc47e6
+
+void ReactionKernelBulk_1_double_precision::run(IBlock *block) {
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+
+  auto &stoech_0 = this->stoech_0_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  auto &order_0 = this->order_0_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(rho_0->xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(rho_0->ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(rho_0->zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  internal_a94c3c474646ee6905a4b90e8ccc47e6::reactionkernelbulk_1_double_precision_reactionkernelbulk_1_double_precision(_data_rho_0, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, order_0, rate_coefficient, stoech_0);
+}
+
+void ReactionKernelBulk_1_double_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+
+  auto &stoech_0 = this->stoech_0_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  auto &order_0 = this->order_0_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  internal_a94c3c474646ee6905a4b90e8ccc47e6::reactionkernelbulk_1_double_precision_reactionkernelbulk_1_double_precision(_data_rho_0, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, order_0, rate_coefficient, stoech_0);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_double_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_double_precision.h
new file mode 100644
index 00000000000..8f53639e934
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_double_precision.h
@@ -0,0 +1,107 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_1_double_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelBulk_1_double_precision {
+public:
+  ReactionKernelBulk_1_double_precision(BlockDataID rho_0ID_, double order_0,
+                                        double rate_coefficient,
+                                        double stoech_0)
+      : rho_0ID(rho_0ID_), order_0_(order_0),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<ReactionKernelBulk_1_double_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<ReactionKernelBulk_1_double_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID rho_0ID;
+  double order_0_;
+  double rate_coefficient_;
+  double stoech_0_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_single_precision.cpp
new file mode 100644
index 00000000000..1016073971a
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_single_precision.cpp
@@ -0,0 +1,124 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_1_single_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelBulk_1_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_2510542a202e1c11a0d58a7e565459bb {
+static FUNC_PREFIX void reactionkernelbulk_1_single_precision_reactionkernelbulk_1_single_precision(float *RESTRICT _data_rho_0, int64_t const _size_rho_0_0, int64_t const _size_rho_0_1, int64_t const _size_rho_0_2, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, float order_0, float rate_coefficient, float stoech_0) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_rho_0_2; ctr_2 += 1) {
+    float *RESTRICT _data_rho_0_20 = _data_rho_0 + _stride_rho_0_2 * ctr_2;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_rho_0_1; ctr_1 += 1) {
+      float *RESTRICT _data_rho_0_20_10 = _stride_rho_0_1 * ctr_1 + _data_rho_0_20;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_rho_0_0; ctr_0 += 1) {
+        const float local_rho_0 = _data_rho_0_20_10[_stride_rho_0_0 * ctr_0];
+        const float rate_factor = rate_coefficient * powf(local_rho_0, order_0);
+        _data_rho_0_20_10[_stride_rho_0_0 * ctr_0] = local_rho_0 + rate_factor * stoech_0;
+      }
+    }
+  }
+}
+} // namespace internal_2510542a202e1c11a0d58a7e565459bb
+
+void ReactionKernelBulk_1_single_precision::run(IBlock *block) {
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+
+  auto &rate_coefficient = this->rate_coefficient_;
+  auto &stoech_0 = this->stoech_0_;
+  auto &order_0 = this->order_0_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(rho_0->xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(rho_0->ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(rho_0->zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  internal_2510542a202e1c11a0d58a7e565459bb::reactionkernelbulk_1_single_precision_reactionkernelbulk_1_single_precision(_data_rho_0, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, order_0, rate_coefficient, stoech_0);
+}
+
+void ReactionKernelBulk_1_single_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+
+  auto &rate_coefficient = this->rate_coefficient_;
+  auto &stoech_0 = this->stoech_0_;
+  auto &order_0 = this->order_0_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  internal_2510542a202e1c11a0d58a7e565459bb::reactionkernelbulk_1_single_precision_reactionkernelbulk_1_single_precision(_data_rho_0, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, order_0, rate_coefficient, stoech_0);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_single_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_single_precision.h
new file mode 100644
index 00000000000..d7e59bf1999
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_1_single_precision.h
@@ -0,0 +1,106 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_1_single_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelBulk_1_single_precision {
+public:
+  ReactionKernelBulk_1_single_precision(BlockDataID rho_0ID_, float order_0,
+                                        float rate_coefficient, float stoech_0)
+      : rho_0ID(rho_0ID_), order_0_(order_0),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<ReactionKernelBulk_1_single_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<ReactionKernelBulk_1_single_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID rho_0ID;
+  float order_0_;
+  float rate_coefficient_;
+  float stoech_0_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_double_precision.cpp
new file mode 100644
index 00000000000..87c25aba058
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_double_precision.cpp
@@ -0,0 +1,146 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_2_double_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelBulk_2_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_2cb10021ef8890fa965cb94996ae1510 {
+static FUNC_PREFIX void reactionkernelbulk_2_double_precision_reactionkernelbulk_2_double_precision(double *RESTRICT _data_rho_0, double *RESTRICT _data_rho_1, int64_t const _size_rho_0_0, int64_t const _size_rho_0_1, int64_t const _size_rho_0_2, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, double order_0, double order_1, double rate_coefficient, double stoech_0, double stoech_1) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_rho_0_2; ctr_2 += 1) {
+    double *RESTRICT _data_rho_0_20 = _data_rho_0 + _stride_rho_0_2 * ctr_2;
+    double *RESTRICT _data_rho_1_20 = _data_rho_1 + _stride_rho_1_2 * ctr_2;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_rho_0_1; ctr_1 += 1) {
+      double *RESTRICT _data_rho_0_20_10 = _stride_rho_0_1 * ctr_1 + _data_rho_0_20;
+      double *RESTRICT _data_rho_1_20_10 = _stride_rho_1_1 * ctr_1 + _data_rho_1_20;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_rho_0_0; ctr_0 += 1) {
+        const double local_rho_0 = _data_rho_0_20_10[_stride_rho_0_0 * ctr_0];
+        const double local_rho_1 = _data_rho_1_20_10[_stride_rho_1_0 * ctr_0];
+        const double rate_factor = pow(local_rho_0, order_0) * pow(local_rho_1, order_1) * rate_coefficient;
+        _data_rho_0_20_10[_stride_rho_0_0 * ctr_0] = local_rho_0 + rate_factor * stoech_0;
+        _data_rho_1_20_10[_stride_rho_1_0 * ctr_0] = local_rho_1 + rate_factor * stoech_1;
+      }
+    }
+  }
+}
+} // namespace internal_2cb10021ef8890fa965cb94996ae1510
+
+void ReactionKernelBulk_2_double_precision::run(IBlock *block) {
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+  auto rho_1 = block->getData<field::GhostLayerField<double, 1>>(rho_1ID);
+
+  auto &stoech_0 = this->stoech_0_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &order_1 = this->order_1_;
+  auto &order_0 = this->order_0_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(rho_0->xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(rho_0->ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(rho_0->zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  internal_2cb10021ef8890fa965cb94996ae1510::reactionkernelbulk_2_double_precision_reactionkernelbulk_2_double_precision(_data_rho_0, _data_rho_1, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, order_0, order_1, rate_coefficient, stoech_0, stoech_1);
+}
+
+void ReactionKernelBulk_2_double_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+  auto rho_1 = block->getData<field::GhostLayerField<double, 1>>(rho_1ID);
+
+  auto &stoech_0 = this->stoech_0_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &order_1 = this->order_1_;
+  auto &order_0 = this->order_0_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_1->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_1 = rho_1->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  internal_2cb10021ef8890fa965cb94996ae1510::reactionkernelbulk_2_double_precision_reactionkernelbulk_2_double_precision(_data_rho_0, _data_rho_1, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, order_0, order_1, rate_coefficient, stoech_0, stoech_1);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_double_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_double_precision.h
new file mode 100644
index 00000000000..f14ab07729d
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_double_precision.h
@@ -0,0 +1,112 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_2_double_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelBulk_2_double_precision {
+public:
+  ReactionKernelBulk_2_double_precision(BlockDataID rho_0ID_,
+                                        BlockDataID rho_1ID_, double order_0,
+                                        double order_1, double rate_coefficient,
+                                        double stoech_0, double stoech_1)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), order_0_(order_0),
+        order_1_(order_1), rate_coefficient_(rate_coefficient),
+        stoech_0_(stoech_0), stoech_1_(stoech_1){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<ReactionKernelBulk_2_double_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<ReactionKernelBulk_2_double_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  double order_0_;
+  double order_1_;
+  double rate_coefficient_;
+  double stoech_0_;
+  double stoech_1_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_single_precision.cpp
new file mode 100644
index 00000000000..a557a71accf
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_single_precision.cpp
@@ -0,0 +1,146 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_2_single_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelBulk_2_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_6de92b64acc501777cd14903620af26b {
+static FUNC_PREFIX void reactionkernelbulk_2_single_precision_reactionkernelbulk_2_single_precision(float *RESTRICT _data_rho_0, float *RESTRICT _data_rho_1, int64_t const _size_rho_0_0, int64_t const _size_rho_0_1, int64_t const _size_rho_0_2, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, float order_0, float order_1, float rate_coefficient, float stoech_0, float stoech_1) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_rho_0_2; ctr_2 += 1) {
+    float *RESTRICT _data_rho_0_20 = _data_rho_0 + _stride_rho_0_2 * ctr_2;
+    float *RESTRICT _data_rho_1_20 = _data_rho_1 + _stride_rho_1_2 * ctr_2;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_rho_0_1; ctr_1 += 1) {
+      float *RESTRICT _data_rho_0_20_10 = _stride_rho_0_1 * ctr_1 + _data_rho_0_20;
+      float *RESTRICT _data_rho_1_20_10 = _stride_rho_1_1 * ctr_1 + _data_rho_1_20;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_rho_0_0; ctr_0 += 1) {
+        const float local_rho_0 = _data_rho_0_20_10[_stride_rho_0_0 * ctr_0];
+        const float local_rho_1 = _data_rho_1_20_10[_stride_rho_1_0 * ctr_0];
+        const float rate_factor = rate_coefficient * powf(local_rho_0, order_0) * powf(local_rho_1, order_1);
+        _data_rho_0_20_10[_stride_rho_0_0 * ctr_0] = local_rho_0 + rate_factor * stoech_0;
+        _data_rho_1_20_10[_stride_rho_1_0 * ctr_0] = local_rho_1 + rate_factor * stoech_1;
+      }
+    }
+  }
+}
+} // namespace internal_6de92b64acc501777cd14903620af26b
+
+void ReactionKernelBulk_2_single_precision::run(IBlock *block) {
+  auto rho_1 = block->getData<field::GhostLayerField<float, 1>>(rho_1ID);
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+
+  auto &order_1 = this->order_1_;
+  auto &order_0 = this->order_0_;
+  auto &stoech_0 = this->stoech_0_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(rho_0->xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(rho_0->ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(rho_0->zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  internal_6de92b64acc501777cd14903620af26b::reactionkernelbulk_2_single_precision_reactionkernelbulk_2_single_precision(_data_rho_0, _data_rho_1, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, order_0, order_1, rate_coefficient, stoech_0, stoech_1);
+}
+
+void ReactionKernelBulk_2_single_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto rho_1 = block->getData<field::GhostLayerField<float, 1>>(rho_1ID);
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+
+  auto &order_1 = this->order_1_;
+  auto &order_0 = this->order_0_;
+  auto &stoech_0 = this->stoech_0_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_1->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_1 = rho_1->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  internal_6de92b64acc501777cd14903620af26b::reactionkernelbulk_2_single_precision_reactionkernelbulk_2_single_precision(_data_rho_0, _data_rho_1, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, order_0, order_1, rate_coefficient, stoech_0, stoech_1);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_single_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_single_precision.h
new file mode 100644
index 00000000000..9c80acfab12
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_2_single_precision.h
@@ -0,0 +1,112 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_2_single_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelBulk_2_single_precision {
+public:
+  ReactionKernelBulk_2_single_precision(BlockDataID rho_0ID_,
+                                        BlockDataID rho_1ID_, float order_0,
+                                        float order_1, float rate_coefficient,
+                                        float stoech_0, float stoech_1)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), order_0_(order_0),
+        order_1_(order_1), rate_coefficient_(rate_coefficient),
+        stoech_0_(stoech_0), stoech_1_(stoech_1){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<ReactionKernelBulk_2_single_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<ReactionKernelBulk_2_single_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  float order_0_;
+  float order_1_;
+  float rate_coefficient_;
+  float stoech_0_;
+  float stoech_1_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_double_precision.cpp
new file mode 100644
index 00000000000..4a21053f0ad
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_double_precision.cpp
@@ -0,0 +1,168 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_3_double_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelBulk_3_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_d3ec035b42efecb474f5d17499de6537 {
+static FUNC_PREFIX void reactionkernelbulk_3_double_precision_reactionkernelbulk_3_double_precision(double *RESTRICT _data_rho_0, double *RESTRICT _data_rho_1, double *RESTRICT _data_rho_2, int64_t const _size_rho_0_0, int64_t const _size_rho_0_1, int64_t const _size_rho_0_2, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, double order_0, double order_1, double order_2, double rate_coefficient, double stoech_0, double stoech_1, double stoech_2) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_rho_0_2; ctr_2 += 1) {
+    double *RESTRICT _data_rho_0_20 = _data_rho_0 + _stride_rho_0_2 * ctr_2;
+    double *RESTRICT _data_rho_1_20 = _data_rho_1 + _stride_rho_1_2 * ctr_2;
+    double *RESTRICT _data_rho_2_20 = _data_rho_2 + _stride_rho_2_2 * ctr_2;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_rho_0_1; ctr_1 += 1) {
+      double *RESTRICT _data_rho_0_20_10 = _stride_rho_0_1 * ctr_1 + _data_rho_0_20;
+      double *RESTRICT _data_rho_1_20_10 = _stride_rho_1_1 * ctr_1 + _data_rho_1_20;
+      double *RESTRICT _data_rho_2_20_10 = _stride_rho_2_1 * ctr_1 + _data_rho_2_20;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_rho_0_0; ctr_0 += 1) {
+        const double local_rho_0 = _data_rho_0_20_10[_stride_rho_0_0 * ctr_0];
+        const double local_rho_1 = _data_rho_1_20_10[_stride_rho_1_0 * ctr_0];
+        const double local_rho_2 = _data_rho_2_20_10[_stride_rho_2_0 * ctr_0];
+        const double rate_factor = pow(local_rho_0, order_0) * pow(local_rho_1, order_1) * pow(local_rho_2, order_2) * rate_coefficient;
+        _data_rho_0_20_10[_stride_rho_0_0 * ctr_0] = local_rho_0 + rate_factor * stoech_0;
+        _data_rho_1_20_10[_stride_rho_1_0 * ctr_0] = local_rho_1 + rate_factor * stoech_1;
+        _data_rho_2_20_10[_stride_rho_2_0 * ctr_0] = local_rho_2 + rate_factor * stoech_2;
+      }
+    }
+  }
+}
+} // namespace internal_d3ec035b42efecb474f5d17499de6537
+
+void ReactionKernelBulk_3_double_precision::run(IBlock *block) {
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+  auto rho_1 = block->getData<field::GhostLayerField<double, 1>>(rho_1ID);
+  auto rho_2 = block->getData<field::GhostLayerField<double, 1>>(rho_2ID);
+
+  auto &stoech_0 = this->stoech_0_;
+  auto &order_2 = this->order_2_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &order_1 = this->order_1_;
+  auto &stoech_2 = this->stoech_2_;
+  auto &order_0 = this->order_0_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(rho_0->xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(rho_0->ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(rho_0->zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  internal_d3ec035b42efecb474f5d17499de6537::reactionkernelbulk_3_double_precision_reactionkernelbulk_3_double_precision(_data_rho_0, _data_rho_1, _data_rho_2, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, order_0, order_1, order_2, rate_coefficient, stoech_0, stoech_1, stoech_2);
+}
+
+void ReactionKernelBulk_3_double_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+  auto rho_1 = block->getData<field::GhostLayerField<double, 1>>(rho_1ID);
+  auto rho_2 = block->getData<field::GhostLayerField<double, 1>>(rho_2ID);
+
+  auto &stoech_0 = this->stoech_0_;
+  auto &order_2 = this->order_2_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &order_1 = this->order_1_;
+  auto &stoech_2 = this->stoech_2_;
+  auto &order_0 = this->order_0_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_1->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_1 = rho_1->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_2->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_2->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_2->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_2 = rho_2->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  internal_d3ec035b42efecb474f5d17499de6537::reactionkernelbulk_3_double_precision_reactionkernelbulk_3_double_precision(_data_rho_0, _data_rho_1, _data_rho_2, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, order_0, order_1, order_2, rate_coefficient, stoech_0, stoech_1, stoech_2);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_double_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_double_precision.h
new file mode 100644
index 00000000000..889b20f2ef4
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_double_precision.h
@@ -0,0 +1,116 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_3_double_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelBulk_3_double_precision {
+public:
+  ReactionKernelBulk_3_double_precision(
+      BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_,
+      double order_0, double order_1, double order_2, double rate_coefficient,
+      double stoech_0, double stoech_1, double stoech_2)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),
+        order_0_(order_0), order_1_(order_1), order_2_(order_2),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
+        stoech_1_(stoech_1), stoech_2_(stoech_2){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<ReactionKernelBulk_3_double_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<ReactionKernelBulk_3_double_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  BlockDataID rho_2ID;
+  double order_0_;
+  double order_1_;
+  double order_2_;
+  double rate_coefficient_;
+  double stoech_0_;
+  double stoech_1_;
+  double stoech_2_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_single_precision.cpp
new file mode 100644
index 00000000000..a88dbaf9a7f
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_single_precision.cpp
@@ -0,0 +1,168 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_3_single_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelBulk_3_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_54fb5dcfe8687c5ab8c7b22acb6d285f {
+static FUNC_PREFIX void reactionkernelbulk_3_single_precision_reactionkernelbulk_3_single_precision(float *RESTRICT _data_rho_0, float *RESTRICT _data_rho_1, float *RESTRICT _data_rho_2, int64_t const _size_rho_0_0, int64_t const _size_rho_0_1, int64_t const _size_rho_0_2, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, float order_0, float order_1, float order_2, float rate_coefficient, float stoech_0, float stoech_1, float stoech_2) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_rho_0_2; ctr_2 += 1) {
+    float *RESTRICT _data_rho_0_20 = _data_rho_0 + _stride_rho_0_2 * ctr_2;
+    float *RESTRICT _data_rho_1_20 = _data_rho_1 + _stride_rho_1_2 * ctr_2;
+    float *RESTRICT _data_rho_2_20 = _data_rho_2 + _stride_rho_2_2 * ctr_2;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_rho_0_1; ctr_1 += 1) {
+      float *RESTRICT _data_rho_0_20_10 = _stride_rho_0_1 * ctr_1 + _data_rho_0_20;
+      float *RESTRICT _data_rho_1_20_10 = _stride_rho_1_1 * ctr_1 + _data_rho_1_20;
+      float *RESTRICT _data_rho_2_20_10 = _stride_rho_2_1 * ctr_1 + _data_rho_2_20;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_rho_0_0; ctr_0 += 1) {
+        const float local_rho_0 = _data_rho_0_20_10[_stride_rho_0_0 * ctr_0];
+        const float local_rho_1 = _data_rho_1_20_10[_stride_rho_1_0 * ctr_0];
+        const float local_rho_2 = _data_rho_2_20_10[_stride_rho_2_0 * ctr_0];
+        const float rate_factor = rate_coefficient * powf(local_rho_0, order_0) * powf(local_rho_1, order_1) * powf(local_rho_2, order_2);
+        _data_rho_0_20_10[_stride_rho_0_0 * ctr_0] = local_rho_0 + rate_factor * stoech_0;
+        _data_rho_1_20_10[_stride_rho_1_0 * ctr_0] = local_rho_1 + rate_factor * stoech_1;
+        _data_rho_2_20_10[_stride_rho_2_0 * ctr_0] = local_rho_2 + rate_factor * stoech_2;
+      }
+    }
+  }
+}
+} // namespace internal_54fb5dcfe8687c5ab8c7b22acb6d285f
+
+void ReactionKernelBulk_3_single_precision::run(IBlock *block) {
+  auto rho_1 = block->getData<field::GhostLayerField<float, 1>>(rho_1ID);
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+  auto rho_2 = block->getData<field::GhostLayerField<float, 1>>(rho_2ID);
+
+  auto &order_1 = this->order_1_;
+  auto &order_0 = this->order_0_;
+  auto &stoech_2 = this->stoech_2_;
+  auto &stoech_0 = this->stoech_0_;
+  auto &order_2 = this->order_2_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(rho_0->xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(rho_0->ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(rho_0->zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  internal_54fb5dcfe8687c5ab8c7b22acb6d285f::reactionkernelbulk_3_single_precision_reactionkernelbulk_3_single_precision(_data_rho_0, _data_rho_1, _data_rho_2, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, order_0, order_1, order_2, rate_coefficient, stoech_0, stoech_1, stoech_2);
+}
+
+void ReactionKernelBulk_3_single_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto rho_1 = block->getData<field::GhostLayerField<float, 1>>(rho_1ID);
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+  auto rho_2 = block->getData<field::GhostLayerField<float, 1>>(rho_2ID);
+
+  auto &order_1 = this->order_1_;
+  auto &order_0 = this->order_0_;
+  auto &stoech_2 = this->stoech_2_;
+  auto &stoech_0 = this->stoech_0_;
+  auto &order_2 = this->order_2_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_1->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_1 = rho_1->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_2->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_2->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_2->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_2 = rho_2->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  internal_54fb5dcfe8687c5ab8c7b22acb6d285f::reactionkernelbulk_3_single_precision_reactionkernelbulk_3_single_precision(_data_rho_0, _data_rho_1, _data_rho_2, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, order_0, order_1, order_2, rate_coefficient, stoech_0, stoech_1, stoech_2);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_single_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_single_precision.h
new file mode 100644
index 00000000000..75a6c72c558
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_3_single_precision.h
@@ -0,0 +1,118 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_3_single_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelBulk_3_single_precision {
+public:
+  ReactionKernelBulk_3_single_precision(BlockDataID rho_0ID_,
+                                        BlockDataID rho_1ID_,
+                                        BlockDataID rho_2ID_, float order_0,
+                                        float order_1, float order_2,
+                                        float rate_coefficient, float stoech_0,
+                                        float stoech_1, float stoech_2)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),
+        order_0_(order_0), order_1_(order_1), order_2_(order_2),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
+        stoech_1_(stoech_1), stoech_2_(stoech_2){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<ReactionKernelBulk_3_single_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<ReactionKernelBulk_3_single_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  BlockDataID rho_2ID;
+  float order_0_;
+  float order_1_;
+  float order_2_;
+  float rate_coefficient_;
+  float stoech_0_;
+  float stoech_1_;
+  float stoech_2_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_double_precision.cpp
new file mode 100644
index 00000000000..5f91a7eb3dd
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_double_precision.cpp
@@ -0,0 +1,190 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_4_double_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelBulk_4_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_49f1aaa6789b7fa16fb103a21ce6fe12 {
+static FUNC_PREFIX void reactionkernelbulk_4_double_precision_reactionkernelbulk_4_double_precision(double *RESTRICT _data_rho_0, double *RESTRICT _data_rho_1, double *RESTRICT _data_rho_2, double *RESTRICT _data_rho_3, int64_t const _size_rho_0_0, int64_t const _size_rho_0_1, int64_t const _size_rho_0_2, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, int64_t const _stride_rho_3_0, int64_t const _stride_rho_3_1, int64_t const _stride_rho_3_2, double order_0, double order_1, double order_2, double order_3, double rate_coefficient, double stoech_0, double stoech_1, double stoech_2, double stoech_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_rho_0_2; ctr_2 += 1) {
+    double *RESTRICT _data_rho_0_20 = _data_rho_0 + _stride_rho_0_2 * ctr_2;
+    double *RESTRICT _data_rho_1_20 = _data_rho_1 + _stride_rho_1_2 * ctr_2;
+    double *RESTRICT _data_rho_2_20 = _data_rho_2 + _stride_rho_2_2 * ctr_2;
+    double *RESTRICT _data_rho_3_20 = _data_rho_3 + _stride_rho_3_2 * ctr_2;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_rho_0_1; ctr_1 += 1) {
+      double *RESTRICT _data_rho_0_20_10 = _stride_rho_0_1 * ctr_1 + _data_rho_0_20;
+      double *RESTRICT _data_rho_1_20_10 = _stride_rho_1_1 * ctr_1 + _data_rho_1_20;
+      double *RESTRICT _data_rho_2_20_10 = _stride_rho_2_1 * ctr_1 + _data_rho_2_20;
+      double *RESTRICT _data_rho_3_20_10 = _stride_rho_3_1 * ctr_1 + _data_rho_3_20;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_rho_0_0; ctr_0 += 1) {
+        const double local_rho_0 = _data_rho_0_20_10[_stride_rho_0_0 * ctr_0];
+        const double local_rho_1 = _data_rho_1_20_10[_stride_rho_1_0 * ctr_0];
+        const double local_rho_2 = _data_rho_2_20_10[_stride_rho_2_0 * ctr_0];
+        const double local_rho_3 = _data_rho_3_20_10[_stride_rho_3_0 * ctr_0];
+        const double rate_factor = pow(local_rho_0, order_0) * pow(local_rho_1, order_1) * pow(local_rho_2, order_2) * pow(local_rho_3, order_3) * rate_coefficient;
+        _data_rho_0_20_10[_stride_rho_0_0 * ctr_0] = local_rho_0 + rate_factor * stoech_0;
+        _data_rho_1_20_10[_stride_rho_1_0 * ctr_0] = local_rho_1 + rate_factor * stoech_1;
+        _data_rho_2_20_10[_stride_rho_2_0 * ctr_0] = local_rho_2 + rate_factor * stoech_2;
+        _data_rho_3_20_10[_stride_rho_3_0 * ctr_0] = local_rho_3 + rate_factor * stoech_3;
+      }
+    }
+  }
+}
+} // namespace internal_49f1aaa6789b7fa16fb103a21ce6fe12
+
+void ReactionKernelBulk_4_double_precision::run(IBlock *block) {
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+  auto rho_1 = block->getData<field::GhostLayerField<double, 1>>(rho_1ID);
+  auto rho_2 = block->getData<field::GhostLayerField<double, 1>>(rho_2ID);
+  auto rho_3 = block->getData<field::GhostLayerField<double, 1>>(rho_3ID);
+
+  auto &stoech_0 = this->stoech_0_;
+  auto &order_2 = this->order_2_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &order_1 = this->order_1_;
+  auto &stoech_2 = this->stoech_2_;
+  auto &order_0 = this->order_0_;
+  auto &stoech_3 = this->stoech_3_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  auto &order_3 = this->order_3_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_3->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_3 = rho_3->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(rho_0->xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(rho_0->ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(rho_0->zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
+  const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
+  const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
+  internal_49f1aaa6789b7fa16fb103a21ce6fe12::reactionkernelbulk_4_double_precision_reactionkernelbulk_4_double_precision(_data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, order_0, order_1, order_2, order_3, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3);
+}
+
+void ReactionKernelBulk_4_double_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+  auto rho_1 = block->getData<field::GhostLayerField<double, 1>>(rho_1ID);
+  auto rho_2 = block->getData<field::GhostLayerField<double, 1>>(rho_2ID);
+  auto rho_3 = block->getData<field::GhostLayerField<double, 1>>(rho_3ID);
+
+  auto &stoech_0 = this->stoech_0_;
+  auto &order_2 = this->order_2_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &order_1 = this->order_1_;
+  auto &stoech_2 = this->stoech_2_;
+  auto &order_0 = this->order_0_;
+  auto &stoech_3 = this->stoech_3_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  auto &order_3 = this->order_3_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_1->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_1 = rho_1->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_2->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_2->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_2->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_2 = rho_2->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_3->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_3->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_3->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_3 = rho_3->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
+  const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
+  const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
+  internal_49f1aaa6789b7fa16fb103a21ce6fe12::reactionkernelbulk_4_double_precision_reactionkernelbulk_4_double_precision(_data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, order_0, order_1, order_2, order_3, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_double_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_double_precision.h
new file mode 100644
index 00000000000..04117076294
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_double_precision.h
@@ -0,0 +1,121 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_4_double_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelBulk_4_double_precision {
+public:
+  ReactionKernelBulk_4_double_precision(
+      BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_,
+      BlockDataID rho_3ID_, double order_0, double order_1, double order_2,
+      double order_3, double rate_coefficient, double stoech_0, double stoech_1,
+      double stoech_2, double stoech_3)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),
+        rho_3ID(rho_3ID_), order_0_(order_0), order_1_(order_1),
+        order_2_(order_2), order_3_(order_3),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
+        stoech_1_(stoech_1), stoech_2_(stoech_2), stoech_3_(stoech_3){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<ReactionKernelBulk_4_double_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<ReactionKernelBulk_4_double_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  BlockDataID rho_2ID;
+  BlockDataID rho_3ID;
+  double order_0_;
+  double order_1_;
+  double order_2_;
+  double order_3_;
+  double rate_coefficient_;
+  double stoech_0_;
+  double stoech_1_;
+  double stoech_2_;
+  double stoech_3_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_single_precision.cpp
new file mode 100644
index 00000000000..1f34057b82f
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_single_precision.cpp
@@ -0,0 +1,190 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_4_single_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelBulk_4_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_716e8e56e191f2f523734cd5e32cecbb {
+static FUNC_PREFIX void reactionkernelbulk_4_single_precision_reactionkernelbulk_4_single_precision(float *RESTRICT _data_rho_0, float *RESTRICT _data_rho_1, float *RESTRICT _data_rho_2, float *RESTRICT _data_rho_3, int64_t const _size_rho_0_0, int64_t const _size_rho_0_1, int64_t const _size_rho_0_2, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, int64_t const _stride_rho_3_0, int64_t const _stride_rho_3_1, int64_t const _stride_rho_3_2, float order_0, float order_1, float order_2, float order_3, float rate_coefficient, float stoech_0, float stoech_1, float stoech_2, float stoech_3) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_rho_0_2; ctr_2 += 1) {
+    float *RESTRICT _data_rho_0_20 = _data_rho_0 + _stride_rho_0_2 * ctr_2;
+    float *RESTRICT _data_rho_1_20 = _data_rho_1 + _stride_rho_1_2 * ctr_2;
+    float *RESTRICT _data_rho_2_20 = _data_rho_2 + _stride_rho_2_2 * ctr_2;
+    float *RESTRICT _data_rho_3_20 = _data_rho_3 + _stride_rho_3_2 * ctr_2;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_rho_0_1; ctr_1 += 1) {
+      float *RESTRICT _data_rho_0_20_10 = _stride_rho_0_1 * ctr_1 + _data_rho_0_20;
+      float *RESTRICT _data_rho_1_20_10 = _stride_rho_1_1 * ctr_1 + _data_rho_1_20;
+      float *RESTRICT _data_rho_2_20_10 = _stride_rho_2_1 * ctr_1 + _data_rho_2_20;
+      float *RESTRICT _data_rho_3_20_10 = _stride_rho_3_1 * ctr_1 + _data_rho_3_20;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_rho_0_0; ctr_0 += 1) {
+        const float local_rho_0 = _data_rho_0_20_10[_stride_rho_0_0 * ctr_0];
+        const float local_rho_1 = _data_rho_1_20_10[_stride_rho_1_0 * ctr_0];
+        const float local_rho_2 = _data_rho_2_20_10[_stride_rho_2_0 * ctr_0];
+        const float local_rho_3 = _data_rho_3_20_10[_stride_rho_3_0 * ctr_0];
+        const float rate_factor = rate_coefficient * powf(local_rho_0, order_0) * powf(local_rho_1, order_1) * powf(local_rho_2, order_2) * powf(local_rho_3, order_3);
+        _data_rho_0_20_10[_stride_rho_0_0 * ctr_0] = local_rho_0 + rate_factor * stoech_0;
+        _data_rho_1_20_10[_stride_rho_1_0 * ctr_0] = local_rho_1 + rate_factor * stoech_1;
+        _data_rho_2_20_10[_stride_rho_2_0 * ctr_0] = local_rho_2 + rate_factor * stoech_2;
+        _data_rho_3_20_10[_stride_rho_3_0 * ctr_0] = local_rho_3 + rate_factor * stoech_3;
+      }
+    }
+  }
+}
+} // namespace internal_716e8e56e191f2f523734cd5e32cecbb
+
+void ReactionKernelBulk_4_single_precision::run(IBlock *block) {
+  auto rho_1 = block->getData<field::GhostLayerField<float, 1>>(rho_1ID);
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+  auto rho_2 = block->getData<field::GhostLayerField<float, 1>>(rho_2ID);
+  auto rho_3 = block->getData<field::GhostLayerField<float, 1>>(rho_3ID);
+
+  auto &order_1 = this->order_1_;
+  auto &order_0 = this->order_0_;
+  auto &stoech_3 = this->stoech_3_;
+  auto &stoech_2 = this->stoech_2_;
+  auto &stoech_0 = this->stoech_0_;
+  auto &order_2 = this->order_2_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  auto &order_3 = this->order_3_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_3->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_3 = rho_3->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(rho_0->xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(rho_0->ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(rho_0->zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
+  const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
+  const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
+  internal_716e8e56e191f2f523734cd5e32cecbb::reactionkernelbulk_4_single_precision_reactionkernelbulk_4_single_precision(_data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, order_0, order_1, order_2, order_3, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3);
+}
+
+void ReactionKernelBulk_4_single_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto rho_1 = block->getData<field::GhostLayerField<float, 1>>(rho_1ID);
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+  auto rho_2 = block->getData<field::GhostLayerField<float, 1>>(rho_2ID);
+  auto rho_3 = block->getData<field::GhostLayerField<float, 1>>(rho_3ID);
+
+  auto &order_1 = this->order_1_;
+  auto &order_0 = this->order_0_;
+  auto &stoech_3 = this->stoech_3_;
+  auto &stoech_2 = this->stoech_2_;
+  auto &stoech_0 = this->stoech_0_;
+  auto &order_2 = this->order_2_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  auto &order_3 = this->order_3_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_1->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_1 = rho_1->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_2->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_2->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_2->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_2 = rho_2->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_3->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_3->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_3->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_3 = rho_3->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
+  const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
+  const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
+  internal_716e8e56e191f2f523734cd5e32cecbb::reactionkernelbulk_4_single_precision_reactionkernelbulk_4_single_precision(_data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, order_0, order_1, order_2, order_3, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_single_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_single_precision.h
new file mode 100644
index 00000000000..f8114b73bbd
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_4_single_precision.h
@@ -0,0 +1,121 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_4_single_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelBulk_4_single_precision {
+public:
+  ReactionKernelBulk_4_single_precision(
+      BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_,
+      BlockDataID rho_3ID_, float order_0, float order_1, float order_2,
+      float order_3, float rate_coefficient, float stoech_0, float stoech_1,
+      float stoech_2, float stoech_3)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),
+        rho_3ID(rho_3ID_), order_0_(order_0), order_1_(order_1),
+        order_2_(order_2), order_3_(order_3),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
+        stoech_1_(stoech_1), stoech_2_(stoech_2), stoech_3_(stoech_3){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<ReactionKernelBulk_4_single_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<ReactionKernelBulk_4_single_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  BlockDataID rho_2ID;
+  BlockDataID rho_3ID;
+  float order_0_;
+  float order_1_;
+  float order_2_;
+  float order_3_;
+  float rate_coefficient_;
+  float stoech_0_;
+  float stoech_1_;
+  float stoech_2_;
+  float stoech_3_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_double_precision.cpp
new file mode 100644
index 00000000000..31edecb7aee
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_double_precision.cpp
@@ -0,0 +1,212 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_5_double_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelBulk_5_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_5119d69793e3096feaaca816d627c080 {
+static FUNC_PREFIX void reactionkernelbulk_5_double_precision_reactionkernelbulk_5_double_precision(double *RESTRICT _data_rho_0, double *RESTRICT _data_rho_1, double *RESTRICT _data_rho_2, double *RESTRICT _data_rho_3, double *RESTRICT _data_rho_4, int64_t const _size_rho_0_0, int64_t const _size_rho_0_1, int64_t const _size_rho_0_2, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, int64_t const _stride_rho_3_0, int64_t const _stride_rho_3_1, int64_t const _stride_rho_3_2, int64_t const _stride_rho_4_0, int64_t const _stride_rho_4_1, int64_t const _stride_rho_4_2, double order_0, double order_1, double order_2, double order_3, double order_4, double rate_coefficient, double stoech_0, double stoech_1, double stoech_2, double stoech_3, double stoech_4) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_rho_0_2; ctr_2 += 1) {
+    double *RESTRICT _data_rho_0_20 = _data_rho_0 + _stride_rho_0_2 * ctr_2;
+    double *RESTRICT _data_rho_1_20 = _data_rho_1 + _stride_rho_1_2 * ctr_2;
+    double *RESTRICT _data_rho_2_20 = _data_rho_2 + _stride_rho_2_2 * ctr_2;
+    double *RESTRICT _data_rho_3_20 = _data_rho_3 + _stride_rho_3_2 * ctr_2;
+    double *RESTRICT _data_rho_4_20 = _data_rho_4 + _stride_rho_4_2 * ctr_2;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_rho_0_1; ctr_1 += 1) {
+      double *RESTRICT _data_rho_0_20_10 = _stride_rho_0_1 * ctr_1 + _data_rho_0_20;
+      double *RESTRICT _data_rho_1_20_10 = _stride_rho_1_1 * ctr_1 + _data_rho_1_20;
+      double *RESTRICT _data_rho_2_20_10 = _stride_rho_2_1 * ctr_1 + _data_rho_2_20;
+      double *RESTRICT _data_rho_3_20_10 = _stride_rho_3_1 * ctr_1 + _data_rho_3_20;
+      double *RESTRICT _data_rho_4_20_10 = _stride_rho_4_1 * ctr_1 + _data_rho_4_20;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_rho_0_0; ctr_0 += 1) {
+        const double local_rho_0 = _data_rho_0_20_10[_stride_rho_0_0 * ctr_0];
+        const double local_rho_1 = _data_rho_1_20_10[_stride_rho_1_0 * ctr_0];
+        const double local_rho_2 = _data_rho_2_20_10[_stride_rho_2_0 * ctr_0];
+        const double local_rho_3 = _data_rho_3_20_10[_stride_rho_3_0 * ctr_0];
+        const double local_rho_4 = _data_rho_4_20_10[_stride_rho_4_0 * ctr_0];
+        const double rate_factor = pow(local_rho_0, order_0) * pow(local_rho_1, order_1) * pow(local_rho_2, order_2) * pow(local_rho_3, order_3) * pow(local_rho_4, order_4) * rate_coefficient;
+        _data_rho_0_20_10[_stride_rho_0_0 * ctr_0] = local_rho_0 + rate_factor * stoech_0;
+        _data_rho_1_20_10[_stride_rho_1_0 * ctr_0] = local_rho_1 + rate_factor * stoech_1;
+        _data_rho_2_20_10[_stride_rho_2_0 * ctr_0] = local_rho_2 + rate_factor * stoech_2;
+        _data_rho_3_20_10[_stride_rho_3_0 * ctr_0] = local_rho_3 + rate_factor * stoech_3;
+        _data_rho_4_20_10[_stride_rho_4_0 * ctr_0] = local_rho_4 + rate_factor * stoech_4;
+      }
+    }
+  }
+}
+} // namespace internal_5119d69793e3096feaaca816d627c080
+
+void ReactionKernelBulk_5_double_precision::run(IBlock *block) {
+  auto rho_3 = block->getData<field::GhostLayerField<double, 1>>(rho_3ID);
+  auto rho_4 = block->getData<field::GhostLayerField<double, 1>>(rho_4ID);
+  auto rho_1 = block->getData<field::GhostLayerField<double, 1>>(rho_1ID);
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+  auto rho_2 = block->getData<field::GhostLayerField<double, 1>>(rho_2ID);
+
+  auto &stoech_0 = this->stoech_0_;
+  auto &order_2 = this->order_2_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &stoech_4 = this->stoech_4_;
+  auto &order_1 = this->order_1_;
+  auto &stoech_2 = this->stoech_2_;
+  auto &order_0 = this->order_0_;
+  auto &order_4 = this->order_4_;
+  auto &stoech_3 = this->stoech_3_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  auto &order_3 = this->order_3_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_3->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_3 = rho_3->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_4->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_4 = rho_4->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(rho_0->xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(rho_0->ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(rho_0->zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
+  const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
+  const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
+  const int64_t _stride_rho_4_0 = int64_t(rho_4->xStride());
+  const int64_t _stride_rho_4_1 = int64_t(rho_4->yStride());
+  const int64_t _stride_rho_4_2 = int64_t(rho_4->zStride());
+  internal_5119d69793e3096feaaca816d627c080::reactionkernelbulk_5_double_precision_reactionkernelbulk_5_double_precision(_data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _data_rho_4, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, _stride_rho_4_0, _stride_rho_4_1, _stride_rho_4_2, order_0, order_1, order_2, order_3, order_4, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3, stoech_4);
+}
+
+void ReactionKernelBulk_5_double_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto rho_3 = block->getData<field::GhostLayerField<double, 1>>(rho_3ID);
+  auto rho_4 = block->getData<field::GhostLayerField<double, 1>>(rho_4ID);
+  auto rho_1 = block->getData<field::GhostLayerField<double, 1>>(rho_1ID);
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+  auto rho_2 = block->getData<field::GhostLayerField<double, 1>>(rho_2ID);
+
+  auto &stoech_0 = this->stoech_0_;
+  auto &order_2 = this->order_2_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &stoech_4 = this->stoech_4_;
+  auto &order_1 = this->order_1_;
+  auto &stoech_2 = this->stoech_2_;
+  auto &order_0 = this->order_0_;
+  auto &order_4 = this->order_4_;
+  auto &stoech_3 = this->stoech_3_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  auto &order_3 = this->order_3_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_1->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_1 = rho_1->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_2->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_2->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_2->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_2 = rho_2->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_3->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_3->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_3->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_3 = rho_3->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_4->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_4->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_4->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_4 = rho_4->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
+  const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
+  const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
+  const int64_t _stride_rho_4_0 = int64_t(rho_4->xStride());
+  const int64_t _stride_rho_4_1 = int64_t(rho_4->yStride());
+  const int64_t _stride_rho_4_2 = int64_t(rho_4->zStride());
+  internal_5119d69793e3096feaaca816d627c080::reactionkernelbulk_5_double_precision_reactionkernelbulk_5_double_precision(_data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _data_rho_4, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, _stride_rho_4_0, _stride_rho_4_1, _stride_rho_4_2, order_0, order_1, order_2, order_3, order_4, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3, stoech_4);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_double_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_double_precision.h
new file mode 100644
index 00000000000..8d8482d72f6
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_double_precision.h
@@ -0,0 +1,126 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_5_double_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelBulk_5_double_precision {
+public:
+  ReactionKernelBulk_5_double_precision(
+      BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_,
+      BlockDataID rho_3ID_, BlockDataID rho_4ID_, double order_0,
+      double order_1, double order_2, double order_3, double order_4,
+      double rate_coefficient, double stoech_0, double stoech_1,
+      double stoech_2, double stoech_3, double stoech_4)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),
+        rho_3ID(rho_3ID_), rho_4ID(rho_4ID_), order_0_(order_0),
+        order_1_(order_1), order_2_(order_2), order_3_(order_3),
+        order_4_(order_4), rate_coefficient_(rate_coefficient),
+        stoech_0_(stoech_0), stoech_1_(stoech_1), stoech_2_(stoech_2),
+        stoech_3_(stoech_3), stoech_4_(stoech_4){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<ReactionKernelBulk_5_double_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<ReactionKernelBulk_5_double_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  BlockDataID rho_2ID;
+  BlockDataID rho_3ID;
+  BlockDataID rho_4ID;
+  double order_0_;
+  double order_1_;
+  double order_2_;
+  double order_3_;
+  double order_4_;
+  double rate_coefficient_;
+  double stoech_0_;
+  double stoech_1_;
+  double stoech_2_;
+  double stoech_3_;
+  double stoech_4_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_single_precision.cpp
new file mode 100644
index 00000000000..0cb8f2d722d
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_single_precision.cpp
@@ -0,0 +1,212 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_5_single_precision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelBulk_5_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_737d6904f7c65dcfc320d619f189641b {
+static FUNC_PREFIX void reactionkernelbulk_5_single_precision_reactionkernelbulk_5_single_precision(float *RESTRICT _data_rho_0, float *RESTRICT _data_rho_1, float *RESTRICT _data_rho_2, float *RESTRICT _data_rho_3, float *RESTRICT _data_rho_4, int64_t const _size_rho_0_0, int64_t const _size_rho_0_1, int64_t const _size_rho_0_2, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, int64_t const _stride_rho_3_0, int64_t const _stride_rho_3_1, int64_t const _stride_rho_3_2, int64_t const _stride_rho_4_0, int64_t const _stride_rho_4_1, int64_t const _stride_rho_4_2, float order_0, float order_1, float order_2, float order_3, float order_4, float rate_coefficient, float stoech_0, float stoech_1, float stoech_2, float stoech_3, float stoech_4) {
+  for (int64_t ctr_2 = 0; ctr_2 < _size_rho_0_2; ctr_2 += 1) {
+    float *RESTRICT _data_rho_0_20 = _data_rho_0 + _stride_rho_0_2 * ctr_2;
+    float *RESTRICT _data_rho_1_20 = _data_rho_1 + _stride_rho_1_2 * ctr_2;
+    float *RESTRICT _data_rho_2_20 = _data_rho_2 + _stride_rho_2_2 * ctr_2;
+    float *RESTRICT _data_rho_3_20 = _data_rho_3 + _stride_rho_3_2 * ctr_2;
+    float *RESTRICT _data_rho_4_20 = _data_rho_4 + _stride_rho_4_2 * ctr_2;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_rho_0_1; ctr_1 += 1) {
+      float *RESTRICT _data_rho_0_20_10 = _stride_rho_0_1 * ctr_1 + _data_rho_0_20;
+      float *RESTRICT _data_rho_1_20_10 = _stride_rho_1_1 * ctr_1 + _data_rho_1_20;
+      float *RESTRICT _data_rho_2_20_10 = _stride_rho_2_1 * ctr_1 + _data_rho_2_20;
+      float *RESTRICT _data_rho_3_20_10 = _stride_rho_3_1 * ctr_1 + _data_rho_3_20;
+      float *RESTRICT _data_rho_4_20_10 = _stride_rho_4_1 * ctr_1 + _data_rho_4_20;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_rho_0_0; ctr_0 += 1) {
+        const float local_rho_0 = _data_rho_0_20_10[_stride_rho_0_0 * ctr_0];
+        const float local_rho_1 = _data_rho_1_20_10[_stride_rho_1_0 * ctr_0];
+        const float local_rho_2 = _data_rho_2_20_10[_stride_rho_2_0 * ctr_0];
+        const float local_rho_3 = _data_rho_3_20_10[_stride_rho_3_0 * ctr_0];
+        const float local_rho_4 = _data_rho_4_20_10[_stride_rho_4_0 * ctr_0];
+        const float rate_factor = rate_coefficient * powf(local_rho_0, order_0) * powf(local_rho_1, order_1) * powf(local_rho_2, order_2) * powf(local_rho_3, order_3) * powf(local_rho_4, order_4);
+        _data_rho_0_20_10[_stride_rho_0_0 * ctr_0] = local_rho_0 + rate_factor * stoech_0;
+        _data_rho_1_20_10[_stride_rho_1_0 * ctr_0] = local_rho_1 + rate_factor * stoech_1;
+        _data_rho_2_20_10[_stride_rho_2_0 * ctr_0] = local_rho_2 + rate_factor * stoech_2;
+        _data_rho_3_20_10[_stride_rho_3_0 * ctr_0] = local_rho_3 + rate_factor * stoech_3;
+        _data_rho_4_20_10[_stride_rho_4_0 * ctr_0] = local_rho_4 + rate_factor * stoech_4;
+      }
+    }
+  }
+}
+} // namespace internal_737d6904f7c65dcfc320d619f189641b
+
+void ReactionKernelBulk_5_single_precision::run(IBlock *block) {
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+  auto rho_2 = block->getData<field::GhostLayerField<float, 1>>(rho_2ID);
+  auto rho_3 = block->getData<field::GhostLayerField<float, 1>>(rho_3ID);
+  auto rho_4 = block->getData<field::GhostLayerField<float, 1>>(rho_4ID);
+  auto rho_1 = block->getData<field::GhostLayerField<float, 1>>(rho_1ID);
+
+  auto &order_1 = this->order_1_;
+  auto &order_4 = this->order_4_;
+  auto &order_0 = this->order_0_;
+  auto &stoech_3 = this->stoech_3_;
+  auto &stoech_4 = this->stoech_4_;
+  auto &stoech_2 = this->stoech_2_;
+  auto &stoech_0 = this->stoech_0_;
+  auto &order_2 = this->order_2_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  auto &order_3 = this->order_3_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_3->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_3 = rho_3->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_4->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_4 = rho_4->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(rho_0->xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(rho_0->ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(rho_0->zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(rho_0->zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
+  const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
+  const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
+  const int64_t _stride_rho_4_0 = int64_t(rho_4->xStride());
+  const int64_t _stride_rho_4_1 = int64_t(rho_4->yStride());
+  const int64_t _stride_rho_4_2 = int64_t(rho_4->zStride());
+  internal_737d6904f7c65dcfc320d619f189641b::reactionkernelbulk_5_single_precision_reactionkernelbulk_5_single_precision(_data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _data_rho_4, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, _stride_rho_4_0, _stride_rho_4_1, _stride_rho_4_2, order_0, order_1, order_2, order_3, order_4, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3, stoech_4);
+}
+
+void ReactionKernelBulk_5_single_precision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+  auto rho_2 = block->getData<field::GhostLayerField<float, 1>>(rho_2ID);
+  auto rho_3 = block->getData<field::GhostLayerField<float, 1>>(rho_3ID);
+  auto rho_4 = block->getData<field::GhostLayerField<float, 1>>(rho_4ID);
+  auto rho_1 = block->getData<field::GhostLayerField<float, 1>>(rho_1ID);
+
+  auto &order_1 = this->order_1_;
+  auto &order_4 = this->order_4_;
+  auto &order_0 = this->order_0_;
+  auto &stoech_3 = this->stoech_3_;
+  auto &stoech_4 = this->stoech_4_;
+  auto &stoech_2 = this->stoech_2_;
+  auto &stoech_0 = this->stoech_0_;
+  auto &order_2 = this->order_2_;
+  auto &stoech_1 = this->stoech_1_;
+  auto &rate_coefficient = this->rate_coefficient_;
+  auto &order_3 = this->order_3_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_0->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_1->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_1->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_1 = rho_1->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_2->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_2->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_2->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_2 = rho_2->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_3->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_3->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_3->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_3 = rho_3->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_4->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_4->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_4->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_4 = rho_4->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_rho_0_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_rho_0_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_rho_0_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
+  const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
+  const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
+  const int64_t _stride_rho_4_0 = int64_t(rho_4->xStride());
+  const int64_t _stride_rho_4_1 = int64_t(rho_4->yStride());
+  const int64_t _stride_rho_4_2 = int64_t(rho_4->zStride());
+  internal_737d6904f7c65dcfc320d619f189641b::reactionkernelbulk_5_single_precision_reactionkernelbulk_5_single_precision(_data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _data_rho_4, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, _stride_rho_4_0, _stride_rho_4_1, _stride_rho_4_2, order_0, order_1, order_2, order_3, order_4, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3, stoech_4);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_single_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_single_precision.h
new file mode 100644
index 00000000000..57c31b2a03b
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_5_single_precision.h
@@ -0,0 +1,126 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelBulk_5_single_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelBulk_5_single_precision {
+public:
+  ReactionKernelBulk_5_single_precision(
+      BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_,
+      BlockDataID rho_3ID_, BlockDataID rho_4ID_, float order_0, float order_1,
+      float order_2, float order_3, float order_4, float rate_coefficient,
+      float stoech_0, float stoech_1, float stoech_2, float stoech_3,
+      float stoech_4)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),
+        rho_3ID(rho_3ID_), rho_4ID(rho_4ID_), order_0_(order_0),
+        order_1_(order_1), order_2_(order_2), order_3_(order_3),
+        order_4_(order_4), rate_coefficient_(rate_coefficient),
+        stoech_0_(stoech_0), stoech_1_(stoech_1), stoech_2_(stoech_2),
+        stoech_3_(stoech_3), stoech_4_(stoech_4){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<ReactionKernelBulk_5_single_precision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<ReactionKernelBulk_5_single_precision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  BlockDataID rho_2ID;
+  BlockDataID rho_3ID;
+  BlockDataID rho_4ID;
+  float order_0_;
+  float order_1_;
+  float order_2_;
+  float order_3_;
+  float order_4_;
+  float rate_coefficient_;
+  float stoech_0_;
+  float stoech_1_;
+  float stoech_2_;
+  float stoech_3_;
+  float stoech_4_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_all.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_all.h
new file mode 100644
index 00000000000..2b4b1f97a2b
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelBulk_all.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+
+#include "ReactionKernelBulk_1_double_precision.h"
+#include "ReactionKernelBulk_1_single_precision.h"
+
+#include "ReactionKernelBulk_2_double_precision.h"
+#include "ReactionKernelBulk_2_single_precision.h"
+
+#include "ReactionKernelBulk_3_double_precision.h"
+#include "ReactionKernelBulk_3_single_precision.h"
+
+#include "ReactionKernelBulk_4_double_precision.h"
+#include "ReactionKernelBulk_4_single_precision.h"
+
+#include "ReactionKernelBulk_5_double_precision.h"
+#include "ReactionKernelBulk_5_single_precision.h"
+
+#include <domain_decomposition/BlockDataID.h>
+
+#include <cstddef>
+#include <memory>
+#include <stdexcept>
+#include <utility>
+#include <vector>
+
+namespace walberla {
+namespace detail {
+namespace ReactionKernelBulkSelector {
+
+template <typename FloatType = double, std::size_t N = 1> struct KernelTrait {
+  using ReactionKernelBulk = pystencils::ReactionKernelBulk_1_double_precision;
+};
+
+template <> struct KernelTrait<double, 2> {
+  using ReactionKernelBulk = pystencils::ReactionKernelBulk_2_double_precision;
+};
+
+template <> struct KernelTrait<double, 3> {
+  using ReactionKernelBulk = pystencils::ReactionKernelBulk_3_double_precision;
+};
+
+template <> struct KernelTrait<double, 4> {
+  using ReactionKernelBulk = pystencils::ReactionKernelBulk_4_double_precision;
+};
+
+template <> struct KernelTrait<double, 5> {
+  using ReactionKernelBulk = pystencils::ReactionKernelBulk_5_double_precision;
+};
+
+template <> struct KernelTrait<float, 1> {
+  using ReactionKernelBulk = pystencils::ReactionKernelBulk_1_single_precision;
+};
+
+template <> struct KernelTrait<float, 2> {
+  using ReactionKernelBulk = pystencils::ReactionKernelBulk_2_single_precision;
+};
+
+template <> struct KernelTrait<float, 3> {
+  using ReactionKernelBulk = pystencils::ReactionKernelBulk_3_single_precision;
+};
+
+template <> struct KernelTrait<float, 4> {
+  using ReactionKernelBulk = pystencils::ReactionKernelBulk_4_single_precision;
+};
+
+template <> struct KernelTrait<float, 5> {
+  using ReactionKernelBulk = pystencils::ReactionKernelBulk_5_single_precision;
+};
+
+template <typename FloatType, class Reactant, std::size_t... ints>
+auto get_kernel_impl(const std::vector<std::shared_ptr<Reactant>> &reactants,
+                     const double coefficient,
+                     std::index_sequence<ints...> int_seq) {
+  auto kernel = std::make_shared<
+      typename KernelTrait<FloatType, int_seq.size()>::ReactionKernelBulk>(
+      walberla::BlockDataID(
+          reactants[ints]->get_species()->get_density_id())...,
+      numeric_cast<FloatType>(reactants[ints]->get_order())...,
+      numeric_cast<FloatType>(coefficient),
+      numeric_cast<FloatType>(reactants[ints]->get_stoech_coeff())...);
+
+  std::function<void(IBlock *)> sweep = [kernel](IBlock *b) { kernel->run(b); };
+  return sweep;
+}
+
+template <typename FloatType, class Reactant, class... Args>
+auto get_kernel_impl(const std::vector<std::shared_ptr<Reactant>> &reactants,
+                     Args... args) {
+  switch (reactants.size()) {
+
+  case 1:
+    return get_kernel_impl<FloatType>(reactants, args...,
+                                      std::make_index_sequence<1>{});
+
+  case 2:
+    return get_kernel_impl<FloatType>(reactants, args...,
+                                      std::make_index_sequence<2>{});
+
+  case 3:
+    return get_kernel_impl<FloatType>(reactants, args...,
+                                      std::make_index_sequence<3>{});
+
+  case 4:
+    return get_kernel_impl<FloatType>(reactants, args...,
+                                      std::make_index_sequence<4>{});
+
+  case 5:
+    return get_kernel_impl<FloatType>(reactants, args...,
+                                      std::make_index_sequence<5>{});
+
+  default:
+    throw std::runtime_error("reactions of this size are not implemented!");
+  }
+}
+
+template <class Reactant, class... Args>
+auto get_kernel(const std::vector<std::shared_ptr<Reactant>> &reactants,
+                Args... args) {
+
+  const auto is_double_precision =
+      reactants[0]->get_species()->is_double_precision();
+
+  if (is_double_precision) {
+    return get_kernel_impl<double>(reactants, args...);
+  }
+
+  return get_kernel_impl<float>(reactants, args...);
+}
+
+} // namespace ReactionKernelBulkSelector
+} // namespace detail
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_double_precision.cpp
new file mode 100644
index 00000000000..e10963c3de9
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_double_precision.cpp
@@ -0,0 +1,108 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelIndexed_1_double_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelIndexed_1_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_8dfd6fd44211225f575bfd4fd9f8b4cc {
+static FUNC_PREFIX void reactionkernelindexed_1_double_precision_boundary_ReactionKernelIndexed_1_double_precision(uint8_t *RESTRICT _data_indexVector, double *RESTRICT _data_rho_0, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int32_t indexVectorSize, double order_0, double rate_coefficient, double stoech_0) {
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[12 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 8]));
+    const double local_rho_0 = _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z];
+    const double rate_factor = pow(local_rho_0, order_0) * rate_coefficient;
+    _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z] = local_rho_0 + rate_factor * stoech_0;
+  }
+}
+} // namespace internal_8dfd6fd44211225f575bfd4fd9f8b4cc
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void ReactionKernelIndexed_1_double_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->uncheckedFastGetData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+
+  auto &stoech_0 = stoech_0_;
+  auto &rate_coefficient = rate_coefficient_;
+  auto &order_0 = order_0_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  internal_8dfd6fd44211225f575bfd4fd9f8b4cc::reactionkernelindexed_1_double_precision_boundary_ReactionKernelIndexed_1_double_precision(_data_indexVector, _data_rho_0, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, indexVectorSize, order_0, rate_coefficient, stoech_0);
+}
+
+void ReactionKernelIndexed_1_double_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void ReactionKernelIndexed_1_double_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void ReactionKernelIndexed_1_double_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_double_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_double_precision.h
new file mode 100644
index 00000000000..9668fd82a35
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_double_precision.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ * Copyright (C) 2020-2023 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+/*
+ * Boundary class.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/fb076cd18daa6e2f24448349d1fffb974c845269/python/pystencils_walberla/templates/Boundary.tmpl.h
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+
+#include <blockforest/StructuredBlockForest.h>
+#include <core/debug/Debug.h>
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
+#include <field/FlagField.h>
+#include <field/GhostLayerField.h>
+
+#include <functional>
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelIndexed_1_double_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_) : x(x_), y(y_), z(z_) {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z;
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  ReactionKernelIndexed_1_double_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID rho_0ID_,
+      double order_0, double rate_coefficient, double stoech_0)
+      : rho_0ID(rho_0ID_), order_0_(order_0),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_ReactionKernelIndexed_1_double_precision");
+  };
+
+  ReactionKernelIndexed_1_double_precision(BlockDataID indexVectorID_,
+                                           BlockDataID rho_0ID_, double order_0,
+                                           double rate_coefficient,
+                                           double stoech_0)
+      : indexVectorID(indexVectorID_), rho_0ID(rho_0ID_), order_0_(order_0),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0){};
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID,
+                                     domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
+         ++it) {
+
+      if (!isFlagSet(it, boundaryFlag))
+        continue;
+      if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
+                                   it.y() + cell_idx_c(0),
+                                   it.z() + cell_idx_c(0)) &&
+          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
+
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+
+public:
+  BlockDataID rho_0ID;
+  double order_0_;
+  double rate_coefficient_;
+  double stoech_0_;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_single_precision.cpp
new file mode 100644
index 00000000000..9cc8eb47ed7
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_single_precision.cpp
@@ -0,0 +1,108 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelIndexed_1_single_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelIndexed_1_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_6780d252234f1c174ca455eaed762815 {
+static FUNC_PREFIX void reactionkernelindexed_1_single_precision_boundary_ReactionKernelIndexed_1_single_precision(uint8_t *RESTRICT _data_indexVector, float *RESTRICT _data_rho_0, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int32_t indexVectorSize, float order_0, float rate_coefficient, float stoech_0) {
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[12 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 8]));
+    const float local_rho_0 = _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z];
+    const float rate_factor = rate_coefficient * powf(local_rho_0, order_0);
+    _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z] = local_rho_0 + rate_factor * stoech_0;
+  }
+}
+} // namespace internal_6780d252234f1c174ca455eaed762815
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void ReactionKernelIndexed_1_single_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->uncheckedFastGetData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+
+  auto &rate_coefficient = rate_coefficient_;
+  auto &stoech_0 = stoech_0_;
+  auto &order_0 = order_0_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  internal_6780d252234f1c174ca455eaed762815::reactionkernelindexed_1_single_precision_boundary_ReactionKernelIndexed_1_single_precision(_data_indexVector, _data_rho_0, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, indexVectorSize, order_0, rate_coefficient, stoech_0);
+}
+
+void ReactionKernelIndexed_1_single_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void ReactionKernelIndexed_1_single_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void ReactionKernelIndexed_1_single_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_single_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_single_precision.h
new file mode 100644
index 00000000000..6a74acc462f
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_1_single_precision.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ * Copyright (C) 2020-2023 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+/*
+ * Boundary class.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/fb076cd18daa6e2f24448349d1fffb974c845269/python/pystencils_walberla/templates/Boundary.tmpl.h
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+
+#include <blockforest/StructuredBlockForest.h>
+#include <core/debug/Debug.h>
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
+#include <field/FlagField.h>
+#include <field/GhostLayerField.h>
+
+#include <functional>
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelIndexed_1_single_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_) : x(x_), y(y_), z(z_) {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z;
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  ReactionKernelIndexed_1_single_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID rho_0ID_,
+      float order_0, float rate_coefficient, float stoech_0)
+      : rho_0ID(rho_0ID_), order_0_(order_0),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_ReactionKernelIndexed_1_single_precision");
+  };
+
+  ReactionKernelIndexed_1_single_precision(BlockDataID indexVectorID_,
+                                           BlockDataID rho_0ID_, float order_0,
+                                           float rate_coefficient,
+                                           float stoech_0)
+      : indexVectorID(indexVectorID_), rho_0ID(rho_0ID_), order_0_(order_0),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0){};
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID,
+                                     domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
+         ++it) {
+
+      if (!isFlagSet(it, boundaryFlag))
+        continue;
+      if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
+                                   it.y() + cell_idx_c(0),
+                                   it.z() + cell_idx_c(0)) &&
+          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
+
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+
+public:
+  BlockDataID rho_0ID;
+  float order_0_;
+  float rate_coefficient_;
+  float stoech_0_;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_double_precision.cpp
new file mode 100644
index 00000000000..b94a96bf80a
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_double_precision.cpp
@@ -0,0 +1,118 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelIndexed_2_double_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelIndexed_2_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_1955c52e82759b23ee69574de97d7a03 {
+static FUNC_PREFIX void reactionkernelindexed_2_double_precision_boundary_ReactionKernelIndexed_2_double_precision(uint8_t *RESTRICT _data_indexVector, double *RESTRICT _data_rho_0, double *RESTRICT _data_rho_1, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int32_t indexVectorSize, double order_0, double order_1, double rate_coefficient, double stoech_0, double stoech_1) {
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[12 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 8]));
+    const double local_rho_0 = _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z];
+    const double local_rho_1 = _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z];
+    const double rate_factor = pow(local_rho_0, order_0) * pow(local_rho_1, order_1) * rate_coefficient;
+    _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z] = local_rho_0 + rate_factor * stoech_0;
+    _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z] = local_rho_1 + rate_factor * stoech_1;
+  }
+}
+} // namespace internal_1955c52e82759b23ee69574de97d7a03
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void ReactionKernelIndexed_2_double_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->uncheckedFastGetData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+  auto rho_1 = block->getData<field::GhostLayerField<double, 1>>(rho_1ID);
+
+  auto &stoech_0 = stoech_0_;
+  auto &stoech_1 = stoech_1_;
+  auto &order_1 = order_1_;
+  auto &order_0 = order_0_;
+  auto &rate_coefficient = rate_coefficient_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  internal_1955c52e82759b23ee69574de97d7a03::reactionkernelindexed_2_double_precision_boundary_ReactionKernelIndexed_2_double_precision(_data_indexVector, _data_rho_0, _data_rho_1, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, indexVectorSize, order_0, order_1, rate_coefficient, stoech_0, stoech_1);
+}
+
+void ReactionKernelIndexed_2_double_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void ReactionKernelIndexed_2_double_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void ReactionKernelIndexed_2_double_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_double_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_double_precision.h
new file mode 100644
index 00000000000..9f3c1fa6b53
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_double_precision.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ * Copyright (C) 2020-2023 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+/*
+ * Boundary class.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/fb076cd18daa6e2f24448349d1fffb974c845269/python/pystencils_walberla/templates/Boundary.tmpl.h
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+
+#include <blockforest/StructuredBlockForest.h>
+#include <core/debug/Debug.h>
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
+#include <field/FlagField.h>
+#include <field/GhostLayerField.h>
+
+#include <functional>
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelIndexed_2_double_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_) : x(x_), y(y_), z(z_) {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z;
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  ReactionKernelIndexed_2_double_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID rho_0ID_,
+      BlockDataID rho_1ID_, double order_0, double order_1,
+      double rate_coefficient, double stoech_0, double stoech_1)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), order_0_(order_0),
+        order_1_(order_1), rate_coefficient_(rate_coefficient),
+        stoech_0_(stoech_0), stoech_1_(stoech_1) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_ReactionKernelIndexed_2_double_precision");
+  };
+
+  ReactionKernelIndexed_2_double_precision(BlockDataID indexVectorID_,
+                                           BlockDataID rho_0ID_,
+                                           BlockDataID rho_1ID_, double order_0,
+                                           double order_1,
+                                           double rate_coefficient,
+                                           double stoech_0, double stoech_1)
+      : indexVectorID(indexVectorID_), rho_0ID(rho_0ID_), rho_1ID(rho_1ID_),
+        order_0_(order_0), order_1_(order_1),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
+        stoech_1_(stoech_1){};
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID,
+                                     domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
+         ++it) {
+
+      if (!isFlagSet(it, boundaryFlag))
+        continue;
+      if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
+                                   it.y() + cell_idx_c(0),
+                                   it.z() + cell_idx_c(0)) &&
+          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
+
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+
+public:
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  double order_0_;
+  double order_1_;
+  double rate_coefficient_;
+  double stoech_0_;
+  double stoech_1_;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_single_precision.cpp
new file mode 100644
index 00000000000..fcacc03e936
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_single_precision.cpp
@@ -0,0 +1,118 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelIndexed_2_single_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelIndexed_2_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_65f3f69877a34020919311605a374bf2 {
+static FUNC_PREFIX void reactionkernelindexed_2_single_precision_boundary_ReactionKernelIndexed_2_single_precision(uint8_t *RESTRICT _data_indexVector, float *RESTRICT _data_rho_0, float *RESTRICT _data_rho_1, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int32_t indexVectorSize, float order_0, float order_1, float rate_coefficient, float stoech_0, float stoech_1) {
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[12 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 8]));
+    const float local_rho_0 = _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z];
+    const float local_rho_1 = _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z];
+    const float rate_factor = rate_coefficient * powf(local_rho_0, order_0) * powf(local_rho_1, order_1);
+    _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z] = local_rho_0 + rate_factor * stoech_0;
+    _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z] = local_rho_1 + rate_factor * stoech_1;
+  }
+}
+} // namespace internal_65f3f69877a34020919311605a374bf2
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void ReactionKernelIndexed_2_single_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->uncheckedFastGetData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto rho_1 = block->getData<field::GhostLayerField<float, 1>>(rho_1ID);
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+
+  auto &order_1 = order_1_;
+  auto &order_0 = order_0_;
+  auto &stoech_0 = stoech_0_;
+  auto &stoech_1 = stoech_1_;
+  auto &rate_coefficient = rate_coefficient_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  internal_65f3f69877a34020919311605a374bf2::reactionkernelindexed_2_single_precision_boundary_ReactionKernelIndexed_2_single_precision(_data_indexVector, _data_rho_0, _data_rho_1, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, indexVectorSize, order_0, order_1, rate_coefficient, stoech_0, stoech_1);
+}
+
+void ReactionKernelIndexed_2_single_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void ReactionKernelIndexed_2_single_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void ReactionKernelIndexed_2_single_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_single_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_single_precision.h
new file mode 100644
index 00000000000..881521227a3
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_2_single_precision.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ * Copyright (C) 2020-2023 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+/*
+ * Boundary class.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/fb076cd18daa6e2f24448349d1fffb974c845269/python/pystencils_walberla/templates/Boundary.tmpl.h
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+
+#include <blockforest/StructuredBlockForest.h>
+#include <core/debug/Debug.h>
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
+#include <field/FlagField.h>
+#include <field/GhostLayerField.h>
+
+#include <functional>
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelIndexed_2_single_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_) : x(x_), y(y_), z(z_) {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z;
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  ReactionKernelIndexed_2_single_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID rho_0ID_,
+      BlockDataID rho_1ID_, float order_0, float order_1,
+      float rate_coefficient, float stoech_0, float stoech_1)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), order_0_(order_0),
+        order_1_(order_1), rate_coefficient_(rate_coefficient),
+        stoech_0_(stoech_0), stoech_1_(stoech_1) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_ReactionKernelIndexed_2_single_precision");
+  };
+
+  ReactionKernelIndexed_2_single_precision(BlockDataID indexVectorID_,
+                                           BlockDataID rho_0ID_,
+                                           BlockDataID rho_1ID_, float order_0,
+                                           float order_1,
+                                           float rate_coefficient,
+                                           float stoech_0, float stoech_1)
+      : indexVectorID(indexVectorID_), rho_0ID(rho_0ID_), rho_1ID(rho_1ID_),
+        order_0_(order_0), order_1_(order_1),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
+        stoech_1_(stoech_1){};
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID,
+                                     domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
+         ++it) {
+
+      if (!isFlagSet(it, boundaryFlag))
+        continue;
+      if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
+                                   it.y() + cell_idx_c(0),
+                                   it.z() + cell_idx_c(0)) &&
+          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
+
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+
+public:
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  float order_0_;
+  float order_1_;
+  float rate_coefficient_;
+  float stoech_0_;
+  float stoech_1_;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_double_precision.cpp
new file mode 100644
index 00000000000..335bf26fa53
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_double_precision.cpp
@@ -0,0 +1,128 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelIndexed_3_double_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelIndexed_3_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_8edace03832e2e7c5856c1019b5b9697 {
+static FUNC_PREFIX void reactionkernelindexed_3_double_precision_boundary_ReactionKernelIndexed_3_double_precision(uint8_t *RESTRICT _data_indexVector, double *RESTRICT _data_rho_0, double *RESTRICT _data_rho_1, double *RESTRICT _data_rho_2, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, int32_t indexVectorSize, double order_0, double order_1, double order_2, double rate_coefficient, double stoech_0, double stoech_1, double stoech_2) {
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[12 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 8]));
+    const double local_rho_0 = _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z];
+    const double local_rho_1 = _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z];
+    const double local_rho_2 = _data_rho_2[_stride_rho_2_0 * x + _stride_rho_2_1 * y + _stride_rho_2_2 * z];
+    const double rate_factor = pow(local_rho_0, order_0) * pow(local_rho_1, order_1) * pow(local_rho_2, order_2) * rate_coefficient;
+    _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z] = local_rho_0 + rate_factor * stoech_0;
+    _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z] = local_rho_1 + rate_factor * stoech_1;
+    _data_rho_2[_stride_rho_2_0 * x + _stride_rho_2_1 * y + _stride_rho_2_2 * z] = local_rho_2 + rate_factor * stoech_2;
+  }
+}
+} // namespace internal_8edace03832e2e7c5856c1019b5b9697
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void ReactionKernelIndexed_3_double_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->uncheckedFastGetData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+  auto rho_1 = block->getData<field::GhostLayerField<double, 1>>(rho_1ID);
+  auto rho_2 = block->getData<field::GhostLayerField<double, 1>>(rho_2ID);
+
+  auto &stoech_0 = stoech_0_;
+  auto &order_2 = order_2_;
+  auto &stoech_1 = stoech_1_;
+  auto &order_1 = order_1_;
+  auto &stoech_2 = stoech_2_;
+  auto &order_0 = order_0_;
+  auto &rate_coefficient = rate_coefficient_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  internal_8edace03832e2e7c5856c1019b5b9697::reactionkernelindexed_3_double_precision_boundary_ReactionKernelIndexed_3_double_precision(_data_indexVector, _data_rho_0, _data_rho_1, _data_rho_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, indexVectorSize, order_0, order_1, order_2, rate_coefficient, stoech_0, stoech_1, stoech_2);
+}
+
+void ReactionKernelIndexed_3_double_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void ReactionKernelIndexed_3_double_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void ReactionKernelIndexed_3_double_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_double_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_double_precision.h
new file mode 100644
index 00000000000..41630c03c8b
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_double_precision.h
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ * Copyright (C) 2020-2023 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+/*
+ * Boundary class.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/fb076cd18daa6e2f24448349d1fffb974c845269/python/pystencils_walberla/templates/Boundary.tmpl.h
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+
+#include <blockforest/StructuredBlockForest.h>
+#include <core/debug/Debug.h>
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
+#include <field/FlagField.h>
+#include <field/GhostLayerField.h>
+
+#include <functional>
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelIndexed_3_double_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_) : x(x_), y(y_), z(z_) {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z;
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  ReactionKernelIndexed_3_double_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID rho_0ID_,
+      BlockDataID rho_1ID_, BlockDataID rho_2ID_, double order_0,
+      double order_1, double order_2, double rate_coefficient, double stoech_0,
+      double stoech_1, double stoech_2)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),
+        order_0_(order_0), order_1_(order_1), order_2_(order_2),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
+        stoech_1_(stoech_1), stoech_2_(stoech_2) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_ReactionKernelIndexed_3_double_precision");
+  };
+
+  ReactionKernelIndexed_3_double_precision(
+      BlockDataID indexVectorID_, BlockDataID rho_0ID_, BlockDataID rho_1ID_,
+      BlockDataID rho_2ID_, double order_0, double order_1, double order_2,
+      double rate_coefficient, double stoech_0, double stoech_1,
+      double stoech_2)
+      : indexVectorID(indexVectorID_), rho_0ID(rho_0ID_), rho_1ID(rho_1ID_),
+        rho_2ID(rho_2ID_), order_0_(order_0), order_1_(order_1),
+        order_2_(order_2), rate_coefficient_(rate_coefficient),
+        stoech_0_(stoech_0), stoech_1_(stoech_1), stoech_2_(stoech_2){};
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID,
+                                     domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
+         ++it) {
+
+      if (!isFlagSet(it, boundaryFlag))
+        continue;
+      if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
+                                   it.y() + cell_idx_c(0),
+                                   it.z() + cell_idx_c(0)) &&
+          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
+
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+
+public:
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  BlockDataID rho_2ID;
+  double order_0_;
+  double order_1_;
+  double order_2_;
+  double rate_coefficient_;
+  double stoech_0_;
+  double stoech_1_;
+  double stoech_2_;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_single_precision.cpp
new file mode 100644
index 00000000000..1e6b37c72c8
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_single_precision.cpp
@@ -0,0 +1,128 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelIndexed_3_single_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelIndexed_3_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_a841832271ac54f2c38b03966a739283 {
+static FUNC_PREFIX void reactionkernelindexed_3_single_precision_boundary_ReactionKernelIndexed_3_single_precision(uint8_t *RESTRICT _data_indexVector, float *RESTRICT _data_rho_0, float *RESTRICT _data_rho_1, float *RESTRICT _data_rho_2, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, int32_t indexVectorSize, float order_0, float order_1, float order_2, float rate_coefficient, float stoech_0, float stoech_1, float stoech_2) {
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[12 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 8]));
+    const float local_rho_0 = _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z];
+    const float local_rho_1 = _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z];
+    const float local_rho_2 = _data_rho_2[_stride_rho_2_0 * x + _stride_rho_2_1 * y + _stride_rho_2_2 * z];
+    const float rate_factor = rate_coefficient * powf(local_rho_0, order_0) * powf(local_rho_1, order_1) * powf(local_rho_2, order_2);
+    _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z] = local_rho_0 + rate_factor * stoech_0;
+    _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z] = local_rho_1 + rate_factor * stoech_1;
+    _data_rho_2[_stride_rho_2_0 * x + _stride_rho_2_1 * y + _stride_rho_2_2 * z] = local_rho_2 + rate_factor * stoech_2;
+  }
+}
+} // namespace internal_a841832271ac54f2c38b03966a739283
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void ReactionKernelIndexed_3_single_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->uncheckedFastGetData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto rho_1 = block->getData<field::GhostLayerField<float, 1>>(rho_1ID);
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+  auto rho_2 = block->getData<field::GhostLayerField<float, 1>>(rho_2ID);
+
+  auto &order_1 = order_1_;
+  auto &order_0 = order_0_;
+  auto &stoech_2 = stoech_2_;
+  auto &stoech_0 = stoech_0_;
+  auto &order_2 = order_2_;
+  auto &stoech_1 = stoech_1_;
+  auto &rate_coefficient = rate_coefficient_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  internal_a841832271ac54f2c38b03966a739283::reactionkernelindexed_3_single_precision_boundary_ReactionKernelIndexed_3_single_precision(_data_indexVector, _data_rho_0, _data_rho_1, _data_rho_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, indexVectorSize, order_0, order_1, order_2, rate_coefficient, stoech_0, stoech_1, stoech_2);
+}
+
+void ReactionKernelIndexed_3_single_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void ReactionKernelIndexed_3_single_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void ReactionKernelIndexed_3_single_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_single_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_single_precision.h
new file mode 100644
index 00000000000..eda8bf18168
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_3_single_precision.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ * Copyright (C) 2020-2023 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+/*
+ * Boundary class.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/fb076cd18daa6e2f24448349d1fffb974c845269/python/pystencils_walberla/templates/Boundary.tmpl.h
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+
+#include <blockforest/StructuredBlockForest.h>
+#include <core/debug/Debug.h>
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
+#include <field/FlagField.h>
+#include <field/GhostLayerField.h>
+
+#include <functional>
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelIndexed_3_single_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_) : x(x_), y(y_), z(z_) {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z;
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  ReactionKernelIndexed_3_single_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID rho_0ID_,
+      BlockDataID rho_1ID_, BlockDataID rho_2ID_, float order_0, float order_1,
+      float order_2, float rate_coefficient, float stoech_0, float stoech_1,
+      float stoech_2)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),
+        order_0_(order_0), order_1_(order_1), order_2_(order_2),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
+        stoech_1_(stoech_1), stoech_2_(stoech_2) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_ReactionKernelIndexed_3_single_precision");
+  };
+
+  ReactionKernelIndexed_3_single_precision(
+      BlockDataID indexVectorID_, BlockDataID rho_0ID_, BlockDataID rho_1ID_,
+      BlockDataID rho_2ID_, float order_0, float order_1, float order_2,
+      float rate_coefficient, float stoech_0, float stoech_1, float stoech_2)
+      : indexVectorID(indexVectorID_), rho_0ID(rho_0ID_), rho_1ID(rho_1ID_),
+        rho_2ID(rho_2ID_), order_0_(order_0), order_1_(order_1),
+        order_2_(order_2), rate_coefficient_(rate_coefficient),
+        stoech_0_(stoech_0), stoech_1_(stoech_1), stoech_2_(stoech_2){};
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID,
+                                     domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
+         ++it) {
+
+      if (!isFlagSet(it, boundaryFlag))
+        continue;
+      if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
+                                   it.y() + cell_idx_c(0),
+                                   it.z() + cell_idx_c(0)) &&
+          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
+
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+
+public:
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  BlockDataID rho_2ID;
+  float order_0_;
+  float order_1_;
+  float order_2_;
+  float rate_coefficient_;
+  float stoech_0_;
+  float stoech_1_;
+  float stoech_2_;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_double_precision.cpp
new file mode 100644
index 00000000000..27072122f68
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_double_precision.cpp
@@ -0,0 +1,138 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelIndexed_4_double_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelIndexed_4_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_7ded543b40ca402e5e3102728843f000 {
+static FUNC_PREFIX void reactionkernelindexed_4_double_precision_boundary_ReactionKernelIndexed_4_double_precision(uint8_t *RESTRICT _data_indexVector, double *RESTRICT _data_rho_0, double *RESTRICT _data_rho_1, double *RESTRICT _data_rho_2, double *RESTRICT _data_rho_3, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, int64_t const _stride_rho_3_0, int64_t const _stride_rho_3_1, int64_t const _stride_rho_3_2, int32_t indexVectorSize, double order_0, double order_1, double order_2, double order_3, double rate_coefficient, double stoech_0, double stoech_1, double stoech_2, double stoech_3) {
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[12 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 8]));
+    const double local_rho_0 = _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z];
+    const double local_rho_1 = _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z];
+    const double local_rho_2 = _data_rho_2[_stride_rho_2_0 * x + _stride_rho_2_1 * y + _stride_rho_2_2 * z];
+    const double local_rho_3 = _data_rho_3[_stride_rho_3_0 * x + _stride_rho_3_1 * y + _stride_rho_3_2 * z];
+    const double rate_factor = pow(local_rho_0, order_0) * pow(local_rho_1, order_1) * pow(local_rho_2, order_2) * pow(local_rho_3, order_3) * rate_coefficient;
+    _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z] = local_rho_0 + rate_factor * stoech_0;
+    _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z] = local_rho_1 + rate_factor * stoech_1;
+    _data_rho_2[_stride_rho_2_0 * x + _stride_rho_2_1 * y + _stride_rho_2_2 * z] = local_rho_2 + rate_factor * stoech_2;
+    _data_rho_3[_stride_rho_3_0 * x + _stride_rho_3_1 * y + _stride_rho_3_2 * z] = local_rho_3 + rate_factor * stoech_3;
+  }
+}
+} // namespace internal_7ded543b40ca402e5e3102728843f000
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void ReactionKernelIndexed_4_double_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->uncheckedFastGetData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+  auto rho_1 = block->getData<field::GhostLayerField<double, 1>>(rho_1ID);
+  auto rho_2 = block->getData<field::GhostLayerField<double, 1>>(rho_2ID);
+  auto rho_3 = block->getData<field::GhostLayerField<double, 1>>(rho_3ID);
+
+  auto &stoech_0 = stoech_0_;
+  auto &order_2 = order_2_;
+  auto &stoech_1 = stoech_1_;
+  auto &order_1 = order_1_;
+  auto &stoech_2 = stoech_2_;
+  auto &order_0 = order_0_;
+  auto &stoech_3 = stoech_3_;
+  auto &rate_coefficient = rate_coefficient_;
+  auto &order_3 = order_3_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_3->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_3 = rho_3->dataAt(0, 0, 0, 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
+  const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
+  const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
+  internal_7ded543b40ca402e5e3102728843f000::reactionkernelindexed_4_double_precision_boundary_ReactionKernelIndexed_4_double_precision(_data_indexVector, _data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, indexVectorSize, order_0, order_1, order_2, order_3, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3);
+}
+
+void ReactionKernelIndexed_4_double_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void ReactionKernelIndexed_4_double_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void ReactionKernelIndexed_4_double_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_double_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_double_precision.h
new file mode 100644
index 00000000000..fd53c4c4690
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_double_precision.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ * Copyright (C) 2020-2023 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+/*
+ * Boundary class.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/fb076cd18daa6e2f24448349d1fffb974c845269/python/pystencils_walberla/templates/Boundary.tmpl.h
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+
+#include <blockforest/StructuredBlockForest.h>
+#include <core/debug/Debug.h>
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
+#include <field/FlagField.h>
+#include <field/GhostLayerField.h>
+
+#include <functional>
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelIndexed_4_double_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_) : x(x_), y(y_), z(z_) {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z;
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  ReactionKernelIndexed_4_double_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID rho_0ID_,
+      BlockDataID rho_1ID_, BlockDataID rho_2ID_, BlockDataID rho_3ID_,
+      double order_0, double order_1, double order_2, double order_3,
+      double rate_coefficient, double stoech_0, double stoech_1,
+      double stoech_2, double stoech_3)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),
+        rho_3ID(rho_3ID_), order_0_(order_0), order_1_(order_1),
+        order_2_(order_2), order_3_(order_3),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
+        stoech_1_(stoech_1), stoech_2_(stoech_2), stoech_3_(stoech_3) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_ReactionKernelIndexed_4_double_precision");
+  };
+
+  ReactionKernelIndexed_4_double_precision(
+      BlockDataID indexVectorID_, BlockDataID rho_0ID_, BlockDataID rho_1ID_,
+      BlockDataID rho_2ID_, BlockDataID rho_3ID_, double order_0,
+      double order_1, double order_2, double order_3, double rate_coefficient,
+      double stoech_0, double stoech_1, double stoech_2, double stoech_3)
+      : indexVectorID(indexVectorID_), rho_0ID(rho_0ID_), rho_1ID(rho_1ID_),
+        rho_2ID(rho_2ID_), rho_3ID(rho_3ID_), order_0_(order_0),
+        order_1_(order_1), order_2_(order_2), order_3_(order_3),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
+        stoech_1_(stoech_1), stoech_2_(stoech_2), stoech_3_(stoech_3){};
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID,
+                                     domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
+         ++it) {
+
+      if (!isFlagSet(it, boundaryFlag))
+        continue;
+      if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
+                                   it.y() + cell_idx_c(0),
+                                   it.z() + cell_idx_c(0)) &&
+          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
+
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+
+public:
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  BlockDataID rho_2ID;
+  BlockDataID rho_3ID;
+  double order_0_;
+  double order_1_;
+  double order_2_;
+  double order_3_;
+  double rate_coefficient_;
+  double stoech_0_;
+  double stoech_1_;
+  double stoech_2_;
+  double stoech_3_;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_single_precision.cpp
new file mode 100644
index 00000000000..b1af396026e
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_single_precision.cpp
@@ -0,0 +1,138 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelIndexed_4_single_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelIndexed_4_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_62195c7636fc531694c87493561447d7 {
+static FUNC_PREFIX void reactionkernelindexed_4_single_precision_boundary_ReactionKernelIndexed_4_single_precision(uint8_t *RESTRICT _data_indexVector, float *RESTRICT _data_rho_0, float *RESTRICT _data_rho_1, float *RESTRICT _data_rho_2, float *RESTRICT _data_rho_3, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, int64_t const _stride_rho_3_0, int64_t const _stride_rho_3_1, int64_t const _stride_rho_3_2, int32_t indexVectorSize, float order_0, float order_1, float order_2, float order_3, float rate_coefficient, float stoech_0, float stoech_1, float stoech_2, float stoech_3) {
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[12 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 8]));
+    const float local_rho_0 = _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z];
+    const float local_rho_1 = _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z];
+    const float local_rho_2 = _data_rho_2[_stride_rho_2_0 * x + _stride_rho_2_1 * y + _stride_rho_2_2 * z];
+    const float local_rho_3 = _data_rho_3[_stride_rho_3_0 * x + _stride_rho_3_1 * y + _stride_rho_3_2 * z];
+    const float rate_factor = rate_coefficient * powf(local_rho_0, order_0) * powf(local_rho_1, order_1) * powf(local_rho_2, order_2) * powf(local_rho_3, order_3);
+    _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z] = local_rho_0 + rate_factor * stoech_0;
+    _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z] = local_rho_1 + rate_factor * stoech_1;
+    _data_rho_2[_stride_rho_2_0 * x + _stride_rho_2_1 * y + _stride_rho_2_2 * z] = local_rho_2 + rate_factor * stoech_2;
+    _data_rho_3[_stride_rho_3_0 * x + _stride_rho_3_1 * y + _stride_rho_3_2 * z] = local_rho_3 + rate_factor * stoech_3;
+  }
+}
+} // namespace internal_62195c7636fc531694c87493561447d7
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void ReactionKernelIndexed_4_single_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->uncheckedFastGetData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto rho_1 = block->getData<field::GhostLayerField<float, 1>>(rho_1ID);
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+  auto rho_2 = block->getData<field::GhostLayerField<float, 1>>(rho_2ID);
+  auto rho_3 = block->getData<field::GhostLayerField<float, 1>>(rho_3ID);
+
+  auto &order_1 = order_1_;
+  auto &order_0 = order_0_;
+  auto &stoech_3 = stoech_3_;
+  auto &stoech_2 = stoech_2_;
+  auto &stoech_0 = stoech_0_;
+  auto &order_2 = order_2_;
+  auto &stoech_1 = stoech_1_;
+  auto &rate_coefficient = rate_coefficient_;
+  auto &order_3 = order_3_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_3->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_3 = rho_3->dataAt(0, 0, 0, 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
+  const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
+  const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
+  internal_62195c7636fc531694c87493561447d7::reactionkernelindexed_4_single_precision_boundary_ReactionKernelIndexed_4_single_precision(_data_indexVector, _data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, indexVectorSize, order_0, order_1, order_2, order_3, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3);
+}
+
+void ReactionKernelIndexed_4_single_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void ReactionKernelIndexed_4_single_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void ReactionKernelIndexed_4_single_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_single_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_single_precision.h
new file mode 100644
index 00000000000..0789eec7170
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_4_single_precision.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ * Copyright (C) 2020-2023 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+/*
+ * Boundary class.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/fb076cd18daa6e2f24448349d1fffb974c845269/python/pystencils_walberla/templates/Boundary.tmpl.h
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+
+#include <blockforest/StructuredBlockForest.h>
+#include <core/debug/Debug.h>
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
+#include <field/FlagField.h>
+#include <field/GhostLayerField.h>
+
+#include <functional>
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelIndexed_4_single_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_) : x(x_), y(y_), z(z_) {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z;
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  ReactionKernelIndexed_4_single_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID rho_0ID_,
+      BlockDataID rho_1ID_, BlockDataID rho_2ID_, BlockDataID rho_3ID_,
+      float order_0, float order_1, float order_2, float order_3,
+      float rate_coefficient, float stoech_0, float stoech_1, float stoech_2,
+      float stoech_3)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),
+        rho_3ID(rho_3ID_), order_0_(order_0), order_1_(order_1),
+        order_2_(order_2), order_3_(order_3),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
+        stoech_1_(stoech_1), stoech_2_(stoech_2), stoech_3_(stoech_3) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_ReactionKernelIndexed_4_single_precision");
+  };
+
+  ReactionKernelIndexed_4_single_precision(
+      BlockDataID indexVectorID_, BlockDataID rho_0ID_, BlockDataID rho_1ID_,
+      BlockDataID rho_2ID_, BlockDataID rho_3ID_, float order_0, float order_1,
+      float order_2, float order_3, float rate_coefficient, float stoech_0,
+      float stoech_1, float stoech_2, float stoech_3)
+      : indexVectorID(indexVectorID_), rho_0ID(rho_0ID_), rho_1ID(rho_1ID_),
+        rho_2ID(rho_2ID_), rho_3ID(rho_3ID_), order_0_(order_0),
+        order_1_(order_1), order_2_(order_2), order_3_(order_3),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
+        stoech_1_(stoech_1), stoech_2_(stoech_2), stoech_3_(stoech_3){};
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID,
+                                     domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
+         ++it) {
+
+      if (!isFlagSet(it, boundaryFlag))
+        continue;
+      if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
+                                   it.y() + cell_idx_c(0),
+                                   it.z() + cell_idx_c(0)) &&
+          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
+
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+
+public:
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  BlockDataID rho_2ID;
+  BlockDataID rho_3ID;
+  float order_0_;
+  float order_1_;
+  float order_2_;
+  float order_3_;
+  float rate_coefficient_;
+  float stoech_0_;
+  float stoech_1_;
+  float stoech_2_;
+  float stoech_3_;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_double_precision.cpp
new file mode 100644
index 00000000000..d2d10cfacc7
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_double_precision.cpp
@@ -0,0 +1,148 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelIndexed_5_double_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelIndexed_5_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_714956d26e6eb81c5dec60e7ab7da8ab {
+static FUNC_PREFIX void reactionkernelindexed_5_double_precision_boundary_ReactionKernelIndexed_5_double_precision(uint8_t *RESTRICT _data_indexVector, double *RESTRICT _data_rho_0, double *RESTRICT _data_rho_1, double *RESTRICT _data_rho_2, double *RESTRICT _data_rho_3, double *RESTRICT _data_rho_4, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, int64_t const _stride_rho_3_0, int64_t const _stride_rho_3_1, int64_t const _stride_rho_3_2, int64_t const _stride_rho_4_0, int64_t const _stride_rho_4_1, int64_t const _stride_rho_4_2, int32_t indexVectorSize, double order_0, double order_1, double order_2, double order_3, double order_4, double rate_coefficient, double stoech_0, double stoech_1, double stoech_2, double stoech_3, double stoech_4) {
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[12 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 8]));
+    const double local_rho_0 = _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z];
+    const double local_rho_1 = _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z];
+    const double local_rho_2 = _data_rho_2[_stride_rho_2_0 * x + _stride_rho_2_1 * y + _stride_rho_2_2 * z];
+    const double local_rho_3 = _data_rho_3[_stride_rho_3_0 * x + _stride_rho_3_1 * y + _stride_rho_3_2 * z];
+    const double local_rho_4 = _data_rho_4[_stride_rho_4_0 * x + _stride_rho_4_1 * y + _stride_rho_4_2 * z];
+    const double rate_factor = pow(local_rho_0, order_0) * pow(local_rho_1, order_1) * pow(local_rho_2, order_2) * pow(local_rho_3, order_3) * pow(local_rho_4, order_4) * rate_coefficient;
+    _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z] = local_rho_0 + rate_factor * stoech_0;
+    _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z] = local_rho_1 + rate_factor * stoech_1;
+    _data_rho_2[_stride_rho_2_0 * x + _stride_rho_2_1 * y + _stride_rho_2_2 * z] = local_rho_2 + rate_factor * stoech_2;
+    _data_rho_3[_stride_rho_3_0 * x + _stride_rho_3_1 * y + _stride_rho_3_2 * z] = local_rho_3 + rate_factor * stoech_3;
+    _data_rho_4[_stride_rho_4_0 * x + _stride_rho_4_1 * y + _stride_rho_4_2 * z] = local_rho_4 + rate_factor * stoech_4;
+  }
+}
+} // namespace internal_714956d26e6eb81c5dec60e7ab7da8ab
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void ReactionKernelIndexed_5_double_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->uncheckedFastGetData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto rho_3 = block->getData<field::GhostLayerField<double, 1>>(rho_3ID);
+  auto rho_4 = block->getData<field::GhostLayerField<double, 1>>(rho_4ID);
+  auto rho_1 = block->getData<field::GhostLayerField<double, 1>>(rho_1ID);
+  auto rho_0 = block->getData<field::GhostLayerField<double, 1>>(rho_0ID);
+  auto rho_2 = block->getData<field::GhostLayerField<double, 1>>(rho_2ID);
+
+  auto &stoech_0 = stoech_0_;
+  auto &order_2 = order_2_;
+  auto &stoech_1 = stoech_1_;
+  auto &stoech_4 = stoech_4_;
+  auto &order_1 = order_1_;
+  auto &stoech_2 = stoech_2_;
+  auto &order_0 = order_0_;
+  auto &order_4 = order_4_;
+  auto &stoech_3 = stoech_3_;
+  auto &rate_coefficient = rate_coefficient_;
+  auto &order_3 = order_3_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_3->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_3 = rho_3->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_4->nrOfGhostLayers()));
+  double *RESTRICT _data_rho_4 = rho_4->dataAt(0, 0, 0, 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
+  const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
+  const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
+  const int64_t _stride_rho_4_0 = int64_t(rho_4->xStride());
+  const int64_t _stride_rho_4_1 = int64_t(rho_4->yStride());
+  const int64_t _stride_rho_4_2 = int64_t(rho_4->zStride());
+  internal_714956d26e6eb81c5dec60e7ab7da8ab::reactionkernelindexed_5_double_precision_boundary_ReactionKernelIndexed_5_double_precision(_data_indexVector, _data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _data_rho_4, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, _stride_rho_4_0, _stride_rho_4_1, _stride_rho_4_2, indexVectorSize, order_0, order_1, order_2, order_3, order_4, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3, stoech_4);
+}
+
+void ReactionKernelIndexed_5_double_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void ReactionKernelIndexed_5_double_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void ReactionKernelIndexed_5_double_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_double_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_double_precision.h
new file mode 100644
index 00000000000..39fbd0a732d
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_double_precision.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ * Copyright (C) 2020-2023 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+/*
+ * Boundary class.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/fb076cd18daa6e2f24448349d1fffb974c845269/python/pystencils_walberla/templates/Boundary.tmpl.h
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+
+#include <blockforest/StructuredBlockForest.h>
+#include <core/debug/Debug.h>
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
+#include <field/FlagField.h>
+#include <field/GhostLayerField.h>
+
+#include <functional>
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelIndexed_5_double_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_) : x(x_), y(y_), z(z_) {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z;
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  ReactionKernelIndexed_5_double_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID rho_0ID_,
+      BlockDataID rho_1ID_, BlockDataID rho_2ID_, BlockDataID rho_3ID_,
+      BlockDataID rho_4ID_, double order_0, double order_1, double order_2,
+      double order_3, double order_4, double rate_coefficient, double stoech_0,
+      double stoech_1, double stoech_2, double stoech_3, double stoech_4)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),
+        rho_3ID(rho_3ID_), rho_4ID(rho_4ID_), order_0_(order_0),
+        order_1_(order_1), order_2_(order_2), order_3_(order_3),
+        order_4_(order_4), rate_coefficient_(rate_coefficient),
+        stoech_0_(stoech_0), stoech_1_(stoech_1), stoech_2_(stoech_2),
+        stoech_3_(stoech_3), stoech_4_(stoech_4) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_ReactionKernelIndexed_5_double_precision");
+  };
+
+  ReactionKernelIndexed_5_double_precision(
+      BlockDataID indexVectorID_, BlockDataID rho_0ID_, BlockDataID rho_1ID_,
+      BlockDataID rho_2ID_, BlockDataID rho_3ID_, BlockDataID rho_4ID_,
+      double order_0, double order_1, double order_2, double order_3,
+      double order_4, double rate_coefficient, double stoech_0, double stoech_1,
+      double stoech_2, double stoech_3, double stoech_4)
+      : indexVectorID(indexVectorID_), rho_0ID(rho_0ID_), rho_1ID(rho_1ID_),
+        rho_2ID(rho_2ID_), rho_3ID(rho_3ID_), rho_4ID(rho_4ID_),
+        order_0_(order_0), order_1_(order_1), order_2_(order_2),
+        order_3_(order_3), order_4_(order_4),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
+        stoech_1_(stoech_1), stoech_2_(stoech_2), stoech_3_(stoech_3),
+        stoech_4_(stoech_4){};
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID,
+                                     domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
+         ++it) {
+
+      if (!isFlagSet(it, boundaryFlag))
+        continue;
+      if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
+                                   it.y() + cell_idx_c(0),
+                                   it.z() + cell_idx_c(0)) &&
+          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
+
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+
+public:
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  BlockDataID rho_2ID;
+  BlockDataID rho_3ID;
+  BlockDataID rho_4ID;
+  double order_0_;
+  double order_1_;
+  double order_2_;
+  double order_3_;
+  double order_4_;
+  double rate_coefficient_;
+  double stoech_0_;
+  double stoech_1_;
+  double stoech_2_;
+  double stoech_3_;
+  double stoech_4_;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_single_precision.cpp
new file mode 100644
index 00000000000..1cfe0b8a559
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_single_precision.cpp
@@ -0,0 +1,148 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file ReactionKernelIndexed_5_single_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#include <cmath>
+
+#include "ReactionKernelIndexed_5_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_c656da8359b8f47f0007107280d91a58 {
+static FUNC_PREFIX void reactionkernelindexed_5_single_precision_boundary_ReactionKernelIndexed_5_single_precision(uint8_t *RESTRICT _data_indexVector, float *RESTRICT _data_rho_0, float *RESTRICT _data_rho_1, float *RESTRICT _data_rho_2, float *RESTRICT _data_rho_3, float *RESTRICT _data_rho_4, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, int64_t const _stride_rho_3_0, int64_t const _stride_rho_3_1, int64_t const _stride_rho_3_2, int64_t const _stride_rho_4_0, int64_t const _stride_rho_4_1, int64_t const _stride_rho_4_2, int32_t indexVectorSize, float order_0, float order_1, float order_2, float order_3, float order_4, float rate_coefficient, float stoech_0, float stoech_1, float stoech_2, float stoech_3, float stoech_4) {
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[12 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[12 * ctr_0 + 8]));
+    const float local_rho_0 = _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z];
+    const float local_rho_1 = _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z];
+    const float local_rho_2 = _data_rho_2[_stride_rho_2_0 * x + _stride_rho_2_1 * y + _stride_rho_2_2 * z];
+    const float local_rho_3 = _data_rho_3[_stride_rho_3_0 * x + _stride_rho_3_1 * y + _stride_rho_3_2 * z];
+    const float local_rho_4 = _data_rho_4[_stride_rho_4_0 * x + _stride_rho_4_1 * y + _stride_rho_4_2 * z];
+    const float rate_factor = rate_coefficient * powf(local_rho_0, order_0) * powf(local_rho_1, order_1) * powf(local_rho_2, order_2) * powf(local_rho_3, order_3) * powf(local_rho_4, order_4);
+    _data_rho_0[_stride_rho_0_0 * x + _stride_rho_0_1 * y + _stride_rho_0_2 * z] = local_rho_0 + rate_factor * stoech_0;
+    _data_rho_1[_stride_rho_1_0 * x + _stride_rho_1_1 * y + _stride_rho_1_2 * z] = local_rho_1 + rate_factor * stoech_1;
+    _data_rho_2[_stride_rho_2_0 * x + _stride_rho_2_1 * y + _stride_rho_2_2 * z] = local_rho_2 + rate_factor * stoech_2;
+    _data_rho_3[_stride_rho_3_0 * x + _stride_rho_3_1 * y + _stride_rho_3_2 * z] = local_rho_3 + rate_factor * stoech_3;
+    _data_rho_4[_stride_rho_4_0 * x + _stride_rho_4_1 * y + _stride_rho_4_2 * z] = local_rho_4 + rate_factor * stoech_4;
+  }
+}
+} // namespace internal_c656da8359b8f47f0007107280d91a58
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void ReactionKernelIndexed_5_single_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->uncheckedFastGetData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto rho_0 = block->getData<field::GhostLayerField<float, 1>>(rho_0ID);
+  auto rho_2 = block->getData<field::GhostLayerField<float, 1>>(rho_2ID);
+  auto rho_3 = block->getData<field::GhostLayerField<float, 1>>(rho_3ID);
+  auto rho_4 = block->getData<field::GhostLayerField<float, 1>>(rho_4ID);
+  auto rho_1 = block->getData<field::GhostLayerField<float, 1>>(rho_1ID);
+
+  auto &order_1 = order_1_;
+  auto &order_4 = order_4_;
+  auto &order_0 = order_0_;
+  auto &stoech_3 = stoech_3_;
+  auto &stoech_4 = stoech_4_;
+  auto &stoech_2 = stoech_2_;
+  auto &stoech_0 = stoech_0_;
+  auto &order_2 = order_2_;
+  auto &stoech_1 = stoech_1_;
+  auto &rate_coefficient = rate_coefficient_;
+  auto &order_3 = order_3_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_3->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_3 = rho_3->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_4->nrOfGhostLayers()));
+  float *RESTRICT _data_rho_4 = rho_4->dataAt(0, 0, 0, 0);
+  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
+  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
+  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
+  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
+  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
+  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
+  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
+  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
+  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
+  const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
+  const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
+  const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
+  const int64_t _stride_rho_4_0 = int64_t(rho_4->xStride());
+  const int64_t _stride_rho_4_1 = int64_t(rho_4->yStride());
+  const int64_t _stride_rho_4_2 = int64_t(rho_4->zStride());
+  internal_c656da8359b8f47f0007107280d91a58::reactionkernelindexed_5_single_precision_boundary_ReactionKernelIndexed_5_single_precision(_data_indexVector, _data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _data_rho_4, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, _stride_rho_4_0, _stride_rho_4_1, _stride_rho_4_2, indexVectorSize, order_0, order_1, order_2, order_3, order_4, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3, stoech_4);
+}
+
+void ReactionKernelIndexed_5_single_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void ReactionKernelIndexed_5_single_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void ReactionKernelIndexed_5_single_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_single_precision.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_single_precision.h
new file mode 100644
index 00000000000..8a32b7f620d
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_5_single_precision.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ * Copyright (C) 2020-2023 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+/*
+ * Boundary class.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/fb076cd18daa6e2f24448349d1fffb974c845269/python/pystencils_walberla/templates/Boundary.tmpl.h
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+
+#include <blockforest/StructuredBlockForest.h>
+#include <core/debug/Debug.h>
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
+#include <field/FlagField.h>
+#include <field/GhostLayerField.h>
+
+#include <functional>
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class ReactionKernelIndexed_5_single_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_) : x(x_), y(y_), z(z_) {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z;
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  ReactionKernelIndexed_5_single_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID rho_0ID_,
+      BlockDataID rho_1ID_, BlockDataID rho_2ID_, BlockDataID rho_3ID_,
+      BlockDataID rho_4ID_, float order_0, float order_1, float order_2,
+      float order_3, float order_4, float rate_coefficient, float stoech_0,
+      float stoech_1, float stoech_2, float stoech_3, float stoech_4)
+      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),
+        rho_3ID(rho_3ID_), rho_4ID(rho_4ID_), order_0_(order_0),
+        order_1_(order_1), order_2_(order_2), order_3_(order_3),
+        order_4_(order_4), rate_coefficient_(rate_coefficient),
+        stoech_0_(stoech_0), stoech_1_(stoech_1), stoech_2_(stoech_2),
+        stoech_3_(stoech_3), stoech_4_(stoech_4) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_ReactionKernelIndexed_5_single_precision");
+  };
+
+  ReactionKernelIndexed_5_single_precision(
+      BlockDataID indexVectorID_, BlockDataID rho_0ID_, BlockDataID rho_1ID_,
+      BlockDataID rho_2ID_, BlockDataID rho_3ID_, BlockDataID rho_4ID_,
+      float order_0, float order_1, float order_2, float order_3, float order_4,
+      float rate_coefficient, float stoech_0, float stoech_1, float stoech_2,
+      float stoech_3, float stoech_4)
+      : indexVectorID(indexVectorID_), rho_0ID(rho_0ID_), rho_1ID(rho_1ID_),
+        rho_2ID(rho_2ID_), rho_3ID(rho_3ID_), rho_4ID(rho_4ID_),
+        order_0_(order_0), order_1_(order_1), order_2_(order_2),
+        order_3_(order_3), order_4_(order_4),
+        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
+        stoech_1_(stoech_1), stoech_2_(stoech_2), stoech_3_(stoech_3),
+        stoech_4_(stoech_4){};
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID,
+                                     domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
+         ++it) {
+
+      if (!isFlagSet(it, boundaryFlag))
+        continue;
+      if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
+                                   it.y() + cell_idx_c(0),
+                                   it.z() + cell_idx_c(0)) &&
+          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
+
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+
+public:
+  BlockDataID rho_0ID;
+  BlockDataID rho_1ID;
+  BlockDataID rho_2ID;
+  BlockDataID rho_3ID;
+  BlockDataID rho_4ID;
+  float order_0_;
+  float order_1_;
+  float order_2_;
+  float order_3_;
+  float order_4_;
+  float rate_coefficient_;
+  float stoech_0_;
+  float stoech_1_;
+  float stoech_2_;
+  float stoech_3_;
+  float stoech_4_;
+};
+
+} // namespace pystencils
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_all.h b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_all.h
new file mode 100644
index 00000000000..c8bc475fc19
--- /dev/null
+++ b/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ReactionKernelIndexed_all.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2022-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
+// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
+
+#pragma once
+
+#include "ReactionKernelIndexed_1_double_precision.h"
+#include "ReactionKernelIndexed_1_single_precision.h"
+
+#include "ReactionKernelIndexed_2_double_precision.h"
+#include "ReactionKernelIndexed_2_single_precision.h"
+
+#include "ReactionKernelIndexed_3_double_precision.h"
+#include "ReactionKernelIndexed_3_single_precision.h"
+
+#include "ReactionKernelIndexed_4_double_precision.h"
+#include "ReactionKernelIndexed_4_single_precision.h"
+
+#include "ReactionKernelIndexed_5_double_precision.h"
+#include "ReactionKernelIndexed_5_single_precision.h"
+
+#include <domain_decomposition/BlockDataID.h>
+
+#include <cstddef>
+#include <memory>
+#include <stdexcept>
+#include <utility>
+#include <vector>
+
+namespace walberla {
+namespace detail {
+namespace ReactionKernelIndexedSelector {
+
+template <typename FloatType = double, std::size_t N = 1> struct KernelTrait {
+  using ReactionKernelIndexed =
+      pystencils::ReactionKernelIndexed_1_double_precision;
+};
+
+template <> struct KernelTrait<double, 2> {
+  using ReactionKernelIndexed =
+      pystencils::ReactionKernelIndexed_2_double_precision;
+};
+
+template <> struct KernelTrait<double, 3> {
+  using ReactionKernelIndexed =
+      pystencils::ReactionKernelIndexed_3_double_precision;
+};
+
+template <> struct KernelTrait<double, 4> {
+  using ReactionKernelIndexed =
+      pystencils::ReactionKernelIndexed_4_double_precision;
+};
+
+template <> struct KernelTrait<double, 5> {
+  using ReactionKernelIndexed =
+      pystencils::ReactionKernelIndexed_5_double_precision;
+};
+
+template <> struct KernelTrait<float, 1> {
+  using ReactionKernelIndexed =
+      pystencils::ReactionKernelIndexed_1_single_precision;
+};
+
+template <> struct KernelTrait<float, 2> {
+  using ReactionKernelIndexed =
+      pystencils::ReactionKernelIndexed_2_single_precision;
+};
+
+template <> struct KernelTrait<float, 3> {
+  using ReactionKernelIndexed =
+      pystencils::ReactionKernelIndexed_3_single_precision;
+};
+
+template <> struct KernelTrait<float, 4> {
+  using ReactionKernelIndexed =
+      pystencils::ReactionKernelIndexed_4_single_precision;
+};
+
+template <> struct KernelTrait<float, 5> {
+  using ReactionKernelIndexed =
+      pystencils::ReactionKernelIndexed_5_single_precision;
+};
+
+template <typename FloatType, class Reactant, std::size_t... ints>
+auto get_kernel_impl(const std::vector<std::shared_ptr<Reactant>> &reactants,
+                     const double coefficient, const BlockDataID &indexFieldID,
+                     std::index_sequence<ints...> int_seq) {
+  auto kernel = std::make_shared<
+      typename KernelTrait<FloatType, int_seq.size()>::ReactionKernelIndexed>(
+      indexFieldID,
+      walberla::BlockDataID(
+          reactants[ints]->get_species()->get_density_id())...,
+      numeric_cast<FloatType>(reactants[ints]->get_order())...,
+      numeric_cast<FloatType>(coefficient),
+      numeric_cast<FloatType>(reactants[ints]->get_stoech_coeff())...);
+
+  std::function<void(IBlock *)> sweep = [kernel](IBlock *b) { kernel->run(b); };
+  return sweep;
+}
+
+template <typename FloatType, class Reactant, class... Args>
+auto get_kernel_impl(const std::vector<std::shared_ptr<Reactant>> &reactants,
+                     Args... args) {
+  switch (reactants.size()) {
+
+  case 1:
+    return get_kernel_impl<FloatType>(reactants, args...,
+                                      std::make_index_sequence<1>{});
+
+  case 2:
+    return get_kernel_impl<FloatType>(reactants, args...,
+                                      std::make_index_sequence<2>{});
+
+  case 3:
+    return get_kernel_impl<FloatType>(reactants, args...,
+                                      std::make_index_sequence<3>{});
+
+  case 4:
+    return get_kernel_impl<FloatType>(reactants, args...,
+                                      std::make_index_sequence<4>{});
+
+  case 5:
+    return get_kernel_impl<FloatType>(reactants, args...,
+                                      std::make_index_sequence<5>{});
+
+  default:
+    throw std::runtime_error("reactions of this size are not implemented!");
+  }
+}
+
+template <class Reactant, class... Args>
+auto get_kernel(const std::vector<std::shared_ptr<Reactant>> &reactants,
+                Args... args) {
+
+  const auto is_double_precision =
+      reactants[0]->get_species()->is_double_precision();
+
+  if (is_double_precision) {
+    return get_kernel_impl<double>(reactants, args...);
+  }
+
+  return get_kernel_impl<float>(reactants, args...);
+}
+
+} // namespace ReactionKernelIndexedSelector
+} // namespace detail
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/CMakeLists.txt b/src/walberla_bridge/src/lattice_boltzmann/CMakeLists.txt
new file mode 100644
index 00000000000..3a2c214c478
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/CMakeLists.txt
@@ -0,0 +1,28 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+add_subdirectory(generated_kernels)
+
+target_sources(espresso_walberla
+               PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/lb_walberla_init.cpp)
+
+if(ESPRESSO_BUILD_WITH_CUDA AND WALBERLA_BUILD_WITH_CUDA)
+  target_sources(espresso_walberla_cuda
+                 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/lb_walberla_init.cu)
+endif()
diff --git a/src/walberla_bridge/src/lattice_boltzmann/InterpolateAndShiftAtBoundary.hpp b/src/walberla_bridge/src/lattice_boltzmann/InterpolateAndShiftAtBoundary.hpp
new file mode 100644
index 00000000000..72b5c326972
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/InterpolateAndShiftAtBoundary.hpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <walberla_bridge/lattice_boltzmann/LeesEdwardsPack.hpp>
+
+#include <blockforest/StructuredBlockForest.h>
+#include <domain_decomposition/all.h>
+#include <stencil/D3Q19.h>
+
+#include <cmath>
+#include <functional>
+#include <memory>
+#include <stdexcept>
+#include <utility>
+
+namespace walberla {
+
+/**
+ * Lees-Edwards sweep.
+ * @todo Currently only works for 1 MPI rank! It should work in parallel if the
+ * MPI domain decomposition for the structured block forest doesn't partition
+ * along the shear direction. For example if the shear direction goes along
+ * the z-axis, it should be possible to run on 4 MPI ranks with [2, 2, 1].
+ * At the moment, ESPResSo requires system.cell_system.node_grid to be in
+ * decreasing order, therefore parallelization requires a shear direction
+ * along the z-axis and a MPI node_grid of [x, y, 1] with x >= y. This
+ * restriction on the ordering of the node_grid may be lifted in the
+ * distant future, when our FFT algorithm is replaced by a new one.
+ */
+template <class FieldType, typename FloatType>
+class InterpolateAndShiftAtBoundary {
+public:
+  InterpolateAndShiftAtBoundary(
+      std::shared_ptr<StructuredBlockForest> blocks, BlockDataID field_id,
+      BlockDataID tmp_field_id, unsigned int n_ghost_layers,
+      unsigned int shear_direction, unsigned int shear_plane_normal,
+      std::function<double()> get_pos_offset,
+      std::function<double()> get_shift = []() { return 0.0; })
+      : m_blocks(std::move(blocks)), m_field_id(field_id),
+        m_tmp_field_id(tmp_field_id), m_n_ghost_layers(uint_c(n_ghost_layers)),
+        m_shear_direction(uint_c(shear_direction)),
+        m_shear_plane_normal(uint_c(shear_plane_normal)),
+        m_get_pos_offset(std::move(get_pos_offset)),
+        m_get_shift(std::move(get_shift)) {
+    if (m_n_ghost_layers != 1u) {
+      throw std::domain_error("The Lees-Edwards sweep is implemented "
+                              "for a ghost layer of thickness 1");
+    }
+    if (m_shear_plane_normal == 0u) {
+      m_slab_min = stencil::W;
+      m_slab_max = stencil::E;
+    } else if (m_shear_plane_normal == 1u) {
+      m_slab_min = stencil::S;
+      m_slab_max = stencil::N;
+    } else if (m_shear_plane_normal == 2u) {
+      m_slab_min = stencil::B;
+      m_slab_max = stencil::T;
+    }
+  }
+
+  FloatType get_pos_offset() const {
+    return numeric_cast<FloatType>(m_get_pos_offset());
+  }
+
+  FloatType get_shift() const { return numeric_cast<FloatType>(m_get_shift()); }
+
+  void operator()(IBlock *block) {
+    kernel(block, m_slab_min);
+    kernel(block, m_slab_max);
+  }
+
+private:
+  void kernel(IBlock *block, stencil::Direction slab_dir) {
+    // setup lengths
+    assert(m_blocks->getNumberOfCells(*block, m_shear_plane_normal) >= 2u);
+    auto const dir = m_shear_direction;
+    auto const dim = cell_idx_c(m_blocks->getNumberOfCells(*block, dir));
+    auto const length = numeric_cast<FloatType>(dim);
+    auto const weight =
+        std::abs(std::fmod(get_pos_offset() + length, FloatType{1}));
+
+    // setup slab
+    auto field = block->template getData<FieldType>(m_field_id);
+    auto tmp_field = block->template getData<FieldType>(m_tmp_field_id);
+
+    CellInterval ci;
+    field->getGhostRegion(slab_dir, ci, cell_idx_t{1}, true);
+
+    // shift
+    auto const shift = get_shift();
+    // Note that the offset is applied to the interpolation source rather than
+    // the target
+    auto const prefactor =
+        ((slab_dir == m_slab_max) ? FloatType{-1} : FloatType{1});
+    auto const offset = get_pos_offset() * prefactor;
+    for (auto const &&cell : ci) {
+      Cell source1 = cell;
+      Cell source2 = cell;
+      source1[dir] = cell_idx_c(std::floor(
+                         static_cast<FloatType>(source1[dir]) + offset)) %
+                     dim;
+      source1[dir] = cell_idx_c(static_cast<FloatType>(source1[dir]) + length);
+      source1[dir] = cell_idx_c(source1[dir] % dim);
+
+      source2[dir] =
+          cell_idx_c(std::ceil(static_cast<FloatType>(source2[dir]) + offset)) %
+          dim;
+      source2[dir] = cell_idx_c(static_cast<FloatType>(source2[dir]) + length);
+      source2[dir] = cell_idx_c(source2[dir] % dim);
+
+      for (uint_t f = 0; f < FieldType::F_SIZE; ++f) {
+        tmp_field->get(cell, f) = field->get(source1, f) * (1 - weight) +
+                                  field->get(source2, f) * weight;
+      }
+      tmp_field->get(cell, m_shear_direction) -= prefactor * shift;
+    }
+
+    // swap
+    for (auto const &&cell : ci) {
+      for (uint_t f = 0; f < FieldType::F_SIZE; ++f) {
+        field->get(cell, f) = tmp_field->get(cell, f);
+      }
+    }
+  }
+
+private:
+  std::shared_ptr<StructuredBlockForest> m_blocks;
+  BlockDataID m_field_id;
+  BlockDataID m_tmp_field_id;
+  uint_t m_n_ghost_layers;
+  uint_t m_shear_direction;
+  uint_t m_shear_plane_normal;
+  std::function<double()> m_get_pos_offset;
+  std::function<double()> m_get_shift;
+  stencil::Direction m_slab_min;
+  stencil::Direction m_slab_max;
+};
+
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
new file mode 100644
index 00000000000..6c230b15854
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
@@ -0,0 +1,1330 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+/**
+ * @file
+ * @ref walberla::LBWalberlaImpl implements the interface of the LB
+ * waLBerla bridge using sweeps generated by lbmpy
+ * (see <tt>maintainer/walberla_kernels</tt>).
+ */
+
+#include <blockforest/Initialization.h>
+#include <blockforest/StructuredBlockForest.h>
+#include <blockforest/communication/UniformBufferedScheme.h>
+#include <domain_decomposition/IBlock.h>
+#include <field/GhostLayerField.h>
+#include <field/vtk/FlagFieldCellFilter.h>
+#include <field/vtk/VTKWriter.h>
+
+#include <field/AddToStorage.h>
+#include <field/FlagField.h>
+#include <field/communication/PackInfo.h>
+#include <lbm/communication/PdfFieldPackInfo.h>
+#include <lbm/field/AddToStorage.h>
+#include <lbm/field/PdfField.h>
+
+#include <stencil/D3Q19.h>
+#include <stencil/D3Q27.h>
+
+#include "../BoundaryHandling.hpp"
+#include "InterpolateAndShiftAtBoundary.hpp"
+#include "ResetForce.hpp"
+#include "lb_kernels.hpp"
+
+#include <walberla_bridge/Architecture.hpp>
+#include <walberla_bridge/BlockAndCell.hpp>
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
+#include <walberla_bridge/lattice_boltzmann/LeesEdwardsPack.hpp>
+#include <walberla_bridge/utils/boundary_utils.hpp>
+#include <walberla_bridge/utils/walberla_utils.hpp>
+
+#include <utils/Vector.hpp>
+#include <utils/interpolation/bspline_3d.hpp>
+#include <utils/math/make_lin_space.hpp>
+
+#include <boost/optional.hpp>
+#include <boost/variant.hpp>
+
+#include <array>
+#include <cmath>
+#include <cstddef>
+#include <initializer_list>
+#include <limits>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+namespace walberla {
+
+/** @brief Class that runs and controls the LB on waLBerla. */
+template <typename FloatType, lbmpy::Arch Architecture>
+class LBWalberlaImpl : public LBWalberlaBase {
+protected:
+  using CollisionModelLeesEdwards =
+      typename detail::KernelTrait<FloatType,
+                                   Architecture>::CollisionModelLeesEdwards;
+  using CollisionModelThermalized =
+      typename detail::KernelTrait<FloatType,
+                                   Architecture>::CollisionModelThermalized;
+  using StreamSweep =
+      typename detail::KernelTrait<FloatType, Architecture>::StreamSweep;
+  using InitialPDFsSetter =
+      typename detail::KernelTrait<FloatType, Architecture>::InitialPDFsSetter;
+  using BoundaryModel =
+      BoundaryHandling<Vector3<FloatType>,
+                       typename detail::BoundaryHandlingTrait<
+                           FloatType, Architecture>::Dynamic_UBB>;
+  using CollisionModel =
+      boost::variant<CollisionModelThermalized, CollisionModelLeesEdwards>;
+
+public:
+  // Type definitions
+  using Stencil = stencil::D3Q19;
+  using Lattice_T = LatticeWalberla::Lattice_T;
+
+protected:
+  template <typename FT, lbmpy::Arch AT = lbmpy::Arch::CPU> struct FieldTrait {
+    using PdfField = GhostLayerField<FT, Stencil::Size>;
+    using VectorField = GhostLayerField<FT, uint_t{3u}>;
+    template <class Field>
+    using PackInfo = field::communication::PackInfo<Field>;
+  };
+
+public:
+  using PdfField = typename FieldTrait<FloatType, Architecture>::PdfField;
+  using VectorField = typename FieldTrait<FloatType, Architecture>::VectorField;
+  using FlagField = typename BoundaryModel::FlagField;
+
+public:
+  template <typename T> FloatType FloatType_c(T t) const {
+    return numeric_cast<FloatType>(t);
+  }
+
+  [[nodiscard]] std::size_t stencil_size() const noexcept override {
+    return static_cast<std::size_t>(Stencil::Size);
+  }
+
+  [[nodiscard]] virtual bool is_double_precision() const noexcept override {
+    return std::is_same<FloatType, double>::value;
+  }
+
+private:
+  class : public boost::static_visitor<> {
+  public:
+    void operator()(CollisionModelThermalized &cm, IBlock *b) { cm(b); }
+
+    void operator()(CollisionModelLeesEdwards &cm, IBlock *b) {
+      cm.v_s_ = static_cast<decltype(cm.v_s_)>(
+          m_lees_edwards_callbacks->get_shear_velocity());
+      cm(b);
+    }
+    void register_lees_edwards_callbacks(
+        std::shared_ptr<LeesEdwardsPack> lees_edwards_callbacks) {
+      m_lees_edwards_callbacks = std::move(lees_edwards_callbacks);
+    }
+
+  private:
+    std::shared_ptr<LeesEdwardsPack> m_lees_edwards_callbacks;
+
+  } run_collide_sweep;
+
+  FloatType shear_mode_relaxation_rate() const {
+    return FloatType{2} / (FloatType{6} * m_viscosity + FloatType{1});
+  }
+
+  FloatType odd_mode_relaxation_rate(
+      FloatType shear_relaxation,
+      FloatType magic_number = FloatType{3} / FloatType{16}) const {
+    return (FloatType{4} - FloatType{2} * shear_relaxation) /
+           (FloatType{4} * magic_number * shear_relaxation + FloatType{2} -
+            shear_relaxation);
+  }
+
+  void reset_boundary_handling() {
+    auto const &blocks = get_lattice().get_blocks();
+    m_boundary = std::make_shared<BoundaryModel>(blocks, m_pdf_field_id,
+                                                 m_flag_field_id);
+  }
+
+  FloatType pressure_tensor_correction_factor() const {
+    return m_viscosity / (m_viscosity + FloatType{1} / FloatType{6});
+  }
+
+  void pressure_tensor_correction(Matrix3<FloatType> &tensor) const {
+    auto const revert_factor = pressure_tensor_correction_factor();
+    for (auto const i : {1u, 2u, 3u, 5u, 6u, 7u}) {
+      tensor[i] *= revert_factor;
+    }
+  }
+
+  class interpolation_illegal_access : public std::runtime_error {
+  public:
+    explicit interpolation_illegal_access(std::string const &field,
+                                          Utils::Vector3d const &pos,
+                                          std::array<int, 3> const &node,
+                                          double weight)
+        : std::runtime_error("Access to LB " + field + " field failed") {
+      std::cerr << "pos [" << pos << "], "
+                << "node [" << Utils::Vector3i(node) << "], "
+                << "weight " << weight << "\n";
+    }
+  };
+
+  class vtk_runtime_error : public std::runtime_error {
+  public:
+    explicit vtk_runtime_error(std::string const &vtk_uid,
+                               std::string const &reason)
+        : std::runtime_error("VTKOutput object '" + vtk_uid + "' " + reason) {}
+  };
+
+protected:
+  // Member variables
+  FloatType m_viscosity; /// kinematic viscosity
+  FloatType m_density;
+  FloatType m_kT;
+
+  // Block data access handles
+  BlockDataID m_pdf_field_id;
+  BlockDataID m_pdf_tmp_field_id;
+  BlockDataID m_flag_field_id;
+
+  BlockDataID m_last_applied_force_field_id;
+  BlockDataID m_force_to_be_applied_id;
+
+  BlockDataID m_velocity_field_id;
+  BlockDataID m_vec_tmp_field_id;
+
+  /**
+   * @brief Full communicator.
+   * We use the D3Q27 directions to update cells along the diagonals during
+   * a full ghost communication. This is needed to properly update the corners
+   * of the ghost layer when setting cell velocities or populations.
+   */
+  using FullCommunicator = blockforest::communication::UniformBufferedScheme<
+      typename stencil::D3Q27>;
+  /**
+   * @brief Regular communicator.
+   * We use the same directions as the stencil during integration.
+   */
+  using PDFStreamingCommunicator =
+      blockforest::communication::UniformBufferedScheme<Stencil>;
+  template <class Field>
+  using PackInfo =
+      typename FieldTrait<FloatType, Architecture>::template PackInfo<Field>;
+
+  // communicators
+  std::shared_ptr<FullCommunicator> m_full_communication;
+  std::shared_ptr<PDFStreamingCommunicator> m_pdf_streaming_communication;
+
+  // ResetForce sweep + external force handling
+  std::shared_ptr<ResetForce<PdfField, VectorField>> m_reset_force;
+
+  // Stream sweep
+  std::shared_ptr<StreamSweep> m_stream;
+
+  // Lees Edwards boundary interpolation
+  std::shared_ptr<LeesEdwardsPack> m_lees_edwards_callbacks;
+  std::shared_ptr<InterpolateAndShiftAtBoundary<PdfField, FloatType>>
+      m_lees_edwards_pdf_interpol_sweep;
+  std::shared_ptr<InterpolateAndShiftAtBoundary<VectorField, FloatType>>
+      m_lees_edwards_vel_interpol_sweep;
+  std::shared_ptr<InterpolateAndShiftAtBoundary<VectorField, FloatType>>
+      m_lees_edwards_last_applied_force_interpol_sweep;
+
+  // Collision sweep
+  std::shared_ptr<CollisionModel> m_collision_model;
+
+  // boundaries
+  std::shared_ptr<BoundaryModel> m_boundary;
+
+  // lattice
+  std::shared_ptr<LatticeWalberla> m_lattice;
+
+  [[nodiscard]] boost::optional<CellInterval>
+  get_interval(Utils::Vector3i const &lower_corner,
+               Utils::Vector3i const &upper_corner) const {
+    auto const &lattice = get_lattice();
+    auto const &cell_min = lower_corner;
+    auto const cell_max = upper_corner - Utils::Vector3i::broadcast(1);
+    auto const lower_bc = get_block_and_cell(lattice, cell_min, true);
+    auto const upper_bc = get_block_and_cell(lattice, cell_max, true);
+    if (not lower_bc or not upper_bc) {
+      return {};
+    }
+    assert(&(*(lower_bc->block)) == &(*(upper_bc->block)));
+    return {CellInterval(lower_bc->cell, upper_bc->cell)};
+  }
+
+  /**
+   * @brief Convenience function to add a field with a custom allocator.
+   *
+   * When vectorization is off, let waLBerla decide which memory allocator
+   * to use. When vectorization is on, the aligned memory allocator is
+   * required, otherwise <tt>cpu_vectorize_info["assume_aligned"]</tt> will
+   * trigger assertions. That is because for single-precision kernels the
+   * waLBerla heuristic in <tt>src/field/allocation/FieldAllocator.h</tt>
+   * will fall back to @c StdFieldAlloc, yet @c AllocateAligned is needed
+   * for intrinsics to work.
+   */
+  template <typename Field> auto add_to_storage(std::string const tag) {
+    auto const &blocks = m_lattice->get_blocks();
+    auto const n_ghost_layers = m_lattice->get_ghost_layers();
+    if constexpr (Architecture == lbmpy::Arch::CPU) {
+#ifdef ESPRESSO_BUILD_WITH_AVX_KERNELS
+#if defined(__AVX512F__)
+      constexpr uint_t alignment = 64;
+#elif defined(__AVX__)
+      constexpr uint_t alignment = 32;
+#elif defined(__SSE__)
+      constexpr uint_t alignment = 16;
+#else
+#error "Unsupported arch, check walberla src/field/allocation/FieldAllocator.h"
+#endif
+      using value_type = typename Field::value_type;
+      using Allocator = field::AllocateAligned<value_type, alignment>;
+      auto const allocator = std::make_shared<Allocator>();
+      auto const empty_set = Set<SUID>::emptySet();
+      return field::addToStorage<Field>(
+          blocks, tag, field::internal::defaultSize, FloatType{0}, field::fzyx,
+          n_ghost_layers, false, {}, empty_set, empty_set, allocator);
+#else  // ESPRESSO_BUILD_WITH_AVX_KERNELS
+      return field::addToStorage<Field>(blocks, tag, FloatType{0}, field::fzyx,
+                                        n_ghost_layers);
+#endif // ESPRESSO_BUILD_WITH_AVX_KERNELS
+    }
+  }
+
+public:
+  LBWalberlaImpl(std::shared_ptr<LatticeWalberla> lattice, double viscosity,
+                 double density)
+      : m_viscosity(FloatType_c(viscosity)), m_density(FloatType_c(density)),
+        m_kT(FloatType{0}), m_lattice(std::move(lattice)) {
+
+    auto const &blocks = m_lattice->get_blocks();
+    auto const n_ghost_layers = m_lattice->get_ghost_layers();
+    if (n_ghost_layers == 0u)
+      throw std::runtime_error("At least one ghost layer must be used");
+
+    // Initialize and register fields
+    m_pdf_field_id = add_to_storage<PdfField>("pdfs");
+    m_pdf_tmp_field_id = add_to_storage<PdfField>("pdfs_tmp");
+    m_last_applied_force_field_id = add_to_storage<VectorField>("force field");
+    m_force_to_be_applied_id = add_to_storage<VectorField>("force field");
+    m_velocity_field_id = add_to_storage<VectorField>("velocity field");
+    m_vec_tmp_field_id = add_to_storage<VectorField>("velocity_tmp field");
+
+    // Initialize and register pdf field
+    auto pdf_setter =
+        InitialPDFsSetter(m_force_to_be_applied_id, m_pdf_field_id,
+                          m_velocity_field_id, m_density);
+    for (auto b = blocks->begin(); b != blocks->end(); ++b) {
+      pdf_setter(&*b);
+    }
+
+    // Initialize and register flag field (fluid/boundary)
+    m_flag_field_id = field::addFlagFieldToStorage<FlagField>(
+        blocks, "flag field", n_ghost_layers);
+    // Initialize boundary sweep
+    reset_boundary_handling();
+
+    // Set up the communication and register fields
+    m_pdf_streaming_communication =
+        std::make_shared<PDFStreamingCommunicator>(blocks);
+    m_pdf_streaming_communication->addPackInfo(
+        std::make_shared<PackInfo<PdfField>>(m_pdf_field_id, n_ghost_layers));
+    m_pdf_streaming_communication->addPackInfo(
+        std::make_shared<PackInfo<VectorField>>(m_last_applied_force_field_id,
+                                                n_ghost_layers));
+    m_pdf_streaming_communication->addPackInfo(
+        std::make_shared<field::communication::PackInfo<FlagField>>(
+            m_flag_field_id, n_ghost_layers));
+
+    m_full_communication = std::make_shared<FullCommunicator>(blocks);
+    m_full_communication->addPackInfo(
+        std::make_shared<PackInfo<PdfField>>(m_pdf_field_id, n_ghost_layers));
+    m_full_communication->addPackInfo(std::make_shared<PackInfo<VectorField>>(
+        m_last_applied_force_field_id, n_ghost_layers));
+    m_full_communication->addPackInfo(std::make_shared<PackInfo<VectorField>>(
+        m_velocity_field_id, n_ghost_layers));
+    m_full_communication->addPackInfo(
+        std::make_shared<field::communication::PackInfo<FlagField>>(
+            m_flag_field_id, n_ghost_layers));
+
+    // Instantiate the sweep responsible for force double buffering and
+    // external forces
+    m_reset_force = std::make_shared<ResetForce<PdfField, VectorField>>(
+        m_last_applied_force_field_id, m_force_to_be_applied_id);
+
+    // Prepare LB sweeps
+    // Note: For now, combined collide-stream sweeps cannot be used,
+    // because the collide-push variant is not supported by lbmpy.
+    // The following functors are individual in-place collide and stream steps
+    m_stream = std::make_shared<StreamSweep>(
+        m_last_applied_force_field_id, m_pdf_field_id, m_velocity_field_id);
+  }
+
+private:
+  void integrate_stream(std::shared_ptr<Lattice_T> const &blocks) {
+    for (auto b = blocks->begin(); b != blocks->end(); ++b)
+      (*m_stream)(&*b);
+  }
+
+  void integrate_collide(std::shared_ptr<Lattice_T> const &blocks) {
+    for (auto b = blocks->begin(); b != blocks->end(); ++b)
+      boost::apply_visitor(run_collide_sweep, *m_collision_model,
+                           boost::variant<IBlock *>(&*b));
+    if (auto *cm = boost::get<CollisionModelThermalized>(&*m_collision_model)) {
+      cm->time_step_++;
+    }
+  }
+
+  auto has_lees_edwards_bc() const {
+    return boost::get<CollisionModelLeesEdwards>(&*m_collision_model) !=
+           nullptr;
+  }
+
+  void apply_lees_edwards_pdf_interpolation(
+      std::shared_ptr<Lattice_T> const &blocks) {
+    for (auto b = blocks->begin(); b != blocks->end(); ++b)
+      (*m_lees_edwards_pdf_interpol_sweep)(&*b);
+  }
+
+  void apply_lees_edwards_vel_interpolation_and_shift(
+      std::shared_ptr<Lattice_T> const &blocks) {
+    for (auto b = blocks->begin(); b != blocks->end(); ++b)
+      (*m_lees_edwards_vel_interpol_sweep)(&*b);
+  }
+
+  void apply_lees_edwards_last_applied_force_interpolation(
+      std::shared_ptr<Lattice_T> const &blocks) {
+    for (auto b = blocks->begin(); b != blocks->end(); ++b)
+      (*m_lees_edwards_last_applied_force_interpol_sweep)(&*b);
+  }
+
+  void integrate_reset_force(std::shared_ptr<Lattice_T> const &blocks) {
+    for (auto b = blocks->begin(); b != blocks->end(); ++b)
+      (*m_reset_force)(&*b);
+  }
+
+  void integrate_boundaries(std::shared_ptr<Lattice_T> const &blocks) {
+    for (auto b = blocks->begin(); b != blocks->end(); ++b)
+      (*m_boundary)(&*b);
+  }
+
+  void integrate_push_scheme() {
+    auto const &blocks = get_lattice().get_blocks();
+    // Reset force fields
+    integrate_reset_force(blocks);
+    // LB collide
+    integrate_collide(blocks);
+    m_pdf_streaming_communication->communicate();
+    // Handle boundaries
+    integrate_boundaries(blocks);
+    // LB stream
+    integrate_stream(blocks);
+    // Refresh ghost layers
+    m_full_communication->communicate();
+  }
+
+  void integrate_pull_scheme() {
+    auto const &blocks = get_lattice().get_blocks();
+    integrate_reset_force(blocks);
+    // Handle boundaries
+    integrate_boundaries(blocks);
+    // LB stream
+    integrate_stream(blocks);
+    // LB collide
+    integrate_collide(blocks);
+    // Refresh ghost layers
+    ghost_communication();
+  }
+
+protected:
+  void integrate_vtk_writers() override {
+    for (auto const &it : m_vtk_auto) {
+      auto &vtk_handle = it.second;
+      if (vtk_handle->enabled) {
+        vtk::writeFiles(vtk_handle->ptr)();
+        vtk_handle->execution_count++;
+      }
+    }
+  }
+
+public:
+  void integrate() override {
+    reallocate_ubb_field();
+    if (has_lees_edwards_bc()) {
+      integrate_pull_scheme();
+    } else {
+      integrate_push_scheme();
+    }
+    // Handle VTK writers
+    integrate_vtk_writers();
+  }
+
+  void ghost_communication() override {
+    m_full_communication->communicate();
+    if (has_lees_edwards_bc()) {
+      auto const &blocks = get_lattice().get_blocks();
+      apply_lees_edwards_pdf_interpolation(blocks);
+      apply_lees_edwards_vel_interpolation_and_shift(blocks);
+      apply_lees_edwards_last_applied_force_interpolation(blocks);
+    }
+  }
+
+  void set_collision_model(double kT, unsigned int seed) override {
+    auto const omega = shear_mode_relaxation_rate();
+    auto const omega_odd = odd_mode_relaxation_rate(omega);
+    m_kT = FloatType_c(kT);
+    auto obj = CollisionModelThermalized(
+        m_last_applied_force_field_id, m_pdf_field_id, uint32_t{0u},
+        uint32_t{0u}, uint32_t{0u}, m_kT, omega, omega, omega_odd, omega, seed,
+        uint32_t{0u});
+    obj.block_offset_generator =
+        [this](IBlock *const block, uint32_t &block_offset_0,
+               uint32_t &block_offset_1, uint32_t &block_offset_2) {
+          auto const &blocks = get_lattice().get_blocks();
+          auto const &ci = blocks->getBlockCellBB(*block);
+          block_offset_0 = static_cast<uint32_t>(ci.xMin());
+          block_offset_1 = static_cast<uint32_t>(ci.yMin());
+          block_offset_2 = static_cast<uint32_t>(ci.zMin());
+        };
+    m_collision_model = std::make_shared<CollisionModel>(std::move(obj));
+  }
+
+  void set_collision_model(
+      std::unique_ptr<LeesEdwardsPack> &&lees_edwards_pack) override {
+    assert(m_kT == 0.);
+    auto const shear_direction = lees_edwards_pack->shear_direction;
+    auto const shear_plane_normal = lees_edwards_pack->shear_plane_normal;
+    auto const shear_vel = FloatType_c(lees_edwards_pack->get_shear_velocity());
+    auto const omega = shear_mode_relaxation_rate();
+    if (shear_plane_normal != 1u) {
+      throw std::domain_error(
+          "Lees-Edwards LB only supports shear_plane_normal=\"y\"");
+    }
+    auto const &lattice = get_lattice();
+    auto const n_ghost_layers = lattice.get_ghost_layers();
+    auto const blocks = lattice.get_blocks();
+    auto const agrid =
+        FloatType_c(lattice.get_grid_dimensions()[shear_plane_normal]);
+    auto obj = CollisionModelLeesEdwards(
+        m_last_applied_force_field_id, m_pdf_field_id, agrid, omega, shear_vel);
+    m_collision_model = std::make_shared<CollisionModel>(std::move(obj));
+    m_lees_edwards_callbacks = std::move(lees_edwards_pack);
+    run_collide_sweep.register_lees_edwards_callbacks(m_lees_edwards_callbacks);
+    m_lees_edwards_pdf_interpol_sweep =
+        std::make_shared<InterpolateAndShiftAtBoundary<PdfField, FloatType>>(
+            blocks, m_pdf_field_id, m_pdf_tmp_field_id, n_ghost_layers,
+            shear_direction, shear_plane_normal,
+            m_lees_edwards_callbacks->get_pos_offset);
+    m_lees_edwards_vel_interpol_sweep =
+        std::make_shared<InterpolateAndShiftAtBoundary<VectorField, FloatType>>(
+            blocks, m_velocity_field_id, m_vec_tmp_field_id, n_ghost_layers,
+            shear_direction, shear_plane_normal,
+            m_lees_edwards_callbacks->get_pos_offset,
+            m_lees_edwards_callbacks->get_shear_velocity);
+    m_lees_edwards_last_applied_force_interpol_sweep =
+        std::make_shared<InterpolateAndShiftAtBoundary<VectorField, FloatType>>(
+            blocks, m_last_applied_force_field_id, m_vec_tmp_field_id,
+            n_ghost_layers, shear_direction, shear_plane_normal,
+            m_lees_edwards_callbacks->get_pos_offset);
+  }
+
+  void check_lebc(unsigned int shear_direction,
+                  unsigned int shear_plane_normal) const override {
+    if (m_lees_edwards_callbacks) {
+      if (m_lees_edwards_callbacks->shear_direction != shear_direction or
+          m_lees_edwards_callbacks->shear_plane_normal != shear_plane_normal) {
+        throw std::runtime_error(
+            "MD and LB Lees-Edwards boundary conditions disagree");
+      }
+    }
+  }
+
+  void set_viscosity(double viscosity) override {
+    m_viscosity = FloatType_c(viscosity);
+  }
+
+  [[nodiscard]] double get_viscosity() const noexcept override {
+    return numeric_cast<double>(m_viscosity);
+  }
+
+  [[nodiscard]] double get_density() const noexcept override {
+    return numeric_cast<double>(m_density);
+  }
+
+  // Velocity
+  boost::optional<Utils::Vector3d>
+  get_node_velocity(Utils::Vector3i const &node,
+                    bool consider_ghosts = false) const override {
+    auto const is_boundary = get_node_is_boundary(node, consider_ghosts);
+    if (is_boundary)    // is info available locally
+      if (*is_boundary) // is the node a boundary
+        return get_node_velocity_at_boundary(node, consider_ghosts);
+    auto const bc = get_block_and_cell(get_lattice(), node, consider_ghosts);
+    if (!bc)
+      return {};
+
+    auto field = bc->block->template getData<VectorField>(m_velocity_field_id);
+    auto const vec = lbm::accessor::Vector::get(field, bc->cell);
+    return to_vector3d(vec);
+  }
+
+  bool set_node_velocity(Utils::Vector3i const &node,
+                         Utils::Vector3d const &v) override {
+    auto bc = get_block_and_cell(get_lattice(), node, false);
+    if (!bc)
+      return false;
+
+    // We have to set both, the pdf and the stored velocity field
+    auto pdf_field = bc->block->template getData<PdfField>(m_pdf_field_id);
+    auto vel_field =
+        bc->block->template getData<VectorField>(m_velocity_field_id);
+    auto force_field =
+        bc->block->template getData<VectorField>(m_last_applied_force_field_id);
+    auto const vel = to_vector3<FloatType>(v);
+    lbm::accessor::Velocity::set(pdf_field, force_field, vel, bc->cell);
+    lbm::accessor::Vector::set(vel_field, vel, bc->cell);
+
+    return true;
+  }
+
+  std::vector<double>
+  get_slice_velocity(Utils::Vector3i const &lower_corner,
+                     Utils::Vector3i const &upper_corner) const override {
+    std::vector<double> out;
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto const &block = *(lattice.get_blocks()->begin());
+      auto const field =
+          block.template getData<VectorField>(m_velocity_field_id);
+      auto const local_offset = std::get<0>(lattice.get_local_grid_range());
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      out.reserve(ci->numCells());
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            auto const node = local_offset + Utils::Vector3i{{x, y, z}};
+            if (m_boundary->node_is_boundary(node)) {
+              auto const vec = m_boundary->get_node_value_at_boundary(node);
+              for (uint_t f = 0u; f < 3u; ++f) {
+                out.emplace_back(double_c(vec[f]));
+              }
+            } else {
+              auto const vec = lbm::accessor::Vector::get(field, Cell{x, y, z});
+              for (uint_t f = 0u; f < 3u; ++f) {
+                out.emplace_back(double_c(vec[f]));
+              }
+            }
+          }
+        }
+      }
+    }
+    return out;
+  }
+
+  void set_slice_velocity(Utils::Vector3i const &lower_corner,
+                          Utils::Vector3i const &upper_corner,
+                          std::vector<double> const &velocity) override {
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto &block = *(lattice.get_blocks()->begin());
+      // We have to set both, the pdf and the stored velocity field
+      auto pdf_field = block.template getData<PdfField>(m_pdf_field_id);
+      auto vel_field = block.template getData<VectorField>(m_velocity_field_id);
+      auto force_field =
+          block.template getData<VectorField>(m_last_applied_force_field_id);
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      auto it = velocity.begin();
+      assert(velocity.size() == 3u * ci->numCells());
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            auto const cell = Cell{x, y, z};
+            Vector3<FloatType> vec;
+            for (uint_t f = 0u; f < 3u; ++f) {
+              vec[f] = FloatType_c(*it);
+              ++it;
+            }
+            lbm::accessor::Velocity::set(pdf_field, force_field, vec, cell);
+            lbm::accessor::Vector::set(vel_field, vec, cell);
+          }
+        }
+      }
+    }
+  }
+
+  boost::optional<Utils::Vector3d>
+  get_velocity_at_pos(Utils::Vector3d const &pos,
+                      bool consider_points_in_halo = false) const override {
+    if (!consider_points_in_halo and !m_lattice->pos_in_local_domain(pos))
+      return {};
+    if (consider_points_in_halo and !m_lattice->pos_in_local_halo(pos))
+      return {};
+    Utils::Vector3d v{0.0, 0.0, 0.0};
+    interpolate_bspline_at_pos(
+        pos, [this, &v, pos](std::array<int, 3> const node, double weight) {
+          // Nodes with zero weight might not be accessible, because they can be
+          // outside ghost layers
+          if (weight != 0) {
+            auto const res = get_node_velocity(Utils::Vector3i(node), true);
+            if (!res) {
+              throw interpolation_illegal_access("velocity", pos, node, weight);
+            }
+            v += *res * weight;
+          }
+        });
+    return {std::move(v)};
+  }
+
+  boost::optional<double> get_interpolated_density_at_pos(
+      Utils::Vector3d const &pos,
+      bool consider_points_in_halo = false) const override {
+    if (!consider_points_in_halo and !m_lattice->pos_in_local_domain(pos))
+      return {};
+    if (consider_points_in_halo and !m_lattice->pos_in_local_halo(pos))
+      return {};
+    double dens = 0.0;
+    interpolate_bspline_at_pos(
+        pos, [this, &dens, pos](std::array<int, 3> const node, double weight) {
+          // Nodes with zero weight might not be accessible, because they can be
+          // outside ghost layers
+          if (weight != 0) {
+            auto const res = get_node_density(Utils::Vector3i(node), true);
+            if (!res) {
+              throw interpolation_illegal_access("density", pos, node, weight);
+            }
+            dens += *res * weight;
+          }
+        });
+    return {std::move(dens)};
+  }
+
+  // Local force
+  bool add_force_at_pos(Utils::Vector3d const &pos,
+                        Utils::Vector3d const &force) override {
+    if (!m_lattice->pos_in_local_halo(pos))
+      return false;
+    auto const force_at_node = [this, force](std::array<int, 3> const node,
+                                             double weight) {
+      auto const bc =
+          get_block_and_cell(get_lattice(), Utils::Vector3i(node), true);
+      if (bc) {
+        auto const weighted_force = to_vector3<FloatType>(weight * force);
+        auto force_field =
+            bc->block->template uncheckedFastGetData<VectorField>(
+                m_force_to_be_applied_id);
+        lbm::accessor::Vector::add(force_field, weighted_force, bc->cell);
+      }
+    };
+    interpolate_bspline_at_pos(pos, force_at_node);
+    return true;
+  }
+
+  boost::optional<Utils::Vector3d>
+  get_node_force_to_be_applied(Utils::Vector3i const &node) const override {
+    auto const bc = get_block_and_cell(get_lattice(), node, true);
+    if (!bc)
+      return {};
+
+    auto field =
+        bc->block->template getData<VectorField>(m_force_to_be_applied_id);
+    auto const vec = lbm::accessor::Vector::get(field, bc->cell);
+    return to_vector3d(vec);
+  }
+
+  boost::optional<Utils::Vector3d>
+  get_node_last_applied_force(Utils::Vector3i const &node,
+                              bool consider_ghosts = false) const override {
+    auto const bc = get_block_and_cell(get_lattice(), node, consider_ghosts);
+    if (!bc)
+      return {};
+
+    auto const field =
+        bc->block->template getData<VectorField>(m_last_applied_force_field_id);
+    auto const vec = lbm::accessor::Vector::get(field, bc->cell);
+    return to_vector3d(vec);
+  }
+
+  bool set_node_last_applied_force(Utils::Vector3i const &node,
+                                   Utils::Vector3d const &force) override {
+    auto bc = get_block_and_cell(get_lattice(), node, false);
+    if (!bc)
+      return false;
+
+    auto field =
+        bc->block->template getData<VectorField>(m_last_applied_force_field_id);
+    auto const vec = to_vector3<FloatType>(force);
+    lbm::accessor::Vector::set(field, vec, bc->cell);
+
+    return true;
+  }
+
+  std::vector<double> get_slice_last_applied_force(
+      Utils::Vector3i const &lower_corner,
+      Utils::Vector3i const &upper_corner) const override {
+    std::vector<double> out;
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto const &block = *(lattice.get_blocks()->begin());
+      auto const field =
+          block.template getData<VectorField>(m_last_applied_force_field_id);
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      auto const n_values = 3u * ci->numCells();
+      out.reserve(n_values);
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            auto const vec = lbm::accessor::Vector::get(field, Cell{x, y, z});
+            for (uint_t f = 0u; f < 3u; ++f) {
+              out.emplace_back(double_c(vec[f]));
+            }
+          }
+        }
+      }
+      assert(out.size() == n_values);
+    }
+    return out;
+  }
+
+  void set_slice_last_applied_force(Utils::Vector3i const &lower_corner,
+                                    Utils::Vector3i const &upper_corner,
+                                    std::vector<double> const &force) override {
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto &block = *(lattice.get_blocks()->begin());
+      auto field =
+          block.template getData<VectorField>(m_last_applied_force_field_id);
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      auto it = force.begin();
+      assert(force.size() == 3u * ci->numCells());
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            Vector3<FloatType> vec;
+            for (uint_t f = 0u; f < 3u; ++f) {
+              vec[f] = FloatType_c(*it);
+              ++it;
+            }
+            lbm::accessor::Vector::set(field, vec, Cell{x, y, z});
+          }
+        }
+      }
+    }
+  }
+
+  // Population
+  boost::optional<std::vector<double>>
+  get_node_population(Utils::Vector3i const &node,
+                      bool consider_ghosts = false) const override {
+    auto bc = get_block_and_cell(get_lattice(), node, consider_ghosts);
+    if (!bc)
+      return {boost::none};
+
+    auto pdf_field = bc->block->template getData<PdfField>(m_pdf_field_id);
+    auto const pop = lbm::accessor::Population::get(pdf_field, bc->cell);
+    std::vector<double> population(Stencil::Size);
+    for (uint_t f = 0u; f < Stencil::Size; ++f) {
+      population[f] = double_c(pop[f]);
+    }
+
+    return {std::move(population)};
+  }
+
+  bool set_node_population(Utils::Vector3i const &node,
+                           std::vector<double> const &population) override {
+    auto bc = get_block_and_cell(get_lattice(), node, false);
+    if (!bc)
+      return false;
+
+    auto pdf_field = bc->block->template getData<PdfField>(m_pdf_field_id);
+    std::array<FloatType, Stencil::Size> pop;
+    for (uint_t f = 0u; f < Stencil::Size; ++f) {
+      pop[f] = FloatType_c(population[f]);
+    }
+    lbm::accessor::Population::set(pdf_field, pop, bc->cell);
+
+    return true;
+  }
+
+  std::vector<double>
+  get_slice_population(Utils::Vector3i const &lower_corner,
+                       Utils::Vector3i const &upper_corner) const override {
+    std::vector<double> out;
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto const &block = *(lattice.get_blocks()->begin());
+      auto const pdf_field = block.template getData<PdfField>(m_pdf_field_id);
+      auto const values = lbm::accessor::Population::get(pdf_field, *ci);
+      if constexpr (std::is_same_v<typename decltype(values)::value_type,
+                                   double>) {
+        out = std::move(values);
+      } else {
+        out = std::vector<double>(values.begin(), values.end());
+      }
+      assert(out.size() == stencil_size() * ci->numCells());
+    }
+    return out;
+  }
+
+  void set_slice_population(Utils::Vector3i const &lower_corner,
+                            Utils::Vector3i const &upper_corner,
+                            std::vector<double> const &population) override {
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto &block = *(lattice.get_blocks()->begin());
+      auto pdf_field = block.template getData<PdfField>(m_pdf_field_id);
+      assert(population.size() == stencil_size() * ci->numCells());
+      std::vector<FloatType> const values(population.begin(), population.end());
+      lbm::accessor::Population::set(pdf_field, values, *ci);
+    }
+  }
+
+  // Density
+  boost::optional<double>
+  get_node_density(Utils::Vector3i const &node,
+                   bool consider_ghosts = false) const override {
+    auto bc = get_block_and_cell(get_lattice(), node, consider_ghosts);
+    if (!bc)
+      return {boost::none};
+
+    auto pdf_field = bc->block->template getData<PdfField>(m_pdf_field_id);
+    auto const density = lbm::accessor::Density::get(pdf_field, bc->cell);
+    return {double_c(density)};
+  }
+
+  bool set_node_density(Utils::Vector3i const &node, double density) override {
+    auto bc = get_block_and_cell(get_lattice(), node, false);
+    if (!bc)
+      return false;
+
+    auto pdf_field = bc->block->template getData<PdfField>(m_pdf_field_id);
+    lbm::accessor::Density::set(pdf_field, FloatType_c(density), bc->cell);
+
+    return true;
+  }
+
+  std::vector<double>
+  get_slice_density(Utils::Vector3i const &lower_corner,
+                    Utils::Vector3i const &upper_corner) const override {
+    std::vector<double> out;
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto const &block = *(lattice.get_blocks()->begin());
+      auto const pdf_field = block.template getData<PdfField>(m_pdf_field_id);
+      auto const values = lbm::accessor::Density::get(pdf_field, *ci);
+      if constexpr (std::is_same_v<typename decltype(values)::value_type,
+                                   double>) {
+        out = std::move(values);
+      } else {
+        out = std::vector<double>(values.begin(), values.end());
+      }
+      assert(out.size() == ci->numCells());
+    }
+    return out;
+  }
+
+  void set_slice_density(Utils::Vector3i const &lower_corner,
+                         Utils::Vector3i const &upper_corner,
+                         std::vector<double> const &density) override {
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto &block = *(lattice.get_blocks()->begin());
+      auto pdf_field = block.template getData<PdfField>(m_pdf_field_id);
+      assert(density.size() == ci->numCells());
+      std::vector<FloatType> const values(density.begin(), density.end());
+      lbm::accessor::Density::set(pdf_field, values, *ci);
+    }
+  }
+
+  boost::optional<Utils::Vector3d>
+  get_node_velocity_at_boundary(Utils::Vector3i const &node,
+                                bool consider_ghosts = false) const override {
+    auto const bc = get_block_and_cell(get_lattice(), node, consider_ghosts);
+    if (!bc or !m_boundary->node_is_boundary(node))
+      return {boost::none};
+
+    return {m_boundary->get_node_value_at_boundary(node)};
+  }
+
+  bool set_node_velocity_at_boundary(Utils::Vector3i const &node,
+                                     Utils::Vector3d const &velocity) override {
+    auto bc = get_block_and_cell(get_lattice(), node, true);
+    if (!bc)
+      return false;
+
+    m_boundary->set_node_value_at_boundary(node, velocity, *bc);
+
+    return true;
+  }
+
+  std::vector<boost::optional<Utils::Vector3d>> get_slice_velocity_at_boundary(
+      Utils::Vector3i const &lower_corner,
+      Utils::Vector3i const &upper_corner) const override {
+    std::vector<boost::optional<Utils::Vector3d>> out;
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto const local_offset = std::get<0>(lattice.get_local_grid_range());
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      auto const n_values = ci->numCells();
+      out.reserve(n_values);
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            auto const node = local_offset + Utils::Vector3i{{x, y, z}};
+            if (m_boundary->node_is_boundary(node)) {
+              out.emplace_back(m_boundary->get_node_value_at_boundary(node));
+            } else {
+              out.emplace_back(boost::none);
+            }
+          }
+        }
+      }
+      assert(out.size() == n_values);
+    }
+    return out;
+  }
+
+  void set_slice_velocity_at_boundary(
+      Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner,
+      std::vector<boost::optional<Utils::Vector3d>> const &velocity) override {
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto const local_offset = std::get<0>(lattice.get_local_grid_range());
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      auto it = velocity.begin();
+      assert(velocity.size() == ci->numCells());
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            auto const node = local_offset + Utils::Vector3i{{x, y, z}};
+            auto const bc = get_block_and_cell(lattice, node, false);
+            auto const &opt = *it;
+            if (opt) {
+              m_boundary->set_node_value_at_boundary(node, *opt, *bc);
+            } else {
+              m_boundary->remove_node_from_boundary(node, *bc);
+            }
+            ++it;
+          }
+        }
+      }
+    }
+  }
+
+  boost::optional<Utils::Vector3d>
+  get_node_boundary_force(Utils::Vector3i const &node) const override {
+    auto const bc = get_block_and_cell(get_lattice(), node, true);
+    if (!bc or !m_boundary->node_is_boundary(node))
+      return {boost::none};
+
+    return get_node_last_applied_force(node, true);
+  }
+
+  bool remove_node_from_boundary(Utils::Vector3i const &node) override {
+    auto bc = get_block_and_cell(get_lattice(), node, true);
+    if (!bc)
+      return false;
+
+    m_boundary->remove_node_from_boundary(node, *bc);
+
+    return true;
+  }
+
+  boost::optional<bool>
+  get_node_is_boundary(Utils::Vector3i const &node,
+                       bool consider_ghosts = false) const override {
+    auto const bc = get_block_and_cell(get_lattice(), node, consider_ghosts);
+    if (!bc)
+      return {boost::none};
+
+    return {m_boundary->node_is_boundary(node)};
+  }
+
+  std::vector<bool>
+  get_slice_is_boundary(Utils::Vector3i const &lower_corner,
+                        Utils::Vector3i const &upper_corner) const override {
+    std::vector<bool> out;
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto const local_offset = std::get<0>(lattice.get_local_grid_range());
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      auto const n_values = ci->numCells();
+      out.reserve(n_values);
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            auto const node = local_offset + Utils::Vector3i{x, y, z};
+            out.emplace_back(m_boundary->node_is_boundary(node));
+          }
+        }
+      }
+      assert(out.size() == n_values);
+    }
+    return out;
+  }
+
+  void reallocate_ubb_field() override { m_boundary->boundary_update(); }
+
+  void clear_boundaries() override { reset_boundary_handling(); }
+
+  void
+  update_boundary_from_shape(std::vector<int> const &raster_flat,
+                             std::vector<double> const &data_flat) override {
+    auto const grid_size = get_lattice().get_grid_dimensions();
+    auto const data = fill_3D_vector_array(data_flat, grid_size);
+    set_boundary_from_grid(*m_boundary, get_lattice(), raster_flat, data);
+  }
+
+  // Pressure tensor
+  boost::optional<Utils::VectorXd<9>>
+  get_node_pressure_tensor(Utils::Vector3i const &node) const override {
+    auto bc = get_block_and_cell(get_lattice(), node, false);
+    if (!bc)
+      return {boost::none};
+
+    auto pdf_field = bc->block->template getData<PdfField>(m_pdf_field_id);
+    auto tensor = lbm::accessor::PressureTensor::get(pdf_field, bc->cell);
+    pressure_tensor_correction(tensor);
+
+    return to_vector9d(tensor);
+  }
+
+  std::vector<double> get_slice_pressure_tensor(
+      Utils::Vector3i const &lower_corner,
+      Utils::Vector3i const &upper_corner) const override {
+    std::vector<double> out;
+    if (auto const ci = get_interval(lower_corner, upper_corner)) {
+      auto const &lattice = get_lattice();
+      auto const &block = *(lattice.get_blocks()->begin());
+      auto const pdf_field = block.template getData<PdfField>(m_pdf_field_id);
+      auto const lower_cell = ci->min();
+      auto const upper_cell = ci->max();
+      auto const n_values = 9u * ci->numCells();
+      out.reserve(n_values);
+      for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) {
+        for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) {
+          for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
+            auto const cell = Cell{x, y, z};
+            auto tensor = lbm::accessor::PressureTensor::get(pdf_field, cell);
+            pressure_tensor_correction(tensor);
+            for (auto i = 0u; i < 9u; ++i) {
+              out.emplace_back(tensor[i]);
+            }
+          }
+        }
+      }
+      assert(out.size() == n_values);
+    }
+    return out;
+  }
+
+  // Global pressure tensor
+  [[nodiscard]] Utils::VectorXd<9> get_pressure_tensor() const override {
+    auto const &blocks = get_lattice().get_blocks();
+    Matrix3<FloatType> tensor(FloatType{0});
+    for (auto block = blocks->begin(); block != blocks->end(); ++block) {
+      auto pdf_field = block->template getData<PdfField>(m_pdf_field_id);
+      WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, {
+        tensor += lbm::accessor::PressureTensor::get(pdf_field, Cell{x, y, z});
+      });
+    }
+    auto const grid_size = get_lattice().get_grid_dimensions();
+    auto const number_of_nodes = Utils::product(grid_size);
+    pressure_tensor_correction(tensor);
+    return to_vector9d(tensor) * (1. / static_cast<double>(number_of_nodes));
+  }
+
+  // Global momentum
+  [[nodiscard]] Utils::Vector3d get_momentum() const override {
+    auto const &blocks = get_lattice().get_blocks();
+    Vector3<FloatType> mom(FloatType{0});
+    for (auto block = blocks->begin(); block != blocks->end(); ++block) {
+      auto pdf_field = block->template getData<PdfField>(m_pdf_field_id);
+      auto force_field =
+          block->template getData<VectorField>(m_last_applied_force_field_id);
+      mom += lbm::accessor::MomentumDensity::reduce(pdf_field, force_field);
+    }
+    return to_vector3d(mom);
+  }
+
+  // Global external force
+  void set_external_force(Utils::Vector3d const &ext_force) override {
+    m_reset_force->set_ext_force(ext_force);
+  }
+
+  [[nodiscard]] Utils::Vector3d get_external_force() const noexcept override {
+    return m_reset_force->get_ext_force();
+  }
+
+  [[nodiscard]] double get_kT() const noexcept override {
+    return numeric_cast<double>(m_kT);
+  }
+
+  [[nodiscard]] boost::optional<uint64_t> get_rng_state() const override {
+    auto const *cm = boost::get<CollisionModelThermalized>(&*m_collision_model);
+    if (!cm or m_kT == 0.) {
+      return {boost::none};
+    }
+    return {static_cast<uint64_t>(cm->time_step_)};
+  }
+
+  void set_rng_state(uint64_t counter) override {
+    auto *cm = boost::get<CollisionModelThermalized>(&*m_collision_model);
+    if (!cm or m_kT == 0.) {
+      throw std::runtime_error("This LB instance is unthermalized");
+    }
+    assert(counter <=
+           static_cast<uint32_t>(std::numeric_limits<uint_t>::max()));
+    cm->time_step_ = static_cast<uint32_t>(counter);
+  }
+
+  [[nodiscard]] LatticeWalberla const &get_lattice() const noexcept override {
+    return *m_lattice;
+  }
+
+  [[nodiscard]] std::size_t get_velocity_field_id() const noexcept override {
+    return m_velocity_field_id;
+  }
+
+  [[nodiscard]] std::size_t get_force_field_id() const noexcept override {
+    return m_force_to_be_applied_id;
+  }
+
+  void register_vtk_field_filters(walberla::vtk::VTKOutput &vtk_obj) override {
+    field::FlagFieldCellFilter<FlagField> fluid_filter(m_flag_field_id);
+    fluid_filter.addFlag(Boundary_flag);
+    vtk_obj.addCellExclusionFilter(fluid_filter);
+  }
+
+protected:
+  template <typename Field_T, uint_t F_SIZE_ARG, typename OutputType>
+  class VTKWriter : public vtk::BlockCellDataWriter<OutputType, F_SIZE_ARG> {
+  public:
+    VTKWriter(ConstBlockDataID const &block_id, std::string const &id,
+              FloatType unit_conversion)
+        : vtk::BlockCellDataWriter<OutputType, F_SIZE_ARG>(id),
+          m_block_id(block_id), m_field(nullptr),
+          m_conversion(unit_conversion) {}
+
+  protected:
+    void configure() override {
+      WALBERLA_ASSERT_NOT_NULLPTR(this->block_);
+      m_field = this->block_->template getData<Field_T>(m_block_id);
+    }
+
+    ConstBlockDataID const m_block_id;
+    Field_T const *m_field;
+    FloatType const m_conversion;
+  };
+
+  template <typename OutputType = float>
+  class DensityVTKWriter : public VTKWriter<PdfField, 1u, OutputType> {
+  public:
+    using VTKWriter<PdfField, 1u, OutputType>::VTKWriter;
+
+  protected:
+    OutputType evaluate(cell_idx_t const x, cell_idx_t const y,
+                        cell_idx_t const z, cell_idx_t const) override {
+      WALBERLA_ASSERT_NOT_NULLPTR(this->m_field);
+      auto const density =
+          lbm::accessor::Density::get(this->m_field, {x, y, z});
+      return numeric_cast<OutputType>(this->m_conversion * density);
+    }
+  };
+
+  template <typename OutputType = float>
+  class VelocityVTKWriter : public VTKWriter<VectorField, 3u, OutputType> {
+  public:
+    using VTKWriter<VectorField, 3u, OutputType>::VTKWriter;
+
+  protected:
+    OutputType evaluate(cell_idx_t const x, cell_idx_t const y,
+                        cell_idx_t const z, cell_idx_t const f) override {
+      WALBERLA_ASSERT_NOT_NULLPTR(this->m_field);
+      auto const velocity =
+          lbm::accessor::Vector::get(this->m_field, {x, y, z});
+      return numeric_cast<OutputType>(this->m_conversion * velocity[uint_c(f)]);
+    }
+  };
+
+  template <typename OutputType = float>
+  class PressureTensorVTKWriter : public VTKWriter<PdfField, 9u, OutputType> {
+  public:
+    PressureTensorVTKWriter(ConstBlockDataID const &block_id,
+                            std::string const &id, FloatType unit_conversion,
+                            FloatType off_diag_factor)
+        : VTKWriter<PdfField, 9u, OutputType>::VTKWriter(block_id, id,
+                                                         unit_conversion),
+          m_off_diag_factor(off_diag_factor) {}
+
+  protected:
+    OutputType evaluate(cell_idx_t const x, cell_idx_t const y,
+                        cell_idx_t const z, cell_idx_t const f) override {
+      WALBERLA_ASSERT_NOT_NULLPTR(this->m_field);
+      auto const pressure =
+          lbm::accessor::PressureTensor::get(this->m_field, {x, y, z});
+      auto const revert_factor =
+          (f == 0 or f == 4 or f == 8) ? FloatType{1} : m_off_diag_factor;
+      return numeric_cast<OutputType>(this->m_conversion * revert_factor *
+                                      pressure[uint_c(f)]);
+    }
+    FloatType const m_off_diag_factor;
+  };
+
+public:
+  void register_vtk_field_writers(walberla::vtk::VTKOutput &vtk_obj,
+                                  LatticeModel::units_map const &units,
+                                  int flag_observables) override {
+    if (flag_observables & static_cast<int>(OutputVTK::density)) {
+      auto const unit_conversion = FloatType_c(units.at("density"));
+      vtk_obj.addCellDataWriter(make_shared<DensityVTKWriter<float>>(
+          m_pdf_field_id, "density", unit_conversion));
+    }
+    if (flag_observables & static_cast<int>(OutputVTK::velocity_vector)) {
+      auto const unit_conversion = FloatType_c(units.at("velocity"));
+      vtk_obj.addCellDataWriter(make_shared<VelocityVTKWriter<float>>(
+          m_velocity_field_id, "velocity_vector", unit_conversion));
+    }
+    if (flag_observables & static_cast<int>(OutputVTK::pressure_tensor)) {
+      auto const unit_conversion = FloatType_c(units.at("pressure"));
+      vtk_obj.addCellDataWriter(make_shared<PressureTensorVTKWriter<float>>(
+          m_pdf_field_id, "pressure_tensor", unit_conversion,
+          pressure_tensor_correction_factor()));
+    }
+  }
+
+  ~LBWalberlaImpl() override = default;
+};
+
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/ResetForce.hpp b/src/walberla_bridge/src/lattice_boltzmann/ResetForce.hpp
new file mode 100644
index 00000000000..1d0a154e5b6
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/ResetForce.hpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2020-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "generated_kernels/FieldAccessorsDoublePrecision.h"
+#include "generated_kernels/FieldAccessorsSinglePrecision.h"
+
+#include <walberla_bridge/utils/walberla_utils.hpp>
+
+#include <core/math/Vector3.h>
+#include <domain_decomposition/SharedSweep.h>
+#include <lbm/sweeps/CellwiseSweep.h>
+
+#include <utils/Vector.hpp>
+
+namespace walberla {
+
+/** Sweep that swaps @c force_to_be_applied and @c last_applied_force
+ *  and resets @c force_to_be_applied to the global external force.
+ */
+template <typename PdfField, typename ForceField> class ResetForce {
+  using FloatType = typename PdfField::value_type;
+
+public:
+  ResetForce(BlockDataID const &last_applied_force_field_id,
+             BlockDataID const &force_to_be_applied_id)
+      : m_last_applied_force_field_id(last_applied_force_field_id),
+        m_force_to_be_applied_id(force_to_be_applied_id),
+        m_ext_force(Vector3<FloatType>{0, 0, 0}) {}
+
+  void set_ext_force(Utils::Vector3d const &ext_force) {
+    m_ext_force = to_vector3<FloatType>(ext_force);
+  }
+
+  Utils::Vector3d get_ext_force() const { return to_vector3d(m_ext_force); }
+
+  void operator()(IBlock *block) {
+    auto force_field =
+        block->template getData<ForceField>(m_last_applied_force_field_id);
+    auto force_to_be_applied =
+        block->template getData<ForceField>(m_force_to_be_applied_id);
+
+    force_field->swapDataPointers(force_to_be_applied);
+
+    lbm::accessor::Vector::add_to_all(force_field, m_ext_force);
+    lbm::accessor::Vector::broadcast(force_to_be_applied,
+                                     Vector3<FloatType>{0});
+  }
+
+private:
+  const BlockDataID m_last_applied_force_field_id;
+  const BlockDataID m_force_to_be_applied_id;
+  Vector3<FloatType> m_ext_force;
+};
+
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt
new file mode 100644
index 00000000000..7756ccd63a7
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+target_sources(
+  espresso_walberla
+  PRIVATE StreamSweepSinglePrecision.cpp StreamSweepDoublePrecision.cpp
+          InitialPDFsSetterSinglePrecision.cpp
+          InitialPDFsSetterDoublePrecision.cpp Dynamic_UBB_single_precision.cpp
+          Dynamic_UBB_double_precision.cpp)
+if(ESPRESSO_BUILD_WITH_WALBERLA_AVX)
+  target_sources(
+    espresso_walberla
+    PRIVATE CollideSweepSinglePrecisionLeesEdwardsAVX.cpp
+            CollideSweepDoublePrecisionLeesEdwardsAVX.cpp
+            CollideSweepSinglePrecisionThermalizedAVX.cpp
+            CollideSweepDoublePrecisionThermalizedAVX.cpp)
+else()
+  target_sources(
+    espresso_walberla
+    PRIVATE CollideSweepSinglePrecisionLeesEdwards.cpp
+            CollideSweepDoublePrecisionLeesEdwards.cpp
+            CollideSweepSinglePrecisionThermalized.cpp
+            CollideSweepDoublePrecisionThermalized.cpp)
+endif()
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwards.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwards.cpp
new file mode 100644
index 00000000000..2814fff59d8
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwards.cpp
@@ -0,0 +1,290 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepDoublePrecisionLeesEdwards.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "CollideSweepDoublePrecisionLeesEdwards.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_607d8a5c7ac58c25acf09ad94bb82cf4 {
+static FUNC_PREFIX void collidesweepdoubleprecisionleesedwards_collidesweepdoubleprecisionleesedwards(double *RESTRICT const _data_force, double *RESTRICT _data_pdfs, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_0, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double grid_size, double omega_shear, double v_s) {
+  const double xi_0 = ((1.0) / (omega_shear * -0.25 + 2.0));
+  const double rr_0 = xi_0 * (omega_shear * -2.0 + 4.0);
+  for (int64_t ctr_2 = 0; ctr_2 < _size_force_2; ctr_2 += 1) {
+    double *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
+    double *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
+    double *RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
+    double *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
+    double *RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3;
+    double *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
+    double *RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_force_1; ctr_1 += 1) {
+      double *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
+      double *RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_36;
+      double *RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_315;
+      double *RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_310;
+      double *RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_312;
+      double *RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_318;
+      double *RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_39;
+      double *RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_31;
+      double *RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_37;
+      double *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
+      double *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
+      double *RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_316;
+      double *RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_38;
+      double *RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_313;
+      double *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
+      double *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
+      double *RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_314;
+      double *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
+      double *RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_317;
+      double *RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_311;
+      double *RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_32;
+      double *RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_35;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_force_0; ctr_0 += 1) {
+        const double xi_25 = _data_pdfs_20_34_10[_stride_pdfs_0 * ctr_0];
+        const double xi_26 = _data_pdfs_20_36_10[_stride_pdfs_0 * ctr_0];
+        const double xi_27 = _data_pdfs_20_315_10[_stride_pdfs_0 * ctr_0];
+        const double xi_28 = _data_pdfs_20_310_10[_stride_pdfs_0 * ctr_0];
+        const double xi_29 = _data_pdfs_20_312_10[_stride_pdfs_0 * ctr_0];
+        const double xi_30 = _data_pdfs_20_318_10[_stride_pdfs_0 * ctr_0];
+        const double xi_31 = _data_pdfs_20_39_10[_stride_pdfs_0 * ctr_0];
+        const double xi_32 = _data_pdfs_20_31_10[_stride_pdfs_0 * ctr_0];
+        const double xi_33 = _data_pdfs_20_37_10[_stride_pdfs_0 * ctr_0];
+        const double xi_34 = _data_pdfs_20_30_10[_stride_pdfs_0 * ctr_0];
+        const double xi_35 = _data_force_20_31_10[_stride_force_0 * ctr_0];
+        const double xi_36 = _data_pdfs_20_316_10[_stride_pdfs_0 * ctr_0];
+        const double xi_37 = _data_pdfs_20_38_10[_stride_pdfs_0 * ctr_0];
+        const double xi_38 = _data_pdfs_20_313_10[_stride_pdfs_0 * ctr_0];
+        const double xi_39 = _data_pdfs_20_33_10[_stride_pdfs_0 * ctr_0];
+        const double xi_40 = _data_force_20_32_10[_stride_force_0 * ctr_0];
+        const double xi_41 = _data_pdfs_20_314_10[_stride_pdfs_0 * ctr_0];
+        const double xi_42 = _data_force_20_30_10[_stride_force_0 * ctr_0];
+        const double xi_43 = _data_pdfs_20_317_10[_stride_pdfs_0 * ctr_0];
+        const double xi_44 = _data_pdfs_20_311_10[_stride_pdfs_0 * ctr_0];
+        const double xi_45 = _data_pdfs_20_32_10[_stride_pdfs_0 * ctr_0];
+        const double xi_46 = _data_pdfs_20_35_10[_stride_pdfs_0 * ctr_0];
+        const double xi_3 = xi_25;
+        const double xi_4 = xi_26;
+        const double xi_5 = xi_27;
+        const double xi_6 = xi_28;
+        const double xi_7 = xi_29;
+        const double xi_8 = xi_30;
+        const double xi_9 = xi_31;
+        const double xi_10 = xi_32;
+        const double xi_11 = xi_33;
+        const double xi_12 = xi_34;
+        const double xi_13 = xi_35;
+        const double xi_14 = xi_36;
+        const double xi_15 = xi_37;
+        const double xi_16 = xi_38;
+        const double xi_17 = xi_39;
+        const double xi_18 = xi_40;
+        const double xi_19 = xi_41;
+        const double xi_20 = xi_42;
+        const double xi_21 = xi_43;
+        const double xi_22 = xi_44;
+        const double xi_23 = xi_45;
+        const double xi_24 = xi_46;
+        const double vel0Term = xi_15 + xi_19 + xi_3 + xi_6 + xi_8;
+        const double vel1Term = xi_10 + xi_11 + xi_22 + xi_5;
+        const double vel2Term = xi_16 + xi_24 + xi_7;
+        const double rho = vel0Term + vel1Term + vel2Term + xi_12 + xi_14 + xi_17 + xi_21 + xi_23 + xi_4 + xi_9;
+        const double xi_1 = ((1.0) / (rho));
+        const double u_0 = xi_1 * xi_20 * 0.5 + xi_1 * (vel0Term + xi_11 * -1.0 + xi_16 * -1.0 + xi_17 * -1.0 + xi_21 * -1.0 + xi_9 * -1.0);
+        const double u_1 = xi_1 * xi_13 * 0.5 + xi_1 * (vel1Term + xi_14 * -1.0 + xi_15 + xi_23 * -1.0 + xi_6 * -1.0 + xi_7 * -1.0 + xi_9 * -1.0);
+        const double u_2 = xi_1 * xi_18 * 0.5 + xi_1 * (vel2Term + xi_14 * -1.0 + xi_19 + xi_21 * -1.0 + xi_22 + xi_4 * -1.0 + xi_5 * -1.0 + xi_8 * -1.0);
+        const double forceTerm_0 = omega_shear * u_0 * xi_20 * 0.5 + omega_shear * u_1 * xi_13 * 0.5 + omega_shear * u_2 * xi_18 * 0.5 + u_0 * xi_20 * -1.0 + u_1 * xi_13 * -1.0 + u_2 * xi_18 * -1.0;
+        const double forceTerm_1 = omega_shear * u_0 * xi_20 * 0.083333333333333329 + omega_shear * u_1 * xi_13 * -0.16666666666666666 + omega_shear * u_2 * xi_18 * 0.083333333333333329 + rr_0 * xi_13 * -0.083333333333333329 + u_0 * xi_20 * -0.16666666666666666 + u_1 * xi_13 * 0.33333333333333331 + u_2 * xi_18 * -0.16666666666666666 + xi_13 * 0.16666666666666666;
+        const double forceTerm_2 = omega_shear * u_0 * xi_20 * 0.083333333333333329 + omega_shear * u_1 * xi_13 * -0.16666666666666666 + omega_shear * u_2 * xi_18 * 0.083333333333333329 + rr_0 * xi_13 * 0.083333333333333329 + u_0 * xi_20 * -0.16666666666666666 + u_1 * xi_13 * 0.33333333333333331 + u_2 * xi_18 * -0.16666666666666666 + xi_13 * -0.16666666666666666;
+        const double forceTerm_3 = omega_shear * u_0 * xi_20 * -0.16666666666666666 + omega_shear * u_1 * xi_13 * 0.083333333333333329 + omega_shear * u_2 * xi_18 * 0.083333333333333329 + rr_0 * xi_20 * 0.083333333333333329 + u_0 * xi_20 * 0.33333333333333331 + u_1 * xi_13 * -0.16666666666666666 + u_2 * xi_18 * -0.16666666666666666 + xi_20 * -0.16666666666666666;
+        const double forceTerm_4 = omega_shear * u_0 * xi_20 * -0.16666666666666666 + omega_shear * u_1 * xi_13 * 0.083333333333333329 + omega_shear * u_2 * xi_18 * 0.083333333333333329 + rr_0 * xi_20 * -0.083333333333333329 + u_0 * xi_20 * 0.33333333333333331 + u_1 * xi_13 * -0.16666666666666666 + u_2 * xi_18 * -0.16666666666666666 + xi_20 * 0.16666666666666666;
+        const double forceTerm_5 = omega_shear * u_0 * xi_20 * 0.083333333333333329 + omega_shear * u_1 * xi_13 * 0.083333333333333329 + omega_shear * u_2 * xi_18 * -0.16666666666666666 + rr_0 * xi_18 * -0.083333333333333329 + u_0 * xi_20 * -0.16666666666666666 + u_1 * xi_13 * -0.16666666666666666 + u_2 * xi_18 * 0.33333333333333331 + xi_18 * 0.16666666666666666;
+        const double forceTerm_6 = omega_shear * u_0 * xi_20 * 0.083333333333333329 + omega_shear * u_1 * xi_13 * 0.083333333333333329 + omega_shear * u_2 * xi_18 * -0.16666666666666666 + rr_0 * xi_18 * 0.083333333333333329 + u_0 * xi_20 * -0.16666666666666666 + u_1 * xi_13 * -0.16666666666666666 + u_2 * xi_18 * 0.33333333333333331 + xi_18 * -0.16666666666666666;
+        const double forceTerm_7 = omega_shear * u_0 * xi_13 * 0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_20 * 0.125 + omega_shear * u_2 * xi_18 * 0.041666666666666664 + rr_0 * xi_13 * -0.041666666666666664 + rr_0 * xi_20 * 0.041666666666666664 + u_0 * xi_13 * -0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_20 * -0.25 + u_2 * xi_18 * -0.083333333333333329 + xi_13 * 0.083333333333333329 + xi_20 * -0.083333333333333329;
+        const double forceTerm_8 = omega_shear * u_0 * xi_13 * -0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_20 * -0.125 + omega_shear * u_2 * xi_18 * 0.041666666666666664 + rr_0 * xi_13 * -0.041666666666666664 + rr_0 * xi_20 * -0.041666666666666664 + u_0 * xi_13 * 0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_20 * 0.25 + u_2 * xi_18 * -0.083333333333333329 + xi_13 * 0.083333333333333329 + xi_20 * 0.083333333333333329;
+        const double forceTerm_9 = omega_shear * u_0 * xi_13 * -0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_20 * -0.125 + omega_shear * u_2 * xi_18 * 0.041666666666666664 + rr_0 * xi_13 * 0.041666666666666664 + rr_0 * xi_20 * 0.041666666666666664 + u_0 * xi_13 * 0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_20 * 0.25 + u_2 * xi_18 * -0.083333333333333329 + xi_13 * -0.083333333333333329 + xi_20 * -0.083333333333333329;
+        const double forceTerm_10 = omega_shear * u_0 * xi_13 * 0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_20 * 0.125 + omega_shear * u_2 * xi_18 * 0.041666666666666664 + rr_0 * xi_13 * 0.041666666666666664 + rr_0 * xi_20 * -0.041666666666666664 + u_0 * xi_13 * -0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_20 * -0.25 + u_2 * xi_18 * -0.083333333333333329 + xi_13 * -0.083333333333333329 + xi_20 * 0.083333333333333329;
+        const double forceTerm_11 = omega_shear * u_0 * xi_20 * 0.041666666666666664 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_18 * -0.125 + omega_shear * u_2 * xi_13 * -0.125 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + rr_0 * xi_13 * -0.041666666666666664 + rr_0 * xi_18 * -0.041666666666666664 + u_0 * xi_20 * -0.083333333333333329 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_18 * 0.25 + u_2 * xi_13 * 0.25 + u_2 * xi_18 * 0.16666666666666666 + xi_13 * 0.083333333333333329 + xi_18 * 0.083333333333333329;
+        const double forceTerm_12 = omega_shear * u_0 * xi_20 * 0.041666666666666664 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_18 * 0.125 + omega_shear * u_2 * xi_13 * 0.125 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + rr_0 * xi_13 * 0.041666666666666664 + rr_0 * xi_18 * -0.041666666666666664 + u_0 * xi_20 * -0.083333333333333329 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_18 * -0.25 + u_2 * xi_13 * -0.25 + u_2 * xi_18 * 0.16666666666666666 + xi_13 * -0.083333333333333329 + xi_18 * 0.083333333333333329;
+        const double forceTerm_13 = omega_shear * u_0 * xi_18 * 0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * 0.041666666666666664 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + omega_shear * u_2 * xi_20 * 0.125 + rr_0 * xi_18 * -0.041666666666666664 + rr_0 * xi_20 * 0.041666666666666664 + u_0 * xi_18 * -0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * -0.083333333333333329 + u_2 * xi_18 * 0.16666666666666666 + u_2 * xi_20 * -0.25 + xi_18 * 0.083333333333333329 + xi_20 * -0.083333333333333329;
+        const double forceTerm_14 = omega_shear * u_0 * xi_18 * -0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * 0.041666666666666664 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + omega_shear * u_2 * xi_20 * -0.125 + rr_0 * xi_18 * -0.041666666666666664 + rr_0 * xi_20 * -0.041666666666666664 + u_0 * xi_18 * 0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * -0.083333333333333329 + u_2 * xi_18 * 0.16666666666666666 + u_2 * xi_20 * 0.25 + xi_18 * 0.083333333333333329 + xi_20 * 0.083333333333333329;
+        const double forceTerm_15 = omega_shear * u_0 * xi_20 * 0.041666666666666664 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_18 * 0.125 + omega_shear * u_2 * xi_13 * 0.125 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + rr_0 * xi_13 * -0.041666666666666664 + rr_0 * xi_18 * 0.041666666666666664 + u_0 * xi_20 * -0.083333333333333329 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_18 * -0.25 + u_2 * xi_13 * -0.25 + u_2 * xi_18 * 0.16666666666666666 + xi_13 * 0.083333333333333329 + xi_18 * -0.083333333333333329;
+        const double forceTerm_16 = omega_shear * u_0 * xi_20 * 0.041666666666666664 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_18 * -0.125 + omega_shear * u_2 * xi_13 * -0.125 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + rr_0 * xi_13 * 0.041666666666666664 + rr_0 * xi_18 * 0.041666666666666664 + u_0 * xi_20 * -0.083333333333333329 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_18 * 0.25 + u_2 * xi_13 * 0.25 + u_2 * xi_18 * 0.16666666666666666 + xi_13 * -0.083333333333333329 + xi_18 * -0.083333333333333329;
+        const double forceTerm_17 = omega_shear * u_0 * xi_18 * -0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * 0.041666666666666664 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + omega_shear * u_2 * xi_20 * -0.125 + rr_0 * xi_18 * 0.041666666666666664 + rr_0 * xi_20 * 0.041666666666666664 + u_0 * xi_18 * 0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * -0.083333333333333329 + u_2 * xi_18 * 0.16666666666666666 + u_2 * xi_20 * 0.25 + xi_18 * -0.083333333333333329 + xi_20 * -0.083333333333333329;
+        const double forceTerm_18 = omega_shear * u_0 * xi_18 * 0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * 0.041666666666666664 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + omega_shear * u_2 * xi_20 * 0.125 + rr_0 * xi_18 * 0.041666666666666664 + rr_0 * xi_20 * -0.041666666666666664 + u_0 * xi_18 * -0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * -0.083333333333333329 + u_2 * xi_18 * 0.16666666666666666 + u_2 * xi_20 * -0.25 + xi_18 * -0.083333333333333329 + xi_20 * 0.083333333333333329;
+        const double u0Mu1 = u_0 + u_1 * -1.0;
+        const double u0Pu1 = u_0 + u_1;
+        const double u1Pu2 = u_1 + u_2;
+        const double u1Mu2 = u_1 + u_2 * -1.0;
+        const double u0Mu2 = u_0 + u_2 * -1.0;
+        const double u0Pu2 = u_0 + u_2;
+        const double f_eq_common = rho * -1.0 * (u_0 * u_0) + rho * -1.0 * (u_1 * u_1) + rho * -1.0 * (u_2 * u_2) + rho;
+        _data_pdfs_20_30_10[_stride_pdfs_0 * ctr_0] = forceTerm_0 + omega_shear * (f_eq_common * 0.33333333333333331 + xi_12 * -1.0) + xi_12;
+        _data_pdfs_20_31_10[_stride_pdfs_0 * ctr_0] = forceTerm_1 + omega_shear * (f_eq_common * 0.16666666666666666 + rho * (-0.1111111111111111 + 0.33333333333333331 * (u_1 * u_1)) + xi_10 * -0.5 + xi_23 * -0.5) + rr_0 * (rho * u_1 * 0.16666666666666666 + xi_10 * -0.5 + xi_23 * 0.5) + xi_10 + ((-1.0 <= grid_size * -1.0 + ((double)(ctr_1))) ? (rho * v_s * (u_0 * 2.0 + v_s) * 0.16666666666666666) : (0.0));
+        _data_pdfs_20_32_10[_stride_pdfs_0 * ctr_0] = forceTerm_2 + omega_shear * (f_eq_common * 0.16666666666666666 + rho * (-0.1111111111111111 + 0.33333333333333331 * (u_1 * u_1)) + xi_10 * -0.5 + xi_23 * -0.5) + rr_0 * (rho * u_1 * -0.16666666666666666 + xi_10 * 0.5 + xi_23 * -0.5) + xi_23 + ((0.0 >= ((double)(ctr_1))) ? (rho * v_s * (u_0 * -2.0 + v_s) * 0.16666666666666666) : (0.0));
+        _data_pdfs_20_33_10[_stride_pdfs_0 * ctr_0] = forceTerm_3 + omega_shear * (f_eq_common * 0.16666666666666666 + rho * (-0.1111111111111111 + 0.33333333333333331 * (u_0 * u_0)) + xi_17 * -0.5 + xi_3 * -0.5) + rr_0 * (rho * u_0 * -0.16666666666666666 + xi_17 * -0.5 + xi_3 * 0.5) + xi_17;
+        _data_pdfs_20_34_10[_stride_pdfs_0 * ctr_0] = forceTerm_4 + omega_shear * (f_eq_common * 0.16666666666666666 + rho * (-0.1111111111111111 + 0.33333333333333331 * (u_0 * u_0)) + xi_17 * -0.5 + xi_3 * -0.5) + rr_0 * (rho * u_0 * 0.16666666666666666 + xi_17 * 0.5 + xi_3 * -0.5) + xi_3;
+        _data_pdfs_20_35_10[_stride_pdfs_0 * ctr_0] = forceTerm_5 + omega_shear * (f_eq_common * 0.16666666666666666 + rho * (-0.1111111111111111 + 0.33333333333333331 * (u_2 * u_2)) + xi_24 * -0.5 + xi_4 * -0.5) + rr_0 * (rho * u_2 * 0.16666666666666666 + xi_24 * -0.5 + xi_4 * 0.5) + xi_24;
+        _data_pdfs_20_36_10[_stride_pdfs_0 * ctr_0] = forceTerm_6 + omega_shear * (f_eq_common * 0.16666666666666666 + rho * (-0.1111111111111111 + 0.33333333333333331 * (u_2 * u_2)) + xi_24 * -0.5 + xi_4 * -0.5) + rr_0 * (rho * u_2 * -0.16666666666666666 + xi_24 * 0.5 + xi_4 * -0.5) + xi_4;
+        _data_pdfs_20_37_10[_stride_pdfs_0 * ctr_0] = forceTerm_7 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * (u_2 * u_2) + 0.125 * (u0Mu1 * u0Mu1)) + xi_11 * -0.5 + xi_6 * -0.5) + rr_0 * (rho * u0Mu1 * -0.083333333333333329 + xi_11 * -0.5 + xi_6 * 0.5) + xi_11 + ((-1.0 <= grid_size * -1.0 + ((double)(ctr_1))) ? (rho * v_s * (u_0 * -2.0 + u_1 * 3.0 + v_s * -1.0 + 1.0) * 0.083333333333333329) : (0.0));
+        _data_pdfs_20_38_10[_stride_pdfs_0 * ctr_0] = forceTerm_8 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * (u_2 * u_2) + 0.125 * (u0Pu1 * u0Pu1)) + xi_15 * -0.5 + xi_9 * -0.5) + rr_0 * (rho * u0Pu1 * 0.083333333333333329 + xi_15 * -0.5 + xi_9 * 0.5) + xi_15 + ((-1.0 <= grid_size * -1.0 + ((double)(ctr_1))) ? (rho * v_s * (u_0 * 2.0 + u_1 * 3.0 + v_s + 1.0) * -0.083333333333333329) : (0.0));
+        _data_pdfs_20_39_10[_stride_pdfs_0 * ctr_0] = forceTerm_9 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * (u_2 * u_2) + 0.125 * (u0Pu1 * u0Pu1)) + xi_15 * -0.5 + xi_9 * -0.5) + rr_0 * (rho * u0Pu1 * -0.083333333333333329 + xi_15 * 0.5 + xi_9 * -0.5) + xi_9 + ((0.0 >= ((double)(ctr_1))) ? (rho * v_s * (u_0 * 2.0 + u_1 * 3.0 + v_s * -1.0 - 1.0) * 0.083333333333333329) : (0.0));
+        _data_pdfs_20_310_10[_stride_pdfs_0 * ctr_0] = forceTerm_10 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * (u_2 * u_2) + 0.125 * (u0Mu1 * u0Mu1)) + xi_11 * -0.5 + xi_6 * -0.5) + rr_0 * (rho * u0Mu1 * 0.083333333333333329 + xi_11 * 0.5 + xi_6 * -0.5) + xi_6 + ((0.0 >= ((double)(ctr_1))) ? (rho * v_s * (u_0 * 2.0 + u_1 * -3.0 + v_s * -1.0 + 1.0) * 0.083333333333333329) : (0.0));
+        _data_pdfs_20_311_10[_stride_pdfs_0 * ctr_0] = forceTerm_11 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * (u_0 * u_0) + 0.125 * (u1Pu2 * u1Pu2)) + xi_14 * -0.5 + xi_22 * -0.5) + rr_0 * (rho * u1Pu2 * 0.083333333333333329 + xi_14 * 0.5 + xi_22 * -0.5) + xi_22;
+        _data_pdfs_20_312_10[_stride_pdfs_0 * ctr_0] = forceTerm_12 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * (u_0 * u_0) + 0.125 * (u1Mu2 * u1Mu2)) + xi_5 * -0.5 + xi_7 * -0.5) + rr_0 * (rho * u1Mu2 * -0.083333333333333329 + xi_5 * 0.5 + xi_7 * -0.5) + xi_7;
+        _data_pdfs_20_313_10[_stride_pdfs_0 * ctr_0] = forceTerm_13 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * (u_1 * u_1) + 0.125 * (u0Mu2 * u0Mu2)) + xi_16 * -0.5 + xi_8 * -0.5) + rr_0 * (rho * u0Mu2 * -0.083333333333333329 + xi_16 * -0.5 + xi_8 * 0.5) + xi_16;
+        _data_pdfs_20_314_10[_stride_pdfs_0 * ctr_0] = forceTerm_14 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * (u_1 * u_1) + 0.125 * (u0Pu2 * u0Pu2)) + xi_19 * -0.5 + xi_21 * -0.5) + rr_0 * (rho * u0Pu2 * 0.083333333333333329 + xi_19 * -0.5 + xi_21 * 0.5) + xi_19;
+        _data_pdfs_20_315_10[_stride_pdfs_0 * ctr_0] = forceTerm_15 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * (u_0 * u_0) + 0.125 * (u1Mu2 * u1Mu2)) + xi_5 * -0.5 + xi_7 * -0.5) + rr_0 * (rho * u1Mu2 * 0.083333333333333329 + xi_5 * -0.5 + xi_7 * 0.5) + xi_5;
+        _data_pdfs_20_316_10[_stride_pdfs_0 * ctr_0] = forceTerm_16 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * (u_0 * u_0) + 0.125 * (u1Pu2 * u1Pu2)) + xi_14 * -0.5 + xi_22 * -0.5) + rr_0 * (rho * u1Pu2 * -0.083333333333333329 + xi_14 * -0.5 + xi_22 * 0.5) + xi_14;
+        _data_pdfs_20_317_10[_stride_pdfs_0 * ctr_0] = forceTerm_17 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * (u_1 * u_1) + 0.125 * (u0Pu2 * u0Pu2)) + xi_19 * -0.5 + xi_21 * -0.5) + rr_0 * (rho * u0Pu2 * -0.083333333333333329 + xi_19 * 0.5 + xi_21 * -0.5) + xi_21;
+        _data_pdfs_20_318_10[_stride_pdfs_0 * ctr_0] = forceTerm_18 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * (u_1 * u_1) + 0.125 * (u0Mu2 * u0Mu2)) + xi_16 * -0.5 + xi_8 * -0.5) + rr_0 * (rho * u0Mu2 * 0.083333333333333329 + xi_16 * 0.5 + xi_8 * -0.5) + xi_8;
+      }
+    }
+  }
+}
+} // namespace internal_607d8a5c7ac58c25acf09ad94bb82cf4
+
+void CollideSweepDoublePrecisionLeesEdwards::run(IBlock *block) {
+  auto force = block->getData<field::GhostLayerField<double, 3>>(forceID);
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+
+  auto &v_s = this->v_s_;
+  auto &omega_shear = this->omega_shear_;
+  auto &grid_size = this->grid_size_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(force->nrOfGhostLayers()));
+  double *RESTRICT const _data_force = force->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_607d8a5c7ac58c25acf09ad94bb82cf4::collidesweepdoubleprecisionleesedwards_collidesweepdoubleprecisionleesedwards(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, grid_size, omega_shear, v_s);
+}
+
+void CollideSweepDoublePrecisionLeesEdwards::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto force = block->getData<field::GhostLayerField<double, 3>>(forceID);
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+
+  auto &v_s = this->v_s_;
+  auto &omega_shear = this->omega_shear_;
+  auto &grid_size = this->grid_size_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(force->nrOfGhostLayers()));
+  double *RESTRICT const _data_force = force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_607d8a5c7ac58c25acf09ad94bb82cf4::collidesweepdoubleprecisionleesedwards_collidesweepdoubleprecisionleesedwards(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, grid_size, omega_shear, v_s);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwards.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwards.h
new file mode 100644
index 00000000000..aa33168b644
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwards.h
@@ -0,0 +1,108 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepDoublePrecisionLeesEdwards.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class CollideSweepDoublePrecisionLeesEdwards {
+public:
+  CollideSweepDoublePrecisionLeesEdwards(BlockDataID forceID_,
+                                         BlockDataID pdfsID_, double grid_size,
+                                         double omega_shear, double v_s)
+      : forceID(forceID_), pdfsID(pdfsID_), grid_size_(grid_size),
+        omega_shear_(omega_shear), v_s_(v_s){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<CollideSweepDoublePrecisionLeesEdwards> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<CollideSweepDoublePrecisionLeesEdwards> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID forceID;
+  BlockDataID pdfsID;
+  double grid_size_;
+  double omega_shear_;
+  double v_s_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwardsAVX.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwardsAVX.cpp
new file mode 100644
index 00000000000..5c339796387
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwardsAVX.cpp
@@ -0,0 +1,399 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepDoublePrecisionLeesEdwardsAVX.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "CollideSweepDoublePrecisionLeesEdwardsAVX.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#include <immintrin.h>
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_f11a519921c681cbc9d0b2f51454c920 {
+static FUNC_PREFIX void collidesweepdoubleprecisionleesedwardsavx_collidesweepdoubleprecisionleesedwardsavx(double *RESTRICT const _data_force, double *RESTRICT _data_pdfs, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double grid_size, double omega_shear, double v_s) {
+  const double xi_0 = ((1.0) / (omega_shear * -0.25 + 2.0));
+  const double rr_0 = xi_0 * (omega_shear * -2.0 + 4.0);
+  for (int64_t ctr_2 = 0; ctr_2 < _size_force_2; ctr_2 += 1) {
+    double *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
+    double *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
+    double *RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
+    double *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
+    double *RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3;
+    double *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
+    double *RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_force_1; ctr_1 += 1) {
+      double *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
+      double *RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_36;
+      double *RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_315;
+      double *RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_310;
+      double *RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_312;
+      double *RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_318;
+      double *RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_39;
+      double *RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_31;
+      double *RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_37;
+      double *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
+      double *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
+      double *RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_316;
+      double *RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_38;
+      double *RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_313;
+      double *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
+      double *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
+      double *RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_314;
+      double *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
+      double *RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_317;
+      double *RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_311;
+      double *RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_32;
+      double *RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_35;
+      {
+        for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((_size_force_0) / (4)) * (4); ctr_0 += 4) {
+          const __m256d xi_25 = _mm256_load_pd(&_data_pdfs_20_34_10[ctr_0]);
+          const __m256d xi_26 = _mm256_load_pd(&_data_pdfs_20_36_10[ctr_0]);
+          const __m256d xi_27 = _mm256_load_pd(&_data_pdfs_20_315_10[ctr_0]);
+          const __m256d xi_28 = _mm256_load_pd(&_data_pdfs_20_310_10[ctr_0]);
+          const __m256d xi_29 = _mm256_load_pd(&_data_pdfs_20_312_10[ctr_0]);
+          const __m256d xi_30 = _mm256_load_pd(&_data_pdfs_20_318_10[ctr_0]);
+          const __m256d xi_31 = _mm256_load_pd(&_data_pdfs_20_39_10[ctr_0]);
+          const __m256d xi_32 = _mm256_load_pd(&_data_pdfs_20_31_10[ctr_0]);
+          const __m256d xi_33 = _mm256_load_pd(&_data_pdfs_20_37_10[ctr_0]);
+          const __m256d xi_34 = _mm256_load_pd(&_data_pdfs_20_30_10[ctr_0]);
+          const __m256d xi_35 = _mm256_load_pd(&_data_force_20_31_10[ctr_0]);
+          const __m256d xi_36 = _mm256_load_pd(&_data_pdfs_20_316_10[ctr_0]);
+          const __m256d xi_37 = _mm256_load_pd(&_data_pdfs_20_38_10[ctr_0]);
+          const __m256d xi_38 = _mm256_load_pd(&_data_pdfs_20_313_10[ctr_0]);
+          const __m256d xi_39 = _mm256_load_pd(&_data_pdfs_20_33_10[ctr_0]);
+          const __m256d xi_40 = _mm256_load_pd(&_data_force_20_32_10[ctr_0]);
+          const __m256d xi_41 = _mm256_load_pd(&_data_pdfs_20_314_10[ctr_0]);
+          const __m256d xi_42 = _mm256_load_pd(&_data_force_20_30_10[ctr_0]);
+          const __m256d xi_43 = _mm256_load_pd(&_data_pdfs_20_317_10[ctr_0]);
+          const __m256d xi_44 = _mm256_load_pd(&_data_pdfs_20_311_10[ctr_0]);
+          const __m256d xi_45 = _mm256_load_pd(&_data_pdfs_20_32_10[ctr_0]);
+          const __m256d xi_46 = _mm256_load_pd(&_data_pdfs_20_35_10[ctr_0]);
+          const __m256d xi_3 = xi_25;
+          const __m256d xi_4 = xi_26;
+          const __m256d xi_5 = xi_27;
+          const __m256d xi_6 = xi_28;
+          const __m256d xi_7 = xi_29;
+          const __m256d xi_8 = xi_30;
+          const __m256d xi_9 = xi_31;
+          const __m256d xi_10 = xi_32;
+          const __m256d xi_11 = xi_33;
+          const __m256d xi_12 = xi_34;
+          const __m256d xi_13 = xi_35;
+          const __m256d xi_14 = xi_36;
+          const __m256d xi_15 = xi_37;
+          const __m256d xi_16 = xi_38;
+          const __m256d xi_17 = xi_39;
+          const __m256d xi_18 = xi_40;
+          const __m256d xi_19 = xi_41;
+          const __m256d xi_20 = xi_42;
+          const __m256d xi_21 = xi_43;
+          const __m256d xi_22 = xi_44;
+          const __m256d xi_23 = xi_45;
+          const __m256d xi_24 = xi_46;
+          const __m256d vel0Term = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_15, xi_19), xi_3), xi_6), xi_8);
+          const __m256d vel1Term = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_10, xi_11), xi_22), xi_5);
+          const __m256d vel2Term = _mm256_add_pd(_mm256_add_pd(xi_16, xi_24), xi_7);
+          const __m256d rho = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(vel0Term, vel1Term), vel2Term), xi_12), xi_14), xi_17), xi_21), xi_23), xi_4), xi_9);
+          const __m256d xi_1 = _mm256_div_pd(_mm256_set_pd(1.0, 1.0, 1.0, 1.0), rho);
+          const __m256d u_0 = _mm256_add_pd(_mm256_mul_pd(xi_1, _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_11, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_16, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(xi_17, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(xi_21, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(xi_9, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), vel0Term)), _mm256_mul_pd(_mm256_mul_pd(xi_1, xi_20), _mm256_set_pd(0.5, 0.5, 0.5, 0.5)));
+          const __m256d u_1 = _mm256_add_pd(_mm256_mul_pd(xi_1, _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_14, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_23, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(xi_6, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(xi_7, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(xi_9, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), vel1Term), xi_15)), _mm256_mul_pd(_mm256_mul_pd(xi_1, xi_13), _mm256_set_pd(0.5, 0.5, 0.5, 0.5)));
+          const __m256d u_2 = _mm256_add_pd(_mm256_mul_pd(xi_1, _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_14, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_21, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(xi_4, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(xi_5, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(xi_8, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), vel2Term), xi_19), xi_22)), _mm256_mul_pd(_mm256_mul_pd(xi_1, xi_18), _mm256_set_pd(0.5, 0.5, 0.5, 0.5)));
+          const __m256d forceTerm_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666)), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331))), _mm256_mul_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666)), _mm256_mul_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_20, _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666)), _mm256_mul_pd(_mm256_mul_pd(xi_20, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_20, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666)), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331))), _mm256_mul_pd(_mm256_mul_pd(xi_20, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666)), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331))), _mm256_mul_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_6 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666)), _mm256_mul_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_7 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_mul_pd(xi_20, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(xi_20, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_13), _mm256_set_pd(-0.25, -0.25, -0.25, -0.25))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_20), _mm256_set_pd(-0.25, -0.25, -0.25, -0.25))), _mm256_mul_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(-0.041666666666666664, -0.041666666666666664, -0.041666666666666664, -0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_13), _mm256_set_pd(0.125, 0.125, 0.125, 0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_20), _mm256_set_pd(0.125, 0.125, 0.125, 0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_8 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_mul_pd(xi_20, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_13), _mm256_set_pd(0.25, 0.25, 0.25, 0.25))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_20), _mm256_set_pd(0.25, 0.25, 0.25, 0.25))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(-0.041666666666666664, -0.041666666666666664, -0.041666666666666664, -0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(xi_20, _mm256_set_pd(-0.041666666666666664, -0.041666666666666664, -0.041666666666666664, -0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_13), _mm256_set_pd(-0.125, -0.125, -0.125, -0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_20), _mm256_set_pd(-0.125, -0.125, -0.125, -0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_9 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_mul_pd(xi_20, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_13), _mm256_set_pd(0.25, 0.25, 0.25, 0.25))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_20), _mm256_set_pd(0.25, 0.25, 0.25, 0.25))), _mm256_mul_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(xi_20, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_13), _mm256_set_pd(-0.125, -0.125, -0.125, -0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_20), _mm256_set_pd(-0.125, -0.125, -0.125, -0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_10 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_20, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_mul_pd(xi_13, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_13), _mm256_set_pd(-0.25, -0.25, -0.25, -0.25))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_20), _mm256_set_pd(-0.25, -0.25, -0.25, -0.25))), _mm256_mul_pd(_mm256_mul_pd(xi_20, _mm256_set_pd(-0.041666666666666664, -0.041666666666666664, -0.041666666666666664, -0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_13), _mm256_set_pd(0.125, 0.125, 0.125, 0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_20), _mm256_set_pd(0.125, 0.125, 0.125, 0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_11 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_mul_pd(xi_18, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_18), _mm256_set_pd(0.25, 0.25, 0.25, 0.25))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_13), _mm256_set_pd(0.25, 0.25, 0.25, 0.25))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(-0.041666666666666664, -0.041666666666666664, -0.041666666666666664, -0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(-0.041666666666666664, -0.041666666666666664, -0.041666666666666664, -0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_18), _mm256_set_pd(-0.125, -0.125, -0.125, -0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_13), _mm256_set_pd(-0.125, -0.125, -0.125, -0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_mul_pd(xi_13, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_18), _mm256_set_pd(-0.25, -0.25, -0.25, -0.25))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_13), _mm256_set_pd(-0.25, -0.25, -0.25, -0.25))), _mm256_mul_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(-0.041666666666666664, -0.041666666666666664, -0.041666666666666664, -0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_18), _mm256_set_pd(0.125, 0.125, 0.125, 0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_13), _mm256_set_pd(0.125, 0.125, 0.125, 0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_mul_pd(xi_20, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(xi_20, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_18), _mm256_set_pd(-0.25, -0.25, -0.25, -0.25))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_20), _mm256_set_pd(-0.25, -0.25, -0.25, -0.25))), _mm256_mul_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(-0.041666666666666664, -0.041666666666666664, -0.041666666666666664, -0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_18), _mm256_set_pd(0.125, 0.125, 0.125, 0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_20), _mm256_set_pd(0.125, 0.125, 0.125, 0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_mul_pd(xi_20, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_18), _mm256_set_pd(0.25, 0.25, 0.25, 0.25))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_20), _mm256_set_pd(0.25, 0.25, 0.25, 0.25))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(-0.041666666666666664, -0.041666666666666664, -0.041666666666666664, -0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(xi_20, _mm256_set_pd(-0.041666666666666664, -0.041666666666666664, -0.041666666666666664, -0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_18), _mm256_set_pd(-0.125, -0.125, -0.125, -0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_20), _mm256_set_pd(-0.125, -0.125, -0.125, -0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_15 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_mul_pd(xi_18, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_18), _mm256_set_pd(-0.25, -0.25, -0.25, -0.25))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_13), _mm256_set_pd(-0.25, -0.25, -0.25, -0.25))), _mm256_mul_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(-0.041666666666666664, -0.041666666666666664, -0.041666666666666664, -0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_18), _mm256_set_pd(0.125, 0.125, 0.125, 0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_13), _mm256_set_pd(0.125, 0.125, 0.125, 0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_16 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_mul_pd(xi_18, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_18), _mm256_set_pd(0.25, 0.25, 0.25, 0.25))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_13), _mm256_set_pd(0.25, 0.25, 0.25, 0.25))), _mm256_mul_pd(_mm256_mul_pd(xi_13, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_18), _mm256_set_pd(-0.125, -0.125, -0.125, -0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_13), _mm256_set_pd(-0.125, -0.125, -0.125, -0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_17 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_mul_pd(xi_20, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_18), _mm256_set_pd(0.25, 0.25, 0.25, 0.25))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_20), _mm256_set_pd(0.25, 0.25, 0.25, 0.25))), _mm256_mul_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(xi_20, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_18), _mm256_set_pd(-0.125, -0.125, -0.125, -0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_20), _mm256_set_pd(-0.125, -0.125, -0.125, -0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d forceTerm_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_20, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_mul_pd(xi_18, _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(xi_18, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_mul_pd(_mm256_mul_pd(u_0, xi_18), _mm256_set_pd(-0.25, -0.25, -0.25, -0.25))), _mm256_mul_pd(_mm256_mul_pd(u_2, xi_20), _mm256_set_pd(-0.25, -0.25, -0.25, -0.25))), _mm256_mul_pd(_mm256_mul_pd(xi_20, _mm256_set_pd(-0.041666666666666664, -0.041666666666666664, -0.041666666666666664, -0.041666666666666664)), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0))), _mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_18), _mm256_set_pd(0.125, 0.125, 0.125, 0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_20), _mm256_set_pd(0.125, 0.125, 0.125, 0.125)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_1, xi_13), _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_0, xi_20), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(u_2, xi_18), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256d u0Mu1 = _mm256_add_pd(_mm256_mul_pd(u_1, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), u_0);
+          const __m256d u0Pu1 = _mm256_add_pd(u_0, u_1);
+          const __m256d u1Pu2 = _mm256_add_pd(u_1, u_2);
+          const __m256d u1Mu2 = _mm256_add_pd(_mm256_mul_pd(u_2, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), u_1);
+          const __m256d u0Mu2 = _mm256_add_pd(_mm256_mul_pd(u_2, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), u_0);
+          const __m256d u0Pu2 = _mm256_add_pd(u_0, u_2);
+          const __m256d f_eq_common = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(u_0, u_0)), _mm256_mul_pd(_mm256_mul_pd(rho, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(u_1, u_1))), _mm256_mul_pd(_mm256_mul_pd(rho, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(u_2, u_2))), rho);
+          _mm256_store_pd(&_data_pdfs_20_30_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(xi_12, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(f_eq_common, _mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331))), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)), forceTerm_0), xi_12));
+          _mm256_store_pd(&_data_pdfs_20_31_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_23, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_10, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u_1), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666)), _mm256_mul_pd(xi_10, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_23, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331), _mm256_mul_pd(u_1, u_1)), _mm256_set_pd(-0.1111111111111111, -0.1111111111111111, -0.1111111111111111, -0.1111111111111111)))))), _mm256_blendv_pd(_mm256_set_pd(0.0, 0.0, 0.0, 0.0), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(rho, _mm256_add_pd(_mm256_mul_pd(u_0, _mm256_set_pd(2.0, 2.0, 2.0, 2.0)), _mm256_set_pd(v_s, v_s, v_s, v_s))), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666)), _mm256_set_pd(v_s, v_s, v_s, v_s)), _mm256_cmp_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_set_pd(grid_size, grid_size, grid_size, grid_size)), _mm256_set_pd(((double)(ctr_1)), ((double)(ctr_1)), ((double)(ctr_1)), ((double)(ctr_1)))), _CMP_LE_OQ))), forceTerm_1), xi_10));
+          _mm256_store_pd(&_data_pdfs_20_32_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_10, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_23, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u_1), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666)), _mm256_mul_pd(xi_10, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_23, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331), _mm256_mul_pd(u_1, u_1)), _mm256_set_pd(-0.1111111111111111, -0.1111111111111111, -0.1111111111111111, -0.1111111111111111)))))), _mm256_blendv_pd(_mm256_set_pd(0.0, 0.0, 0.0, 0.0), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(rho, _mm256_add_pd(_mm256_mul_pd(u_0, _mm256_set_pd(-2.0, -2.0, -2.0, -2.0)), _mm256_set_pd(v_s, v_s, v_s, v_s))), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666)), _mm256_set_pd(v_s, v_s, v_s, v_s)), _mm256_cmp_pd(_mm256_set_pd(0.0, 0.0, 0.0, 0.0), _mm256_set_pd(((double)(ctr_1)), ((double)(ctr_1)), ((double)(ctr_1)), ((double)(ctr_1))), _CMP_GE_OQ))), forceTerm_2), xi_23));
+          _mm256_store_pd(&_data_pdfs_20_33_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_3, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_17, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u_0), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666)), _mm256_mul_pd(xi_17, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_3, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331), _mm256_mul_pd(u_0, u_0)), _mm256_set_pd(-0.1111111111111111, -0.1111111111111111, -0.1111111111111111, -0.1111111111111111)))))), forceTerm_3), xi_17));
+          _mm256_store_pd(&_data_pdfs_20_34_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_17, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_3, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u_0), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666)), _mm256_mul_pd(xi_17, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_3, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331), _mm256_mul_pd(u_0, u_0)), _mm256_set_pd(-0.1111111111111111, -0.1111111111111111, -0.1111111111111111, -0.1111111111111111)))))), forceTerm_4), xi_3));
+          _mm256_store_pd(&_data_pdfs_20_35_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_4, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_24, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u_2), _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666)), _mm256_mul_pd(xi_24, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_4, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331), _mm256_mul_pd(u_2, u_2)), _mm256_set_pd(-0.1111111111111111, -0.1111111111111111, -0.1111111111111111, -0.1111111111111111)))))), forceTerm_5), xi_24));
+          _mm256_store_pd(&_data_pdfs_20_36_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_24, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_4, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u_2), _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666)), _mm256_mul_pd(xi_24, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_4, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331), _mm256_mul_pd(u_2, u_2)), _mm256_set_pd(-0.1111111111111111, -0.1111111111111111, -0.1111111111111111, -0.1111111111111111)))))), forceTerm_6), xi_4));
+          _mm256_store_pd(&_data_pdfs_20_37_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_6, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_11, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u0Mu1), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_mul_pd(xi_11, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_6, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.125, 0.125, 0.125, 0.125), _mm256_mul_pd(u0Mu1, u0Mu1)), _mm256_mul_pd(_mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664), _mm256_mul_pd(u_2, u_2))), _mm256_set_pd(-0.013888888888888888, -0.013888888888888888, -0.013888888888888888, -0.013888888888888888)))))), _mm256_blendv_pd(_mm256_set_pd(0.0, 0.0, 0.0, 0.0), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_set_pd(v_s, v_s, v_s, v_s)), _mm256_mul_pd(u_1, _mm256_set_pd(3.0, 3.0, 3.0, 3.0))), _mm256_mul_pd(u_0, _mm256_set_pd(-2.0, -2.0, -2.0, -2.0))), _mm256_set_pd(1.0, 1.0, 1.0, 1.0))), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(v_s, v_s, v_s, v_s)), _mm256_cmp_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_set_pd(grid_size, grid_size, grid_size, grid_size)), _mm256_set_pd(((double)(ctr_1)), ((double)(ctr_1)), ((double)(ctr_1)), ((double)(ctr_1)))), _CMP_LE_OQ))), forceTerm_7), xi_11));
+          _mm256_store_pd(&_data_pdfs_20_38_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_9, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_15, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u0Pu1), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_mul_pd(xi_15, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_9, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.125, 0.125, 0.125, 0.125), _mm256_mul_pd(u0Pu1, u0Pu1)), _mm256_mul_pd(_mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664), _mm256_mul_pd(u_2, u_2))), _mm256_set_pd(-0.013888888888888888, -0.013888888888888888, -0.013888888888888888, -0.013888888888888888)))))), _mm256_blendv_pd(_mm256_set_pd(0.0, 0.0, 0.0, 0.0), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(u_0, _mm256_set_pd(2.0, 2.0, 2.0, 2.0)), _mm256_mul_pd(u_1, _mm256_set_pd(3.0, 3.0, 3.0, 3.0))), _mm256_set_pd(1.0, 1.0, 1.0, 1.0)), _mm256_set_pd(v_s, v_s, v_s, v_s))), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329)), _mm256_set_pd(v_s, v_s, v_s, v_s)), _mm256_cmp_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_set_pd(grid_size, grid_size, grid_size, grid_size)), _mm256_set_pd(((double)(ctr_1)), ((double)(ctr_1)), ((double)(ctr_1)), ((double)(ctr_1)))), _CMP_LE_OQ))), forceTerm_8), xi_15));
+          _mm256_store_pd(&_data_pdfs_20_39_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_15, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_9, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u0Pu1), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_mul_pd(xi_15, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_9, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.125, 0.125, 0.125, 0.125), _mm256_mul_pd(u0Pu1, u0Pu1)), _mm256_mul_pd(_mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664), _mm256_mul_pd(u_2, u_2))), _mm256_set_pd(-0.013888888888888888, -0.013888888888888888, -0.013888888888888888, -0.013888888888888888)))))), _mm256_blendv_pd(_mm256_set_pd(0.0, 0.0, 0.0, 0.0), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_set_pd(v_s, v_s, v_s, v_s)), _mm256_mul_pd(u_0, _mm256_set_pd(2.0, 2.0, 2.0, 2.0))), _mm256_mul_pd(u_1, _mm256_set_pd(3.0, 3.0, 3.0, 3.0))), _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(v_s, v_s, v_s, v_s)), _mm256_cmp_pd(_mm256_set_pd(0.0, 0.0, 0.0, 0.0), _mm256_set_pd(((double)(ctr_1)), ((double)(ctr_1)), ((double)(ctr_1)), ((double)(ctr_1))), _CMP_GE_OQ))), forceTerm_9), xi_9));
+          _mm256_store_pd(&_data_pdfs_20_310_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_11, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_6, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u0Mu1), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_mul_pd(xi_11, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_6, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.125, 0.125, 0.125, 0.125), _mm256_mul_pd(u0Mu1, u0Mu1)), _mm256_mul_pd(_mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664), _mm256_mul_pd(u_2, u_2))), _mm256_set_pd(-0.013888888888888888, -0.013888888888888888, -0.013888888888888888, -0.013888888888888888)))))), _mm256_blendv_pd(_mm256_set_pd(0.0, 0.0, 0.0, 0.0), _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_set_pd(v_s, v_s, v_s, v_s)), _mm256_mul_pd(u_0, _mm256_set_pd(2.0, 2.0, 2.0, 2.0))), _mm256_mul_pd(u_1, _mm256_set_pd(-3.0, -3.0, -3.0, -3.0))), _mm256_set_pd(1.0, 1.0, 1.0, 1.0))), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_set_pd(v_s, v_s, v_s, v_s)), _mm256_cmp_pd(_mm256_set_pd(0.0, 0.0, 0.0, 0.0), _mm256_set_pd(((double)(ctr_1)), ((double)(ctr_1)), ((double)(ctr_1)), ((double)(ctr_1))), _CMP_GE_OQ))), forceTerm_10), xi_6));
+          _mm256_store_pd(&_data_pdfs_20_311_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_14, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_22, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u1Pu2), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_mul_pd(xi_14, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_22, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.125, 0.125, 0.125, 0.125), _mm256_mul_pd(u1Pu2, u1Pu2)), _mm256_mul_pd(_mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664), _mm256_mul_pd(u_0, u_0))), _mm256_set_pd(-0.013888888888888888, -0.013888888888888888, -0.013888888888888888, -0.013888888888888888)))))), forceTerm_11), xi_22));
+          _mm256_store_pd(&_data_pdfs_20_312_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_5, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_7, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u1Mu2), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_mul_pd(xi_5, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_7, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.125, 0.125, 0.125, 0.125), _mm256_mul_pd(u1Mu2, u1Mu2)), _mm256_mul_pd(_mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664), _mm256_mul_pd(u_0, u_0))), _mm256_set_pd(-0.013888888888888888, -0.013888888888888888, -0.013888888888888888, -0.013888888888888888)))))), forceTerm_12), xi_7));
+          _mm256_store_pd(&_data_pdfs_20_313_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_8, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_16, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u0Mu2), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_mul_pd(xi_16, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_8, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.125, 0.125, 0.125, 0.125), _mm256_mul_pd(u0Mu2, u0Mu2)), _mm256_mul_pd(_mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664), _mm256_mul_pd(u_1, u_1))), _mm256_set_pd(-0.013888888888888888, -0.013888888888888888, -0.013888888888888888, -0.013888888888888888)))))), forceTerm_13), xi_16));
+          _mm256_store_pd(&_data_pdfs_20_314_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_21, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_19, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u0Pu2), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_mul_pd(xi_19, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_21, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.125, 0.125, 0.125, 0.125), _mm256_mul_pd(u0Pu2, u0Pu2)), _mm256_mul_pd(_mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664), _mm256_mul_pd(u_1, u_1))), _mm256_set_pd(-0.013888888888888888, -0.013888888888888888, -0.013888888888888888, -0.013888888888888888)))))), forceTerm_14), xi_19));
+          _mm256_store_pd(&_data_pdfs_20_315_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_7, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_5, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u1Mu2), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_mul_pd(xi_5, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_7, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.125, 0.125, 0.125, 0.125), _mm256_mul_pd(u1Mu2, u1Mu2)), _mm256_mul_pd(_mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664), _mm256_mul_pd(u_0, u_0))), _mm256_set_pd(-0.013888888888888888, -0.013888888888888888, -0.013888888888888888, -0.013888888888888888)))))), forceTerm_15), xi_5));
+          _mm256_store_pd(&_data_pdfs_20_316_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_22, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_14, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u1Pu2), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_mul_pd(xi_14, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_22, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.125, 0.125, 0.125, 0.125), _mm256_mul_pd(u1Pu2, u1Pu2)), _mm256_mul_pd(_mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664), _mm256_mul_pd(u_0, u_0))), _mm256_set_pd(-0.013888888888888888, -0.013888888888888888, -0.013888888888888888, -0.013888888888888888)))))), forceTerm_16), xi_14));
+          _mm256_store_pd(&_data_pdfs_20_317_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_19, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_21, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u0Pu2), _mm256_set_pd(-0.083333333333333329, -0.083333333333333329, -0.083333333333333329, -0.083333333333333329))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_mul_pd(xi_19, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_21, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.125, 0.125, 0.125, 0.125), _mm256_mul_pd(u0Pu2, u0Pu2)), _mm256_mul_pd(_mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664), _mm256_mul_pd(u_1, u_1))), _mm256_set_pd(-0.013888888888888888, -0.013888888888888888, -0.013888888888888888, -0.013888888888888888)))))), forceTerm_17), xi_21));
+          _mm256_store_pd(&_data_pdfs_20_318_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_16, _mm256_set_pd(0.5, 0.5, 0.5, 0.5)), _mm256_mul_pd(xi_8, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(_mm256_mul_pd(rho, u0Mu2), _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329))), _mm256_set_pd(rr_0, rr_0, rr_0, rr_0)), _mm256_mul_pd(_mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(f_eq_common, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_mul_pd(xi_16, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(xi_8, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), _mm256_mul_pd(rho, _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(0.125, 0.125, 0.125, 0.125), _mm256_mul_pd(u0Mu2, u0Mu2)), _mm256_mul_pd(_mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664), _mm256_mul_pd(u_1, u_1))), _mm256_set_pd(-0.013888888888888888, -0.013888888888888888, -0.013888888888888888, -0.013888888888888888)))))), forceTerm_18), xi_8));
+        }
+        for (int64_t ctr_0 = (int64_t)((_size_force_0) / (4)) * (4); ctr_0 < _size_force_0; ctr_0 += 1) {
+          const double xi_25 = _data_pdfs_20_34_10[ctr_0];
+          const double xi_26 = _data_pdfs_20_36_10[ctr_0];
+          const double xi_27 = _data_pdfs_20_315_10[ctr_0];
+          const double xi_28 = _data_pdfs_20_310_10[ctr_0];
+          const double xi_29 = _data_pdfs_20_312_10[ctr_0];
+          const double xi_30 = _data_pdfs_20_318_10[ctr_0];
+          const double xi_31 = _data_pdfs_20_39_10[ctr_0];
+          const double xi_32 = _data_pdfs_20_31_10[ctr_0];
+          const double xi_33 = _data_pdfs_20_37_10[ctr_0];
+          const double xi_34 = _data_pdfs_20_30_10[ctr_0];
+          const double xi_35 = _data_force_20_31_10[ctr_0];
+          const double xi_36 = _data_pdfs_20_316_10[ctr_0];
+          const double xi_37 = _data_pdfs_20_38_10[ctr_0];
+          const double xi_38 = _data_pdfs_20_313_10[ctr_0];
+          const double xi_39 = _data_pdfs_20_33_10[ctr_0];
+          const double xi_40 = _data_force_20_32_10[ctr_0];
+          const double xi_41 = _data_pdfs_20_314_10[ctr_0];
+          const double xi_42 = _data_force_20_30_10[ctr_0];
+          const double xi_43 = _data_pdfs_20_317_10[ctr_0];
+          const double xi_44 = _data_pdfs_20_311_10[ctr_0];
+          const double xi_45 = _data_pdfs_20_32_10[ctr_0];
+          const double xi_46 = _data_pdfs_20_35_10[ctr_0];
+          const double xi_3 = xi_25;
+          const double xi_4 = xi_26;
+          const double xi_5 = xi_27;
+          const double xi_6 = xi_28;
+          const double xi_7 = xi_29;
+          const double xi_8 = xi_30;
+          const double xi_9 = xi_31;
+          const double xi_10 = xi_32;
+          const double xi_11 = xi_33;
+          const double xi_12 = xi_34;
+          const double xi_13 = xi_35;
+          const double xi_14 = xi_36;
+          const double xi_15 = xi_37;
+          const double xi_16 = xi_38;
+          const double xi_17 = xi_39;
+          const double xi_18 = xi_40;
+          const double xi_19 = xi_41;
+          const double xi_20 = xi_42;
+          const double xi_21 = xi_43;
+          const double xi_22 = xi_44;
+          const double xi_23 = xi_45;
+          const double xi_24 = xi_46;
+          const double vel0Term = xi_15 + xi_19 + xi_3 + xi_6 + xi_8;
+          const double vel1Term = xi_10 + xi_11 + xi_22 + xi_5;
+          const double vel2Term = xi_16 + xi_24 + xi_7;
+          const double rho = vel0Term + vel1Term + vel2Term + xi_12 + xi_14 + xi_17 + xi_21 + xi_23 + xi_4 + xi_9;
+          const double xi_1 = ((1.0) / (rho));
+          const double u_0 = xi_1 * xi_20 * 0.5 + xi_1 * (vel0Term + xi_11 * -1.0 + xi_16 * -1.0 + xi_17 * -1.0 + xi_21 * -1.0 + xi_9 * -1.0);
+          const double u_1 = xi_1 * xi_13 * 0.5 + xi_1 * (vel1Term + xi_14 * -1.0 + xi_15 + xi_23 * -1.0 + xi_6 * -1.0 + xi_7 * -1.0 + xi_9 * -1.0);
+          const double u_2 = xi_1 * xi_18 * 0.5 + xi_1 * (vel2Term + xi_14 * -1.0 + xi_19 + xi_21 * -1.0 + xi_22 + xi_4 * -1.0 + xi_5 * -1.0 + xi_8 * -1.0);
+          const double forceTerm_0 = omega_shear * u_0 * xi_20 * 0.5 + omega_shear * u_1 * xi_13 * 0.5 + omega_shear * u_2 * xi_18 * 0.5 + u_0 * xi_20 * -1.0 + u_1 * xi_13 * -1.0 + u_2 * xi_18 * -1.0;
+          const double forceTerm_1 = omega_shear * u_0 * xi_20 * 0.083333333333333329 + omega_shear * u_1 * xi_13 * -0.16666666666666666 + omega_shear * u_2 * xi_18 * 0.083333333333333329 + rr_0 * xi_13 * -0.083333333333333329 + u_0 * xi_20 * -0.16666666666666666 + u_1 * xi_13 * 0.33333333333333331 + u_2 * xi_18 * -0.16666666666666666 + xi_13 * 0.16666666666666666;
+          const double forceTerm_2 = omega_shear * u_0 * xi_20 * 0.083333333333333329 + omega_shear * u_1 * xi_13 * -0.16666666666666666 + omega_shear * u_2 * xi_18 * 0.083333333333333329 + rr_0 * xi_13 * 0.083333333333333329 + u_0 * xi_20 * -0.16666666666666666 + u_1 * xi_13 * 0.33333333333333331 + u_2 * xi_18 * -0.16666666666666666 + xi_13 * -0.16666666666666666;
+          const double forceTerm_3 = omega_shear * u_0 * xi_20 * -0.16666666666666666 + omega_shear * u_1 * xi_13 * 0.083333333333333329 + omega_shear * u_2 * xi_18 * 0.083333333333333329 + rr_0 * xi_20 * 0.083333333333333329 + u_0 * xi_20 * 0.33333333333333331 + u_1 * xi_13 * -0.16666666666666666 + u_2 * xi_18 * -0.16666666666666666 + xi_20 * -0.16666666666666666;
+          const double forceTerm_4 = omega_shear * u_0 * xi_20 * -0.16666666666666666 + omega_shear * u_1 * xi_13 * 0.083333333333333329 + omega_shear * u_2 * xi_18 * 0.083333333333333329 + rr_0 * xi_20 * -0.083333333333333329 + u_0 * xi_20 * 0.33333333333333331 + u_1 * xi_13 * -0.16666666666666666 + u_2 * xi_18 * -0.16666666666666666 + xi_20 * 0.16666666666666666;
+          const double forceTerm_5 = omega_shear * u_0 * xi_20 * 0.083333333333333329 + omega_shear * u_1 * xi_13 * 0.083333333333333329 + omega_shear * u_2 * xi_18 * -0.16666666666666666 + rr_0 * xi_18 * -0.083333333333333329 + u_0 * xi_20 * -0.16666666666666666 + u_1 * xi_13 * -0.16666666666666666 + u_2 * xi_18 * 0.33333333333333331 + xi_18 * 0.16666666666666666;
+          const double forceTerm_6 = omega_shear * u_0 * xi_20 * 0.083333333333333329 + omega_shear * u_1 * xi_13 * 0.083333333333333329 + omega_shear * u_2 * xi_18 * -0.16666666666666666 + rr_0 * xi_18 * 0.083333333333333329 + u_0 * xi_20 * -0.16666666666666666 + u_1 * xi_13 * -0.16666666666666666 + u_2 * xi_18 * 0.33333333333333331 + xi_18 * -0.16666666666666666;
+          const double forceTerm_7 = omega_shear * u_0 * xi_13 * 0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_20 * 0.125 + omega_shear * u_2 * xi_18 * 0.041666666666666664 + rr_0 * xi_13 * -0.041666666666666664 + rr_0 * xi_20 * 0.041666666666666664 + u_0 * xi_13 * -0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_20 * -0.25 + u_2 * xi_18 * -0.083333333333333329 + xi_13 * 0.083333333333333329 + xi_20 * -0.083333333333333329;
+          const double forceTerm_8 = omega_shear * u_0 * xi_13 * -0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_20 * -0.125 + omega_shear * u_2 * xi_18 * 0.041666666666666664 + rr_0 * xi_13 * -0.041666666666666664 + rr_0 * xi_20 * -0.041666666666666664 + u_0 * xi_13 * 0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_20 * 0.25 + u_2 * xi_18 * -0.083333333333333329 + xi_13 * 0.083333333333333329 + xi_20 * 0.083333333333333329;
+          const double forceTerm_9 = omega_shear * u_0 * xi_13 * -0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_20 * -0.125 + omega_shear * u_2 * xi_18 * 0.041666666666666664 + rr_0 * xi_13 * 0.041666666666666664 + rr_0 * xi_20 * 0.041666666666666664 + u_0 * xi_13 * 0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_20 * 0.25 + u_2 * xi_18 * -0.083333333333333329 + xi_13 * -0.083333333333333329 + xi_20 * -0.083333333333333329;
+          const double forceTerm_10 = omega_shear * u_0 * xi_13 * 0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_20 * 0.125 + omega_shear * u_2 * xi_18 * 0.041666666666666664 + rr_0 * xi_13 * 0.041666666666666664 + rr_0 * xi_20 * -0.041666666666666664 + u_0 * xi_13 * -0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_20 * -0.25 + u_2 * xi_18 * -0.083333333333333329 + xi_13 * -0.083333333333333329 + xi_20 * 0.083333333333333329;
+          const double forceTerm_11 = omega_shear * u_0 * xi_20 * 0.041666666666666664 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_18 * -0.125 + omega_shear * u_2 * xi_13 * -0.125 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + rr_0 * xi_13 * -0.041666666666666664 + rr_0 * xi_18 * -0.041666666666666664 + u_0 * xi_20 * -0.083333333333333329 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_18 * 0.25 + u_2 * xi_13 * 0.25 + u_2 * xi_18 * 0.16666666666666666 + xi_13 * 0.083333333333333329 + xi_18 * 0.083333333333333329;
+          const double forceTerm_12 = omega_shear * u_0 * xi_20 * 0.041666666666666664 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_18 * 0.125 + omega_shear * u_2 * xi_13 * 0.125 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + rr_0 * xi_13 * 0.041666666666666664 + rr_0 * xi_18 * -0.041666666666666664 + u_0 * xi_20 * -0.083333333333333329 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_18 * -0.25 + u_2 * xi_13 * -0.25 + u_2 * xi_18 * 0.16666666666666666 + xi_13 * -0.083333333333333329 + xi_18 * 0.083333333333333329;
+          const double forceTerm_13 = omega_shear * u_0 * xi_18 * 0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * 0.041666666666666664 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + omega_shear * u_2 * xi_20 * 0.125 + rr_0 * xi_18 * -0.041666666666666664 + rr_0 * xi_20 * 0.041666666666666664 + u_0 * xi_18 * -0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * -0.083333333333333329 + u_2 * xi_18 * 0.16666666666666666 + u_2 * xi_20 * -0.25 + xi_18 * 0.083333333333333329 + xi_20 * -0.083333333333333329;
+          const double forceTerm_14 = omega_shear * u_0 * xi_18 * -0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * 0.041666666666666664 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + omega_shear * u_2 * xi_20 * -0.125 + rr_0 * xi_18 * -0.041666666666666664 + rr_0 * xi_20 * -0.041666666666666664 + u_0 * xi_18 * 0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * -0.083333333333333329 + u_2 * xi_18 * 0.16666666666666666 + u_2 * xi_20 * 0.25 + xi_18 * 0.083333333333333329 + xi_20 * 0.083333333333333329;
+          const double forceTerm_15 = omega_shear * u_0 * xi_20 * 0.041666666666666664 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_18 * 0.125 + omega_shear * u_2 * xi_13 * 0.125 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + rr_0 * xi_13 * -0.041666666666666664 + rr_0 * xi_18 * 0.041666666666666664 + u_0 * xi_20 * -0.083333333333333329 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_18 * -0.25 + u_2 * xi_13 * -0.25 + u_2 * xi_18 * 0.16666666666666666 + xi_13 * 0.083333333333333329 + xi_18 * -0.083333333333333329;
+          const double forceTerm_16 = omega_shear * u_0 * xi_20 * 0.041666666666666664 + omega_shear * u_1 * xi_13 * -0.083333333333333329 + omega_shear * u_1 * xi_18 * -0.125 + omega_shear * u_2 * xi_13 * -0.125 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + rr_0 * xi_13 * 0.041666666666666664 + rr_0 * xi_18 * 0.041666666666666664 + u_0 * xi_20 * -0.083333333333333329 + u_1 * xi_13 * 0.16666666666666666 + u_1 * xi_18 * 0.25 + u_2 * xi_13 * 0.25 + u_2 * xi_18 * 0.16666666666666666 + xi_13 * -0.083333333333333329 + xi_18 * -0.083333333333333329;
+          const double forceTerm_17 = omega_shear * u_0 * xi_18 * -0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * 0.041666666666666664 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + omega_shear * u_2 * xi_20 * -0.125 + rr_0 * xi_18 * 0.041666666666666664 + rr_0 * xi_20 * 0.041666666666666664 + u_0 * xi_18 * 0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * -0.083333333333333329 + u_2 * xi_18 * 0.16666666666666666 + u_2 * xi_20 * 0.25 + xi_18 * -0.083333333333333329 + xi_20 * -0.083333333333333329;
+          const double forceTerm_18 = omega_shear * u_0 * xi_18 * 0.125 + omega_shear * u_0 * xi_20 * -0.083333333333333329 + omega_shear * u_1 * xi_13 * 0.041666666666666664 + omega_shear * u_2 * xi_18 * -0.083333333333333329 + omega_shear * u_2 * xi_20 * 0.125 + rr_0 * xi_18 * 0.041666666666666664 + rr_0 * xi_20 * -0.041666666666666664 + u_0 * xi_18 * -0.25 + u_0 * xi_20 * 0.16666666666666666 + u_1 * xi_13 * -0.083333333333333329 + u_2 * xi_18 * 0.16666666666666666 + u_2 * xi_20 * -0.25 + xi_18 * -0.083333333333333329 + xi_20 * 0.083333333333333329;
+          const double u0Mu1 = u_0 + u_1 * -1.0;
+          const double u0Pu1 = u_0 + u_1;
+          const double u1Pu2 = u_1 + u_2;
+          const double u1Mu2 = u_1 + u_2 * -1.0;
+          const double u0Mu2 = u_0 + u_2 * -1.0;
+          const double u0Pu2 = u_0 + u_2;
+          const double f_eq_common = rho * -1.0 * u_0 * u_0 + rho * -1.0 * u_1 * u_1 + rho * -1.0 * u_2 * u_2 + rho;
+          _data_pdfs_20_30_10[ctr_0] = forceTerm_0 + omega_shear * (f_eq_common * 0.33333333333333331 + xi_12 * -1.0) + xi_12;
+          _data_pdfs_20_31_10[ctr_0] = forceTerm_1 + omega_shear * (f_eq_common * 0.16666666666666666 + rho * (-0.1111111111111111 + 0.33333333333333331 * u_1 * u_1) + xi_10 * -0.5 + xi_23 * -0.5) + rr_0 * (rho * u_1 * 0.16666666666666666 + xi_10 * -0.5 + xi_23 * 0.5) + xi_10 + ((-1.0 <= grid_size * -1.0 + ((double)(ctr_1))) ? (rho * v_s * (u_0 * 2.0 + v_s) * 0.16666666666666666) : (0.0));
+          _data_pdfs_20_32_10[ctr_0] = forceTerm_2 + omega_shear * (f_eq_common * 0.16666666666666666 + rho * (-0.1111111111111111 + 0.33333333333333331 * u_1 * u_1) + xi_10 * -0.5 + xi_23 * -0.5) + rr_0 * (rho * u_1 * -0.16666666666666666 + xi_10 * 0.5 + xi_23 * -0.5) + xi_23 + ((0.0 >= ((double)(ctr_1))) ? (rho * v_s * (u_0 * -2.0 + v_s) * 0.16666666666666666) : (0.0));
+          _data_pdfs_20_33_10[ctr_0] = forceTerm_3 + omega_shear * (f_eq_common * 0.16666666666666666 + rho * (-0.1111111111111111 + 0.33333333333333331 * u_0 * u_0) + xi_17 * -0.5 + xi_3 * -0.5) + rr_0 * (rho * u_0 * -0.16666666666666666 + xi_17 * -0.5 + xi_3 * 0.5) + xi_17;
+          _data_pdfs_20_34_10[ctr_0] = forceTerm_4 + omega_shear * (f_eq_common * 0.16666666666666666 + rho * (-0.1111111111111111 + 0.33333333333333331 * u_0 * u_0) + xi_17 * -0.5 + xi_3 * -0.5) + rr_0 * (rho * u_0 * 0.16666666666666666 + xi_17 * 0.5 + xi_3 * -0.5) + xi_3;
+          _data_pdfs_20_35_10[ctr_0] = forceTerm_5 + omega_shear * (f_eq_common * 0.16666666666666666 + rho * (-0.1111111111111111 + 0.33333333333333331 * u_2 * u_2) + xi_24 * -0.5 + xi_4 * -0.5) + rr_0 * (rho * u_2 * 0.16666666666666666 + xi_24 * -0.5 + xi_4 * 0.5) + xi_24;
+          _data_pdfs_20_36_10[ctr_0] = forceTerm_6 + omega_shear * (f_eq_common * 0.16666666666666666 + rho * (-0.1111111111111111 + 0.33333333333333331 * u_2 * u_2) + xi_24 * -0.5 + xi_4 * -0.5) + rr_0 * (rho * u_2 * -0.16666666666666666 + xi_24 * 0.5 + xi_4 * -0.5) + xi_4;
+          _data_pdfs_20_37_10[ctr_0] = forceTerm_7 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * u_2 * u_2 + 0.125 * u0Mu1 * u0Mu1) + xi_11 * -0.5 + xi_6 * -0.5) + rr_0 * (rho * u0Mu1 * -0.083333333333333329 + xi_11 * -0.5 + xi_6 * 0.5) + xi_11 + ((-1.0 <= grid_size * -1.0 + ((double)(ctr_1))) ? (rho * v_s * (u_0 * -2.0 + u_1 * 3.0 + v_s * -1.0 + 1.0) * 0.083333333333333329) : (0.0));
+          _data_pdfs_20_38_10[ctr_0] = forceTerm_8 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * u_2 * u_2 + 0.125 * u0Pu1 * u0Pu1) + xi_15 * -0.5 + xi_9 * -0.5) + rr_0 * (rho * u0Pu1 * 0.083333333333333329 + xi_15 * -0.5 + xi_9 * 0.5) + xi_15 + ((-1.0 <= grid_size * -1.0 + ((double)(ctr_1))) ? (rho * v_s * (u_0 * 2.0 + u_1 * 3.0 + v_s + 1.0) * -0.083333333333333329) : (0.0));
+          _data_pdfs_20_39_10[ctr_0] = forceTerm_9 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * u_2 * u_2 + 0.125 * u0Pu1 * u0Pu1) + xi_15 * -0.5 + xi_9 * -0.5) + rr_0 * (rho * u0Pu1 * -0.083333333333333329 + xi_15 * 0.5 + xi_9 * -0.5) + xi_9 + ((0.0 >= ((double)(ctr_1))) ? (rho * v_s * (u_0 * 2.0 + u_1 * 3.0 + v_s * -1.0 - 1.0) * 0.083333333333333329) : (0.0));
+          _data_pdfs_20_310_10[ctr_0] = forceTerm_10 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * u_2 * u_2 + 0.125 * u0Mu1 * u0Mu1) + xi_11 * -0.5 + xi_6 * -0.5) + rr_0 * (rho * u0Mu1 * 0.083333333333333329 + xi_11 * 0.5 + xi_6 * -0.5) + xi_6 + ((0.0 >= ((double)(ctr_1))) ? (rho * v_s * (u_0 * 2.0 + u_1 * -3.0 + v_s * -1.0 + 1.0) * 0.083333333333333329) : (0.0));
+          _data_pdfs_20_311_10[ctr_0] = forceTerm_11 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * u_0 * u_0 + 0.125 * u1Pu2 * u1Pu2) + xi_14 * -0.5 + xi_22 * -0.5) + rr_0 * (rho * u1Pu2 * 0.083333333333333329 + xi_14 * 0.5 + xi_22 * -0.5) + xi_22;
+          _data_pdfs_20_312_10[ctr_0] = forceTerm_12 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * u_0 * u_0 + 0.125 * u1Mu2 * u1Mu2) + xi_5 * -0.5 + xi_7 * -0.5) + rr_0 * (rho * u1Mu2 * -0.083333333333333329 + xi_5 * 0.5 + xi_7 * -0.5) + xi_7;
+          _data_pdfs_20_313_10[ctr_0] = forceTerm_13 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * u_1 * u_1 + 0.125 * u0Mu2 * u0Mu2) + xi_16 * -0.5 + xi_8 * -0.5) + rr_0 * (rho * u0Mu2 * -0.083333333333333329 + xi_16 * -0.5 + xi_8 * 0.5) + xi_16;
+          _data_pdfs_20_314_10[ctr_0] = forceTerm_14 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * u_1 * u_1 + 0.125 * u0Pu2 * u0Pu2) + xi_19 * -0.5 + xi_21 * -0.5) + rr_0 * (rho * u0Pu2 * 0.083333333333333329 + xi_19 * -0.5 + xi_21 * 0.5) + xi_19;
+          _data_pdfs_20_315_10[ctr_0] = forceTerm_15 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * u_0 * u_0 + 0.125 * u1Mu2 * u1Mu2) + xi_5 * -0.5 + xi_7 * -0.5) + rr_0 * (rho * u1Mu2 * 0.083333333333333329 + xi_5 * -0.5 + xi_7 * 0.5) + xi_5;
+          _data_pdfs_20_316_10[ctr_0] = forceTerm_16 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * u_0 * u_0 + 0.125 * u1Pu2 * u1Pu2) + xi_14 * -0.5 + xi_22 * -0.5) + rr_0 * (rho * u1Pu2 * -0.083333333333333329 + xi_14 * -0.5 + xi_22 * 0.5) + xi_14;
+          _data_pdfs_20_317_10[ctr_0] = forceTerm_17 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * u_1 * u_1 + 0.125 * u0Pu2 * u0Pu2) + xi_19 * -0.5 + xi_21 * -0.5) + rr_0 * (rho * u0Pu2 * -0.083333333333333329 + xi_19 * 0.5 + xi_21 * -0.5) + xi_21;
+          _data_pdfs_20_318_10[ctr_0] = forceTerm_18 + omega_shear * (f_eq_common * 0.041666666666666664 + rho * (-0.013888888888888888 + 0.041666666666666664 * u_1 * u_1 + 0.125 * u0Mu2 * u0Mu2) + xi_16 * -0.5 + xi_8 * -0.5) + rr_0 * (rho * u0Mu2 * 0.083333333333333329 + xi_16 * 0.5 + xi_8 * -0.5) + xi_8;
+        }
+      }
+    }
+  }
+}
+} // namespace internal_f11a519921c681cbc9d0b2f51454c920
+
+void CollideSweepDoublePrecisionLeesEdwardsAVX::run(IBlock *block) {
+  auto force = block->getData<field::GhostLayerField<double, 3>>(forceID);
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+
+  auto &v_s = this->v_s_;
+  auto &omega_shear = this->omega_shear_;
+  auto &grid_size = this->grid_size_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(force->nrOfGhostLayers()));
+  double *RESTRICT const _data_force = force->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_f11a519921c681cbc9d0b2f51454c920::collidesweepdoubleprecisionleesedwardsavx_collidesweepdoubleprecisionleesedwardsavx(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, grid_size, omega_shear, v_s);
+}
+
+void CollideSweepDoublePrecisionLeesEdwardsAVX::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto force = block->getData<field::GhostLayerField<double, 3>>(forceID);
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+
+  auto &v_s = this->v_s_;
+  auto &omega_shear = this->omega_shear_;
+  auto &grid_size = this->grid_size_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(force->nrOfGhostLayers()));
+  double *RESTRICT const _data_force = force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_f11a519921c681cbc9d0b2f51454c920::collidesweepdoubleprecisionleesedwardsavx_collidesweepdoubleprecisionleesedwardsavx(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, grid_size, omega_shear, v_s);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwardsAVX.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwardsAVX.h
new file mode 100644
index 00000000000..272d0555606
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionLeesEdwardsAVX.h
@@ -0,0 +1,109 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepDoublePrecisionLeesEdwardsAVX.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class CollideSweepDoublePrecisionLeesEdwardsAVX {
+public:
+  CollideSweepDoublePrecisionLeesEdwardsAVX(BlockDataID forceID_,
+                                            BlockDataID pdfsID_,
+                                            double grid_size,
+                                            double omega_shear, double v_s)
+      : forceID(forceID_), pdfsID(pdfsID_), grid_size_(grid_size),
+        omega_shear_(omega_shear), v_s_(v_s){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)> getSweep(
+      const shared_ptr<CollideSweepDoublePrecisionLeesEdwardsAVX> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<CollideSweepDoublePrecisionLeesEdwardsAVX> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID forceID;
+  BlockDataID pdfsID;
+  double grid_size_;
+  double omega_shear_;
+  double v_s_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalized.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalized.cpp
new file mode 100644
index 00000000000..94629fa8141
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalized.cpp
@@ -0,0 +1,568 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepDoublePrecisionThermalized.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "CollideSweepDoublePrecisionThermalized.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#include "philox_rand.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_0d943397135d13b4628c5752888935d7 {
+static FUNC_PREFIX void collidesweepdoubleprecisionthermalized_collidesweepdoubleprecisionthermalized(double *RESTRICT const _data_force, double *RESTRICT _data_pdfs, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_0, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, uint32_t block_offset_0, uint32_t block_offset_1, uint32_t block_offset_2, double kT, double omega_bulk, double omega_even, double omega_odd, double omega_shear, uint32_t seed, uint32_t time_step) {
+  const double xi_28 = omega_bulk * 0.5;
+  const double xi_55 = omega_shear * 0.041666666666666664;
+  const double xi_60 = omega_bulk * 0.041666666666666664;
+  const double xi_71 = omega_shear * 0.125;
+  const double xi_109 = 2.4494897427831779;
+  const double xi_134 = omega_odd * 0.25;
+  const double xi_145 = omega_odd * 0.083333333333333329;
+  const double xi_198 = omega_shear * 0.25;
+  const double xi_211 = omega_odd * 0.041666666666666664;
+  const double xi_213 = omega_odd * 0.125;
+  const double rr_0 = 0.0;
+  const double xi_53 = rr_0 * 0.041666666666666664;
+  for (int64_t ctr_2 = 0; ctr_2 < _size_force_2; ctr_2 += 1) {
+    double *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
+    double *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
+    double *RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
+    double *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
+    double *RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3;
+    double *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
+    double *RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_force_1; ctr_1 += 1) {
+      double *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
+      double *RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_36;
+      double *RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_315;
+      double *RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_310;
+      double *RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_312;
+      double *RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_318;
+      double *RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_39;
+      double *RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_31;
+      double *RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_37;
+      double *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
+      double *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
+      double *RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_316;
+      double *RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_313;
+      double *RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_38;
+      double *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
+      double *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
+      double *RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_314;
+      double *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
+      double *RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_317;
+      double *RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_311;
+      double *RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_32;
+      double *RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_35;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_force_0; ctr_0 += 1) {
+        const double xi_244 = _data_pdfs_20_34_10[_stride_pdfs_0 * ctr_0];
+        const double xi_245 = _data_pdfs_20_36_10[_stride_pdfs_0 * ctr_0];
+        const double xi_246 = _data_pdfs_20_315_10[_stride_pdfs_0 * ctr_0];
+        const double xi_247 = _data_pdfs_20_310_10[_stride_pdfs_0 * ctr_0];
+        const double xi_248 = _data_pdfs_20_312_10[_stride_pdfs_0 * ctr_0];
+        const double xi_249 = _data_pdfs_20_318_10[_stride_pdfs_0 * ctr_0];
+        const double xi_250 = _data_pdfs_20_39_10[_stride_pdfs_0 * ctr_0];
+        const double xi_251 = _data_pdfs_20_31_10[_stride_pdfs_0 * ctr_0];
+        const double xi_252 = _data_pdfs_20_37_10[_stride_pdfs_0 * ctr_0];
+        const double xi_253 = _data_pdfs_20_30_10[_stride_pdfs_0 * ctr_0];
+        const double xi_254 = _data_force_20_31_10[_stride_force_0 * ctr_0];
+        const double xi_255 = _data_pdfs_20_316_10[_stride_pdfs_0 * ctr_0];
+        const double xi_256 = _data_pdfs_20_313_10[_stride_pdfs_0 * ctr_0];
+        const double xi_257 = _data_pdfs_20_38_10[_stride_pdfs_0 * ctr_0];
+        const double xi_258 = _data_pdfs_20_33_10[_stride_pdfs_0 * ctr_0];
+        const double xi_259 = _data_force_20_32_10[_stride_force_0 * ctr_0];
+        const double xi_260 = _data_pdfs_20_314_10[_stride_pdfs_0 * ctr_0];
+        const double xi_261 = _data_force_20_30_10[_stride_force_0 * ctr_0];
+        const double xi_262 = _data_pdfs_20_317_10[_stride_pdfs_0 * ctr_0];
+        const double xi_263 = _data_pdfs_20_311_10[_stride_pdfs_0 * ctr_0];
+        const double xi_264 = _data_pdfs_20_32_10[_stride_pdfs_0 * ctr_0];
+        const double xi_265 = _data_pdfs_20_35_10[_stride_pdfs_0 * ctr_0];
+
+        double random_7_0{};
+        double random_7_1{};
+        if (kT > 0.) {
+          philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 7, seed, random_7_0, random_7_1);
+        }
+
+        double random_6_0{};
+        double random_6_1{};
+        if (kT > 0.) {
+          philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 6, seed, random_6_0, random_6_1);
+        }
+
+        double random_5_0{};
+        double random_5_1{};
+        if (kT > 0.) {
+          philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 5, seed, random_5_0, random_5_1);
+        }
+
+        double random_4_0{};
+        double random_4_1{};
+        if (kT > 0.) {
+          philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 4, seed, random_4_0, random_4_1);
+        }
+
+        double random_3_0{};
+        double random_3_1{};
+        if (kT > 0.) {
+          philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 3, seed, random_3_0, random_3_1);
+        }
+
+        double random_2_0{};
+        double random_2_1{};
+        if (kT > 0.) {
+          philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 2, seed, random_2_0, random_2_1);
+        }
+
+        double random_1_0{};
+        double random_1_1{};
+        if (kT > 0.) {
+          philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 1, seed, random_1_0, random_1_1);
+        }
+
+        double random_0_0{};
+        double random_0_1{};
+        if (kT > 0.) {
+          philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 0, seed, random_0_0, random_0_1);
+        }
+        const double xi_2 = xi_249 + xi_260;
+        const double xi_3 = xi_2 + xi_244;
+        const double xi_4 = xi_246 + xi_251 + xi_263;
+        const double xi_5 = xi_248 + xi_265;
+        const double xi_6 = xi_245 + xi_262;
+        const double xi_8 = xi_250 * -1.0;
+        const double xi_9 = xi_252 * -1.0;
+        const double xi_10 = xi_262 * -1.0;
+        const double xi_11 = xi_256 * -1.0;
+        const double xi_12 = xi_258 * -1.0;
+        const double xi_13 = xi_10 + xi_11 + xi_12;
+        const double xi_14 = xi_264 * -1.0;
+        const double xi_15 = xi_247 * -1.0;
+        const double xi_16 = xi_14 + xi_15;
+        const double xi_17 = xi_255 * -1.0;
+        const double xi_18 = xi_248 * -1.0;
+        const double xi_19 = xi_17 + xi_18;
+        const double xi_20 = xi_249 * -1.0;
+        const double xi_21 = xi_10 + xi_20;
+        const double xi_22 = xi_246 * -1.0;
+        const double xi_23 = xi_245 * -1.0;
+        const double xi_24 = xi_17 + xi_22 + xi_23 + xi_263;
+        const double xi_29 = xi_254 * 0.16666666666666666;
+        const double xi_30 = xi_254 * 0.083333333333333329;
+        const double xi_42 = xi_261 * 0.16666666666666666;
+        const double xi_43 = xi_261 * 0.083333333333333329;
+        const double xi_49 = xi_259 * 0.16666666666666666;
+        const double xi_50 = xi_259 * 0.083333333333333329;
+        const double xi_67 = xi_254 * 0.25;
+        const double xi_72 = xi_254 * xi_71;
+        const double xi_114 = xi_253 * -1.0;
+        const double xi_118 = xi_263 * -1.0;
+        const double xi_119 = xi_118 + xi_18;
+        const double xi_120 = xi_257 * -1.0 + xi_8;
+        const double xi_122 = xi_260 * -1.0;
+        const double xi_123 = xi_11 + xi_122 + xi_15 + xi_21;
+        const double xi_125 = xi_246 * 2.0 + xi_248 * 2.0 + xi_255 * 2.0 + xi_263 * 2.0;
+        const double xi_126 = xi_125 + xi_244 * 5.0 + xi_258 * 5.0;
+        const double xi_128 = xi_256 * 2.0;
+        const double xi_129 = xi_260 * 2.0;
+        const double xi_130 = xi_249 * 2.0 + xi_262 * 2.0;
+        const double xi_132 = xi_118 + xi_248;
+        const double xi_133 = xi_132 + xi_14 + xi_22 + xi_251 + xi_255;
+        const double xi_135 = xi_133 * xi_134;
+        const double xi_136 = random_5_1 - 0.5;
+        const double xi_141 = xi_252 * 2.0;
+        const double xi_142 = xi_247 * 2.0;
+        const double xi_143 = xi_250 * 2.0 + xi_257 * -2.0;
+        const double xi_144 = xi_14 + xi_141 * -1.0 + xi_142 + xi_143 + xi_19 + xi_4;
+        const double xi_146 = xi_144 * xi_145;
+        const double xi_147 = random_3_0 - 0.5;
+        const double xi_152 = random_0_1 - 0.5;
+        const double xi_166 = xi_122 + xi_256;
+        const double xi_167 = xi_12 + xi_166 + xi_20 + xi_244 + xi_262;
+        const double xi_168 = xi_134 * xi_167;
+        const double xi_169 = random_4_1 - 0.5;
+        const double xi_171 = xi_13 + xi_141 + xi_142 * -1.0 + xi_143 + xi_3;
+        const double xi_172 = xi_145 * xi_171;
+        const double xi_173 = random_4_0 - 0.5;
+        const double xi_178 = xi_119 + xi_23 + xi_246 + xi_255 + xi_265;
+        const double xi_179 = xi_134 * xi_178;
+        const double xi_180 = random_5_0 - 0.5;
+        const double xi_182 = xi_128 * -1.0 + xi_129 * -1.0 + xi_130 + xi_24 + xi_5;
+        const double xi_183 = xi_145 * xi_182;
+        const double xi_184 = random_3_1 - 0.5;
+        const double xi_212 = xi_182 * xi_211;
+        const double xi_214 = xi_178 * xi_213;
+        const double xi_220 = xi_144 * xi_211;
+        const double xi_221 = xi_133 * xi_213;
+        const double xi_235 = xi_167 * xi_213;
+        const double xi_236 = xi_171 * xi_211;
+        const double xi_31 = rr_0 * xi_30;
+        const double xi_44 = rr_0 * xi_43;
+        const double xi_51 = rr_0 * xi_50;
+        const double xi_54 = xi_261 * xi_53;
+        const double xi_59 = xi_254 * xi_53;
+        const double xi_81 = xi_259 * xi_53;
+        const double vel0Term = xi_247 + xi_257 + xi_3;
+        const double vel1Term = xi_252 + xi_4;
+        const double vel2Term = xi_256 + xi_5;
+        const double rho = vel0Term + vel1Term + vel2Term + xi_250 + xi_253 + xi_255 + xi_258 + xi_264 + xi_6;
+        const double xi_105 = kT * rho;
+        const double xi_106 = pow(xi_105 * (-1.0 * ((omega_even * -1.0 + 1.0) * (omega_even * -1.0 + 1.0)) + 1.0), 0.5);
+        const double xi_107 = xi_106 * (random_6_0 - 0.5) * 3.7416573867739413;
+        const double xi_108 = xi_106 * (random_7_0 - 0.5) * 5.4772255750516612;
+        const double xi_110 = xi_109 * (random_2_1 - 0.5) * pow(xi_105 * (-1.0 * ((omega_bulk * -1.0 + 1.0) * (omega_bulk * -1.0 + 1.0)) + 1.0), 0.5);
+        const double xi_111 = xi_106 * (random_6_1 - 0.5) * 8.3666002653407556;
+        const double xi_137 = pow(xi_105 * (-1.0 * ((omega_odd * -1.0 + 1.0) * (omega_odd * -1.0 + 1.0)) + 1.0), 0.5);
+        const double xi_138 = xi_137 * 1.4142135623730951;
+        const double xi_139 = xi_138 * 0.5;
+        const double xi_140 = xi_136 * xi_139;
+        const double xi_148 = xi_109 * xi_137;
+        const double xi_149 = xi_148 * 0.16666666666666666;
+        const double xi_150 = xi_147 * xi_149;
+        const double xi_151 = xi_146 * -1.0 + xi_150 * -1.0;
+        const double xi_153 = pow(xi_105 * (-1.0 * ((omega_shear * -1.0 + 1.0) * (omega_shear * -1.0 + 1.0)) + 1.0), 0.5);
+        const double xi_154 = xi_153 * 0.5;
+        const double xi_155 = xi_152 * xi_154;
+        const double xi_161 = xi_153 * (random_0_0 - 0.5) * 1.7320508075688772;
+        const double xi_165 = xi_146 + xi_150;
+        const double xi_170 = xi_139 * xi_169;
+        const double xi_174 = xi_149 * xi_173;
+        const double xi_175 = xi_172 + xi_174;
+        const double xi_177 = xi_172 * -1.0 + xi_174 * -1.0;
+        const double xi_181 = xi_139 * xi_180;
+        const double xi_185 = xi_149 * xi_184;
+        const double xi_186 = xi_183 * -1.0 + xi_185 * -1.0;
+        const double xi_188 = xi_183 + xi_185;
+        const double xi_189 = xi_152 * xi_153 * 0.25;
+        const double xi_192 = xi_107 * 0.083333333333333329;
+        const double xi_196 = xi_154 * (random_1_0 - 0.5);
+        const double xi_203 = xi_154 * (random_2_0 - 0.5);
+        const double xi_207 = xi_111 * -0.014285714285714285;
+        const double xi_208 = xi_108 * 0.050000000000000003;
+        const double xi_215 = xi_148 * 0.083333333333333329;
+        const double xi_216 = xi_184 * xi_215;
+        const double xi_217 = xi_138 * 0.25;
+        const double xi_218 = xi_180 * xi_217;
+        const double xi_219 = xi_212 * -1.0 + xi_214 + xi_216 * -1.0 + xi_218;
+        const double xi_222 = xi_147 * xi_215;
+        const double xi_223 = xi_136 * xi_217;
+        const double xi_224 = xi_220 * -1.0 + xi_221 + xi_222 * -1.0 + xi_223;
+        const double xi_225 = xi_220 + xi_221 * -1.0 + xi_222 + xi_223 * -1.0;
+        const double xi_227 = xi_189 * -1.0;
+        const double xi_230 = xi_111 * 0.035714285714285712;
+        const double xi_232 = xi_154 * (random_1_1 - 0.5);
+        const double xi_237 = xi_169 * xi_217;
+        const double xi_238 = xi_173 * xi_215;
+        const double xi_239 = xi_235 * -1.0 + xi_236 + xi_237 * -1.0 + xi_238;
+        const double xi_241 = xi_235 + xi_236 * -1.0 + xi_237 + xi_238 * -1.0;
+        const double xi_242 = xi_212 + xi_214 * -1.0 + xi_216 + xi_218 * -1.0;
+        const double xi_0 = ((1.0) / (rho));
+        const double xi_7 = xi_0 * 0.5;
+        const double u_0 = xi_0 * (vel0Term + xi_13 + xi_8 + xi_9) + xi_261 * xi_7;
+        const double xi_25 = u_0 * xi_261;
+        const double xi_37 = xi_25 * 0.16666666666666666;
+        const double xi_38 = xi_25 * 0.083333333333333329;
+        const double xi_39 = omega_shear * xi_38;
+        const double xi_40 = xi_37 * -1.0 + xi_39;
+        const double xi_56 = xi_25 * xi_55 * -1.0 + xi_37;
+        const double xi_57 = xi_43 * -1.0 + xi_54 + xi_56;
+        const double xi_61 = xi_25 * xi_60 * -1.0;
+        const double xi_68 = u_0 * xi_67;
+        const double xi_73 = u_0 * xi_72;
+        const double xi_77 = xi_43 + xi_54 * -1.0 + xi_56;
+        const double xi_84 = xi_38 * -1.0;
+        const double xi_95 = u_0 * xi_259;
+        const double xi_96 = xi_95 * 0.25;
+        const double xi_99 = xi_71 * xi_95;
+        const double xi_113 = rho * (u_0 * u_0);
+        const double u_1 = xi_0 * (vel1Term + xi_16 + xi_19 + xi_257 + xi_8) + xi_254 * xi_7;
+        const double xi_26 = u_1 * xi_254;
+        const double xi_32 = xi_26 * 0.16666666666666666;
+        const double xi_45 = xi_26 * 0.083333333333333329;
+        const double xi_46 = omega_shear * xi_45;
+        const double xi_47 = xi_32 * -1.0 + xi_46;
+        const double xi_62 = xi_26 * xi_60 * -1.0;
+        const double xi_69 = u_1 * 0.25;
+        const double xi_70 = xi_261 * xi_69;
+        const double xi_74 = u_1 * xi_71;
+        const double xi_75 = xi_261 * xi_74;
+        const double xi_76 = xi_68 * -1.0 + xi_70 * -1.0 + xi_73 + xi_75;
+        const double xi_78 = xi_68 + xi_70 + xi_73 * -1.0 + xi_75 * -1.0;
+        const double xi_86 = xi_259 * xi_69;
+        const double xi_88 = xi_259 * xi_74;
+        const double xi_93 = xi_45 * -1.0;
+        const double xi_112 = rho * (u_1 * u_1);
+        const double xi_121 = xi_112 + xi_120 + xi_9;
+        const double xi_197 = rho * u_1;
+        const double xi_199 = xi_198 * (u_0 * xi_197 + xi_120 + xi_247 + xi_252);
+        const double xi_200 = xi_196 * -1.0 + xi_199 * -1.0;
+        const double xi_201 = xi_196 + xi_199;
+        const double u_2 = xi_0 * (vel2Term + xi_21 + xi_24 + xi_260) + xi_259 * xi_7;
+        const double xi_27 = u_2 * xi_259;
+        const double xi_33 = xi_27 * 0.16666666666666666;
+        const double xi_34 = xi_27 * 0.083333333333333329;
+        const double xi_35 = omega_shear * xi_34;
+        const double xi_36 = xi_33 * -1.0 + xi_35;
+        const double xi_41 = omega_shear * xi_32 * -1.0 + xi_26 * 0.33333333333333331 + xi_36 + xi_40;
+        const double xi_48 = omega_shear * xi_37 * -1.0 + xi_25 * 0.33333333333333331 + xi_36 + xi_47;
+        const double xi_52 = omega_shear * xi_33 * -1.0 + xi_27 * 0.33333333333333331 + xi_40 + xi_47;
+        const double xi_58 = xi_34 * -1.0;
+        const double xi_63 = xi_27 * xi_60 * -1.0;
+        const double xi_64 = xi_26 * xi_55 * -1.0 + xi_32 + xi_61 + xi_62 + xi_63;
+        const double xi_65 = xi_30 + xi_59 * -1.0 + xi_64;
+        const double xi_66 = xi_35 + xi_58 + xi_65;
+        const double xi_79 = xi_30 * -1.0 + xi_59 + xi_64;
+        const double xi_80 = xi_35 + xi_58 + xi_79;
+        const double xi_82 = xi_27 * xi_55 * -1.0 + xi_33;
+        const double xi_83 = xi_50 + xi_81 * -1.0 + xi_82;
+        const double xi_85 = xi_39 + xi_65 + xi_84;
+        const double xi_87 = u_2 * xi_67;
+        const double xi_89 = u_2 * xi_72;
+        const double xi_90 = xi_86 + xi_87 + xi_88 * -1.0 + xi_89 * -1.0;
+        const double xi_91 = xi_39 + xi_79 + xi_84;
+        const double xi_92 = xi_86 * -1.0 + xi_87 * -1.0 + xi_88 + xi_89;
+        const double xi_94 = xi_46 + xi_61 + xi_62 + xi_63 + xi_83 + xi_93;
+        const double xi_97 = u_2 * xi_261;
+        const double xi_98 = xi_97 * 0.25;
+        const double xi_100 = xi_71 * xi_97;
+        const double xi_101 = xi_100 + xi_96 * -1.0 + xi_98 * -1.0 + xi_99;
+        const double xi_102 = xi_100 * -1.0 + xi_96 + xi_98 + xi_99 * -1.0;
+        const double xi_103 = xi_50 * -1.0 + xi_81 + xi_82;
+        const double xi_104 = xi_103 + xi_46 + xi_61 + xi_62 + xi_63 + xi_93;
+        const double xi_115 = rho * (u_2 * u_2);
+        const double xi_116 = xi_114 + xi_115 * 0.66666666666666663 + xi_245 * 3.0 + xi_265 * 3.0;
+        const double xi_117 = omega_even * (xi_112 * 0.66666666666666663 + xi_113 * 1.6666666666666667 + xi_116 + xi_246 * -3.0 + xi_248 * -3.0 + xi_251 * 3.0 + xi_255 * -3.0 + xi_263 * -3.0 + xi_264 * 3.0);
+        const double xi_124 = omega_bulk * (xi_113 + xi_115 + xi_119 + xi_121 + xi_123 + xi_17 + xi_22 + xi_253);
+        const double xi_127 = omega_even * (xi_112 * 2.3333333333333335 + xi_116 + xi_126 + xi_249 * -5.0 + xi_251 * -2.0 + xi_256 * -5.0 + xi_260 * -5.0 + xi_262 * -5.0 + xi_264 * -2.0);
+        const double xi_131 = omega_even * (xi_114 + xi_115 * 3.0 + xi_126 + xi_128 + xi_129 + xi_130 + xi_245 * -4.0 + xi_247 * -7.0 + xi_250 * -7.0 + xi_251 * 5.0 + xi_252 * -7.0 + xi_257 * -7.0 + xi_264 * 5.0 + xi_265 * -4.0);
+        const double xi_156 = xi_115 * -1.0 + xi_265;
+        const double xi_157 = omega_shear * (xi_121 + xi_156 + xi_16 + xi_2 + xi_251 * -1.0 + xi_256 + xi_6);
+        const double xi_158 = xi_157 * 0.125;
+        const double xi_159 = xi_107 * -0.11904761904761904 + xi_131 * -0.01984126984126984;
+        const double xi_160 = omega_shear * (xi_112 * -1.0 + xi_113 * 2.0 + xi_120 + xi_123 + xi_125 + xi_156 + xi_244 * -2.0 + xi_245 + xi_251 + xi_258 * -2.0 + xi_264 + xi_9);
+        const double xi_162 = xi_160 * -0.041666666666666664 + xi_161 * -0.16666666666666666;
+        const double xi_163 = xi_108 * -0.10000000000000001 + xi_117 * -0.050000000000000003 + xi_162;
+        const double xi_164 = xi_111 * 0.028571428571428571 + xi_127 * 0.014285714285714285 + xi_155 + xi_158 + xi_159 + xi_163;
+        const double xi_176 = xi_111 * -0.071428571428571425 + xi_127 * -0.035714285714285712 + xi_159 + xi_160 * 0.083333333333333329 + xi_161 * 0.33333333333333331;
+        const double xi_187 = xi_107 * 0.095238095238095233 + xi_111 * -0.042857142857142858 + xi_127 * -0.021428571428571429 + xi_131 * 0.015873015873015872 + xi_155 * -1.0 + xi_158 * -1.0 + xi_163;
+        const double xi_190 = xi_157 * 0.0625;
+        const double xi_191 = xi_131 * 0.013888888888888888;
+        const double xi_193 = xi_110 * 0.083333333333333329 + xi_124 * 0.041666666666666664;
+        const double xi_194 = xi_160 * 0.020833333333333332 + xi_161 * 0.083333333333333329 + xi_193;
+        const double xi_195 = xi_165 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
+        const double xi_202 = xi_151 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
+        const double xi_204 = xi_127 * -0.0071428571428571426;
+        const double xi_205 = xi_198 * (u_2 * xi_197 + xi_132 + xi_17 + xi_246);
+        const double xi_206 = xi_117 * 0.025000000000000001;
+        const double xi_209 = xi_107 * -0.023809523809523808 + xi_131 * -0.003968253968253968;
+        const double xi_210 = xi_162 + xi_193 + xi_203 + xi_204 + xi_205 + xi_206 + xi_207 + xi_208 + xi_209;
+        const double xi_226 = xi_162 + xi_193 + xi_203 * -1.0 + xi_204 + xi_205 * -1.0 + xi_206 + xi_207 + xi_208 + xi_209;
+        const double xi_228 = xi_190 * -1.0;
+        const double xi_229 = xi_127 * 0.017857142857142856;
+        const double xi_231 = xi_188 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
+        const double xi_233 = xi_198 * (rho * u_0 * u_2 + xi_10 + xi_166 + xi_249);
+        const double xi_234 = xi_232 * -1.0 + xi_233 * -1.0;
+        const double xi_240 = xi_232 + xi_233;
+        const double xi_243 = xi_186 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
+        const double forceTerm_0 = xi_25 * xi_28 + xi_25 * -1.0 + xi_26 * xi_28 + xi_26 * -1.0 + xi_27 * xi_28 + xi_27 * -1.0;
+        const double forceTerm_1 = xi_29 + xi_31 * -1.0 + xi_41;
+        const double forceTerm_2 = xi_29 * -1.0 + xi_31 + xi_41;
+        const double forceTerm_3 = xi_42 * -1.0 + xi_44 + xi_48;
+        const double forceTerm_4 = xi_42 + xi_44 * -1.0 + xi_48;
+        const double forceTerm_5 = xi_49 + xi_51 * -1.0 + xi_52;
+        const double forceTerm_6 = xi_49 * -1.0 + xi_51 + xi_52;
+        const double forceTerm_7 = xi_57 + xi_66 + xi_76;
+        const double forceTerm_8 = xi_66 + xi_77 + xi_78;
+        const double forceTerm_9 = xi_57 + xi_78 + xi_80;
+        const double forceTerm_10 = xi_76 + xi_77 + xi_80;
+        const double forceTerm_11 = xi_83 + xi_85 + xi_90;
+        const double forceTerm_12 = xi_83 + xi_91 + xi_92;
+        const double forceTerm_13 = xi_101 + xi_57 + xi_94;
+        const double forceTerm_14 = xi_102 + xi_77 + xi_94;
+        const double forceTerm_15 = xi_103 + xi_85 + xi_92;
+        const double forceTerm_16 = xi_103 + xi_90 + xi_91;
+        const double forceTerm_17 = xi_102 + xi_104 + xi_57;
+        const double forceTerm_18 = xi_101 + xi_104 + xi_77;
+        _data_pdfs_20_30_10[_stride_pdfs_0 * ctr_0] = forceTerm_0 + xi_107 * 0.14285714285714285 + xi_108 * 0.20000000000000001 + xi_110 * -1.0 + xi_111 * 0.085714285714285715 + xi_117 * 0.10000000000000001 + xi_124 * -0.5 + xi_127 * 0.042857142857142858 + xi_131 * 0.023809523809523808 + xi_253;
+        _data_pdfs_20_31_10[_stride_pdfs_0 * ctr_0] = forceTerm_1 + xi_135 * -1.0 + xi_140 * -1.0 + xi_151 + xi_164 + xi_251;
+        _data_pdfs_20_32_10[_stride_pdfs_0 * ctr_0] = forceTerm_2 + xi_135 + xi_140 + xi_164 + xi_165 + xi_264;
+        _data_pdfs_20_33_10[_stride_pdfs_0 * ctr_0] = forceTerm_3 + xi_168 + xi_170 + xi_175 + xi_176 + xi_258;
+        _data_pdfs_20_34_10[_stride_pdfs_0 * ctr_0] = forceTerm_4 + xi_168 * -1.0 + xi_170 * -1.0 + xi_176 + xi_177 + xi_244;
+        _data_pdfs_20_35_10[_stride_pdfs_0 * ctr_0] = forceTerm_5 + xi_179 * -1.0 + xi_181 * -1.0 + xi_186 + xi_187 + xi_265;
+        _data_pdfs_20_36_10[_stride_pdfs_0 * ctr_0] = forceTerm_6 + xi_179 + xi_181 + xi_187 + xi_188 + xi_245;
+        _data_pdfs_20_37_10[_stride_pdfs_0 * ctr_0] = forceTerm_7 + xi_177 + xi_195 + xi_200 + xi_252;
+        _data_pdfs_20_38_10[_stride_pdfs_0 * ctr_0] = forceTerm_8 + xi_175 + xi_195 + xi_201 + xi_257;
+        _data_pdfs_20_39_10[_stride_pdfs_0 * ctr_0] = forceTerm_9 + xi_177 + xi_201 + xi_202 + xi_250;
+        _data_pdfs_20_310_10[_stride_pdfs_0 * ctr_0] = forceTerm_10 + xi_175 + xi_200 + xi_202 + xi_247;
+        _data_pdfs_20_311_10[_stride_pdfs_0 * ctr_0] = forceTerm_11 + xi_210 + xi_219 + xi_224 + xi_263;
+        _data_pdfs_20_312_10[_stride_pdfs_0 * ctr_0] = forceTerm_12 + xi_219 + xi_225 + xi_226 + xi_248;
+        _data_pdfs_20_313_10[_stride_pdfs_0 * ctr_0] = forceTerm_13 + xi_231 + xi_234 + xi_239 + xi_256;
+        _data_pdfs_20_314_10[_stride_pdfs_0 * ctr_0] = forceTerm_14 + xi_231 + xi_240 + xi_241 + xi_260;
+        _data_pdfs_20_315_10[_stride_pdfs_0 * ctr_0] = forceTerm_15 + xi_224 + xi_226 + xi_242 + xi_246;
+        _data_pdfs_20_316_10[_stride_pdfs_0 * ctr_0] = forceTerm_16 + xi_210 + xi_225 + xi_242 + xi_255;
+        _data_pdfs_20_317_10[_stride_pdfs_0 * ctr_0] = forceTerm_17 + xi_239 + xi_240 + xi_243 + xi_262;
+        _data_pdfs_20_318_10[_stride_pdfs_0 * ctr_0] = forceTerm_18 + xi_234 + xi_241 + xi_243 + xi_249;
+      }
+    }
+  }
+}
+} // namespace internal_0d943397135d13b4628c5752888935d7
+
+void CollideSweepDoublePrecisionThermalized::run(IBlock *block) {
+  auto force = block->getData<field::GhostLayerField<double, 3>>(forceID);
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+
+  auto &omega_bulk = this->omega_bulk_;
+  auto block_offset_1 = this->block_offset_1_;
+  auto &seed = this->seed_;
+  auto &omega_even = this->omega_even_;
+  auto &kT = this->kT_;
+  auto &omega_odd = this->omega_odd_;
+  auto block_offset_2 = this->block_offset_2_;
+  auto &time_step = this->time_step_;
+  auto block_offset_0 = this->block_offset_0_;
+  auto &omega_shear = this->omega_shear_;
+  block_offset_generator(block, block_offset_0, block_offset_1, block_offset_2);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(force->nrOfGhostLayers()));
+  double *RESTRICT const _data_force = force->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_0d943397135d13b4628c5752888935d7::collidesweepdoubleprecisionthermalized_collidesweepdoubleprecisionthermalized(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
+}
+
+void CollideSweepDoublePrecisionThermalized::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto force = block->getData<field::GhostLayerField<double, 3>>(forceID);
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+
+  auto &omega_bulk = this->omega_bulk_;
+  auto block_offset_1 = this->block_offset_1_;
+  auto &seed = this->seed_;
+  auto &omega_even = this->omega_even_;
+  auto &kT = this->kT_;
+  auto &omega_odd = this->omega_odd_;
+  auto block_offset_2 = this->block_offset_2_;
+  auto &time_step = this->time_step_;
+  auto block_offset_0 = this->block_offset_0_;
+  auto &omega_shear = this->omega_shear_;
+  block_offset_generator(block, block_offset_0, block_offset_1, block_offset_2);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(force->nrOfGhostLayers()));
+  double *RESTRICT const _data_force = force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_0d943397135d13b4628c5752888935d7::collidesweepdoubleprecisionthermalized_collidesweepdoubleprecisionthermalized(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalized.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalized.h
new file mode 100644
index 00000000000..d5b207b7b3d
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalized.h
@@ -0,0 +1,123 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepDoublePrecisionThermalized.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class CollideSweepDoublePrecisionThermalized {
+public:
+  CollideSweepDoublePrecisionThermalized(
+      BlockDataID forceID_, BlockDataID pdfsID_, uint32_t block_offset_0,
+      uint32_t block_offset_1, uint32_t block_offset_2, double kT,
+      double omega_bulk, double omega_even, double omega_odd,
+      double omega_shear, uint32_t seed, uint32_t time_step)
+      : forceID(forceID_), pdfsID(pdfsID_), block_offset_0_(block_offset_0),
+        block_offset_1_(block_offset_1), block_offset_2_(block_offset_2),
+        kT_(kT), omega_bulk_(omega_bulk), omega_even_(omega_even),
+        omega_odd_(omega_odd), omega_shear_(omega_shear), seed_(seed),
+        time_step_(time_step){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<CollideSweepDoublePrecisionThermalized> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<CollideSweepDoublePrecisionThermalized> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID forceID;
+  BlockDataID pdfsID;
+  uint32_t block_offset_0_;
+  uint32_t block_offset_1_;
+  uint32_t block_offset_2_;
+  double kT_;
+  double omega_bulk_;
+  double omega_even_;
+  double omega_odd_;
+  double omega_shear_;
+  uint32_t seed_;
+  uint32_t time_step_;
+  std::function<void(IBlock *, uint32_t &, uint32_t &, uint32_t &)>
+      block_offset_generator =
+          [](IBlock *const, uint32_t &, uint32_t &, uint32_t &) {};
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalizedAVX.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalizedAVX.cpp
new file mode 100644
index 00000000000..18b00c96698
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalizedAVX.cpp
@@ -0,0 +1,927 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepDoublePrecisionThermalizedAVX.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "CollideSweepDoublePrecisionThermalizedAVX.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#include <immintrin.h>
+
+#include "philox_rand.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_25bc51f30ec2c20f3ee9796f7dcb65c6 {
+static FUNC_PREFIX void collidesweepdoubleprecisionthermalizedavx_collidesweepdoubleprecisionthermalizedavx(double *RESTRICT const _data_force, double *RESTRICT _data_pdfs, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, uint32_t block_offset_0, uint32_t block_offset_1, uint32_t block_offset_2, double kT, double omega_bulk, double omega_even, double omega_odd, double omega_shear, uint32_t seed, uint32_t time_step) {
+  const double xi_28 = omega_bulk * 0.5;
+  const double xi_55 = omega_shear * 0.041666666666666664;
+  const double xi_60 = omega_bulk * 0.041666666666666664;
+  const double xi_71 = omega_shear * 0.125;
+  const double xi_109 = 2.4494897427831779;
+  const double xi_134 = omega_odd * 0.25;
+  const double xi_145 = omega_odd * 0.083333333333333329;
+  const double xi_198 = omega_shear * 0.25;
+  const double xi_211 = omega_odd * 0.041666666666666664;
+  const double xi_213 = omega_odd * 0.125;
+  const double rr_0 = 0.0;
+  const double xi_53 = rr_0 * 0.041666666666666664;
+  for (int64_t ctr_2 = 0; ctr_2 < _size_force_2; ctr_2 += 1) {
+    double *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
+    double *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
+    double *RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
+    double *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
+    double *RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3;
+    double *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
+    double *RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_force_1; ctr_1 += 1) {
+      double *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
+      double *RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_36;
+      double *RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_315;
+      double *RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_310;
+      double *RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_312;
+      double *RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_318;
+      double *RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_39;
+      double *RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_31;
+      double *RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_37;
+      double *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
+      double *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
+      double *RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_316;
+      double *RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_313;
+      double *RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_38;
+      double *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
+      double *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
+      double *RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_314;
+      double *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
+      double *RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_317;
+      double *RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_311;
+      double *RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_32;
+      double *RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_35;
+      {
+        for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((_size_force_0) / (4)) * (4); ctr_0 += 4) {
+          const __m256d xi_244 = _mm256_load_pd(&_data_pdfs_20_34_10[ctr_0]);
+          const __m256d xi_245 = _mm256_load_pd(&_data_pdfs_20_36_10[ctr_0]);
+          const __m256d xi_246 = _mm256_load_pd(&_data_pdfs_20_315_10[ctr_0]);
+          const __m256d xi_247 = _mm256_load_pd(&_data_pdfs_20_310_10[ctr_0]);
+          const __m256d xi_248 = _mm256_load_pd(&_data_pdfs_20_312_10[ctr_0]);
+          const __m256d xi_249 = _mm256_load_pd(&_data_pdfs_20_318_10[ctr_0]);
+          const __m256d xi_250 = _mm256_load_pd(&_data_pdfs_20_39_10[ctr_0]);
+          const __m256d xi_251 = _mm256_load_pd(&_data_pdfs_20_31_10[ctr_0]);
+          const __m256d xi_252 = _mm256_load_pd(&_data_pdfs_20_37_10[ctr_0]);
+          const __m256d xi_253 = _mm256_load_pd(&_data_pdfs_20_30_10[ctr_0]);
+          const __m256d xi_254 = _mm256_load_pd(&_data_force_20_31_10[ctr_0]);
+          const __m256d xi_255 = _mm256_load_pd(&_data_pdfs_20_316_10[ctr_0]);
+          const __m256d xi_256 = _mm256_load_pd(&_data_pdfs_20_313_10[ctr_0]);
+          const __m256d xi_257 = _mm256_load_pd(&_data_pdfs_20_38_10[ctr_0]);
+          const __m256d xi_258 = _mm256_load_pd(&_data_pdfs_20_33_10[ctr_0]);
+          const __m256d xi_259 = _mm256_load_pd(&_data_force_20_32_10[ctr_0]);
+          const __m256d xi_260 = _mm256_load_pd(&_data_pdfs_20_314_10[ctr_0]);
+          const __m256d xi_261 = _mm256_load_pd(&_data_force_20_30_10[ctr_0]);
+          const __m256d xi_262 = _mm256_load_pd(&_data_pdfs_20_317_10[ctr_0]);
+          const __m256d xi_263 = _mm256_load_pd(&_data_pdfs_20_311_10[ctr_0]);
+          const __m256d xi_264 = _mm256_load_pd(&_data_pdfs_20_32_10[ctr_0]);
+          const __m256d xi_265 = _mm256_load_pd(&_data_pdfs_20_35_10[ctr_0]);
+
+          __m256d random_7_0{};
+          __m256d random_7_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, _mm256_add_epi32(_mm256_add_epi32(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), _mm256_set_epi32(ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0)), _mm256_set_epi32(((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)))), block_offset_1 + ctr_1, block_offset_2 + ctr_2, 7, seed, random_7_0, random_7_1);
+          }
+
+          __m256d random_6_0{};
+          __m256d random_6_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, _mm256_add_epi32(_mm256_add_epi32(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), _mm256_set_epi32(ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0)), _mm256_set_epi32(((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)))), block_offset_1 + ctr_1, block_offset_2 + ctr_2, 6, seed, random_6_0, random_6_1);
+          }
+
+          __m256d random_5_0{};
+          __m256d random_5_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, _mm256_add_epi32(_mm256_add_epi32(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), _mm256_set_epi32(ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0)), _mm256_set_epi32(((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)))), block_offset_1 + ctr_1, block_offset_2 + ctr_2, 5, seed, random_5_0, random_5_1);
+          }
+
+          __m256d random_4_0{};
+          __m256d random_4_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, _mm256_add_epi32(_mm256_add_epi32(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), _mm256_set_epi32(ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0)), _mm256_set_epi32(((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)))), block_offset_1 + ctr_1, block_offset_2 + ctr_2, 4, seed, random_4_0, random_4_1);
+          }
+
+          __m256d random_3_0{};
+          __m256d random_3_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, _mm256_add_epi32(_mm256_add_epi32(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), _mm256_set_epi32(ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0)), _mm256_set_epi32(((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)))), block_offset_1 + ctr_1, block_offset_2 + ctr_2, 3, seed, random_3_0, random_3_1);
+          }
+
+          __m256d random_2_0{};
+          __m256d random_2_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, _mm256_add_epi32(_mm256_add_epi32(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), _mm256_set_epi32(ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0)), _mm256_set_epi32(((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)))), block_offset_1 + ctr_1, block_offset_2 + ctr_2, 2, seed, random_2_0, random_2_1);
+          }
+
+          __m256d random_1_0{};
+          __m256d random_1_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, _mm256_add_epi32(_mm256_add_epi32(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), _mm256_set_epi32(ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0)), _mm256_set_epi32(((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)))), block_offset_1 + ctr_1, block_offset_2 + ctr_2, 1, seed, random_1_0, random_1_1);
+          }
+
+          __m256d random_0_0{};
+          __m256d random_0_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, _mm256_add_epi32(_mm256_add_epi32(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), _mm256_set_epi32(ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0)), _mm256_set_epi32(((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)))), block_offset_1 + ctr_1, block_offset_2 + ctr_2, 0, seed, random_0_0, random_0_1);
+          }
+          const __m256d xi_2 = _mm256_add_pd(xi_249, xi_260);
+          const __m256d xi_3 = _mm256_add_pd(xi_2, xi_244);
+          const __m256d xi_4 = _mm256_add_pd(_mm256_add_pd(xi_246, xi_251), xi_263);
+          const __m256d xi_5 = _mm256_add_pd(xi_248, xi_265);
+          const __m256d xi_6 = _mm256_add_pd(xi_245, xi_262);
+          const __m256d xi_8 = _mm256_mul_pd(xi_250, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_9 = _mm256_mul_pd(xi_252, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_10 = _mm256_mul_pd(xi_262, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_11 = _mm256_mul_pd(xi_256, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_12 = _mm256_mul_pd(xi_258, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_13 = _mm256_add_pd(_mm256_add_pd(xi_10, xi_11), xi_12);
+          const __m256d xi_14 = _mm256_mul_pd(xi_264, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_15 = _mm256_mul_pd(xi_247, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_16 = _mm256_add_pd(xi_14, xi_15);
+          const __m256d xi_17 = _mm256_mul_pd(xi_255, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_18 = _mm256_mul_pd(xi_248, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_19 = _mm256_add_pd(xi_17, xi_18);
+          const __m256d xi_20 = _mm256_mul_pd(xi_249, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_21 = _mm256_add_pd(xi_10, xi_20);
+          const __m256d xi_22 = _mm256_mul_pd(xi_246, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_23 = _mm256_mul_pd(xi_245, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_24 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_17, xi_22), xi_23), xi_263);
+          const __m256d xi_29 = _mm256_mul_pd(xi_254, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666));
+          const __m256d xi_30 = _mm256_mul_pd(xi_254, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329));
+          const __m256d xi_42 = _mm256_mul_pd(xi_261, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666));
+          const __m256d xi_43 = _mm256_mul_pd(xi_261, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329));
+          const __m256d xi_49 = _mm256_mul_pd(xi_259, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666));
+          const __m256d xi_50 = _mm256_mul_pd(xi_259, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329));
+          const __m256d xi_67 = _mm256_mul_pd(xi_254, _mm256_set_pd(0.25, 0.25, 0.25, 0.25));
+          const __m256d xi_72 = _mm256_mul_pd(xi_254, _mm256_set_pd(xi_71, xi_71, xi_71, xi_71));
+          const __m256d xi_114 = _mm256_mul_pd(xi_253, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_118 = _mm256_mul_pd(xi_263, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_119 = _mm256_add_pd(xi_118, xi_18);
+          const __m256d xi_120 = _mm256_add_pd(_mm256_mul_pd(xi_257, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_8);
+          const __m256d xi_122 = _mm256_mul_pd(xi_260, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_123 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_11, xi_122), xi_15), xi_21);
+          const __m256d xi_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_246, _mm256_set_pd(2.0, 2.0, 2.0, 2.0)), _mm256_mul_pd(xi_248, _mm256_set_pd(2.0, 2.0, 2.0, 2.0))), _mm256_mul_pd(xi_255, _mm256_set_pd(2.0, 2.0, 2.0, 2.0))), _mm256_mul_pd(xi_263, _mm256_set_pd(2.0, 2.0, 2.0, 2.0)));
+          const __m256d xi_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_244, _mm256_set_pd(5.0, 5.0, 5.0, 5.0)), _mm256_mul_pd(xi_258, _mm256_set_pd(5.0, 5.0, 5.0, 5.0))), xi_125);
+          const __m256d xi_128 = _mm256_mul_pd(xi_256, _mm256_set_pd(2.0, 2.0, 2.0, 2.0));
+          const __m256d xi_129 = _mm256_mul_pd(xi_260, _mm256_set_pd(2.0, 2.0, 2.0, 2.0));
+          const __m256d xi_130 = _mm256_add_pd(_mm256_mul_pd(xi_249, _mm256_set_pd(2.0, 2.0, 2.0, 2.0)), _mm256_mul_pd(xi_262, _mm256_set_pd(2.0, 2.0, 2.0, 2.0)));
+          const __m256d xi_132 = _mm256_add_pd(xi_118, xi_248);
+          const __m256d xi_133 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_132, xi_14), xi_22), xi_251), xi_255);
+          const __m256d xi_135 = _mm256_mul_pd(xi_133, _mm256_set_pd(xi_134, xi_134, xi_134, xi_134));
+          const __m256d xi_136 = _mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_5_1);
+          const __m256d xi_141 = _mm256_mul_pd(xi_252, _mm256_set_pd(2.0, 2.0, 2.0, 2.0));
+          const __m256d xi_142 = _mm256_mul_pd(xi_247, _mm256_set_pd(2.0, 2.0, 2.0, 2.0));
+          const __m256d xi_143 = _mm256_add_pd(_mm256_mul_pd(xi_250, _mm256_set_pd(2.0, 2.0, 2.0, 2.0)), _mm256_mul_pd(xi_257, _mm256_set_pd(-2.0, -2.0, -2.0, -2.0)));
+          const __m256d xi_144 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_141, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_14), xi_142), xi_143), xi_19), xi_4);
+          const __m256d xi_146 = _mm256_mul_pd(xi_144, _mm256_set_pd(xi_145, xi_145, xi_145, xi_145));
+          const __m256d xi_147 = _mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_3_0);
+          const __m256d xi_152 = _mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_0_1);
+          const __m256d xi_166 = _mm256_add_pd(xi_122, xi_256);
+          const __m256d xi_167 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_12, xi_166), xi_20), xi_244), xi_262);
+          const __m256d xi_168 = _mm256_mul_pd(xi_167, _mm256_set_pd(xi_134, xi_134, xi_134, xi_134));
+          const __m256d xi_169 = _mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_4_1);
+          const __m256d xi_171 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_142, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_13), xi_141), xi_143), xi_3);
+          const __m256d xi_172 = _mm256_mul_pd(xi_171, _mm256_set_pd(xi_145, xi_145, xi_145, xi_145));
+          const __m256d xi_173 = _mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_4_0);
+          const __m256d xi_178 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_119, xi_23), xi_246), xi_255), xi_265);
+          const __m256d xi_179 = _mm256_mul_pd(xi_178, _mm256_set_pd(xi_134, xi_134, xi_134, xi_134));
+          const __m256d xi_180 = _mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_5_0);
+          const __m256d xi_182 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_128, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_129, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), xi_130), xi_24), xi_5);
+          const __m256d xi_183 = _mm256_mul_pd(xi_182, _mm256_set_pd(xi_145, xi_145, xi_145, xi_145));
+          const __m256d xi_184 = _mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_3_1);
+          const __m256d xi_212 = _mm256_mul_pd(xi_182, _mm256_set_pd(xi_211, xi_211, xi_211, xi_211));
+          const __m256d xi_214 = _mm256_mul_pd(xi_178, _mm256_set_pd(xi_213, xi_213, xi_213, xi_213));
+          const __m256d xi_220 = _mm256_mul_pd(xi_144, _mm256_set_pd(xi_211, xi_211, xi_211, xi_211));
+          const __m256d xi_221 = _mm256_mul_pd(xi_133, _mm256_set_pd(xi_213, xi_213, xi_213, xi_213));
+          const __m256d xi_235 = _mm256_mul_pd(xi_167, _mm256_set_pd(xi_213, xi_213, xi_213, xi_213));
+          const __m256d xi_236 = _mm256_mul_pd(xi_171, _mm256_set_pd(xi_211, xi_211, xi_211, xi_211));
+          const __m256d xi_31 = _mm256_mul_pd(xi_30, _mm256_set_pd(rr_0, rr_0, rr_0, rr_0));
+          const __m256d xi_44 = _mm256_mul_pd(xi_43, _mm256_set_pd(rr_0, rr_0, rr_0, rr_0));
+          const __m256d xi_51 = _mm256_mul_pd(xi_50, _mm256_set_pd(rr_0, rr_0, rr_0, rr_0));
+          const __m256d xi_54 = _mm256_mul_pd(xi_261, _mm256_set_pd(xi_53, xi_53, xi_53, xi_53));
+          const __m256d xi_59 = _mm256_mul_pd(xi_254, _mm256_set_pd(xi_53, xi_53, xi_53, xi_53));
+          const __m256d xi_81 = _mm256_mul_pd(xi_259, _mm256_set_pd(xi_53, xi_53, xi_53, xi_53));
+          const __m256d vel0Term = _mm256_add_pd(_mm256_add_pd(xi_247, xi_257), xi_3);
+          const __m256d vel1Term = _mm256_add_pd(xi_252, xi_4);
+          const __m256d vel2Term = _mm256_add_pd(xi_256, xi_5);
+          const __m256d rho = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(vel0Term, vel1Term), vel2Term), xi_250), xi_253), xi_255), xi_258), xi_264), xi_6);
+          const __m256d xi_105 = _mm256_mul_pd(rho, _mm256_set_pd(kT, kT, kT, kT));
+          const __m256d xi_106 = _mm256_sqrt_pd(_mm256_mul_pd(xi_105, _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_set_pd(omega_even, omega_even, omega_even, omega_even)), _mm256_set_pd(1.0, 1.0, 1.0, 1.0)), _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_set_pd(omega_even, omega_even, omega_even, omega_even)), _mm256_set_pd(1.0, 1.0, 1.0, 1.0)))), _mm256_set_pd(1.0, 1.0, 1.0, 1.0))));
+          const __m256d xi_107 = _mm256_mul_pd(_mm256_mul_pd(xi_106, _mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_6_0)), _mm256_set_pd(3.7416573867739413, 3.7416573867739413, 3.7416573867739413, 3.7416573867739413));
+          const __m256d xi_108 = _mm256_mul_pd(_mm256_mul_pd(xi_106, _mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_7_0)), _mm256_set_pd(5.4772255750516612, 5.4772255750516612, 5.4772255750516612, 5.4772255750516612));
+          const __m256d xi_110 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_2_1), _mm256_set_pd(xi_109, xi_109, xi_109, xi_109)), _mm256_sqrt_pd(_mm256_mul_pd(xi_105, _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_set_pd(omega_bulk, omega_bulk, omega_bulk, omega_bulk)), _mm256_set_pd(1.0, 1.0, 1.0, 1.0)), _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_set_pd(omega_bulk, omega_bulk, omega_bulk, omega_bulk)), _mm256_set_pd(1.0, 1.0, 1.0, 1.0)))), _mm256_set_pd(1.0, 1.0, 1.0, 1.0)))));
+          const __m256d xi_111 = _mm256_mul_pd(_mm256_mul_pd(xi_106, _mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_6_1)), _mm256_set_pd(8.3666002653407556, 8.3666002653407556, 8.3666002653407556, 8.3666002653407556));
+          const __m256d xi_137 = _mm256_sqrt_pd(_mm256_mul_pd(xi_105, _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_set_pd(omega_odd, omega_odd, omega_odd, omega_odd)), _mm256_set_pd(1.0, 1.0, 1.0, 1.0)), _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_set_pd(omega_odd, omega_odd, omega_odd, omega_odd)), _mm256_set_pd(1.0, 1.0, 1.0, 1.0)))), _mm256_set_pd(1.0, 1.0, 1.0, 1.0))));
+          const __m256d xi_138 = _mm256_mul_pd(xi_137, _mm256_set_pd(1.4142135623730951, 1.4142135623730951, 1.4142135623730951, 1.4142135623730951));
+          const __m256d xi_139 = _mm256_mul_pd(xi_138, _mm256_set_pd(0.5, 0.5, 0.5, 0.5));
+          const __m256d xi_140 = _mm256_mul_pd(xi_136, xi_139);
+          const __m256d xi_148 = _mm256_mul_pd(xi_137, _mm256_set_pd(xi_109, xi_109, xi_109, xi_109));
+          const __m256d xi_149 = _mm256_mul_pd(xi_148, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666));
+          const __m256d xi_150 = _mm256_mul_pd(xi_147, xi_149);
+          const __m256d xi_151 = _mm256_add_pd(_mm256_mul_pd(xi_146, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_150, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)));
+          const __m256d xi_153 = _mm256_sqrt_pd(_mm256_mul_pd(xi_105, _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)), _mm256_set_pd(1.0, 1.0, 1.0, 1.0)), _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0, -1.0, -1.0, -1.0), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear)), _mm256_set_pd(1.0, 1.0, 1.0, 1.0)))), _mm256_set_pd(1.0, 1.0, 1.0, 1.0))));
+          const __m256d xi_154 = _mm256_mul_pd(xi_153, _mm256_set_pd(0.5, 0.5, 0.5, 0.5));
+          const __m256d xi_155 = _mm256_mul_pd(xi_152, xi_154);
+          const __m256d xi_161 = _mm256_mul_pd(_mm256_mul_pd(xi_153, _mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_0_0)), _mm256_set_pd(1.7320508075688772, 1.7320508075688772, 1.7320508075688772, 1.7320508075688772));
+          const __m256d xi_165 = _mm256_add_pd(xi_146, xi_150);
+          const __m256d xi_170 = _mm256_mul_pd(xi_139, xi_169);
+          const __m256d xi_174 = _mm256_mul_pd(xi_149, xi_173);
+          const __m256d xi_175 = _mm256_add_pd(xi_172, xi_174);
+          const __m256d xi_177 = _mm256_add_pd(_mm256_mul_pd(xi_172, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_174, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)));
+          const __m256d xi_181 = _mm256_mul_pd(xi_139, xi_180);
+          const __m256d xi_185 = _mm256_mul_pd(xi_149, xi_184);
+          const __m256d xi_186 = _mm256_add_pd(_mm256_mul_pd(xi_183, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_185, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)));
+          const __m256d xi_188 = _mm256_add_pd(xi_183, xi_185);
+          const __m256d xi_189 = _mm256_mul_pd(_mm256_mul_pd(xi_152, xi_153), _mm256_set_pd(0.25, 0.25, 0.25, 0.25));
+          const __m256d xi_192 = _mm256_mul_pd(xi_107, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329));
+          const __m256d xi_196 = _mm256_mul_pd(xi_154, _mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_1_0));
+          const __m256d xi_203 = _mm256_mul_pd(xi_154, _mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_2_0));
+          const __m256d xi_207 = _mm256_mul_pd(xi_111, _mm256_set_pd(-0.014285714285714285, -0.014285714285714285, -0.014285714285714285, -0.014285714285714285));
+          const __m256d xi_208 = _mm256_mul_pd(xi_108, _mm256_set_pd(0.050000000000000003, 0.050000000000000003, 0.050000000000000003, 0.050000000000000003));
+          const __m256d xi_215 = _mm256_mul_pd(xi_148, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329));
+          const __m256d xi_216 = _mm256_mul_pd(xi_184, xi_215);
+          const __m256d xi_217 = _mm256_mul_pd(xi_138, _mm256_set_pd(0.25, 0.25, 0.25, 0.25));
+          const __m256d xi_218 = _mm256_mul_pd(xi_180, xi_217);
+          const __m256d xi_219 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_212, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_216, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), xi_214), xi_218);
+          const __m256d xi_222 = _mm256_mul_pd(xi_147, xi_215);
+          const __m256d xi_223 = _mm256_mul_pd(xi_136, xi_217);
+          const __m256d xi_224 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_220, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_222, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), xi_221), xi_223);
+          const __m256d xi_225 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_221, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_223, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), xi_220), xi_222);
+          const __m256d xi_227 = _mm256_mul_pd(xi_189, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_230 = _mm256_mul_pd(xi_111, _mm256_set_pd(0.035714285714285712, 0.035714285714285712, 0.035714285714285712, 0.035714285714285712));
+          const __m256d xi_232 = _mm256_mul_pd(xi_154, _mm256_add_pd(_mm256_set_pd(-0.5, -0.5, -0.5, -0.5), random_1_1));
+          const __m256d xi_237 = _mm256_mul_pd(xi_169, xi_217);
+          const __m256d xi_238 = _mm256_mul_pd(xi_173, xi_215);
+          const __m256d xi_239 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_235, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_237, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), xi_236), xi_238);
+          const __m256d xi_241 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_236, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_238, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), xi_235), xi_237);
+          const __m256d xi_242 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_214, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_218, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), xi_212), xi_216);
+          const __m256d xi_0 = _mm256_div_pd(_mm256_set_pd(1.0, 1.0, 1.0, 1.0), rho);
+          const __m256d xi_7 = _mm256_mul_pd(xi_0, _mm256_set_pd(0.5, 0.5, 0.5, 0.5));
+          const __m256d u_0 = _mm256_add_pd(_mm256_mul_pd(xi_0, _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(vel0Term, xi_13), xi_8), xi_9)), _mm256_mul_pd(xi_261, xi_7));
+          const __m256d xi_25 = _mm256_mul_pd(u_0, xi_261);
+          const __m256d xi_37 = _mm256_mul_pd(xi_25, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666));
+          const __m256d xi_38 = _mm256_mul_pd(xi_25, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329));
+          const __m256d xi_39 = _mm256_mul_pd(xi_38, _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear));
+          const __m256d xi_40 = _mm256_add_pd(_mm256_mul_pd(xi_37, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_39);
+          const __m256d xi_56 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(xi_25, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_set_pd(xi_55, xi_55, xi_55, xi_55)), xi_37);
+          const __m256d xi_57 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_43, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_54), xi_56);
+          const __m256d xi_61 = _mm256_mul_pd(_mm256_mul_pd(xi_25, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_set_pd(xi_60, xi_60, xi_60, xi_60));
+          const __m256d xi_68 = _mm256_mul_pd(u_0, xi_67);
+          const __m256d xi_73 = _mm256_mul_pd(u_0, xi_72);
+          const __m256d xi_77 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_54, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_43), xi_56);
+          const __m256d xi_84 = _mm256_mul_pd(xi_38, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_95 = _mm256_mul_pd(u_0, xi_259);
+          const __m256d xi_96 = _mm256_mul_pd(xi_95, _mm256_set_pd(0.25, 0.25, 0.25, 0.25));
+          const __m256d xi_99 = _mm256_mul_pd(xi_95, _mm256_set_pd(xi_71, xi_71, xi_71, xi_71));
+          const __m256d xi_113 = _mm256_mul_pd(rho, _mm256_mul_pd(u_0, u_0));
+          const __m256d u_1 = _mm256_add_pd(_mm256_mul_pd(xi_0, _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(vel1Term, xi_16), xi_19), xi_257), xi_8)), _mm256_mul_pd(xi_254, xi_7));
+          const __m256d xi_26 = _mm256_mul_pd(u_1, xi_254);
+          const __m256d xi_32 = _mm256_mul_pd(xi_26, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666));
+          const __m256d xi_45 = _mm256_mul_pd(xi_26, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329));
+          const __m256d xi_46 = _mm256_mul_pd(xi_45, _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear));
+          const __m256d xi_47 = _mm256_add_pd(_mm256_mul_pd(xi_32, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_46);
+          const __m256d xi_62 = _mm256_mul_pd(_mm256_mul_pd(xi_26, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_set_pd(xi_60, xi_60, xi_60, xi_60));
+          const __m256d xi_69 = _mm256_mul_pd(u_1, _mm256_set_pd(0.25, 0.25, 0.25, 0.25));
+          const __m256d xi_70 = _mm256_mul_pd(xi_261, xi_69);
+          const __m256d xi_74 = _mm256_mul_pd(u_1, _mm256_set_pd(xi_71, xi_71, xi_71, xi_71));
+          const __m256d xi_75 = _mm256_mul_pd(xi_261, xi_74);
+          const __m256d xi_76 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_68, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_70, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), xi_73), xi_75);
+          const __m256d xi_78 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_73, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_75, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), xi_68), xi_70);
+          const __m256d xi_86 = _mm256_mul_pd(xi_259, xi_69);
+          const __m256d xi_88 = _mm256_mul_pd(xi_259, xi_74);
+          const __m256d xi_93 = _mm256_mul_pd(xi_45, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_112 = _mm256_mul_pd(rho, _mm256_mul_pd(u_1, u_1));
+          const __m256d xi_121 = _mm256_add_pd(_mm256_add_pd(xi_112, xi_120), xi_9);
+          const __m256d xi_197 = _mm256_mul_pd(rho, u_1);
+          const __m256d xi_199 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(u_0, xi_197), xi_120), xi_247), xi_252), _mm256_set_pd(xi_198, xi_198, xi_198, xi_198));
+          const __m256d xi_200 = _mm256_add_pd(_mm256_mul_pd(xi_196, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_199, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)));
+          const __m256d xi_201 = _mm256_add_pd(xi_196, xi_199);
+          const __m256d u_2 = _mm256_add_pd(_mm256_mul_pd(xi_0, _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(vel2Term, xi_21), xi_24), xi_260)), _mm256_mul_pd(xi_259, xi_7));
+          const __m256d xi_27 = _mm256_mul_pd(u_2, xi_259);
+          const __m256d xi_33 = _mm256_mul_pd(xi_27, _mm256_set_pd(0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666));
+          const __m256d xi_34 = _mm256_mul_pd(xi_27, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329));
+          const __m256d xi_35 = _mm256_mul_pd(xi_34, _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear));
+          const __m256d xi_36 = _mm256_add_pd(_mm256_mul_pd(xi_33, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_35);
+          const __m256d xi_41 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_26, _mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331)), _mm256_mul_pd(_mm256_mul_pd(xi_32, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), xi_36), xi_40);
+          const __m256d xi_48 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_25, _mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331)), _mm256_mul_pd(_mm256_mul_pd(xi_37, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), xi_36), xi_47);
+          const __m256d xi_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_27, _mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331)), _mm256_mul_pd(_mm256_mul_pd(xi_33, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear))), xi_40), xi_47);
+          const __m256d xi_58 = _mm256_mul_pd(xi_34, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_63 = _mm256_mul_pd(_mm256_mul_pd(xi_27, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_set_pd(xi_60, xi_60, xi_60, xi_60));
+          const __m256d xi_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(xi_26, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_set_pd(xi_55, xi_55, xi_55, xi_55)), xi_32), xi_61), xi_62), xi_63);
+          const __m256d xi_65 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_59, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_30), xi_64);
+          const __m256d xi_66 = _mm256_add_pd(_mm256_add_pd(xi_35, xi_58), xi_65);
+          const __m256d xi_79 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_30, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_59), xi_64);
+          const __m256d xi_80 = _mm256_add_pd(_mm256_add_pd(xi_35, xi_58), xi_79);
+          const __m256d xi_82 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(xi_27, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_set_pd(xi_55, xi_55, xi_55, xi_55)), xi_33);
+          const __m256d xi_83 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_81, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_50), xi_82);
+          const __m256d xi_85 = _mm256_add_pd(_mm256_add_pd(xi_39, xi_65), xi_84);
+          const __m256d xi_87 = _mm256_mul_pd(u_2, xi_67);
+          const __m256d xi_89 = _mm256_mul_pd(u_2, xi_72);
+          const __m256d xi_90 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_88, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_89, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), xi_86), xi_87);
+          const __m256d xi_91 = _mm256_add_pd(_mm256_add_pd(xi_39, xi_79), xi_84);
+          const __m256d xi_92 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_86, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_87, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), xi_88), xi_89);
+          const __m256d xi_94 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_46, xi_61), xi_62), xi_63), xi_83), xi_93);
+          const __m256d xi_97 = _mm256_mul_pd(u_2, xi_261);
+          const __m256d xi_98 = _mm256_mul_pd(xi_97, _mm256_set_pd(0.25, 0.25, 0.25, 0.25));
+          const __m256d xi_100 = _mm256_mul_pd(xi_97, _mm256_set_pd(xi_71, xi_71, xi_71, xi_71));
+          const __m256d xi_101 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_96, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_98, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), xi_100), xi_99);
+          const __m256d xi_102 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_100, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_99, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), xi_96), xi_98);
+          const __m256d xi_103 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_50, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_81), xi_82);
+          const __m256d xi_104 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_103, xi_46), xi_61), xi_62), xi_63), xi_93);
+          const __m256d xi_115 = _mm256_mul_pd(rho, _mm256_mul_pd(u_2, u_2));
+          const __m256d xi_116 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_245, _mm256_set_pd(3.0, 3.0, 3.0, 3.0)), _mm256_mul_pd(xi_265, _mm256_set_pd(3.0, 3.0, 3.0, 3.0))), _mm256_mul_pd(xi_115, _mm256_set_pd(0.66666666666666663, 0.66666666666666663, 0.66666666666666663, 0.66666666666666663))), xi_114);
+          const __m256d xi_117 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_251, _mm256_set_pd(3.0, 3.0, 3.0, 3.0)), _mm256_mul_pd(xi_264, _mm256_set_pd(3.0, 3.0, 3.0, 3.0))), _mm256_mul_pd(xi_112, _mm256_set_pd(0.66666666666666663, 0.66666666666666663, 0.66666666666666663, 0.66666666666666663))), _mm256_mul_pd(xi_113, _mm256_set_pd(1.6666666666666667, 1.6666666666666667, 1.6666666666666667, 1.6666666666666667))), _mm256_mul_pd(xi_246, _mm256_set_pd(-3.0, -3.0, -3.0, -3.0))), _mm256_mul_pd(xi_248, _mm256_set_pd(-3.0, -3.0, -3.0, -3.0))), _mm256_mul_pd(xi_255, _mm256_set_pd(-3.0, -3.0, -3.0, -3.0))), _mm256_mul_pd(xi_263, _mm256_set_pd(-3.0, -3.0, -3.0, -3.0))), xi_116), _mm256_set_pd(omega_even, omega_even, omega_even, omega_even));
+          const __m256d xi_124 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_113, xi_115), xi_119), xi_121), xi_123), xi_17), xi_22), xi_253), _mm256_set_pd(omega_bulk, omega_bulk, omega_bulk, omega_bulk));
+          const __m256d xi_127 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_112, _mm256_set_pd(2.3333333333333335, 2.3333333333333335, 2.3333333333333335, 2.3333333333333335)), _mm256_mul_pd(xi_251, _mm256_set_pd(-2.0, -2.0, -2.0, -2.0))), _mm256_mul_pd(xi_264, _mm256_set_pd(-2.0, -2.0, -2.0, -2.0))), _mm256_mul_pd(xi_249, _mm256_set_pd(-5.0, -5.0, -5.0, -5.0))), _mm256_mul_pd(xi_256, _mm256_set_pd(-5.0, -5.0, -5.0, -5.0))), _mm256_mul_pd(xi_260, _mm256_set_pd(-5.0, -5.0, -5.0, -5.0))), _mm256_mul_pd(xi_262, _mm256_set_pd(-5.0, -5.0, -5.0, -5.0))), xi_116), xi_126), _mm256_set_pd(omega_even, omega_even, omega_even, omega_even));
+          const __m256d xi_131 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_115, _mm256_set_pd(3.0, 3.0, 3.0, 3.0)), _mm256_mul_pd(xi_251, _mm256_set_pd(5.0, 5.0, 5.0, 5.0))), _mm256_mul_pd(xi_264, _mm256_set_pd(5.0, 5.0, 5.0, 5.0))), _mm256_mul_pd(xi_245, _mm256_set_pd(-4.0, -4.0, -4.0, -4.0))), _mm256_mul_pd(xi_265, _mm256_set_pd(-4.0, -4.0, -4.0, -4.0))), _mm256_mul_pd(xi_247, _mm256_set_pd(-7.0, -7.0, -7.0, -7.0))), _mm256_mul_pd(xi_250, _mm256_set_pd(-7.0, -7.0, -7.0, -7.0))), _mm256_mul_pd(xi_252, _mm256_set_pd(-7.0, -7.0, -7.0, -7.0))), _mm256_mul_pd(xi_257, _mm256_set_pd(-7.0, -7.0, -7.0, -7.0))), xi_114), xi_126), xi_128), xi_129), xi_130), _mm256_set_pd(omega_even, omega_even, omega_even, omega_even));
+          const __m256d xi_156 = _mm256_add_pd(_mm256_mul_pd(xi_115, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_265);
+          const __m256d xi_157 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_251, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_121), xi_156), xi_16), xi_2), xi_256), xi_6), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear));
+          const __m256d xi_158 = _mm256_mul_pd(xi_157, _mm256_set_pd(0.125, 0.125, 0.125, 0.125));
+          const __m256d xi_159 = _mm256_add_pd(_mm256_mul_pd(xi_131, _mm256_set_pd(-0.01984126984126984, -0.01984126984126984, -0.01984126984126984, -0.01984126984126984)), _mm256_mul_pd(xi_107, _mm256_set_pd(-0.11904761904761904, -0.11904761904761904, -0.11904761904761904, -0.11904761904761904)));
+          const __m256d xi_160 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_112, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_113, _mm256_set_pd(2.0, 2.0, 2.0, 2.0))), _mm256_mul_pd(xi_244, _mm256_set_pd(-2.0, -2.0, -2.0, -2.0))), _mm256_mul_pd(xi_258, _mm256_set_pd(-2.0, -2.0, -2.0, -2.0))), xi_120), xi_123), xi_125), xi_156), xi_245), xi_251), xi_264), xi_9), _mm256_set_pd(omega_shear, omega_shear, omega_shear, omega_shear));
+          const __m256d xi_162 = _mm256_add_pd(_mm256_mul_pd(xi_160, _mm256_set_pd(-0.041666666666666664, -0.041666666666666664, -0.041666666666666664, -0.041666666666666664)), _mm256_mul_pd(xi_161, _mm256_set_pd(-0.16666666666666666, -0.16666666666666666, -0.16666666666666666, -0.16666666666666666)));
+          const __m256d xi_163 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_117, _mm256_set_pd(-0.050000000000000003, -0.050000000000000003, -0.050000000000000003, -0.050000000000000003)), _mm256_mul_pd(xi_108, _mm256_set_pd(-0.10000000000000001, -0.10000000000000001, -0.10000000000000001, -0.10000000000000001))), xi_162);
+          const __m256d xi_164 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_127, _mm256_set_pd(0.014285714285714285, 0.014285714285714285, 0.014285714285714285, 0.014285714285714285)), _mm256_mul_pd(xi_111, _mm256_set_pd(0.028571428571428571, 0.028571428571428571, 0.028571428571428571, 0.028571428571428571))), xi_155), xi_158), xi_159), xi_163);
+          const __m256d xi_176 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_160, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)), _mm256_mul_pd(xi_161, _mm256_set_pd(0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331))), _mm256_mul_pd(xi_127, _mm256_set_pd(-0.035714285714285712, -0.035714285714285712, -0.035714285714285712, -0.035714285714285712))), _mm256_mul_pd(xi_111, _mm256_set_pd(-0.071428571428571425, -0.071428571428571425, -0.071428571428571425, -0.071428571428571425))), xi_159);
+          const __m256d xi_187 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_155, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_158, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(xi_131, _mm256_set_pd(0.015873015873015872, 0.015873015873015872, 0.015873015873015872, 0.015873015873015872))), _mm256_mul_pd(xi_107, _mm256_set_pd(0.095238095238095233, 0.095238095238095233, 0.095238095238095233, 0.095238095238095233))), _mm256_mul_pd(xi_127, _mm256_set_pd(-0.021428571428571429, -0.021428571428571429, -0.021428571428571429, -0.021428571428571429))), _mm256_mul_pd(xi_111, _mm256_set_pd(-0.042857142857142858, -0.042857142857142858, -0.042857142857142858, -0.042857142857142858))), xi_163);
+          const __m256d xi_190 = _mm256_mul_pd(xi_157, _mm256_set_pd(0.0625, 0.0625, 0.0625, 0.0625));
+          const __m256d xi_191 = _mm256_mul_pd(xi_131, _mm256_set_pd(0.013888888888888888, 0.013888888888888888, 0.013888888888888888, 0.013888888888888888));
+          const __m256d xi_193 = _mm256_add_pd(_mm256_mul_pd(xi_124, _mm256_set_pd(0.041666666666666664, 0.041666666666666664, 0.041666666666666664, 0.041666666666666664)), _mm256_mul_pd(xi_110, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329)));
+          const __m256d xi_194 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_160, _mm256_set_pd(0.020833333333333332, 0.020833333333333332, 0.020833333333333332, 0.020833333333333332)), _mm256_mul_pd(xi_161, _mm256_set_pd(0.083333333333333329, 0.083333333333333329, 0.083333333333333329, 0.083333333333333329))), xi_193);
+          const __m256d xi_195 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_165, xi_189), xi_190), xi_191), xi_192), xi_194);
+          const __m256d xi_202 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_151, xi_189), xi_190), xi_191), xi_192), xi_194);
+          const __m256d xi_204 = _mm256_mul_pd(xi_127, _mm256_set_pd(-0.0071428571428571426, -0.0071428571428571426, -0.0071428571428571426, -0.0071428571428571426));
+          const __m256d xi_205 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(u_2, xi_197), xi_132), xi_17), xi_246), _mm256_set_pd(xi_198, xi_198, xi_198, xi_198));
+          const __m256d xi_206 = _mm256_mul_pd(xi_117, _mm256_set_pd(0.025000000000000001, 0.025000000000000001, 0.025000000000000001, 0.025000000000000001));
+          const __m256d xi_209 = _mm256_add_pd(_mm256_mul_pd(xi_131, _mm256_set_pd(-0.003968253968253968, -0.003968253968253968, -0.003968253968253968, -0.003968253968253968)), _mm256_mul_pd(xi_107, _mm256_set_pd(-0.023809523809523808, -0.023809523809523808, -0.023809523809523808, -0.023809523809523808)));
+          const __m256d xi_210 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_162, xi_193), xi_203), xi_204), xi_205), xi_206), xi_207), xi_208), xi_209);
+          const __m256d xi_226 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_203, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_205, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), xi_162), xi_193), xi_204), xi_206), xi_207), xi_208), xi_209);
+          const __m256d xi_228 = _mm256_mul_pd(xi_190, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0));
+          const __m256d xi_229 = _mm256_mul_pd(xi_127, _mm256_set_pd(0.017857142857142856, 0.017857142857142856, 0.017857142857142856, 0.017857142857142856));
+          const __m256d xi_231 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_188, xi_194), xi_209), xi_227), xi_228), xi_229), xi_230);
+          const __m256d xi_233 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho, u_0), u_2), xi_10), xi_166), xi_249), _mm256_set_pd(xi_198, xi_198, xi_198, xi_198));
+          const __m256d xi_234 = _mm256_add_pd(_mm256_mul_pd(xi_232, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_233, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)));
+          const __m256d xi_240 = _mm256_add_pd(xi_232, xi_233);
+          const __m256d xi_243 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(xi_186, xi_194), xi_209), xi_227), xi_228), xi_229), xi_230);
+          const __m256d forceTerm_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_25, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_26, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(xi_27, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(xi_25, _mm256_set_pd(xi_28, xi_28, xi_28, xi_28))), _mm256_mul_pd(xi_26, _mm256_set_pd(xi_28, xi_28, xi_28, xi_28))), _mm256_mul_pd(xi_27, _mm256_set_pd(xi_28, xi_28, xi_28, xi_28)));
+          const __m256d forceTerm_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_31, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_29), xi_41);
+          const __m256d forceTerm_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_29, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_31), xi_41);
+          const __m256d forceTerm_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_42, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_44), xi_48);
+          const __m256d forceTerm_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_44, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_42), xi_48);
+          const __m256d forceTerm_5 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_51, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_49), xi_52);
+          const __m256d forceTerm_6 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_49, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), xi_51), xi_52);
+          const __m256d forceTerm_7 = _mm256_add_pd(_mm256_add_pd(xi_57, xi_66), xi_76);
+          const __m256d forceTerm_8 = _mm256_add_pd(_mm256_add_pd(xi_66, xi_77), xi_78);
+          const __m256d forceTerm_9 = _mm256_add_pd(_mm256_add_pd(xi_57, xi_78), xi_80);
+          const __m256d forceTerm_10 = _mm256_add_pd(_mm256_add_pd(xi_76, xi_77), xi_80);
+          const __m256d forceTerm_11 = _mm256_add_pd(_mm256_add_pd(xi_83, xi_85), xi_90);
+          const __m256d forceTerm_12 = _mm256_add_pd(_mm256_add_pd(xi_83, xi_91), xi_92);
+          const __m256d forceTerm_13 = _mm256_add_pd(_mm256_add_pd(xi_101, xi_57), xi_94);
+          const __m256d forceTerm_14 = _mm256_add_pd(_mm256_add_pd(xi_102, xi_77), xi_94);
+          const __m256d forceTerm_15 = _mm256_add_pd(_mm256_add_pd(xi_103, xi_85), xi_92);
+          const __m256d forceTerm_16 = _mm256_add_pd(_mm256_add_pd(xi_103, xi_90), xi_91);
+          const __m256d forceTerm_17 = _mm256_add_pd(_mm256_add_pd(xi_102, xi_104), xi_57);
+          const __m256d forceTerm_18 = _mm256_add_pd(_mm256_add_pd(xi_101, xi_104), xi_77);
+          _mm256_store_pd(&_data_pdfs_20_30_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_110, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_131, _mm256_set_pd(0.023809523809523808, 0.023809523809523808, 0.023809523809523808, 0.023809523809523808))), _mm256_mul_pd(xi_107, _mm256_set_pd(0.14285714285714285, 0.14285714285714285, 0.14285714285714285, 0.14285714285714285))), _mm256_mul_pd(xi_127, _mm256_set_pd(0.042857142857142858, 0.042857142857142858, 0.042857142857142858, 0.042857142857142858))), _mm256_mul_pd(xi_111, _mm256_set_pd(0.085714285714285715, 0.085714285714285715, 0.085714285714285715, 0.085714285714285715))), _mm256_mul_pd(xi_117, _mm256_set_pd(0.10000000000000001, 0.10000000000000001, 0.10000000000000001, 0.10000000000000001))), _mm256_mul_pd(xi_108, _mm256_set_pd(0.20000000000000001, 0.20000000000000001, 0.20000000000000001, 0.20000000000000001))), _mm256_mul_pd(xi_124, _mm256_set_pd(-0.5, -0.5, -0.5, -0.5))), forceTerm_0), xi_253));
+          _mm256_store_pd(&_data_pdfs_20_31_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_135, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_140, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), forceTerm_1), xi_151), xi_164), xi_251));
+          _mm256_store_pd(&_data_pdfs_20_32_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_2, xi_135), xi_140), xi_164), xi_165), xi_264));
+          _mm256_store_pd(&_data_pdfs_20_33_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_3, xi_168), xi_170), xi_175), xi_176), xi_258));
+          _mm256_store_pd(&_data_pdfs_20_34_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_168, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_170, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), forceTerm_4), xi_176), xi_177), xi_244));
+          _mm256_store_pd(&_data_pdfs_20_35_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(xi_179, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(xi_181, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), forceTerm_5), xi_186), xi_187), xi_265));
+          _mm256_store_pd(&_data_pdfs_20_36_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_6, xi_179), xi_181), xi_187), xi_188), xi_245));
+          _mm256_store_pd(&_data_pdfs_20_37_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_7, xi_177), xi_195), xi_200), xi_252));
+          _mm256_store_pd(&_data_pdfs_20_38_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_8, xi_175), xi_195), xi_201), xi_257));
+          _mm256_store_pd(&_data_pdfs_20_39_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_9, xi_177), xi_201), xi_202), xi_250));
+          _mm256_store_pd(&_data_pdfs_20_310_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_10, xi_175), xi_200), xi_202), xi_247));
+          _mm256_store_pd(&_data_pdfs_20_311_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_11, xi_210), xi_219), xi_224), xi_263));
+          _mm256_store_pd(&_data_pdfs_20_312_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_12, xi_219), xi_225), xi_226), xi_248));
+          _mm256_store_pd(&_data_pdfs_20_313_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_13, xi_231), xi_234), xi_239), xi_256));
+          _mm256_store_pd(&_data_pdfs_20_314_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_14, xi_231), xi_240), xi_241), xi_260));
+          _mm256_store_pd(&_data_pdfs_20_315_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_15, xi_224), xi_226), xi_242), xi_246));
+          _mm256_store_pd(&_data_pdfs_20_316_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_16, xi_210), xi_225), xi_242), xi_255));
+          _mm256_store_pd(&_data_pdfs_20_317_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_17, xi_239), xi_240), xi_243), xi_262));
+          _mm256_store_pd(&_data_pdfs_20_318_10[ctr_0], _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(forceTerm_18, xi_234), xi_241), xi_243), xi_249));
+        }
+        for (int64_t ctr_0 = (int64_t)((_size_force_0) / (4)) * (4); ctr_0 < _size_force_0; ctr_0 += 1) {
+          const double xi_244 = _data_pdfs_20_34_10[ctr_0];
+          const double xi_245 = _data_pdfs_20_36_10[ctr_0];
+          const double xi_246 = _data_pdfs_20_315_10[ctr_0];
+          const double xi_247 = _data_pdfs_20_310_10[ctr_0];
+          const double xi_248 = _data_pdfs_20_312_10[ctr_0];
+          const double xi_249 = _data_pdfs_20_318_10[ctr_0];
+          const double xi_250 = _data_pdfs_20_39_10[ctr_0];
+          const double xi_251 = _data_pdfs_20_31_10[ctr_0];
+          const double xi_252 = _data_pdfs_20_37_10[ctr_0];
+          const double xi_253 = _data_pdfs_20_30_10[ctr_0];
+          const double xi_254 = _data_force_20_31_10[ctr_0];
+          const double xi_255 = _data_pdfs_20_316_10[ctr_0];
+          const double xi_256 = _data_pdfs_20_313_10[ctr_0];
+          const double xi_257 = _data_pdfs_20_38_10[ctr_0];
+          const double xi_258 = _data_pdfs_20_33_10[ctr_0];
+          const double xi_259 = _data_force_20_32_10[ctr_0];
+          const double xi_260 = _data_pdfs_20_314_10[ctr_0];
+          const double xi_261 = _data_force_20_30_10[ctr_0];
+          const double xi_262 = _data_pdfs_20_317_10[ctr_0];
+          const double xi_263 = _data_pdfs_20_311_10[ctr_0];
+          const double xi_264 = _data_pdfs_20_32_10[ctr_0];
+          const double xi_265 = _data_pdfs_20_35_10[ctr_0];
+
+          double random_7_0{};
+          double random_7_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 7, seed, random_7_0, random_7_1);
+          }
+
+          double random_6_0{};
+          double random_6_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 6, seed, random_6_0, random_6_1);
+          }
+
+          double random_5_0{};
+          double random_5_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 5, seed, random_5_0, random_5_1);
+          }
+
+          double random_4_0{};
+          double random_4_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 4, seed, random_4_0, random_4_1);
+          }
+
+          double random_3_0{};
+          double random_3_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 3, seed, random_3_0, random_3_1);
+          }
+
+          double random_2_0{};
+          double random_2_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 2, seed, random_2_0, random_2_1);
+          }
+
+          double random_1_0{};
+          double random_1_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 1, seed, random_1_0, random_1_1);
+          }
+
+          double random_0_0{};
+          double random_0_1{};
+          if (kT > 0.) {
+            philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 0, seed, random_0_0, random_0_1);
+          }
+          const double xi_2 = xi_249 + xi_260;
+          const double xi_3 = xi_2 + xi_244;
+          const double xi_4 = xi_246 + xi_251 + xi_263;
+          const double xi_5 = xi_248 + xi_265;
+          const double xi_6 = xi_245 + xi_262;
+          const double xi_8 = xi_250 * -1.0;
+          const double xi_9 = xi_252 * -1.0;
+          const double xi_10 = xi_262 * -1.0;
+          const double xi_11 = xi_256 * -1.0;
+          const double xi_12 = xi_258 * -1.0;
+          const double xi_13 = xi_10 + xi_11 + xi_12;
+          const double xi_14 = xi_264 * -1.0;
+          const double xi_15 = xi_247 * -1.0;
+          const double xi_16 = xi_14 + xi_15;
+          const double xi_17 = xi_255 * -1.0;
+          const double xi_18 = xi_248 * -1.0;
+          const double xi_19 = xi_17 + xi_18;
+          const double xi_20 = xi_249 * -1.0;
+          const double xi_21 = xi_10 + xi_20;
+          const double xi_22 = xi_246 * -1.0;
+          const double xi_23 = xi_245 * -1.0;
+          const double xi_24 = xi_17 + xi_22 + xi_23 + xi_263;
+          const double xi_29 = xi_254 * 0.16666666666666666;
+          const double xi_30 = xi_254 * 0.083333333333333329;
+          const double xi_42 = xi_261 * 0.16666666666666666;
+          const double xi_43 = xi_261 * 0.083333333333333329;
+          const double xi_49 = xi_259 * 0.16666666666666666;
+          const double xi_50 = xi_259 * 0.083333333333333329;
+          const double xi_67 = xi_254 * 0.25;
+          const double xi_72 = xi_254 * xi_71;
+          const double xi_114 = xi_253 * -1.0;
+          const double xi_118 = xi_263 * -1.0;
+          const double xi_119 = xi_118 + xi_18;
+          const double xi_120 = xi_257 * -1.0 + xi_8;
+          const double xi_122 = xi_260 * -1.0;
+          const double xi_123 = xi_11 + xi_122 + xi_15 + xi_21;
+          const double xi_125 = xi_246 * 2.0 + xi_248 * 2.0 + xi_255 * 2.0 + xi_263 * 2.0;
+          const double xi_126 = xi_125 + xi_244 * 5.0 + xi_258 * 5.0;
+          const double xi_128 = xi_256 * 2.0;
+          const double xi_129 = xi_260 * 2.0;
+          const double xi_130 = xi_249 * 2.0 + xi_262 * 2.0;
+          const double xi_132 = xi_118 + xi_248;
+          const double xi_133 = xi_132 + xi_14 + xi_22 + xi_251 + xi_255;
+          const double xi_135 = xi_133 * xi_134;
+          const double xi_136 = random_5_1 - 0.5;
+          const double xi_141 = xi_252 * 2.0;
+          const double xi_142 = xi_247 * 2.0;
+          const double xi_143 = xi_250 * 2.0 + xi_257 * -2.0;
+          const double xi_144 = xi_14 + xi_141 * -1.0 + xi_142 + xi_143 + xi_19 + xi_4;
+          const double xi_146 = xi_144 * xi_145;
+          const double xi_147 = random_3_0 - 0.5;
+          const double xi_152 = random_0_1 - 0.5;
+          const double xi_166 = xi_122 + xi_256;
+          const double xi_167 = xi_12 + xi_166 + xi_20 + xi_244 + xi_262;
+          const double xi_168 = xi_134 * xi_167;
+          const double xi_169 = random_4_1 - 0.5;
+          const double xi_171 = xi_13 + xi_141 + xi_142 * -1.0 + xi_143 + xi_3;
+          const double xi_172 = xi_145 * xi_171;
+          const double xi_173 = random_4_0 - 0.5;
+          const double xi_178 = xi_119 + xi_23 + xi_246 + xi_255 + xi_265;
+          const double xi_179 = xi_134 * xi_178;
+          const double xi_180 = random_5_0 - 0.5;
+          const double xi_182 = xi_128 * -1.0 + xi_129 * -1.0 + xi_130 + xi_24 + xi_5;
+          const double xi_183 = xi_145 * xi_182;
+          const double xi_184 = random_3_1 - 0.5;
+          const double xi_212 = xi_182 * xi_211;
+          const double xi_214 = xi_178 * xi_213;
+          const double xi_220 = xi_144 * xi_211;
+          const double xi_221 = xi_133 * xi_213;
+          const double xi_235 = xi_167 * xi_213;
+          const double xi_236 = xi_171 * xi_211;
+          const double xi_31 = rr_0 * xi_30;
+          const double xi_44 = rr_0 * xi_43;
+          const double xi_51 = rr_0 * xi_50;
+          const double xi_54 = xi_261 * xi_53;
+          const double xi_59 = xi_254 * xi_53;
+          const double xi_81 = xi_259 * xi_53;
+          const double vel0Term = xi_247 + xi_257 + xi_3;
+          const double vel1Term = xi_252 + xi_4;
+          const double vel2Term = xi_256 + xi_5;
+          const double rho = vel0Term + vel1Term + vel2Term + xi_250 + xi_253 + xi_255 + xi_258 + xi_264 + xi_6;
+          const double xi_105 = kT * rho;
+          const double xi_106 = pow(xi_105 * (-1.0 * (omega_even * -1.0 + 1.0) * (omega_even * -1.0 + 1.0) + 1.0), 0.5);
+          const double xi_107 = xi_106 * (random_6_0 - 0.5) * 3.7416573867739413;
+          const double xi_108 = xi_106 * (random_7_0 - 0.5) * 5.4772255750516612;
+          const double xi_110 = xi_109 * (random_2_1 - 0.5) * pow(xi_105 * (-1.0 * (omega_bulk * -1.0 + 1.0) * (omega_bulk * -1.0 + 1.0) + 1.0), 0.5);
+          const double xi_111 = xi_106 * (random_6_1 - 0.5) * 8.3666002653407556;
+          const double xi_137 = pow(xi_105 * (-1.0 * (omega_odd * -1.0 + 1.0) * (omega_odd * -1.0 + 1.0) + 1.0), 0.5);
+          const double xi_138 = xi_137 * 1.4142135623730951;
+          const double xi_139 = xi_138 * 0.5;
+          const double xi_140 = xi_136 * xi_139;
+          const double xi_148 = xi_109 * xi_137;
+          const double xi_149 = xi_148 * 0.16666666666666666;
+          const double xi_150 = xi_147 * xi_149;
+          const double xi_151 = xi_146 * -1.0 + xi_150 * -1.0;
+          const double xi_153 = pow(xi_105 * (-1.0 * (omega_shear * -1.0 + 1.0) * (omega_shear * -1.0 + 1.0) + 1.0), 0.5);
+          const double xi_154 = xi_153 * 0.5;
+          const double xi_155 = xi_152 * xi_154;
+          const double xi_161 = xi_153 * (random_0_0 - 0.5) * 1.7320508075688772;
+          const double xi_165 = xi_146 + xi_150;
+          const double xi_170 = xi_139 * xi_169;
+          const double xi_174 = xi_149 * xi_173;
+          const double xi_175 = xi_172 + xi_174;
+          const double xi_177 = xi_172 * -1.0 + xi_174 * -1.0;
+          const double xi_181 = xi_139 * xi_180;
+          const double xi_185 = xi_149 * xi_184;
+          const double xi_186 = xi_183 * -1.0 + xi_185 * -1.0;
+          const double xi_188 = xi_183 + xi_185;
+          const double xi_189 = xi_152 * xi_153 * 0.25;
+          const double xi_192 = xi_107 * 0.083333333333333329;
+          const double xi_196 = xi_154 * (random_1_0 - 0.5);
+          const double xi_203 = xi_154 * (random_2_0 - 0.5);
+          const double xi_207 = xi_111 * -0.014285714285714285;
+          const double xi_208 = xi_108 * 0.050000000000000003;
+          const double xi_215 = xi_148 * 0.083333333333333329;
+          const double xi_216 = xi_184 * xi_215;
+          const double xi_217 = xi_138 * 0.25;
+          const double xi_218 = xi_180 * xi_217;
+          const double xi_219 = xi_212 * -1.0 + xi_214 + xi_216 * -1.0 + xi_218;
+          const double xi_222 = xi_147 * xi_215;
+          const double xi_223 = xi_136 * xi_217;
+          const double xi_224 = xi_220 * -1.0 + xi_221 + xi_222 * -1.0 + xi_223;
+          const double xi_225 = xi_220 + xi_221 * -1.0 + xi_222 + xi_223 * -1.0;
+          const double xi_227 = xi_189 * -1.0;
+          const double xi_230 = xi_111 * 0.035714285714285712;
+          const double xi_232 = xi_154 * (random_1_1 - 0.5);
+          const double xi_237 = xi_169 * xi_217;
+          const double xi_238 = xi_173 * xi_215;
+          const double xi_239 = xi_235 * -1.0 + xi_236 + xi_237 * -1.0 + xi_238;
+          const double xi_241 = xi_235 + xi_236 * -1.0 + xi_237 + xi_238 * -1.0;
+          const double xi_242 = xi_212 + xi_214 * -1.0 + xi_216 + xi_218 * -1.0;
+          const double xi_0 = ((1.0) / (rho));
+          const double xi_7 = xi_0 * 0.5;
+          const double u_0 = xi_0 * (vel0Term + xi_13 + xi_8 + xi_9) + xi_261 * xi_7;
+          const double xi_25 = u_0 * xi_261;
+          const double xi_37 = xi_25 * 0.16666666666666666;
+          const double xi_38 = xi_25 * 0.083333333333333329;
+          const double xi_39 = omega_shear * xi_38;
+          const double xi_40 = xi_37 * -1.0 + xi_39;
+          const double xi_56 = xi_25 * xi_55 * -1.0 + xi_37;
+          const double xi_57 = xi_43 * -1.0 + xi_54 + xi_56;
+          const double xi_61 = xi_25 * xi_60 * -1.0;
+          const double xi_68 = u_0 * xi_67;
+          const double xi_73 = u_0 * xi_72;
+          const double xi_77 = xi_43 + xi_54 * -1.0 + xi_56;
+          const double xi_84 = xi_38 * -1.0;
+          const double xi_95 = u_0 * xi_259;
+          const double xi_96 = xi_95 * 0.25;
+          const double xi_99 = xi_71 * xi_95;
+          const double xi_113 = rho * u_0 * u_0;
+          const double u_1 = xi_0 * (vel1Term + xi_16 + xi_19 + xi_257 + xi_8) + xi_254 * xi_7;
+          const double xi_26 = u_1 * xi_254;
+          const double xi_32 = xi_26 * 0.16666666666666666;
+          const double xi_45 = xi_26 * 0.083333333333333329;
+          const double xi_46 = omega_shear * xi_45;
+          const double xi_47 = xi_32 * -1.0 + xi_46;
+          const double xi_62 = xi_26 * xi_60 * -1.0;
+          const double xi_69 = u_1 * 0.25;
+          const double xi_70 = xi_261 * xi_69;
+          const double xi_74 = u_1 * xi_71;
+          const double xi_75 = xi_261 * xi_74;
+          const double xi_76 = xi_68 * -1.0 + xi_70 * -1.0 + xi_73 + xi_75;
+          const double xi_78 = xi_68 + xi_70 + xi_73 * -1.0 + xi_75 * -1.0;
+          const double xi_86 = xi_259 * xi_69;
+          const double xi_88 = xi_259 * xi_74;
+          const double xi_93 = xi_45 * -1.0;
+          const double xi_112 = rho * u_1 * u_1;
+          const double xi_121 = xi_112 + xi_120 + xi_9;
+          const double xi_197 = rho * u_1;
+          const double xi_199 = xi_198 * (u_0 * xi_197 + xi_120 + xi_247 + xi_252);
+          const double xi_200 = xi_196 * -1.0 + xi_199 * -1.0;
+          const double xi_201 = xi_196 + xi_199;
+          const double u_2 = xi_0 * (vel2Term + xi_21 + xi_24 + xi_260) + xi_259 * xi_7;
+          const double xi_27 = u_2 * xi_259;
+          const double xi_33 = xi_27 * 0.16666666666666666;
+          const double xi_34 = xi_27 * 0.083333333333333329;
+          const double xi_35 = omega_shear * xi_34;
+          const double xi_36 = xi_33 * -1.0 + xi_35;
+          const double xi_41 = omega_shear * xi_32 * -1.0 + xi_26 * 0.33333333333333331 + xi_36 + xi_40;
+          const double xi_48 = omega_shear * xi_37 * -1.0 + xi_25 * 0.33333333333333331 + xi_36 + xi_47;
+          const double xi_52 = omega_shear * xi_33 * -1.0 + xi_27 * 0.33333333333333331 + xi_40 + xi_47;
+          const double xi_58 = xi_34 * -1.0;
+          const double xi_63 = xi_27 * xi_60 * -1.0;
+          const double xi_64 = xi_26 * xi_55 * -1.0 + xi_32 + xi_61 + xi_62 + xi_63;
+          const double xi_65 = xi_30 + xi_59 * -1.0 + xi_64;
+          const double xi_66 = xi_35 + xi_58 + xi_65;
+          const double xi_79 = xi_30 * -1.0 + xi_59 + xi_64;
+          const double xi_80 = xi_35 + xi_58 + xi_79;
+          const double xi_82 = xi_27 * xi_55 * -1.0 + xi_33;
+          const double xi_83 = xi_50 + xi_81 * -1.0 + xi_82;
+          const double xi_85 = xi_39 + xi_65 + xi_84;
+          const double xi_87 = u_2 * xi_67;
+          const double xi_89 = u_2 * xi_72;
+          const double xi_90 = xi_86 + xi_87 + xi_88 * -1.0 + xi_89 * -1.0;
+          const double xi_91 = xi_39 + xi_79 + xi_84;
+          const double xi_92 = xi_86 * -1.0 + xi_87 * -1.0 + xi_88 + xi_89;
+          const double xi_94 = xi_46 + xi_61 + xi_62 + xi_63 + xi_83 + xi_93;
+          const double xi_97 = u_2 * xi_261;
+          const double xi_98 = xi_97 * 0.25;
+          const double xi_100 = xi_71 * xi_97;
+          const double xi_101 = xi_100 + xi_96 * -1.0 + xi_98 * -1.0 + xi_99;
+          const double xi_102 = xi_100 * -1.0 + xi_96 + xi_98 + xi_99 * -1.0;
+          const double xi_103 = xi_50 * -1.0 + xi_81 + xi_82;
+          const double xi_104 = xi_103 + xi_46 + xi_61 + xi_62 + xi_63 + xi_93;
+          const double xi_115 = rho * u_2 * u_2;
+          const double xi_116 = xi_114 + xi_115 * 0.66666666666666663 + xi_245 * 3.0 + xi_265 * 3.0;
+          const double xi_117 = omega_even * (xi_112 * 0.66666666666666663 + xi_113 * 1.6666666666666667 + xi_116 + xi_246 * -3.0 + xi_248 * -3.0 + xi_251 * 3.0 + xi_255 * -3.0 + xi_263 * -3.0 + xi_264 * 3.0);
+          const double xi_124 = omega_bulk * (xi_113 + xi_115 + xi_119 + xi_121 + xi_123 + xi_17 + xi_22 + xi_253);
+          const double xi_127 = omega_even * (xi_112 * 2.3333333333333335 + xi_116 + xi_126 + xi_249 * -5.0 + xi_251 * -2.0 + xi_256 * -5.0 + xi_260 * -5.0 + xi_262 * -5.0 + xi_264 * -2.0);
+          const double xi_131 = omega_even * (xi_114 + xi_115 * 3.0 + xi_126 + xi_128 + xi_129 + xi_130 + xi_245 * -4.0 + xi_247 * -7.0 + xi_250 * -7.0 + xi_251 * 5.0 + xi_252 * -7.0 + xi_257 * -7.0 + xi_264 * 5.0 + xi_265 * -4.0);
+          const double xi_156 = xi_115 * -1.0 + xi_265;
+          const double xi_157 = omega_shear * (xi_121 + xi_156 + xi_16 + xi_2 + xi_251 * -1.0 + xi_256 + xi_6);
+          const double xi_158 = xi_157 * 0.125;
+          const double xi_159 = xi_107 * -0.11904761904761904 + xi_131 * -0.01984126984126984;
+          const double xi_160 = omega_shear * (xi_112 * -1.0 + xi_113 * 2.0 + xi_120 + xi_123 + xi_125 + xi_156 + xi_244 * -2.0 + xi_245 + xi_251 + xi_258 * -2.0 + xi_264 + xi_9);
+          const double xi_162 = xi_160 * -0.041666666666666664 + xi_161 * -0.16666666666666666;
+          const double xi_163 = xi_108 * -0.10000000000000001 + xi_117 * -0.050000000000000003 + xi_162;
+          const double xi_164 = xi_111 * 0.028571428571428571 + xi_127 * 0.014285714285714285 + xi_155 + xi_158 + xi_159 + xi_163;
+          const double xi_176 = xi_111 * -0.071428571428571425 + xi_127 * -0.035714285714285712 + xi_159 + xi_160 * 0.083333333333333329 + xi_161 * 0.33333333333333331;
+          const double xi_187 = xi_107 * 0.095238095238095233 + xi_111 * -0.042857142857142858 + xi_127 * -0.021428571428571429 + xi_131 * 0.015873015873015872 + xi_155 * -1.0 + xi_158 * -1.0 + xi_163;
+          const double xi_190 = xi_157 * 0.0625;
+          const double xi_191 = xi_131 * 0.013888888888888888;
+          const double xi_193 = xi_110 * 0.083333333333333329 + xi_124 * 0.041666666666666664;
+          const double xi_194 = xi_160 * 0.020833333333333332 + xi_161 * 0.083333333333333329 + xi_193;
+          const double xi_195 = xi_165 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
+          const double xi_202 = xi_151 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
+          const double xi_204 = xi_127 * -0.0071428571428571426;
+          const double xi_205 = xi_198 * (u_2 * xi_197 + xi_132 + xi_17 + xi_246);
+          const double xi_206 = xi_117 * 0.025000000000000001;
+          const double xi_209 = xi_107 * -0.023809523809523808 + xi_131 * -0.003968253968253968;
+          const double xi_210 = xi_162 + xi_193 + xi_203 + xi_204 + xi_205 + xi_206 + xi_207 + xi_208 + xi_209;
+          const double xi_226 = xi_162 + xi_193 + xi_203 * -1.0 + xi_204 + xi_205 * -1.0 + xi_206 + xi_207 + xi_208 + xi_209;
+          const double xi_228 = xi_190 * -1.0;
+          const double xi_229 = xi_127 * 0.017857142857142856;
+          const double xi_231 = xi_188 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
+          const double xi_233 = xi_198 * (rho * u_0 * u_2 + xi_10 + xi_166 + xi_249);
+          const double xi_234 = xi_232 * -1.0 + xi_233 * -1.0;
+          const double xi_240 = xi_232 + xi_233;
+          const double xi_243 = xi_186 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
+          const double forceTerm_0 = xi_25 * xi_28 + xi_25 * -1.0 + xi_26 * xi_28 + xi_26 * -1.0 + xi_27 * xi_28 + xi_27 * -1.0;
+          const double forceTerm_1 = xi_29 + xi_31 * -1.0 + xi_41;
+          const double forceTerm_2 = xi_29 * -1.0 + xi_31 + xi_41;
+          const double forceTerm_3 = xi_42 * -1.0 + xi_44 + xi_48;
+          const double forceTerm_4 = xi_42 + xi_44 * -1.0 + xi_48;
+          const double forceTerm_5 = xi_49 + xi_51 * -1.0 + xi_52;
+          const double forceTerm_6 = xi_49 * -1.0 + xi_51 + xi_52;
+          const double forceTerm_7 = xi_57 + xi_66 + xi_76;
+          const double forceTerm_8 = xi_66 + xi_77 + xi_78;
+          const double forceTerm_9 = xi_57 + xi_78 + xi_80;
+          const double forceTerm_10 = xi_76 + xi_77 + xi_80;
+          const double forceTerm_11 = xi_83 + xi_85 + xi_90;
+          const double forceTerm_12 = xi_83 + xi_91 + xi_92;
+          const double forceTerm_13 = xi_101 + xi_57 + xi_94;
+          const double forceTerm_14 = xi_102 + xi_77 + xi_94;
+          const double forceTerm_15 = xi_103 + xi_85 + xi_92;
+          const double forceTerm_16 = xi_103 + xi_90 + xi_91;
+          const double forceTerm_17 = xi_102 + xi_104 + xi_57;
+          const double forceTerm_18 = xi_101 + xi_104 + xi_77;
+          _data_pdfs_20_30_10[ctr_0] = forceTerm_0 + xi_107 * 0.14285714285714285 + xi_108 * 0.20000000000000001 + xi_110 * -1.0 + xi_111 * 0.085714285714285715 + xi_117 * 0.10000000000000001 + xi_124 * -0.5 + xi_127 * 0.042857142857142858 + xi_131 * 0.023809523809523808 + xi_253;
+          _data_pdfs_20_31_10[ctr_0] = forceTerm_1 + xi_135 * -1.0 + xi_140 * -1.0 + xi_151 + xi_164 + xi_251;
+          _data_pdfs_20_32_10[ctr_0] = forceTerm_2 + xi_135 + xi_140 + xi_164 + xi_165 + xi_264;
+          _data_pdfs_20_33_10[ctr_0] = forceTerm_3 + xi_168 + xi_170 + xi_175 + xi_176 + xi_258;
+          _data_pdfs_20_34_10[ctr_0] = forceTerm_4 + xi_168 * -1.0 + xi_170 * -1.0 + xi_176 + xi_177 + xi_244;
+          _data_pdfs_20_35_10[ctr_0] = forceTerm_5 + xi_179 * -1.0 + xi_181 * -1.0 + xi_186 + xi_187 + xi_265;
+          _data_pdfs_20_36_10[ctr_0] = forceTerm_6 + xi_179 + xi_181 + xi_187 + xi_188 + xi_245;
+          _data_pdfs_20_37_10[ctr_0] = forceTerm_7 + xi_177 + xi_195 + xi_200 + xi_252;
+          _data_pdfs_20_38_10[ctr_0] = forceTerm_8 + xi_175 + xi_195 + xi_201 + xi_257;
+          _data_pdfs_20_39_10[ctr_0] = forceTerm_9 + xi_177 + xi_201 + xi_202 + xi_250;
+          _data_pdfs_20_310_10[ctr_0] = forceTerm_10 + xi_175 + xi_200 + xi_202 + xi_247;
+          _data_pdfs_20_311_10[ctr_0] = forceTerm_11 + xi_210 + xi_219 + xi_224 + xi_263;
+          _data_pdfs_20_312_10[ctr_0] = forceTerm_12 + xi_219 + xi_225 + xi_226 + xi_248;
+          _data_pdfs_20_313_10[ctr_0] = forceTerm_13 + xi_231 + xi_234 + xi_239 + xi_256;
+          _data_pdfs_20_314_10[ctr_0] = forceTerm_14 + xi_231 + xi_240 + xi_241 + xi_260;
+          _data_pdfs_20_315_10[ctr_0] = forceTerm_15 + xi_224 + xi_226 + xi_242 + xi_246;
+          _data_pdfs_20_316_10[ctr_0] = forceTerm_16 + xi_210 + xi_225 + xi_242 + xi_255;
+          _data_pdfs_20_317_10[ctr_0] = forceTerm_17 + xi_239 + xi_240 + xi_243 + xi_262;
+          _data_pdfs_20_318_10[ctr_0] = forceTerm_18 + xi_234 + xi_241 + xi_243 + xi_249;
+        }
+      }
+    }
+  }
+}
+} // namespace internal_25bc51f30ec2c20f3ee9796f7dcb65c6
+
+void CollideSweepDoublePrecisionThermalizedAVX::run(IBlock *block) {
+  auto force = block->getData<field::GhostLayerField<double, 3>>(forceID);
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+
+  auto &omega_bulk = this->omega_bulk_;
+  auto block_offset_1 = this->block_offset_1_;
+  auto &seed = this->seed_;
+  auto &omega_even = this->omega_even_;
+  auto &kT = this->kT_;
+  auto &omega_odd = this->omega_odd_;
+  auto block_offset_2 = this->block_offset_2_;
+  auto &time_step = this->time_step_;
+  auto block_offset_0 = this->block_offset_0_;
+  auto &omega_shear = this->omega_shear_;
+  block_offset_generator(block, block_offset_0, block_offset_1, block_offset_2);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(force->nrOfGhostLayers()));
+  double *RESTRICT const _data_force = force->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_25bc51f30ec2c20f3ee9796f7dcb65c6::collidesweepdoubleprecisionthermalizedavx_collidesweepdoubleprecisionthermalizedavx(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
+}
+
+void CollideSweepDoublePrecisionThermalizedAVX::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto force = block->getData<field::GhostLayerField<double, 3>>(forceID);
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+
+  auto &omega_bulk = this->omega_bulk_;
+  auto block_offset_1 = this->block_offset_1_;
+  auto &seed = this->seed_;
+  auto &omega_even = this->omega_even_;
+  auto &kT = this->kT_;
+  auto &omega_odd = this->omega_odd_;
+  auto block_offset_2 = this->block_offset_2_;
+  auto &time_step = this->time_step_;
+  auto block_offset_0 = this->block_offset_0_;
+  auto &omega_shear = this->omega_shear_;
+  block_offset_generator(block, block_offset_0, block_offset_1, block_offset_2);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(force->nrOfGhostLayers()));
+  double *RESTRICT const _data_force = force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_25bc51f30ec2c20f3ee9796f7dcb65c6::collidesweepdoubleprecisionthermalizedavx_collidesweepdoubleprecisionthermalizedavx(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalizedAVX.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalizedAVX.h
new file mode 100644
index 00000000000..f822ebf2955
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepDoublePrecisionThermalizedAVX.h
@@ -0,0 +1,123 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepDoublePrecisionThermalizedAVX.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class CollideSweepDoublePrecisionThermalizedAVX {
+public:
+  CollideSweepDoublePrecisionThermalizedAVX(
+      BlockDataID forceID_, BlockDataID pdfsID_, uint32_t block_offset_0,
+      uint32_t block_offset_1, uint32_t block_offset_2, double kT,
+      double omega_bulk, double omega_even, double omega_odd,
+      double omega_shear, uint32_t seed, uint32_t time_step)
+      : forceID(forceID_), pdfsID(pdfsID_), block_offset_0_(block_offset_0),
+        block_offset_1_(block_offset_1), block_offset_2_(block_offset_2),
+        kT_(kT), omega_bulk_(omega_bulk), omega_even_(omega_even),
+        omega_odd_(omega_odd), omega_shear_(omega_shear), seed_(seed),
+        time_step_(time_step){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)> getSweep(
+      const shared_ptr<CollideSweepDoublePrecisionThermalizedAVX> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<CollideSweepDoublePrecisionThermalizedAVX> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID forceID;
+  BlockDataID pdfsID;
+  uint32_t block_offset_0_;
+  uint32_t block_offset_1_;
+  uint32_t block_offset_2_;
+  double kT_;
+  double omega_bulk_;
+  double omega_even_;
+  double omega_odd_;
+  double omega_shear_;
+  uint32_t seed_;
+  uint32_t time_step_;
+  std::function<void(IBlock *, uint32_t &, uint32_t &, uint32_t &)>
+      block_offset_generator =
+          [](IBlock *const, uint32_t &, uint32_t &, uint32_t &) {};
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwards.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwards.cpp
new file mode 100644
index 00000000000..560fbac086b
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwards.cpp
@@ -0,0 +1,290 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepSinglePrecisionLeesEdwards.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "CollideSweepSinglePrecisionLeesEdwards.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_ab1f3bc3368574afb482da84ccb58898 {
+static FUNC_PREFIX void collidesweepsingleprecisionleesedwards_collidesweepsingleprecisionleesedwards(float *RESTRICT const _data_force, float *RESTRICT _data_pdfs, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_0, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, float grid_size, float omega_shear, float v_s) {
+  const float xi_0 = ((1.0f) / (omega_shear * -0.25f + 2.0f));
+  const float rr_0 = xi_0 * (omega_shear * -2.0f + 4.0f);
+  for (int64_t ctr_2 = 0; ctr_2 < _size_force_2; ctr_2 += 1) {
+    float *RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3;
+    float *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
+    float *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3;
+    float *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
+    float *RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3;
+    float *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
+    float *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
+    float *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_force_1; ctr_1 += 1) {
+      float *RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_36;
+      float *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
+      float *RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_31;
+      float *RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_32;
+      float *RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_311;
+      float *RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_318;
+      float *RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_313;
+      float *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
+      float *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
+      float *RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_317;
+      float *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
+      float *RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_35;
+      float *RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_314;
+      float *RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_38;
+      float *RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_312;
+      float *RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_316;
+      float *RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_39;
+      float *RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_315;
+      float *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
+      float *RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_310;
+      float *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
+      float *RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_37;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_force_0; ctr_0 += 1) {
+        const float xi_25 = _data_pdfs_20_36_10[_stride_pdfs_0 * ctr_0];
+        const float xi_26 = _data_force_20_32_10[_stride_force_0 * ctr_0];
+        const float xi_27 = _data_pdfs_20_31_10[_stride_pdfs_0 * ctr_0];
+        const float xi_28 = _data_pdfs_20_32_10[_stride_pdfs_0 * ctr_0];
+        const float xi_29 = _data_pdfs_20_311_10[_stride_pdfs_0 * ctr_0];
+        const float xi_30 = _data_pdfs_20_318_10[_stride_pdfs_0 * ctr_0];
+        const float xi_31 = _data_pdfs_20_313_10[_stride_pdfs_0 * ctr_0];
+        const float xi_32 = _data_pdfs_20_34_10[_stride_pdfs_0 * ctr_0];
+        const float xi_33 = _data_pdfs_20_33_10[_stride_pdfs_0 * ctr_0];
+        const float xi_34 = _data_pdfs_20_317_10[_stride_pdfs_0 * ctr_0];
+        const float xi_35 = _data_force_20_30_10[_stride_force_0 * ctr_0];
+        const float xi_36 = _data_pdfs_20_35_10[_stride_pdfs_0 * ctr_0];
+        const float xi_37 = _data_pdfs_20_314_10[_stride_pdfs_0 * ctr_0];
+        const float xi_38 = _data_pdfs_20_38_10[_stride_pdfs_0 * ctr_0];
+        const float xi_39 = _data_pdfs_20_312_10[_stride_pdfs_0 * ctr_0];
+        const float xi_40 = _data_pdfs_20_316_10[_stride_pdfs_0 * ctr_0];
+        const float xi_41 = _data_pdfs_20_39_10[_stride_pdfs_0 * ctr_0];
+        const float xi_42 = _data_pdfs_20_315_10[_stride_pdfs_0 * ctr_0];
+        const float xi_43 = _data_force_20_31_10[_stride_force_0 * ctr_0];
+        const float xi_44 = _data_pdfs_20_310_10[_stride_pdfs_0 * ctr_0];
+        const float xi_45 = _data_pdfs_20_30_10[_stride_pdfs_0 * ctr_0];
+        const float xi_46 = _data_pdfs_20_37_10[_stride_pdfs_0 * ctr_0];
+        const float xi_3 = xi_25;
+        const float xi_4 = xi_26;
+        const float xi_5 = xi_27;
+        const float xi_6 = xi_28;
+        const float xi_7 = xi_29;
+        const float xi_8 = xi_30;
+        const float xi_9 = xi_31;
+        const float xi_10 = xi_45;
+        const float xi_11 = xi_32;
+        const float xi_12 = xi_33;
+        const float xi_13 = xi_34;
+        const float xi_14 = xi_35;
+        const float xi_15 = xi_36;
+        const float xi_16 = xi_37;
+        const float xi_17 = xi_38;
+        const float xi_18 = xi_39;
+        const float xi_19 = xi_40;
+        const float xi_20 = xi_42;
+        const float xi_21 = xi_43;
+        const float xi_22 = xi_44;
+        const float xi_23 = xi_41;
+        const float xi_24 = xi_46;
+        const float vel0Term = xi_11 + xi_16 + xi_17 + xi_22 + xi_8;
+        const float vel1Term = xi_20 + xi_24 + xi_5 + xi_7;
+        const float vel2Term = xi_15 + xi_18 + xi_9;
+        const float rho = vel0Term + vel1Term + vel2Term + xi_10 + xi_12 + xi_13 + xi_19 + xi_23 + xi_3 + xi_6;
+        const float xi_1 = ((1.0f) / (rho));
+        const float u_0 = xi_1 * xi_14 * 0.5f + xi_1 * (vel0Term + xi_12 * -1.0f + xi_13 * -1.0f + xi_23 * -1.0f + xi_24 * -1.0f + xi_9 * -1.0f);
+        const float u_1 = xi_1 * xi_21 * 0.5f + xi_1 * (vel1Term + xi_17 + xi_18 * -1.0f + xi_19 * -1.0f + xi_22 * -1.0f + xi_23 * -1.0f + xi_6 * -1.0f);
+        const float u_2 = xi_1 * xi_4 * 0.5f + xi_1 * (vel2Term + xi_13 * -1.0f + xi_16 + xi_19 * -1.0f + xi_20 * -1.0f + xi_3 * -1.0f + xi_7 + xi_8 * -1.0f);
+        const float forceTerm_0 = omega_shear * u_0 * xi_14 * 0.5f + omega_shear * u_1 * xi_21 * 0.5f + omega_shear * u_2 * xi_4 * 0.5f + u_0 * xi_14 * -1.0f + u_1 * xi_21 * -1.0f + u_2 * xi_4 * -1.0f;
+        const float forceTerm_1 = omega_shear * u_0 * xi_14 * 0.083333333333333329f + omega_shear * u_1 * xi_21 * -0.16666666666666666f + omega_shear * u_2 * xi_4 * 0.083333333333333329f + rr_0 * xi_21 * -0.083333333333333329f + u_0 * xi_14 * -0.16666666666666666f + u_1 * xi_21 * 0.33333333333333331f + u_2 * xi_4 * -0.16666666666666666f + xi_21 * 0.16666666666666666f;
+        const float forceTerm_2 = omega_shear * u_0 * xi_14 * 0.083333333333333329f + omega_shear * u_1 * xi_21 * -0.16666666666666666f + omega_shear * u_2 * xi_4 * 0.083333333333333329f + rr_0 * xi_21 * 0.083333333333333329f + u_0 * xi_14 * -0.16666666666666666f + u_1 * xi_21 * 0.33333333333333331f + u_2 * xi_4 * -0.16666666666666666f + xi_21 * -0.16666666666666666f;
+        const float forceTerm_3 = omega_shear * u_0 * xi_14 * -0.16666666666666666f + omega_shear * u_1 * xi_21 * 0.083333333333333329f + omega_shear * u_2 * xi_4 * 0.083333333333333329f + rr_0 * xi_14 * 0.083333333333333329f + u_0 * xi_14 * 0.33333333333333331f + u_1 * xi_21 * -0.16666666666666666f + u_2 * xi_4 * -0.16666666666666666f + xi_14 * -0.16666666666666666f;
+        const float forceTerm_4 = omega_shear * u_0 * xi_14 * -0.16666666666666666f + omega_shear * u_1 * xi_21 * 0.083333333333333329f + omega_shear * u_2 * xi_4 * 0.083333333333333329f + rr_0 * xi_14 * -0.083333333333333329f + u_0 * xi_14 * 0.33333333333333331f + u_1 * xi_21 * -0.16666666666666666f + u_2 * xi_4 * -0.16666666666666666f + xi_14 * 0.16666666666666666f;
+        const float forceTerm_5 = omega_shear * u_0 * xi_14 * 0.083333333333333329f + omega_shear * u_1 * xi_21 * 0.083333333333333329f + omega_shear * u_2 * xi_4 * -0.16666666666666666f + rr_0 * xi_4 * -0.083333333333333329f + u_0 * xi_14 * -0.16666666666666666f + u_1 * xi_21 * -0.16666666666666666f + u_2 * xi_4 * 0.33333333333333331f + xi_4 * 0.16666666666666666f;
+        const float forceTerm_6 = omega_shear * u_0 * xi_14 * 0.083333333333333329f + omega_shear * u_1 * xi_21 * 0.083333333333333329f + omega_shear * u_2 * xi_4 * -0.16666666666666666f + rr_0 * xi_4 * 0.083333333333333329f + u_0 * xi_14 * -0.16666666666666666f + u_1 * xi_21 * -0.16666666666666666f + u_2 * xi_4 * 0.33333333333333331f + xi_4 * -0.16666666666666666f;
+        const float forceTerm_7 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_21 * 0.125f + omega_shear * u_1 * xi_14 * 0.125f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_2 * xi_4 * 0.041666666666666664f + rr_0 * xi_14 * 0.041666666666666664f + rr_0 * xi_21 * -0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_21 * -0.25f + u_1 * xi_14 * -0.25f + u_1 * xi_21 * 0.16666666666666666f + u_2 * xi_4 * -0.083333333333333329f + xi_14 * -0.083333333333333329f + xi_21 * 0.083333333333333329f;
+        const float forceTerm_8 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_21 * -0.125f + omega_shear * u_1 * xi_14 * -0.125f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_2 * xi_4 * 0.041666666666666664f + rr_0 * xi_14 * -0.041666666666666664f + rr_0 * xi_21 * -0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_21 * 0.25f + u_1 * xi_14 * 0.25f + u_1 * xi_21 * 0.16666666666666666f + u_2 * xi_4 * -0.083333333333333329f + xi_14 * 0.083333333333333329f + xi_21 * 0.083333333333333329f;
+        const float forceTerm_9 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_21 * -0.125f + omega_shear * u_1 * xi_14 * -0.125f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_2 * xi_4 * 0.041666666666666664f + rr_0 * xi_14 * 0.041666666666666664f + rr_0 * xi_21 * 0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_21 * 0.25f + u_1 * xi_14 * 0.25f + u_1 * xi_21 * 0.16666666666666666f + u_2 * xi_4 * -0.083333333333333329f + xi_14 * -0.083333333333333329f + xi_21 * -0.083333333333333329f;
+        const float forceTerm_10 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_21 * 0.125f + omega_shear * u_1 * xi_14 * 0.125f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_2 * xi_4 * 0.041666666666666664f + rr_0 * xi_14 * -0.041666666666666664f + rr_0 * xi_21 * 0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_21 * -0.25f + u_1 * xi_14 * -0.25f + u_1 * xi_21 * 0.16666666666666666f + u_2 * xi_4 * -0.083333333333333329f + xi_14 * 0.083333333333333329f + xi_21 * -0.083333333333333329f;
+        const float forceTerm_11 = omega_shear * u_0 * xi_14 * 0.041666666666666664f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_1 * xi_4 * -0.125f + omega_shear * u_2 * xi_21 * -0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_21 * -0.041666666666666664f + rr_0 * xi_4 * -0.041666666666666664f + u_0 * xi_14 * -0.083333333333333329f + u_1 * xi_21 * 0.16666666666666666f + u_1 * xi_4 * 0.25f + u_2 * xi_21 * 0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_21 * 0.083333333333333329f + xi_4 * 0.083333333333333329f;
+        const float forceTerm_12 = omega_shear * u_0 * xi_14 * 0.041666666666666664f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_1 * xi_4 * 0.125f + omega_shear * u_2 * xi_21 * 0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_21 * 0.041666666666666664f + rr_0 * xi_4 * -0.041666666666666664f + u_0 * xi_14 * -0.083333333333333329f + u_1 * xi_21 * 0.16666666666666666f + u_1 * xi_4 * -0.25f + u_2 * xi_21 * -0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_21 * -0.083333333333333329f + xi_4 * 0.083333333333333329f;
+        const float forceTerm_13 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_4 * 0.125f + omega_shear * u_1 * xi_21 * 0.041666666666666664f + omega_shear * u_2 * xi_14 * 0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_14 * 0.041666666666666664f + rr_0 * xi_4 * -0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_4 * -0.25f + u_1 * xi_21 * -0.083333333333333329f + u_2 * xi_14 * -0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_14 * -0.083333333333333329f + xi_4 * 0.083333333333333329f;
+        const float forceTerm_14 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_4 * -0.125f + omega_shear * u_1 * xi_21 * 0.041666666666666664f + omega_shear * u_2 * xi_14 * -0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_14 * -0.041666666666666664f + rr_0 * xi_4 * -0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_4 * 0.25f + u_1 * xi_21 * -0.083333333333333329f + u_2 * xi_14 * 0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_14 * 0.083333333333333329f + xi_4 * 0.083333333333333329f;
+        const float forceTerm_15 = omega_shear * u_0 * xi_14 * 0.041666666666666664f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_1 * xi_4 * 0.125f + omega_shear * u_2 * xi_21 * 0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_21 * -0.041666666666666664f + rr_0 * xi_4 * 0.041666666666666664f + u_0 * xi_14 * -0.083333333333333329f + u_1 * xi_21 * 0.16666666666666666f + u_1 * xi_4 * -0.25f + u_2 * xi_21 * -0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_21 * 0.083333333333333329f + xi_4 * -0.083333333333333329f;
+        const float forceTerm_16 = omega_shear * u_0 * xi_14 * 0.041666666666666664f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_1 * xi_4 * -0.125f + omega_shear * u_2 * xi_21 * -0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_21 * 0.041666666666666664f + rr_0 * xi_4 * 0.041666666666666664f + u_0 * xi_14 * -0.083333333333333329f + u_1 * xi_21 * 0.16666666666666666f + u_1 * xi_4 * 0.25f + u_2 * xi_21 * 0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_21 * -0.083333333333333329f + xi_4 * -0.083333333333333329f;
+        const float forceTerm_17 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_4 * -0.125f + omega_shear * u_1 * xi_21 * 0.041666666666666664f + omega_shear * u_2 * xi_14 * -0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_14 * 0.041666666666666664f + rr_0 * xi_4 * 0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_4 * 0.25f + u_1 * xi_21 * -0.083333333333333329f + u_2 * xi_14 * 0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_14 * -0.083333333333333329f + xi_4 * -0.083333333333333329f;
+        const float forceTerm_18 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_4 * 0.125f + omega_shear * u_1 * xi_21 * 0.041666666666666664f + omega_shear * u_2 * xi_14 * 0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_14 * -0.041666666666666664f + rr_0 * xi_4 * 0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_4 * -0.25f + u_1 * xi_21 * -0.083333333333333329f + u_2 * xi_14 * -0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_14 * 0.083333333333333329f + xi_4 * -0.083333333333333329f;
+        const float u0Mu1 = u_0 + u_1 * -1.0f;
+        const float u0Pu1 = u_0 + u_1;
+        const float u1Pu2 = u_1 + u_2;
+        const float u1Mu2 = u_1 + u_2 * -1.0f;
+        const float u0Mu2 = u_0 + u_2 * -1.0f;
+        const float u0Pu2 = u_0 + u_2;
+        const float f_eq_common = rho * -1.0f * (u_0 * u_0) + rho * -1.0f * (u_1 * u_1) + rho * -1.0f * (u_2 * u_2) + rho;
+        _data_pdfs_20_30_10[_stride_pdfs_0 * ctr_0] = forceTerm_0 + omega_shear * (f_eq_common * 0.33333333333333331f + xi_10 * -1.0f) + xi_10;
+        _data_pdfs_20_31_10[_stride_pdfs_0 * ctr_0] = forceTerm_1 + omega_shear * (f_eq_common * 0.16666666666666666f + rho * (-0.1111111111111111f + 0.33333333333333331f * (u_1 * u_1)) + xi_5 * -0.5f + xi_6 * -0.5f) + rr_0 * (rho * u_1 * 0.16666666666666666f + xi_5 * -0.5f + xi_6 * 0.5f) + xi_5 + ((-1.0f <= grid_size * -1.0f + ((float)(ctr_1))) ? (rho * v_s * (u_0 * 2.0f + v_s) * 0.16666666666666666f) : (0.0f));
+        _data_pdfs_20_32_10[_stride_pdfs_0 * ctr_0] = forceTerm_2 + omega_shear * (f_eq_common * 0.16666666666666666f + rho * (-0.1111111111111111f + 0.33333333333333331f * (u_1 * u_1)) + xi_5 * -0.5f + xi_6 * -0.5f) + rr_0 * (rho * u_1 * -0.16666666666666666f + xi_5 * 0.5f + xi_6 * -0.5f) + xi_6 + ((0.0f >= ((float)(ctr_1))) ? (rho * v_s * (u_0 * -2.0f + v_s) * 0.16666666666666666f) : (0.0f));
+        _data_pdfs_20_33_10[_stride_pdfs_0 * ctr_0] = forceTerm_3 + omega_shear * (f_eq_common * 0.16666666666666666f + rho * (-0.1111111111111111f + 0.33333333333333331f * (u_0 * u_0)) + xi_11 * -0.5f + xi_12 * -0.5f) + rr_0 * (rho * u_0 * -0.16666666666666666f + xi_11 * 0.5f + xi_12 * -0.5f) + xi_12;
+        _data_pdfs_20_34_10[_stride_pdfs_0 * ctr_0] = forceTerm_4 + omega_shear * (f_eq_common * 0.16666666666666666f + rho * (-0.1111111111111111f + 0.33333333333333331f * (u_0 * u_0)) + xi_11 * -0.5f + xi_12 * -0.5f) + rr_0 * (rho * u_0 * 0.16666666666666666f + xi_11 * -0.5f + xi_12 * 0.5f) + xi_11;
+        _data_pdfs_20_35_10[_stride_pdfs_0 * ctr_0] = forceTerm_5 + omega_shear * (f_eq_common * 0.16666666666666666f + rho * (-0.1111111111111111f + 0.33333333333333331f * (u_2 * u_2)) + xi_15 * -0.5f + xi_3 * -0.5f) + rr_0 * (rho * u_2 * 0.16666666666666666f + xi_15 * -0.5f + xi_3 * 0.5f) + xi_15;
+        _data_pdfs_20_36_10[_stride_pdfs_0 * ctr_0] = forceTerm_6 + omega_shear * (f_eq_common * 0.16666666666666666f + rho * (-0.1111111111111111f + 0.33333333333333331f * (u_2 * u_2)) + xi_15 * -0.5f + xi_3 * -0.5f) + rr_0 * (rho * u_2 * -0.16666666666666666f + xi_15 * 0.5f + xi_3 * -0.5f) + xi_3;
+        _data_pdfs_20_37_10[_stride_pdfs_0 * ctr_0] = forceTerm_7 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * (u_2 * u_2) + 0.125f * (u0Mu1 * u0Mu1)) + xi_22 * -0.5f + xi_24 * -0.5f) + rr_0 * (rho * u0Mu1 * -0.083333333333333329f + xi_22 * 0.5f + xi_24 * -0.5f) + xi_24 + ((-1.0f <= grid_size * -1.0f + ((float)(ctr_1))) ? (rho * v_s * (u_0 * -2.0f + u_1 * 3.0f + v_s * -1.0f + 1.0f) * 0.083333333333333329f) : (0.0f));
+        _data_pdfs_20_38_10[_stride_pdfs_0 * ctr_0] = forceTerm_8 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * (u_2 * u_2) + 0.125f * (u0Pu1 * u0Pu1)) + xi_17 * -0.5f + xi_23 * -0.5f) + rr_0 * (rho * u0Pu1 * 0.083333333333333329f + xi_17 * -0.5f + xi_23 * 0.5f) + xi_17 + ((-1.0f <= grid_size * -1.0f + ((float)(ctr_1))) ? (rho * v_s * (u_0 * 2.0f + u_1 * 3.0f + v_s + 1.0f) * -0.083333333333333329f) : (0.0f));
+        _data_pdfs_20_39_10[_stride_pdfs_0 * ctr_0] = forceTerm_9 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * (u_2 * u_2) + 0.125f * (u0Pu1 * u0Pu1)) + xi_17 * -0.5f + xi_23 * -0.5f) + rr_0 * (rho * u0Pu1 * -0.083333333333333329f + xi_17 * 0.5f + xi_23 * -0.5f) + xi_23 + ((0.0f >= ((float)(ctr_1))) ? (rho * v_s * (u_0 * 2.0f + u_1 * 3.0f + v_s * -1.0f - 1.0f) * 0.083333333333333329f) : (0.0f));
+        _data_pdfs_20_310_10[_stride_pdfs_0 * ctr_0] = forceTerm_10 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * (u_2 * u_2) + 0.125f * (u0Mu1 * u0Mu1)) + xi_22 * -0.5f + xi_24 * -0.5f) + rr_0 * (rho * u0Mu1 * 0.083333333333333329f + xi_22 * -0.5f + xi_24 * 0.5f) + xi_22 + ((0.0f >= ((float)(ctr_1))) ? (rho * v_s * (u_0 * 2.0f + u_1 * -3.0f + v_s * -1.0f + 1.0f) * 0.083333333333333329f) : (0.0f));
+        _data_pdfs_20_311_10[_stride_pdfs_0 * ctr_0] = forceTerm_11 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * (u_0 * u_0) + 0.125f * (u1Pu2 * u1Pu2)) + xi_19 * -0.5f + xi_7 * -0.5f) + rr_0 * (rho * u1Pu2 * 0.083333333333333329f + xi_19 * 0.5f + xi_7 * -0.5f) + xi_7;
+        _data_pdfs_20_312_10[_stride_pdfs_0 * ctr_0] = forceTerm_12 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * (u_0 * u_0) + 0.125f * (u1Mu2 * u1Mu2)) + xi_18 * -0.5f + xi_20 * -0.5f) + rr_0 * (rho * u1Mu2 * -0.083333333333333329f + xi_18 * -0.5f + xi_20 * 0.5f) + xi_18;
+        _data_pdfs_20_313_10[_stride_pdfs_0 * ctr_0] = forceTerm_13 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * (u_1 * u_1) + 0.125f * (u0Mu2 * u0Mu2)) + xi_8 * -0.5f + xi_9 * -0.5f) + rr_0 * (rho * u0Mu2 * -0.083333333333333329f + xi_8 * 0.5f + xi_9 * -0.5f) + xi_9;
+        _data_pdfs_20_314_10[_stride_pdfs_0 * ctr_0] = forceTerm_14 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * (u_1 * u_1) + 0.125f * (u0Pu2 * u0Pu2)) + xi_13 * -0.5f + xi_16 * -0.5f) + rr_0 * (rho * u0Pu2 * 0.083333333333333329f + xi_13 * 0.5f + xi_16 * -0.5f) + xi_16;
+        _data_pdfs_20_315_10[_stride_pdfs_0 * ctr_0] = forceTerm_15 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * (u_0 * u_0) + 0.125f * (u1Mu2 * u1Mu2)) + xi_18 * -0.5f + xi_20 * -0.5f) + rr_0 * (rho * u1Mu2 * 0.083333333333333329f + xi_18 * 0.5f + xi_20 * -0.5f) + xi_20;
+        _data_pdfs_20_316_10[_stride_pdfs_0 * ctr_0] = forceTerm_16 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * (u_0 * u_0) + 0.125f * (u1Pu2 * u1Pu2)) + xi_19 * -0.5f + xi_7 * -0.5f) + rr_0 * (rho * u1Pu2 * -0.083333333333333329f + xi_19 * -0.5f + xi_7 * 0.5f) + xi_19;
+        _data_pdfs_20_317_10[_stride_pdfs_0 * ctr_0] = forceTerm_17 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * (u_1 * u_1) + 0.125f * (u0Pu2 * u0Pu2)) + xi_13 * -0.5f + xi_16 * -0.5f) + rr_0 * (rho * u0Pu2 * -0.083333333333333329f + xi_13 * -0.5f + xi_16 * 0.5f) + xi_13;
+        _data_pdfs_20_318_10[_stride_pdfs_0 * ctr_0] = forceTerm_18 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * (u_1 * u_1) + 0.125f * (u0Mu2 * u0Mu2)) + xi_8 * -0.5f + xi_9 * -0.5f) + rr_0 * (rho * u0Mu2 * 0.083333333333333329f + xi_8 * -0.5f + xi_9 * 0.5f) + xi_8;
+      }
+    }
+  }
+}
+} // namespace internal_ab1f3bc3368574afb482da84ccb58898
+
+void CollideSweepSinglePrecisionLeesEdwards::run(IBlock *block) {
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+  auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
+
+  auto &omega_shear = this->omega_shear_;
+  auto &v_s = this->v_s_;
+  auto &grid_size = this->grid_size_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(force->nrOfGhostLayers()));
+  float *RESTRICT const _data_force = force->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_ab1f3bc3368574afb482da84ccb58898::collidesweepsingleprecisionleesedwards_collidesweepsingleprecisionleesedwards(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, grid_size, omega_shear, v_s);
+}
+
+void CollideSweepSinglePrecisionLeesEdwards::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+  auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
+
+  auto &omega_shear = this->omega_shear_;
+  auto &v_s = this->v_s_;
+  auto &grid_size = this->grid_size_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(force->nrOfGhostLayers()));
+  float *RESTRICT const _data_force = force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_ab1f3bc3368574afb482da84ccb58898::collidesweepsingleprecisionleesedwards_collidesweepsingleprecisionleesedwards(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, grid_size, omega_shear, v_s);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwards.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwards.h
new file mode 100644
index 00000000000..d65e0a1b32f
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwards.h
@@ -0,0 +1,108 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepSinglePrecisionLeesEdwards.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class CollideSweepSinglePrecisionLeesEdwards {
+public:
+  CollideSweepSinglePrecisionLeesEdwards(BlockDataID forceID_,
+                                         BlockDataID pdfsID_, float grid_size,
+                                         float omega_shear, float v_s)
+      : forceID(forceID_), pdfsID(pdfsID_), grid_size_(grid_size),
+        omega_shear_(omega_shear), v_s_(v_s){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<CollideSweepSinglePrecisionLeesEdwards> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<CollideSweepSinglePrecisionLeesEdwards> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID forceID;
+  BlockDataID pdfsID;
+  float grid_size_;
+  float omega_shear_;
+  float v_s_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwardsAVX.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwardsAVX.cpp
new file mode 100644
index 00000000000..7885aed8d9c
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwardsAVX.cpp
@@ -0,0 +1,399 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepSinglePrecisionLeesEdwardsAVX.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "CollideSweepSinglePrecisionLeesEdwardsAVX.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#include <immintrin.h>
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_9a18f2f4073cdcc5365cdfddb752069e {
+static FUNC_PREFIX void collidesweepsingleprecisionleesedwardsavx_collidesweepsingleprecisionleesedwardsavx(float *RESTRICT const _data_force, float *RESTRICT _data_pdfs, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, float grid_size, float omega_shear, float v_s) {
+  const float xi_0 = ((1.0f) / (omega_shear * -0.25f + 2.0f));
+  const float rr_0 = xi_0 * (omega_shear * -2.0f + 4.0f);
+  for (int64_t ctr_2 = 0; ctr_2 < _size_force_2; ctr_2 += 1) {
+    float *RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3;
+    float *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
+    float *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3;
+    float *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
+    float *RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3;
+    float *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
+    float *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
+    float *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_force_1; ctr_1 += 1) {
+      float *RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_36;
+      float *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
+      float *RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_31;
+      float *RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_32;
+      float *RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_311;
+      float *RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_318;
+      float *RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_313;
+      float *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
+      float *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
+      float *RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_317;
+      float *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
+      float *RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_35;
+      float *RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_314;
+      float *RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_38;
+      float *RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_312;
+      float *RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_316;
+      float *RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_39;
+      float *RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_315;
+      float *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
+      float *RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_310;
+      float *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
+      float *RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_37;
+      {
+        for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((_size_force_0) / (8)) * (8); ctr_0 += 8) {
+          const __m256 xi_25 = _mm256_load_ps(&_data_pdfs_20_36_10[ctr_0]);
+          const __m256 xi_26 = _mm256_load_ps(&_data_force_20_32_10[ctr_0]);
+          const __m256 xi_27 = _mm256_load_ps(&_data_pdfs_20_31_10[ctr_0]);
+          const __m256 xi_28 = _mm256_load_ps(&_data_pdfs_20_32_10[ctr_0]);
+          const __m256 xi_29 = _mm256_load_ps(&_data_pdfs_20_311_10[ctr_0]);
+          const __m256 xi_30 = _mm256_load_ps(&_data_pdfs_20_318_10[ctr_0]);
+          const __m256 xi_31 = _mm256_load_ps(&_data_pdfs_20_313_10[ctr_0]);
+          const __m256 xi_32 = _mm256_load_ps(&_data_pdfs_20_34_10[ctr_0]);
+          const __m256 xi_33 = _mm256_load_ps(&_data_pdfs_20_33_10[ctr_0]);
+          const __m256 xi_34 = _mm256_load_ps(&_data_pdfs_20_317_10[ctr_0]);
+          const __m256 xi_35 = _mm256_load_ps(&_data_force_20_30_10[ctr_0]);
+          const __m256 xi_36 = _mm256_load_ps(&_data_pdfs_20_35_10[ctr_0]);
+          const __m256 xi_37 = _mm256_load_ps(&_data_pdfs_20_314_10[ctr_0]);
+          const __m256 xi_38 = _mm256_load_ps(&_data_pdfs_20_38_10[ctr_0]);
+          const __m256 xi_39 = _mm256_load_ps(&_data_pdfs_20_312_10[ctr_0]);
+          const __m256 xi_40 = _mm256_load_ps(&_data_pdfs_20_316_10[ctr_0]);
+          const __m256 xi_41 = _mm256_load_ps(&_data_pdfs_20_39_10[ctr_0]);
+          const __m256 xi_42 = _mm256_load_ps(&_data_pdfs_20_315_10[ctr_0]);
+          const __m256 xi_43 = _mm256_load_ps(&_data_force_20_31_10[ctr_0]);
+          const __m256 xi_44 = _mm256_load_ps(&_data_pdfs_20_310_10[ctr_0]);
+          const __m256 xi_45 = _mm256_load_ps(&_data_pdfs_20_30_10[ctr_0]);
+          const __m256 xi_46 = _mm256_load_ps(&_data_pdfs_20_37_10[ctr_0]);
+          const __m256 xi_3 = xi_25;
+          const __m256 xi_4 = xi_26;
+          const __m256 xi_5 = xi_27;
+          const __m256 xi_6 = xi_28;
+          const __m256 xi_7 = xi_29;
+          const __m256 xi_8 = xi_30;
+          const __m256 xi_9 = xi_31;
+          const __m256 xi_10 = xi_45;
+          const __m256 xi_11 = xi_32;
+          const __m256 xi_12 = xi_33;
+          const __m256 xi_13 = xi_34;
+          const __m256 xi_14 = xi_35;
+          const __m256 xi_15 = xi_36;
+          const __m256 xi_16 = xi_37;
+          const __m256 xi_17 = xi_38;
+          const __m256 xi_18 = xi_39;
+          const __m256 xi_19 = xi_40;
+          const __m256 xi_20 = xi_42;
+          const __m256 xi_21 = xi_43;
+          const __m256 xi_22 = xi_44;
+          const __m256 xi_23 = xi_41;
+          const __m256 xi_24 = xi_46;
+          const __m256 vel0Term = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_11, xi_16), xi_17), xi_22), xi_8);
+          const __m256 vel1Term = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_20, xi_24), xi_5), xi_7);
+          const __m256 vel2Term = _mm256_add_ps(_mm256_add_ps(xi_15, xi_18), xi_9);
+          const __m256 rho = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(vel0Term, vel1Term), vel2Term), xi_10), xi_12), xi_13), xi_19), xi_23), xi_3), xi_6);
+          const __m256 xi_1 = _mm256_div_ps(_mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), rho);
+          const __m256 u_0 = _mm256_add_ps(_mm256_mul_ps(xi_1, _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_12, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_13, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(xi_23, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(xi_24, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(xi_9, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), vel0Term)), _mm256_mul_ps(_mm256_mul_ps(xi_1, xi_14), _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)));
+          const __m256 u_1 = _mm256_add_ps(_mm256_mul_ps(xi_1, _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_18, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_19, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(xi_22, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(xi_23, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(xi_6, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), vel1Term), xi_17)), _mm256_mul_ps(_mm256_mul_ps(xi_1, xi_21), _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)));
+          const __m256 u_2 = _mm256_add_ps(_mm256_mul_ps(xi_1, _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_13, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_19, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(xi_20, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(xi_3, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(xi_8, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), vel2Term), xi_16), xi_7)), _mm256_mul_ps(_mm256_mul_ps(xi_1, xi_4), _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)));
+          const __m256 forceTerm_0 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_1 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f)), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f))), _mm256_mul_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_2 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f)), _mm256_mul_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_3 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f)), _mm256_mul_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_4 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f)), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f))), _mm256_mul_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_5 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_4, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f)), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f))), _mm256_mul_ps(_mm256_mul_ps(xi_4, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_6 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_4, _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f)), _mm256_mul_ps(_mm256_mul_ps(xi_4, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_7 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_mul_ps(xi_14, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_21), _mm256_set_ps(-0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_14), _mm256_set_ps(-0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f))), _mm256_mul_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(-0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_21), _mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_14), _mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_8 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_mul_ps(xi_21, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_21), _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_14), _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(-0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(-0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_21), _mm256_set_ps(-0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_14), _mm256_set_ps(-0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_9 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_mul_ps(xi_21, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_21), _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_14), _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f))), _mm256_mul_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_21), _mm256_set_ps(-0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_14), _mm256_set_ps(-0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_10 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_mul_ps(xi_21, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_21), _mm256_set_ps(-0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_14), _mm256_set_ps(-0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f))), _mm256_mul_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(-0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_21), _mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_14), _mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_11 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_mul_ps(xi_4, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_4), _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_21), _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(-0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(xi_4, _mm256_set_ps(-0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_4), _mm256_set_ps(-0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_21), _mm256_set_ps(-0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_12 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_4, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_mul_ps(xi_21, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_4), _mm256_set_ps(-0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_21), _mm256_set_ps(-0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f))), _mm256_mul_ps(_mm256_mul_ps(xi_4, _mm256_set_ps(-0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_4), _mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_21), _mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_13 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_4, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_mul_ps(xi_14, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_4), _mm256_set_ps(-0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_14), _mm256_set_ps(-0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f))), _mm256_mul_ps(_mm256_mul_ps(xi_4, _mm256_set_ps(-0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_4), _mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_14), _mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_14 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_mul_ps(xi_4, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_4), _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_14), _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(-0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(xi_4, _mm256_set_ps(-0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_4), _mm256_set_ps(-0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_14), _mm256_set_ps(-0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_15 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_mul_ps(xi_4, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(xi_4, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_4), _mm256_set_ps(-0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_21), _mm256_set_ps(-0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f))), _mm256_mul_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(-0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_4), _mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_21), _mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_16 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_mul_ps(xi_4, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_4), _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_21), _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f))), _mm256_mul_ps(_mm256_mul_ps(xi_21, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(xi_4, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_4), _mm256_set_ps(-0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_21), _mm256_set_ps(-0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_17 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_mul_ps(xi_4, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_4), _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_14), _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f))), _mm256_mul_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(xi_4, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_4), _mm256_set_ps(-0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_14), _mm256_set_ps(-0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f, -0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 forceTerm_18 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_mul_ps(xi_4, _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(xi_4, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_mul_ps(_mm256_mul_ps(u_0, xi_4), _mm256_set_ps(-0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f))), _mm256_mul_ps(_mm256_mul_ps(u_2, xi_14), _mm256_set_ps(-0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f, -0.25f))), _mm256_mul_ps(_mm256_mul_ps(xi_14, _mm256_set_ps(-0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f)), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0))), _mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_4), _mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_14), _mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_1, xi_21), _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_0, xi_14), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(u_2, xi_4), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)));
+          const __m256 u0Mu1 = _mm256_add_ps(_mm256_mul_ps(u_1, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), u_0);
+          const __m256 u0Pu1 = _mm256_add_ps(u_0, u_1);
+          const __m256 u1Pu2 = _mm256_add_ps(u_1, u_2);
+          const __m256 u1Mu2 = _mm256_add_ps(_mm256_mul_ps(u_2, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), u_1);
+          const __m256 u0Mu2 = _mm256_add_ps(_mm256_mul_ps(u_2, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), u_0);
+          const __m256 u0Pu2 = _mm256_add_ps(u_0, u_2);
+          const __m256 f_eq_common = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_mul_ps(rho, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(u_0, u_0)), _mm256_mul_ps(_mm256_mul_ps(rho, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(u_1, u_1))), _mm256_mul_ps(_mm256_mul_ps(rho, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(u_2, u_2))), rho);
+          _mm256_store_ps(&_data_pdfs_20_30_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_mul_ps(xi_10, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(f_eq_common, _mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f))), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)), forceTerm_0), xi_10));
+          _mm256_store_ps(&_data_pdfs_20_31_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_6, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_5, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u_1), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f)), _mm256_mul_ps(xi_5, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_6, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f), _mm256_mul_ps(u_1, u_1)), _mm256_set_ps(-0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f)))))), _mm256_blendv_ps(_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(rho, _mm256_add_ps(_mm256_mul_ps(u_0, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f)), _mm256_set_ps(v_s, v_s, v_s, v_s, v_s, v_s, v_s, v_s))), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f)), _mm256_set_ps(v_s, v_s, v_s, v_s, v_s, v_s, v_s, v_s)), _mm256_cmp_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_set_ps(grid_size, grid_size, grid_size, grid_size, grid_size, grid_size, grid_size, grid_size)), _mm256_set_ps(((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)))), _CMP_LE_OQ))), forceTerm_1), xi_5));
+          _mm256_store_ps(&_data_pdfs_20_32_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_5, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_6, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u_1), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f)), _mm256_mul_ps(xi_5, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_6, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f), _mm256_mul_ps(u_1, u_1)), _mm256_set_ps(-0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f)))))), _mm256_blendv_ps(_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(rho, _mm256_add_ps(_mm256_mul_ps(u_0, _mm256_set_ps(-2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f)), _mm256_set_ps(v_s, v_s, v_s, v_s, v_s, v_s, v_s, v_s))), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f)), _mm256_set_ps(v_s, v_s, v_s, v_s, v_s, v_s, v_s, v_s)), _mm256_cmp_ps(_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), _mm256_set_ps(((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1))), _CMP_GE_OQ))), forceTerm_2), xi_6));
+          _mm256_store_ps(&_data_pdfs_20_33_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_11, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_12, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u_0), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f)), _mm256_mul_ps(xi_11, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_12, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f), _mm256_mul_ps(u_0, u_0)), _mm256_set_ps(-0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f)))))), forceTerm_3), xi_12));
+          _mm256_store_ps(&_data_pdfs_20_34_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_12, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_11, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u_0), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f)), _mm256_mul_ps(xi_11, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_12, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f), _mm256_mul_ps(u_0, u_0)), _mm256_set_ps(-0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f)))))), forceTerm_4), xi_11));
+          _mm256_store_ps(&_data_pdfs_20_35_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_3, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_15, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u_2), _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f)), _mm256_mul_ps(xi_15, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_3, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f), _mm256_mul_ps(u_2, u_2)), _mm256_set_ps(-0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f)))))), forceTerm_5), xi_15));
+          _mm256_store_ps(&_data_pdfs_20_36_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_15, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_3, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u_2), _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f)), _mm256_mul_ps(xi_15, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_3, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f), _mm256_mul_ps(u_2, u_2)), _mm256_set_ps(-0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f, -0.1111111111111111f)))))), forceTerm_6), xi_3));
+          _mm256_store_ps(&_data_pdfs_20_37_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_22, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_24, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u0Mu1), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_mul_ps(xi_22, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_24, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f), _mm256_mul_ps(u0Mu1, u0Mu1)), _mm256_mul_ps(_mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f), _mm256_mul_ps(u_2, u_2))), _mm256_set_ps(-0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f)))))), _mm256_blendv_ps(_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_set_ps(v_s, v_s, v_s, v_s, v_s, v_s, v_s, v_s)), _mm256_mul_ps(u_1, _mm256_set_ps(3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f))), _mm256_mul_ps(u_0, _mm256_set_ps(-2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f))), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f))), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(v_s, v_s, v_s, v_s, v_s, v_s, v_s, v_s)), _mm256_cmp_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_set_ps(grid_size, grid_size, grid_size, grid_size, grid_size, grid_size, grid_size, grid_size)), _mm256_set_ps(((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)))), _CMP_LE_OQ))), forceTerm_7), xi_24));
+          _mm256_store_ps(&_data_pdfs_20_38_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_23, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_17, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u0Pu1), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_mul_ps(xi_17, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_23, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f), _mm256_mul_ps(u0Pu1, u0Pu1)), _mm256_mul_ps(_mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f), _mm256_mul_ps(u_2, u_2))), _mm256_set_ps(-0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f)))))), _mm256_blendv_ps(_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(u_0, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f)), _mm256_mul_ps(u_1, _mm256_set_ps(3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f))), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f)), _mm256_set_ps(v_s, v_s, v_s, v_s, v_s, v_s, v_s, v_s))), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f)), _mm256_set_ps(v_s, v_s, v_s, v_s, v_s, v_s, v_s, v_s)), _mm256_cmp_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_set_ps(grid_size, grid_size, grid_size, grid_size, grid_size, grid_size, grid_size, grid_size)), _mm256_set_ps(((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)))), _CMP_LE_OQ))), forceTerm_8), xi_17));
+          _mm256_store_ps(&_data_pdfs_20_39_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_17, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_23, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u0Pu1), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_mul_ps(xi_17, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_23, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f), _mm256_mul_ps(u0Pu1, u0Pu1)), _mm256_mul_ps(_mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f), _mm256_mul_ps(u_2, u_2))), _mm256_set_ps(-0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f)))))), _mm256_blendv_ps(_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_set_ps(v_s, v_s, v_s, v_s, v_s, v_s, v_s, v_s)), _mm256_mul_ps(u_0, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f))), _mm256_mul_ps(u_1, _mm256_set_ps(3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f))), _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(v_s, v_s, v_s, v_s, v_s, v_s, v_s, v_s)), _mm256_cmp_ps(_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), _mm256_set_ps(((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1))), _CMP_GE_OQ))), forceTerm_9), xi_23));
+          _mm256_store_ps(&_data_pdfs_20_310_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_24, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_22, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u0Mu1), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_mul_ps(xi_22, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_24, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f), _mm256_mul_ps(u0Mu1, u0Mu1)), _mm256_mul_ps(_mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f), _mm256_mul_ps(u_2, u_2))), _mm256_set_ps(-0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f)))))), _mm256_blendv_ps(_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), _mm256_mul_ps(_mm256_mul_ps(_mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_set_ps(v_s, v_s, v_s, v_s, v_s, v_s, v_s, v_s)), _mm256_mul_ps(u_0, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f))), _mm256_mul_ps(u_1, _mm256_set_ps(-3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f))), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f))), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_set_ps(v_s, v_s, v_s, v_s, v_s, v_s, v_s, v_s)), _mm256_cmp_ps(_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), _mm256_set_ps(((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1)), ((float)(ctr_1))), _CMP_GE_OQ))), forceTerm_10), xi_22));
+          _mm256_store_ps(&_data_pdfs_20_311_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_19, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_7, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u1Pu2), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_mul_ps(xi_19, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_7, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f), _mm256_mul_ps(u1Pu2, u1Pu2)), _mm256_mul_ps(_mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f), _mm256_mul_ps(u_0, u_0))), _mm256_set_ps(-0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f)))))), forceTerm_11), xi_7));
+          _mm256_store_ps(&_data_pdfs_20_312_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_20, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_18, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u1Mu2), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_mul_ps(xi_18, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_20, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f), _mm256_mul_ps(u1Mu2, u1Mu2)), _mm256_mul_ps(_mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f), _mm256_mul_ps(u_0, u_0))), _mm256_set_ps(-0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f)))))), forceTerm_12), xi_18));
+          _mm256_store_ps(&_data_pdfs_20_313_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_8, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_9, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u0Mu2), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_mul_ps(xi_8, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_9, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f), _mm256_mul_ps(u0Mu2, u0Mu2)), _mm256_mul_ps(_mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f), _mm256_mul_ps(u_1, u_1))), _mm256_set_ps(-0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f)))))), forceTerm_13), xi_9));
+          _mm256_store_ps(&_data_pdfs_20_314_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_13, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_16, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u0Pu2), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_mul_ps(xi_13, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_16, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f), _mm256_mul_ps(u0Pu2, u0Pu2)), _mm256_mul_ps(_mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f), _mm256_mul_ps(u_1, u_1))), _mm256_set_ps(-0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f)))))), forceTerm_14), xi_16));
+          _mm256_store_ps(&_data_pdfs_20_315_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_18, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_20, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u1Mu2), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_mul_ps(xi_18, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_20, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f), _mm256_mul_ps(u1Mu2, u1Mu2)), _mm256_mul_ps(_mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f), _mm256_mul_ps(u_0, u_0))), _mm256_set_ps(-0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f)))))), forceTerm_15), xi_20));
+          _mm256_store_ps(&_data_pdfs_20_316_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_7, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_19, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u1Pu2), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_mul_ps(xi_19, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_7, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f), _mm256_mul_ps(u1Pu2, u1Pu2)), _mm256_mul_ps(_mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f), _mm256_mul_ps(u_0, u_0))), _mm256_set_ps(-0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f)))))), forceTerm_16), xi_19));
+          _mm256_store_ps(&_data_pdfs_20_317_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_16, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_13, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u0Pu2), _mm256_set_ps(-0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f, -0.083333333333333329f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_mul_ps(xi_13, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_16, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f), _mm256_mul_ps(u0Pu2, u0Pu2)), _mm256_mul_ps(_mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f), _mm256_mul_ps(u_1, u_1))), _mm256_set_ps(-0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f)))))), forceTerm_17), xi_13));
+          _mm256_store_ps(&_data_pdfs_20_318_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_9, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f)), _mm256_mul_ps(xi_8, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(_mm256_mul_ps(rho, u0Mu2), _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f))), _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0)), _mm256_mul_ps(_mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear), _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(f_eq_common, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_mul_ps(xi_8, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(xi_9, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), _mm256_mul_ps(rho, _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f), _mm256_mul_ps(u0Mu2, u0Mu2)), _mm256_mul_ps(_mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f), _mm256_mul_ps(u_1, u_1))), _mm256_set_ps(-0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f, -0.013888888888888888f)))))), forceTerm_18), xi_8));
+        }
+        for (int64_t ctr_0 = (int64_t)((_size_force_0) / (8)) * (8); ctr_0 < _size_force_0; ctr_0 += 1) {
+          const float xi_25 = _data_pdfs_20_36_10[ctr_0];
+          const float xi_26 = _data_force_20_32_10[ctr_0];
+          const float xi_27 = _data_pdfs_20_31_10[ctr_0];
+          const float xi_28 = _data_pdfs_20_32_10[ctr_0];
+          const float xi_29 = _data_pdfs_20_311_10[ctr_0];
+          const float xi_30 = _data_pdfs_20_318_10[ctr_0];
+          const float xi_31 = _data_pdfs_20_313_10[ctr_0];
+          const float xi_32 = _data_pdfs_20_34_10[ctr_0];
+          const float xi_33 = _data_pdfs_20_33_10[ctr_0];
+          const float xi_34 = _data_pdfs_20_317_10[ctr_0];
+          const float xi_35 = _data_force_20_30_10[ctr_0];
+          const float xi_36 = _data_pdfs_20_35_10[ctr_0];
+          const float xi_37 = _data_pdfs_20_314_10[ctr_0];
+          const float xi_38 = _data_pdfs_20_38_10[ctr_0];
+          const float xi_39 = _data_pdfs_20_312_10[ctr_0];
+          const float xi_40 = _data_pdfs_20_316_10[ctr_0];
+          const float xi_41 = _data_pdfs_20_39_10[ctr_0];
+          const float xi_42 = _data_pdfs_20_315_10[ctr_0];
+          const float xi_43 = _data_force_20_31_10[ctr_0];
+          const float xi_44 = _data_pdfs_20_310_10[ctr_0];
+          const float xi_45 = _data_pdfs_20_30_10[ctr_0];
+          const float xi_46 = _data_pdfs_20_37_10[ctr_0];
+          const float xi_3 = xi_25;
+          const float xi_4 = xi_26;
+          const float xi_5 = xi_27;
+          const float xi_6 = xi_28;
+          const float xi_7 = xi_29;
+          const float xi_8 = xi_30;
+          const float xi_9 = xi_31;
+          const float xi_10 = xi_45;
+          const float xi_11 = xi_32;
+          const float xi_12 = xi_33;
+          const float xi_13 = xi_34;
+          const float xi_14 = xi_35;
+          const float xi_15 = xi_36;
+          const float xi_16 = xi_37;
+          const float xi_17 = xi_38;
+          const float xi_18 = xi_39;
+          const float xi_19 = xi_40;
+          const float xi_20 = xi_42;
+          const float xi_21 = xi_43;
+          const float xi_22 = xi_44;
+          const float xi_23 = xi_41;
+          const float xi_24 = xi_46;
+          const float vel0Term = xi_11 + xi_16 + xi_17 + xi_22 + xi_8;
+          const float vel1Term = xi_20 + xi_24 + xi_5 + xi_7;
+          const float vel2Term = xi_15 + xi_18 + xi_9;
+          const float rho = vel0Term + vel1Term + vel2Term + xi_10 + xi_12 + xi_13 + xi_19 + xi_23 + xi_3 + xi_6;
+          const float xi_1 = ((1.0f) / (rho));
+          const float u_0 = xi_1 * xi_14 * 0.5f + xi_1 * (vel0Term + xi_12 * -1.0f + xi_13 * -1.0f + xi_23 * -1.0f + xi_24 * -1.0f + xi_9 * -1.0f);
+          const float u_1 = xi_1 * xi_21 * 0.5f + xi_1 * (vel1Term + xi_17 + xi_18 * -1.0f + xi_19 * -1.0f + xi_22 * -1.0f + xi_23 * -1.0f + xi_6 * -1.0f);
+          const float u_2 = xi_1 * xi_4 * 0.5f + xi_1 * (vel2Term + xi_13 * -1.0f + xi_16 + xi_19 * -1.0f + xi_20 * -1.0f + xi_3 * -1.0f + xi_7 + xi_8 * -1.0f);
+          const float forceTerm_0 = omega_shear * u_0 * xi_14 * 0.5f + omega_shear * u_1 * xi_21 * 0.5f + omega_shear * u_2 * xi_4 * 0.5f + u_0 * xi_14 * -1.0f + u_1 * xi_21 * -1.0f + u_2 * xi_4 * -1.0f;
+          const float forceTerm_1 = omega_shear * u_0 * xi_14 * 0.083333333333333329f + omega_shear * u_1 * xi_21 * -0.16666666666666666f + omega_shear * u_2 * xi_4 * 0.083333333333333329f + rr_0 * xi_21 * -0.083333333333333329f + u_0 * xi_14 * -0.16666666666666666f + u_1 * xi_21 * 0.33333333333333331f + u_2 * xi_4 * -0.16666666666666666f + xi_21 * 0.16666666666666666f;
+          const float forceTerm_2 = omega_shear * u_0 * xi_14 * 0.083333333333333329f + omega_shear * u_1 * xi_21 * -0.16666666666666666f + omega_shear * u_2 * xi_4 * 0.083333333333333329f + rr_0 * xi_21 * 0.083333333333333329f + u_0 * xi_14 * -0.16666666666666666f + u_1 * xi_21 * 0.33333333333333331f + u_2 * xi_4 * -0.16666666666666666f + xi_21 * -0.16666666666666666f;
+          const float forceTerm_3 = omega_shear * u_0 * xi_14 * -0.16666666666666666f + omega_shear * u_1 * xi_21 * 0.083333333333333329f + omega_shear * u_2 * xi_4 * 0.083333333333333329f + rr_0 * xi_14 * 0.083333333333333329f + u_0 * xi_14 * 0.33333333333333331f + u_1 * xi_21 * -0.16666666666666666f + u_2 * xi_4 * -0.16666666666666666f + xi_14 * -0.16666666666666666f;
+          const float forceTerm_4 = omega_shear * u_0 * xi_14 * -0.16666666666666666f + omega_shear * u_1 * xi_21 * 0.083333333333333329f + omega_shear * u_2 * xi_4 * 0.083333333333333329f + rr_0 * xi_14 * -0.083333333333333329f + u_0 * xi_14 * 0.33333333333333331f + u_1 * xi_21 * -0.16666666666666666f + u_2 * xi_4 * -0.16666666666666666f + xi_14 * 0.16666666666666666f;
+          const float forceTerm_5 = omega_shear * u_0 * xi_14 * 0.083333333333333329f + omega_shear * u_1 * xi_21 * 0.083333333333333329f + omega_shear * u_2 * xi_4 * -0.16666666666666666f + rr_0 * xi_4 * -0.083333333333333329f + u_0 * xi_14 * -0.16666666666666666f + u_1 * xi_21 * -0.16666666666666666f + u_2 * xi_4 * 0.33333333333333331f + xi_4 * 0.16666666666666666f;
+          const float forceTerm_6 = omega_shear * u_0 * xi_14 * 0.083333333333333329f + omega_shear * u_1 * xi_21 * 0.083333333333333329f + omega_shear * u_2 * xi_4 * -0.16666666666666666f + rr_0 * xi_4 * 0.083333333333333329f + u_0 * xi_14 * -0.16666666666666666f + u_1 * xi_21 * -0.16666666666666666f + u_2 * xi_4 * 0.33333333333333331f + xi_4 * -0.16666666666666666f;
+          const float forceTerm_7 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_21 * 0.125f + omega_shear * u_1 * xi_14 * 0.125f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_2 * xi_4 * 0.041666666666666664f + rr_0 * xi_14 * 0.041666666666666664f + rr_0 * xi_21 * -0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_21 * -0.25f + u_1 * xi_14 * -0.25f + u_1 * xi_21 * 0.16666666666666666f + u_2 * xi_4 * -0.083333333333333329f + xi_14 * -0.083333333333333329f + xi_21 * 0.083333333333333329f;
+          const float forceTerm_8 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_21 * -0.125f + omega_shear * u_1 * xi_14 * -0.125f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_2 * xi_4 * 0.041666666666666664f + rr_0 * xi_14 * -0.041666666666666664f + rr_0 * xi_21 * -0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_21 * 0.25f + u_1 * xi_14 * 0.25f + u_1 * xi_21 * 0.16666666666666666f + u_2 * xi_4 * -0.083333333333333329f + xi_14 * 0.083333333333333329f + xi_21 * 0.083333333333333329f;
+          const float forceTerm_9 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_21 * -0.125f + omega_shear * u_1 * xi_14 * -0.125f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_2 * xi_4 * 0.041666666666666664f + rr_0 * xi_14 * 0.041666666666666664f + rr_0 * xi_21 * 0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_21 * 0.25f + u_1 * xi_14 * 0.25f + u_1 * xi_21 * 0.16666666666666666f + u_2 * xi_4 * -0.083333333333333329f + xi_14 * -0.083333333333333329f + xi_21 * -0.083333333333333329f;
+          const float forceTerm_10 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_21 * 0.125f + omega_shear * u_1 * xi_14 * 0.125f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_2 * xi_4 * 0.041666666666666664f + rr_0 * xi_14 * -0.041666666666666664f + rr_0 * xi_21 * 0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_21 * -0.25f + u_1 * xi_14 * -0.25f + u_1 * xi_21 * 0.16666666666666666f + u_2 * xi_4 * -0.083333333333333329f + xi_14 * 0.083333333333333329f + xi_21 * -0.083333333333333329f;
+          const float forceTerm_11 = omega_shear * u_0 * xi_14 * 0.041666666666666664f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_1 * xi_4 * -0.125f + omega_shear * u_2 * xi_21 * -0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_21 * -0.041666666666666664f + rr_0 * xi_4 * -0.041666666666666664f + u_0 * xi_14 * -0.083333333333333329f + u_1 * xi_21 * 0.16666666666666666f + u_1 * xi_4 * 0.25f + u_2 * xi_21 * 0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_21 * 0.083333333333333329f + xi_4 * 0.083333333333333329f;
+          const float forceTerm_12 = omega_shear * u_0 * xi_14 * 0.041666666666666664f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_1 * xi_4 * 0.125f + omega_shear * u_2 * xi_21 * 0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_21 * 0.041666666666666664f + rr_0 * xi_4 * -0.041666666666666664f + u_0 * xi_14 * -0.083333333333333329f + u_1 * xi_21 * 0.16666666666666666f + u_1 * xi_4 * -0.25f + u_2 * xi_21 * -0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_21 * -0.083333333333333329f + xi_4 * 0.083333333333333329f;
+          const float forceTerm_13 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_4 * 0.125f + omega_shear * u_1 * xi_21 * 0.041666666666666664f + omega_shear * u_2 * xi_14 * 0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_14 * 0.041666666666666664f + rr_0 * xi_4 * -0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_4 * -0.25f + u_1 * xi_21 * -0.083333333333333329f + u_2 * xi_14 * -0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_14 * -0.083333333333333329f + xi_4 * 0.083333333333333329f;
+          const float forceTerm_14 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_4 * -0.125f + omega_shear * u_1 * xi_21 * 0.041666666666666664f + omega_shear * u_2 * xi_14 * -0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_14 * -0.041666666666666664f + rr_0 * xi_4 * -0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_4 * 0.25f + u_1 * xi_21 * -0.083333333333333329f + u_2 * xi_14 * 0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_14 * 0.083333333333333329f + xi_4 * 0.083333333333333329f;
+          const float forceTerm_15 = omega_shear * u_0 * xi_14 * 0.041666666666666664f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_1 * xi_4 * 0.125f + omega_shear * u_2 * xi_21 * 0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_21 * -0.041666666666666664f + rr_0 * xi_4 * 0.041666666666666664f + u_0 * xi_14 * -0.083333333333333329f + u_1 * xi_21 * 0.16666666666666666f + u_1 * xi_4 * -0.25f + u_2 * xi_21 * -0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_21 * 0.083333333333333329f + xi_4 * -0.083333333333333329f;
+          const float forceTerm_16 = omega_shear * u_0 * xi_14 * 0.041666666666666664f + omega_shear * u_1 * xi_21 * -0.083333333333333329f + omega_shear * u_1 * xi_4 * -0.125f + omega_shear * u_2 * xi_21 * -0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_21 * 0.041666666666666664f + rr_0 * xi_4 * 0.041666666666666664f + u_0 * xi_14 * -0.083333333333333329f + u_1 * xi_21 * 0.16666666666666666f + u_1 * xi_4 * 0.25f + u_2 * xi_21 * 0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_21 * -0.083333333333333329f + xi_4 * -0.083333333333333329f;
+          const float forceTerm_17 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_4 * -0.125f + omega_shear * u_1 * xi_21 * 0.041666666666666664f + omega_shear * u_2 * xi_14 * -0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_14 * 0.041666666666666664f + rr_0 * xi_4 * 0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_4 * 0.25f + u_1 * xi_21 * -0.083333333333333329f + u_2 * xi_14 * 0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_14 * -0.083333333333333329f + xi_4 * -0.083333333333333329f;
+          const float forceTerm_18 = omega_shear * u_0 * xi_14 * -0.083333333333333329f + omega_shear * u_0 * xi_4 * 0.125f + omega_shear * u_1 * xi_21 * 0.041666666666666664f + omega_shear * u_2 * xi_14 * 0.125f + omega_shear * u_2 * xi_4 * -0.083333333333333329f + rr_0 * xi_14 * -0.041666666666666664f + rr_0 * xi_4 * 0.041666666666666664f + u_0 * xi_14 * 0.16666666666666666f + u_0 * xi_4 * -0.25f + u_1 * xi_21 * -0.083333333333333329f + u_2 * xi_14 * -0.25f + u_2 * xi_4 * 0.16666666666666666f + xi_14 * 0.083333333333333329f + xi_4 * -0.083333333333333329f;
+          const float u0Mu1 = u_0 + u_1 * -1.0f;
+          const float u0Pu1 = u_0 + u_1;
+          const float u1Pu2 = u_1 + u_2;
+          const float u1Mu2 = u_1 + u_2 * -1.0f;
+          const float u0Mu2 = u_0 + u_2 * -1.0f;
+          const float u0Pu2 = u_0 + u_2;
+          const float f_eq_common = rho * -1.0f * u_0 * u_0 + rho * -1.0f * u_1 * u_1 + rho * -1.0f * u_2 * u_2 + rho;
+          _data_pdfs_20_30_10[ctr_0] = forceTerm_0 + omega_shear * (f_eq_common * 0.33333333333333331f + xi_10 * -1.0f) + xi_10;
+          _data_pdfs_20_31_10[ctr_0] = forceTerm_1 + omega_shear * (f_eq_common * 0.16666666666666666f + rho * (-0.1111111111111111f + 0.33333333333333331f * u_1 * u_1) + xi_5 * -0.5f + xi_6 * -0.5f) + rr_0 * (rho * u_1 * 0.16666666666666666f + xi_5 * -0.5f + xi_6 * 0.5f) + xi_5 + ((-1.0f <= grid_size * -1.0f + ((float)(ctr_1))) ? (rho * v_s * (u_0 * 2.0f + v_s) * 0.16666666666666666f) : (0.0f));
+          _data_pdfs_20_32_10[ctr_0] = forceTerm_2 + omega_shear * (f_eq_common * 0.16666666666666666f + rho * (-0.1111111111111111f + 0.33333333333333331f * u_1 * u_1) + xi_5 * -0.5f + xi_6 * -0.5f) + rr_0 * (rho * u_1 * -0.16666666666666666f + xi_5 * 0.5f + xi_6 * -0.5f) + xi_6 + ((0.0f >= ((float)(ctr_1))) ? (rho * v_s * (u_0 * -2.0f + v_s) * 0.16666666666666666f) : (0.0f));
+          _data_pdfs_20_33_10[ctr_0] = forceTerm_3 + omega_shear * (f_eq_common * 0.16666666666666666f + rho * (-0.1111111111111111f + 0.33333333333333331f * u_0 * u_0) + xi_11 * -0.5f + xi_12 * -0.5f) + rr_0 * (rho * u_0 * -0.16666666666666666f + xi_11 * 0.5f + xi_12 * -0.5f) + xi_12;
+          _data_pdfs_20_34_10[ctr_0] = forceTerm_4 + omega_shear * (f_eq_common * 0.16666666666666666f + rho * (-0.1111111111111111f + 0.33333333333333331f * u_0 * u_0) + xi_11 * -0.5f + xi_12 * -0.5f) + rr_0 * (rho * u_0 * 0.16666666666666666f + xi_11 * -0.5f + xi_12 * 0.5f) + xi_11;
+          _data_pdfs_20_35_10[ctr_0] = forceTerm_5 + omega_shear * (f_eq_common * 0.16666666666666666f + rho * (-0.1111111111111111f + 0.33333333333333331f * u_2 * u_2) + xi_15 * -0.5f + xi_3 * -0.5f) + rr_0 * (rho * u_2 * 0.16666666666666666f + xi_15 * -0.5f + xi_3 * 0.5f) + xi_15;
+          _data_pdfs_20_36_10[ctr_0] = forceTerm_6 + omega_shear * (f_eq_common * 0.16666666666666666f + rho * (-0.1111111111111111f + 0.33333333333333331f * u_2 * u_2) + xi_15 * -0.5f + xi_3 * -0.5f) + rr_0 * (rho * u_2 * -0.16666666666666666f + xi_15 * 0.5f + xi_3 * -0.5f) + xi_3;
+          _data_pdfs_20_37_10[ctr_0] = forceTerm_7 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * u_2 * u_2 + 0.125f * u0Mu1 * u0Mu1) + xi_22 * -0.5f + xi_24 * -0.5f) + rr_0 * (rho * u0Mu1 * -0.083333333333333329f + xi_22 * 0.5f + xi_24 * -0.5f) + xi_24 + ((-1.0f <= grid_size * -1.0f + ((float)(ctr_1))) ? (rho * v_s * (u_0 * -2.0f + u_1 * 3.0f + v_s * -1.0f + 1.0f) * 0.083333333333333329f) : (0.0f));
+          _data_pdfs_20_38_10[ctr_0] = forceTerm_8 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * u_2 * u_2 + 0.125f * u0Pu1 * u0Pu1) + xi_17 * -0.5f + xi_23 * -0.5f) + rr_0 * (rho * u0Pu1 * 0.083333333333333329f + xi_17 * -0.5f + xi_23 * 0.5f) + xi_17 + ((-1.0f <= grid_size * -1.0f + ((float)(ctr_1))) ? (rho * v_s * (u_0 * 2.0f + u_1 * 3.0f + v_s + 1.0f) * -0.083333333333333329f) : (0.0f));
+          _data_pdfs_20_39_10[ctr_0] = forceTerm_9 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * u_2 * u_2 + 0.125f * u0Pu1 * u0Pu1) + xi_17 * -0.5f + xi_23 * -0.5f) + rr_0 * (rho * u0Pu1 * -0.083333333333333329f + xi_17 * 0.5f + xi_23 * -0.5f) + xi_23 + ((0.0f >= ((float)(ctr_1))) ? (rho * v_s * (u_0 * 2.0f + u_1 * 3.0f + v_s * -1.0f - 1.0f) * 0.083333333333333329f) : (0.0f));
+          _data_pdfs_20_310_10[ctr_0] = forceTerm_10 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * u_2 * u_2 + 0.125f * u0Mu1 * u0Mu1) + xi_22 * -0.5f + xi_24 * -0.5f) + rr_0 * (rho * u0Mu1 * 0.083333333333333329f + xi_22 * -0.5f + xi_24 * 0.5f) + xi_22 + ((0.0f >= ((float)(ctr_1))) ? (rho * v_s * (u_0 * 2.0f + u_1 * -3.0f + v_s * -1.0f + 1.0f) * 0.083333333333333329f) : (0.0f));
+          _data_pdfs_20_311_10[ctr_0] = forceTerm_11 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * u_0 * u_0 + 0.125f * u1Pu2 * u1Pu2) + xi_19 * -0.5f + xi_7 * -0.5f) + rr_0 * (rho * u1Pu2 * 0.083333333333333329f + xi_19 * 0.5f + xi_7 * -0.5f) + xi_7;
+          _data_pdfs_20_312_10[ctr_0] = forceTerm_12 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * u_0 * u_0 + 0.125f * u1Mu2 * u1Mu2) + xi_18 * -0.5f + xi_20 * -0.5f) + rr_0 * (rho * u1Mu2 * -0.083333333333333329f + xi_18 * -0.5f + xi_20 * 0.5f) + xi_18;
+          _data_pdfs_20_313_10[ctr_0] = forceTerm_13 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * u_1 * u_1 + 0.125f * u0Mu2 * u0Mu2) + xi_8 * -0.5f + xi_9 * -0.5f) + rr_0 * (rho * u0Mu2 * -0.083333333333333329f + xi_8 * 0.5f + xi_9 * -0.5f) + xi_9;
+          _data_pdfs_20_314_10[ctr_0] = forceTerm_14 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * u_1 * u_1 + 0.125f * u0Pu2 * u0Pu2) + xi_13 * -0.5f + xi_16 * -0.5f) + rr_0 * (rho * u0Pu2 * 0.083333333333333329f + xi_13 * 0.5f + xi_16 * -0.5f) + xi_16;
+          _data_pdfs_20_315_10[ctr_0] = forceTerm_15 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * u_0 * u_0 + 0.125f * u1Mu2 * u1Mu2) + xi_18 * -0.5f + xi_20 * -0.5f) + rr_0 * (rho * u1Mu2 * 0.083333333333333329f + xi_18 * 0.5f + xi_20 * -0.5f) + xi_20;
+          _data_pdfs_20_316_10[ctr_0] = forceTerm_16 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * u_0 * u_0 + 0.125f * u1Pu2 * u1Pu2) + xi_19 * -0.5f + xi_7 * -0.5f) + rr_0 * (rho * u1Pu2 * -0.083333333333333329f + xi_19 * -0.5f + xi_7 * 0.5f) + xi_19;
+          _data_pdfs_20_317_10[ctr_0] = forceTerm_17 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * u_1 * u_1 + 0.125f * u0Pu2 * u0Pu2) + xi_13 * -0.5f + xi_16 * -0.5f) + rr_0 * (rho * u0Pu2 * -0.083333333333333329f + xi_13 * -0.5f + xi_16 * 0.5f) + xi_13;
+          _data_pdfs_20_318_10[ctr_0] = forceTerm_18 + omega_shear * (f_eq_common * 0.041666666666666664f + rho * (-0.013888888888888888f + 0.041666666666666664f * u_1 * u_1 + 0.125f * u0Mu2 * u0Mu2) + xi_8 * -0.5f + xi_9 * -0.5f) + rr_0 * (rho * u0Mu2 * 0.083333333333333329f + xi_8 * -0.5f + xi_9 * 0.5f) + xi_8;
+        }
+      }
+    }
+  }
+}
+} // namespace internal_9a18f2f4073cdcc5365cdfddb752069e
+
+void CollideSweepSinglePrecisionLeesEdwardsAVX::run(IBlock *block) {
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+  auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
+
+  auto &omega_shear = this->omega_shear_;
+  auto &v_s = this->v_s_;
+  auto &grid_size = this->grid_size_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(force->nrOfGhostLayers()));
+  float *RESTRICT const _data_force = force->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_9a18f2f4073cdcc5365cdfddb752069e::collidesweepsingleprecisionleesedwardsavx_collidesweepsingleprecisionleesedwardsavx(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, grid_size, omega_shear, v_s);
+}
+
+void CollideSweepSinglePrecisionLeesEdwardsAVX::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+  auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
+
+  auto &omega_shear = this->omega_shear_;
+  auto &v_s = this->v_s_;
+  auto &grid_size = this->grid_size_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(force->nrOfGhostLayers()));
+  float *RESTRICT const _data_force = force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_9a18f2f4073cdcc5365cdfddb752069e::collidesweepsingleprecisionleesedwardsavx_collidesweepsingleprecisionleesedwardsavx(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, grid_size, omega_shear, v_s);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwardsAVX.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwardsAVX.h
new file mode 100644
index 00000000000..1885949a8e9
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionLeesEdwardsAVX.h
@@ -0,0 +1,109 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepSinglePrecisionLeesEdwardsAVX.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class CollideSweepSinglePrecisionLeesEdwardsAVX {
+public:
+  CollideSweepSinglePrecisionLeesEdwardsAVX(BlockDataID forceID_,
+                                            BlockDataID pdfsID_,
+                                            float grid_size, float omega_shear,
+                                            float v_s)
+      : forceID(forceID_), pdfsID(pdfsID_), grid_size_(grid_size),
+        omega_shear_(omega_shear), v_s_(v_s){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)> getSweep(
+      const shared_ptr<CollideSweepSinglePrecisionLeesEdwardsAVX> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<CollideSweepSinglePrecisionLeesEdwardsAVX> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID forceID;
+  BlockDataID pdfsID;
+  float grid_size_;
+  float omega_shear_;
+  float v_s_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalized.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalized.cpp
new file mode 100644
index 00000000000..bcdd45ddad4
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalized.cpp
@@ -0,0 +1,552 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepSinglePrecisionThermalized.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "CollideSweepSinglePrecisionThermalized.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#include "philox_rand.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_69764eed2d0964e29e3b97d1054b4693 {
+static FUNC_PREFIX void collidesweepsingleprecisionthermalized_collidesweepsingleprecisionthermalized(float *RESTRICT const _data_force, float *RESTRICT _data_pdfs, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_0, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, uint32_t block_offset_0, uint32_t block_offset_1, uint32_t block_offset_2, float kT, float omega_bulk, float omega_even, float omega_odd, float omega_shear, uint32_t seed, uint32_t time_step) {
+  const float xi_28 = omega_bulk * 0.5f;
+  const float xi_55 = omega_shear * 0.041666666666666664f;
+  const float xi_60 = omega_bulk * 0.041666666666666664f;
+  const float xi_71 = omega_shear * 0.125f;
+  const float xi_109 = 2.4494897427831779f;
+  const float xi_134 = omega_odd * 0.25f;
+  const float xi_145 = omega_odd * 0.083333333333333329f;
+  const float xi_198 = omega_shear * 0.25f;
+  const float xi_211 = omega_odd * 0.041666666666666664f;
+  const float xi_213 = omega_odd * 0.125f;
+  const float rr_0 = 0.0f;
+  const float xi_53 = rr_0 * 0.041666666666666664f;
+  for (int64_t ctr_2 = 0; ctr_2 < _size_force_2; ctr_2 += 1) {
+    float *RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3;
+    float *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
+    float *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
+    float *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3;
+    float *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
+    float *RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3;
+    float *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
+    float *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_force_1; ctr_1 += 1) {
+      float *RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_36;
+      float *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
+      float *RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_31;
+      float *RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_32;
+      float *RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_311;
+      float *RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_318;
+      float *RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_313;
+      float *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
+      float *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
+      float *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
+      float *RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_317;
+      float *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
+      float *RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_35;
+      float *RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_314;
+      float *RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_312;
+      float *RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_316;
+      float *RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_38;
+      float *RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_315;
+      float *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
+      float *RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_310;
+      float *RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_39;
+      float *RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_37;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_force_0; ctr_0 += 1) {
+        const float xi_244 = _data_pdfs_20_36_10[_stride_pdfs_0 * ctr_0];
+        const float xi_245 = _data_force_20_32_10[_stride_force_0 * ctr_0];
+        const float xi_246 = _data_pdfs_20_31_10[_stride_pdfs_0 * ctr_0];
+        const float xi_247 = _data_pdfs_20_32_10[_stride_pdfs_0 * ctr_0];
+        const float xi_248 = _data_pdfs_20_311_10[_stride_pdfs_0 * ctr_0];
+        const float xi_249 = _data_pdfs_20_318_10[_stride_pdfs_0 * ctr_0];
+        const float xi_250 = _data_pdfs_20_313_10[_stride_pdfs_0 * ctr_0];
+        const float xi_251 = _data_pdfs_20_30_10[_stride_pdfs_0 * ctr_0];
+        const float xi_252 = _data_pdfs_20_34_10[_stride_pdfs_0 * ctr_0];
+        const float xi_253 = _data_pdfs_20_33_10[_stride_pdfs_0 * ctr_0];
+        const float xi_254 = _data_pdfs_20_317_10[_stride_pdfs_0 * ctr_0];
+        const float xi_255 = _data_force_20_30_10[_stride_force_0 * ctr_0];
+        const float xi_256 = _data_pdfs_20_35_10[_stride_pdfs_0 * ctr_0];
+        const float xi_257 = _data_pdfs_20_314_10[_stride_pdfs_0 * ctr_0];
+        const float xi_258 = _data_pdfs_20_312_10[_stride_pdfs_0 * ctr_0];
+        const float xi_259 = _data_pdfs_20_316_10[_stride_pdfs_0 * ctr_0];
+        const float xi_260 = _data_pdfs_20_38_10[_stride_pdfs_0 * ctr_0];
+        const float xi_261 = _data_pdfs_20_315_10[_stride_pdfs_0 * ctr_0];
+        const float xi_262 = _data_force_20_31_10[_stride_force_0 * ctr_0];
+        const float xi_263 = _data_pdfs_20_310_10[_stride_pdfs_0 * ctr_0];
+        const float xi_264 = _data_pdfs_20_39_10[_stride_pdfs_0 * ctr_0];
+        const float xi_265 = _data_pdfs_20_37_10[_stride_pdfs_0 * ctr_0];
+
+        float random_3_0{};
+        float random_3_1{};
+        float random_3_2{};
+        float random_3_3{};
+        if (kT > 0.) {
+          philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 3, seed, random_3_0, random_3_1, random_3_2, random_3_3);
+        }
+
+        float random_2_0{};
+        float random_2_1{};
+        float random_2_2{};
+        float random_2_3{};
+        if (kT > 0.) {
+          philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 2, seed, random_2_0, random_2_1, random_2_2, random_2_3);
+        }
+
+        float random_1_0{};
+        float random_1_1{};
+        float random_1_2{};
+        float random_1_3{};
+        if (kT > 0.) {
+          philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 1, seed, random_1_0, random_1_1, random_1_2, random_1_3);
+        }
+
+        float random_0_0{};
+        float random_0_1{};
+        float random_0_2{};
+        float random_0_3{};
+        if (kT > 0.) {
+          philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 0, seed, random_0_0, random_0_1, random_0_2, random_0_3);
+        }
+        const float xi_2 = xi_249 + xi_257;
+        const float xi_3 = xi_2 + xi_252;
+        const float xi_4 = xi_246 + xi_248 + xi_261;
+        const float xi_5 = xi_256 + xi_258;
+        const float xi_6 = xi_244 + xi_254;
+        const float xi_8 = xi_264 * -1.0f;
+        const float xi_9 = xi_265 * -1.0f;
+        const float xi_10 = xi_254 * -1.0f;
+        const float xi_11 = xi_250 * -1.0f;
+        const float xi_12 = xi_253 * -1.0f;
+        const float xi_13 = xi_10 + xi_11 + xi_12;
+        const float xi_14 = xi_247 * -1.0f;
+        const float xi_15 = xi_263 * -1.0f;
+        const float xi_16 = xi_14 + xi_15;
+        const float xi_17 = xi_259 * -1.0f;
+        const float xi_18 = xi_258 * -1.0f;
+        const float xi_19 = xi_17 + xi_18;
+        const float xi_20 = xi_249 * -1.0f;
+        const float xi_21 = xi_10 + xi_20;
+        const float xi_22 = xi_261 * -1.0f;
+        const float xi_23 = xi_244 * -1.0f;
+        const float xi_24 = xi_17 + xi_22 + xi_23 + xi_248;
+        const float xi_29 = xi_262 * 0.16666666666666666f;
+        const float xi_30 = xi_262 * 0.083333333333333329f;
+        const float xi_42 = xi_255 * 0.16666666666666666f;
+        const float xi_43 = xi_255 * 0.083333333333333329f;
+        const float xi_49 = xi_245 * 0.16666666666666666f;
+        const float xi_50 = xi_245 * 0.083333333333333329f;
+        const float xi_67 = xi_262 * 0.25f;
+        const float xi_72 = xi_262 * xi_71;
+        const float xi_114 = xi_251 * -1.0f;
+        const float xi_118 = xi_248 * -1.0f;
+        const float xi_119 = xi_118 + xi_18;
+        const float xi_120 = xi_260 * -1.0f + xi_8;
+        const float xi_122 = xi_257 * -1.0f;
+        const float xi_123 = xi_11 + xi_122 + xi_15 + xi_21;
+        const float xi_125 = xi_248 * 2.0f + xi_258 * 2.0f + xi_259 * 2.0f + xi_261 * 2.0f;
+        const float xi_126 = xi_125 + xi_252 * 5.0f + xi_253 * 5.0f;
+        const float xi_128 = xi_250 * 2.0f;
+        const float xi_129 = xi_257 * 2.0f;
+        const float xi_130 = xi_249 * 2.0f + xi_254 * 2.0f;
+        const float xi_132 = xi_118 + xi_258;
+        const float xi_133 = xi_132 + xi_14 + xi_22 + xi_246 + xi_259;
+        const float xi_135 = xi_133 * xi_134;
+        const float xi_136 = random_2_3 - 0.5f;
+        const float xi_141 = xi_265 * 2.0f;
+        const float xi_142 = xi_263 * 2.0f;
+        const float xi_143 = xi_260 * -2.0f + xi_264 * 2.0f;
+        const float xi_144 = xi_14 + xi_141 * -1.0f + xi_142 + xi_143 + xi_19 + xi_4;
+        const float xi_146 = xi_144 * xi_145;
+        const float xi_147 = random_1_2 - 0.5f;
+        const float xi_152 = random_0_1 - 0.5f;
+        const float xi_166 = xi_122 + xi_250;
+        const float xi_167 = xi_12 + xi_166 + xi_20 + xi_252 + xi_254;
+        const float xi_168 = xi_134 * xi_167;
+        const float xi_169 = random_2_1 - 0.5f;
+        const float xi_171 = xi_13 + xi_141 + xi_142 * -1.0f + xi_143 + xi_3;
+        const float xi_172 = xi_145 * xi_171;
+        const float xi_173 = random_2_0 - 0.5f;
+        const float xi_178 = xi_119 + xi_23 + xi_256 + xi_259 + xi_261;
+        const float xi_179 = xi_134 * xi_178;
+        const float xi_180 = random_2_2 - 0.5f;
+        const float xi_182 = xi_128 * -1.0f + xi_129 * -1.0f + xi_130 + xi_24 + xi_5;
+        const float xi_183 = xi_145 * xi_182;
+        const float xi_184 = random_1_3 - 0.5f;
+        const float xi_212 = xi_182 * xi_211;
+        const float xi_214 = xi_178 * xi_213;
+        const float xi_220 = xi_144 * xi_211;
+        const float xi_221 = xi_133 * xi_213;
+        const float xi_235 = xi_167 * xi_213;
+        const float xi_236 = xi_171 * xi_211;
+        const float xi_31 = rr_0 * xi_30;
+        const float xi_44 = rr_0 * xi_43;
+        const float xi_51 = rr_0 * xi_50;
+        const float xi_54 = xi_255 * xi_53;
+        const float xi_59 = xi_262 * xi_53;
+        const float xi_81 = xi_245 * xi_53;
+        const float vel0Term = xi_260 + xi_263 + xi_3;
+        const float vel1Term = xi_265 + xi_4;
+        const float vel2Term = xi_250 + xi_5;
+        const float rho = vel0Term + vel1Term + vel2Term + xi_247 + xi_251 + xi_253 + xi_259 + xi_264 + xi_6;
+        const float xi_105 = kT * rho;
+        const float xi_106 = powf(xi_105 * (-1.0f * ((omega_even * -1.0f + 1.0f) * (omega_even * -1.0f + 1.0f)) + 1.0f), 0.5f);
+        const float xi_107 = xi_106 * (random_3_0 - 0.5f) * 3.7416573867739413f;
+        const float xi_108 = xi_106 * (random_3_2 - 0.5f) * 5.4772255750516612f;
+        const float xi_110 = xi_109 * (random_1_1 - 0.5f) * powf(xi_105 * (-1.0f * ((omega_bulk * -1.0f + 1.0f) * (omega_bulk * -1.0f + 1.0f)) + 1.0f), 0.5f);
+        const float xi_111 = xi_106 * (random_3_1 - 0.5f) * 8.3666002653407556f;
+        const float xi_137 = powf(xi_105 * (-1.0f * ((omega_odd * -1.0f + 1.0f) * (omega_odd * -1.0f + 1.0f)) + 1.0f), 0.5f);
+        const float xi_138 = xi_137 * 1.4142135623730951f;
+        const float xi_139 = xi_138 * 0.5f;
+        const float xi_140 = xi_136 * xi_139;
+        const float xi_148 = xi_109 * xi_137;
+        const float xi_149 = xi_148 * 0.16666666666666666f;
+        const float xi_150 = xi_147 * xi_149;
+        const float xi_151 = xi_146 * -1.0f + xi_150 * -1.0f;
+        const float xi_153 = powf(xi_105 * (-1.0f * ((omega_shear * -1.0f + 1.0f) * (omega_shear * -1.0f + 1.0f)) + 1.0f), 0.5f);
+        const float xi_154 = xi_153 * 0.5f;
+        const float xi_155 = xi_152 * xi_154;
+        const float xi_161 = xi_153 * (random_0_0 - 0.5f) * 1.7320508075688772f;
+        const float xi_165 = xi_146 + xi_150;
+        const float xi_170 = xi_139 * xi_169;
+        const float xi_174 = xi_149 * xi_173;
+        const float xi_175 = xi_172 + xi_174;
+        const float xi_177 = xi_172 * -1.0f + xi_174 * -1.0f;
+        const float xi_181 = xi_139 * xi_180;
+        const float xi_185 = xi_149 * xi_184;
+        const float xi_186 = xi_183 * -1.0f + xi_185 * -1.0f;
+        const float xi_188 = xi_183 + xi_185;
+        const float xi_189 = xi_152 * xi_153 * 0.25f;
+        const float xi_192 = xi_107 * 0.083333333333333329f;
+        const float xi_196 = xi_154 * (random_0_2 - 0.5f);
+        const float xi_203 = xi_154 * (random_1_0 - 0.5f);
+        const float xi_207 = xi_111 * -0.014285714285714285f;
+        const float xi_208 = xi_108 * 0.050000000000000003f;
+        const float xi_215 = xi_148 * 0.083333333333333329f;
+        const float xi_216 = xi_184 * xi_215;
+        const float xi_217 = xi_138 * 0.25f;
+        const float xi_218 = xi_180 * xi_217;
+        const float xi_219 = xi_212 * -1.0f + xi_214 + xi_216 * -1.0f + xi_218;
+        const float xi_222 = xi_147 * xi_215;
+        const float xi_223 = xi_136 * xi_217;
+        const float xi_224 = xi_220 * -1.0f + xi_221 + xi_222 * -1.0f + xi_223;
+        const float xi_225 = xi_220 + xi_221 * -1.0f + xi_222 + xi_223 * -1.0f;
+        const float xi_227 = xi_189 * -1.0f;
+        const float xi_230 = xi_111 * 0.035714285714285712f;
+        const float xi_232 = xi_154 * (random_0_3 - 0.5f);
+        const float xi_237 = xi_169 * xi_217;
+        const float xi_238 = xi_173 * xi_215;
+        const float xi_239 = xi_235 * -1.0f + xi_236 + xi_237 * -1.0f + xi_238;
+        const float xi_241 = xi_235 + xi_236 * -1.0f + xi_237 + xi_238 * -1.0f;
+        const float xi_242 = xi_212 + xi_214 * -1.0f + xi_216 + xi_218 * -1.0f;
+        const float xi_0 = ((1.0f) / (rho));
+        const float xi_7 = xi_0 * 0.5f;
+        const float u_0 = xi_0 * (vel0Term + xi_13 + xi_8 + xi_9) + xi_255 * xi_7;
+        const float xi_25 = u_0 * xi_255;
+        const float xi_37 = xi_25 * 0.16666666666666666f;
+        const float xi_38 = xi_25 * 0.083333333333333329f;
+        const float xi_39 = omega_shear * xi_38;
+        const float xi_40 = xi_37 * -1.0f + xi_39;
+        const float xi_56 = xi_25 * xi_55 * -1.0f + xi_37;
+        const float xi_57 = xi_43 * -1.0f + xi_54 + xi_56;
+        const float xi_61 = xi_25 * xi_60 * -1.0f;
+        const float xi_68 = u_0 * xi_67;
+        const float xi_73 = u_0 * xi_72;
+        const float xi_77 = xi_43 + xi_54 * -1.0f + xi_56;
+        const float xi_84 = xi_38 * -1.0f;
+        const float xi_95 = u_0 * xi_245;
+        const float xi_96 = xi_95 * 0.25f;
+        const float xi_99 = xi_71 * xi_95;
+        const float xi_113 = rho * (u_0 * u_0);
+        const float u_1 = xi_0 * (vel1Term + xi_16 + xi_19 + xi_260 + xi_8) + xi_262 * xi_7;
+        const float xi_26 = u_1 * xi_262;
+        const float xi_32 = xi_26 * 0.16666666666666666f;
+        const float xi_45 = xi_26 * 0.083333333333333329f;
+        const float xi_46 = omega_shear * xi_45;
+        const float xi_47 = xi_32 * -1.0f + xi_46;
+        const float xi_62 = xi_26 * xi_60 * -1.0f;
+        const float xi_69 = u_1 * 0.25f;
+        const float xi_70 = xi_255 * xi_69;
+        const float xi_74 = u_1 * xi_71;
+        const float xi_75 = xi_255 * xi_74;
+        const float xi_76 = xi_68 * -1.0f + xi_70 * -1.0f + xi_73 + xi_75;
+        const float xi_78 = xi_68 + xi_70 + xi_73 * -1.0f + xi_75 * -1.0f;
+        const float xi_86 = xi_245 * xi_69;
+        const float xi_88 = xi_245 * xi_74;
+        const float xi_93 = xi_45 * -1.0f;
+        const float xi_112 = rho * (u_1 * u_1);
+        const float xi_121 = xi_112 + xi_120 + xi_9;
+        const float xi_197 = rho * u_1;
+        const float xi_199 = xi_198 * (u_0 * xi_197 + xi_120 + xi_263 + xi_265);
+        const float xi_200 = xi_196 * -1.0f + xi_199 * -1.0f;
+        const float xi_201 = xi_196 + xi_199;
+        const float u_2 = xi_0 * (vel2Term + xi_21 + xi_24 + xi_257) + xi_245 * xi_7;
+        const float xi_27 = u_2 * xi_245;
+        const float xi_33 = xi_27 * 0.16666666666666666f;
+        const float xi_34 = xi_27 * 0.083333333333333329f;
+        const float xi_35 = omega_shear * xi_34;
+        const float xi_36 = xi_33 * -1.0f + xi_35;
+        const float xi_41 = omega_shear * xi_32 * -1.0f + xi_26 * 0.33333333333333331f + xi_36 + xi_40;
+        const float xi_48 = omega_shear * xi_37 * -1.0f + xi_25 * 0.33333333333333331f + xi_36 + xi_47;
+        const float xi_52 = omega_shear * xi_33 * -1.0f + xi_27 * 0.33333333333333331f + xi_40 + xi_47;
+        const float xi_58 = xi_34 * -1.0f;
+        const float xi_63 = xi_27 * xi_60 * -1.0f;
+        const float xi_64 = xi_26 * xi_55 * -1.0f + xi_32 + xi_61 + xi_62 + xi_63;
+        const float xi_65 = xi_30 + xi_59 * -1.0f + xi_64;
+        const float xi_66 = xi_35 + xi_58 + xi_65;
+        const float xi_79 = xi_30 * -1.0f + xi_59 + xi_64;
+        const float xi_80 = xi_35 + xi_58 + xi_79;
+        const float xi_82 = xi_27 * xi_55 * -1.0f + xi_33;
+        const float xi_83 = xi_50 + xi_81 * -1.0f + xi_82;
+        const float xi_85 = xi_39 + xi_65 + xi_84;
+        const float xi_87 = u_2 * xi_67;
+        const float xi_89 = u_2 * xi_72;
+        const float xi_90 = xi_86 + xi_87 + xi_88 * -1.0f + xi_89 * -1.0f;
+        const float xi_91 = xi_39 + xi_79 + xi_84;
+        const float xi_92 = xi_86 * -1.0f + xi_87 * -1.0f + xi_88 + xi_89;
+        const float xi_94 = xi_46 + xi_61 + xi_62 + xi_63 + xi_83 + xi_93;
+        const float xi_97 = u_2 * xi_255;
+        const float xi_98 = xi_97 * 0.25f;
+        const float xi_100 = xi_71 * xi_97;
+        const float xi_101 = xi_100 + xi_96 * -1.0f + xi_98 * -1.0f + xi_99;
+        const float xi_102 = xi_100 * -1.0f + xi_96 + xi_98 + xi_99 * -1.0f;
+        const float xi_103 = xi_50 * -1.0f + xi_81 + xi_82;
+        const float xi_104 = xi_103 + xi_46 + xi_61 + xi_62 + xi_63 + xi_93;
+        const float xi_115 = rho * (u_2 * u_2);
+        const float xi_116 = xi_114 + xi_115 * 0.66666666666666663f + xi_244 * 3.0f + xi_256 * 3.0f;
+        const float xi_117 = omega_even * (xi_112 * 0.66666666666666663f + xi_113 * 1.6666666666666667f + xi_116 + xi_246 * 3.0f + xi_247 * 3.0f + xi_248 * -3.0f + xi_258 * -3.0f + xi_259 * -3.0f + xi_261 * -3.0f);
+        const float xi_124 = omega_bulk * (xi_113 + xi_115 + xi_119 + xi_121 + xi_123 + xi_17 + xi_22 + xi_251);
+        const float xi_127 = omega_even * (xi_112 * 2.3333333333333335f + xi_116 + xi_126 + xi_246 * -2.0f + xi_247 * -2.0f + xi_249 * -5.0f + xi_250 * -5.0f + xi_254 * -5.0f + xi_257 * -5.0f);
+        const float xi_131 = omega_even * (xi_114 + xi_115 * 3.0f + xi_126 + xi_128 + xi_129 + xi_130 + xi_244 * -4.0f + xi_246 * 5.0f + xi_247 * 5.0f + xi_256 * -4.0f + xi_260 * -7.0f + xi_263 * -7.0f + xi_264 * -7.0f + xi_265 * -7.0f);
+        const float xi_156 = xi_115 * -1.0f + xi_256;
+        const float xi_157 = omega_shear * (xi_121 + xi_156 + xi_16 + xi_2 + xi_246 * -1.0f + xi_250 + xi_6);
+        const float xi_158 = xi_157 * 0.125f;
+        const float xi_159 = xi_107 * -0.11904761904761904f + xi_131 * -0.01984126984126984f;
+        const float xi_160 = omega_shear * (xi_112 * -1.0f + xi_113 * 2.0f + xi_120 + xi_123 + xi_125 + xi_156 + xi_244 + xi_246 + xi_247 + xi_252 * -2.0f + xi_253 * -2.0f + xi_9);
+        const float xi_162 = xi_160 * -0.041666666666666664f + xi_161 * -0.16666666666666666f;
+        const float xi_163 = xi_108 * -0.10000000000000001f + xi_117 * -0.050000000000000003f + xi_162;
+        const float xi_164 = xi_111 * 0.028571428571428571f + xi_127 * 0.014285714285714285f + xi_155 + xi_158 + xi_159 + xi_163;
+        const float xi_176 = xi_111 * -0.071428571428571425f + xi_127 * -0.035714285714285712f + xi_159 + xi_160 * 0.083333333333333329f + xi_161 * 0.33333333333333331f;
+        const float xi_187 = xi_107 * 0.095238095238095233f + xi_111 * -0.042857142857142858f + xi_127 * -0.021428571428571429f + xi_131 * 0.015873015873015872f + xi_155 * -1.0f + xi_158 * -1.0f + xi_163;
+        const float xi_190 = xi_157 * 0.0625f;
+        const float xi_191 = xi_131 * 0.013888888888888888f;
+        const float xi_193 = xi_110 * 0.083333333333333329f + xi_124 * 0.041666666666666664f;
+        const float xi_194 = xi_160 * 0.020833333333333332f + xi_161 * 0.083333333333333329f + xi_193;
+        const float xi_195 = xi_165 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
+        const float xi_202 = xi_151 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
+        const float xi_204 = xi_127 * -0.0071428571428571426f;
+        const float xi_205 = xi_198 * (u_2 * xi_197 + xi_132 + xi_17 + xi_261);
+        const float xi_206 = xi_117 * 0.025000000000000001f;
+        const float xi_209 = xi_107 * -0.023809523809523808f + xi_131 * -0.003968253968253968f;
+        const float xi_210 = xi_162 + xi_193 + xi_203 + xi_204 + xi_205 + xi_206 + xi_207 + xi_208 + xi_209;
+        const float xi_226 = xi_162 + xi_193 + xi_203 * -1.0f + xi_204 + xi_205 * -1.0f + xi_206 + xi_207 + xi_208 + xi_209;
+        const float xi_228 = xi_190 * -1.0f;
+        const float xi_229 = xi_127 * 0.017857142857142856f;
+        const float xi_231 = xi_188 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
+        const float xi_233 = xi_198 * (rho * u_0 * u_2 + xi_10 + xi_166 + xi_249);
+        const float xi_234 = xi_232 * -1.0f + xi_233 * -1.0f;
+        const float xi_240 = xi_232 + xi_233;
+        const float xi_243 = xi_186 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
+        const float forceTerm_0 = xi_25 * xi_28 + xi_25 * -1.0f + xi_26 * xi_28 + xi_26 * -1.0f + xi_27 * xi_28 + xi_27 * -1.0f;
+        const float forceTerm_1 = xi_29 + xi_31 * -1.0f + xi_41;
+        const float forceTerm_2 = xi_29 * -1.0f + xi_31 + xi_41;
+        const float forceTerm_3 = xi_42 * -1.0f + xi_44 + xi_48;
+        const float forceTerm_4 = xi_42 + xi_44 * -1.0f + xi_48;
+        const float forceTerm_5 = xi_49 + xi_51 * -1.0f + xi_52;
+        const float forceTerm_6 = xi_49 * -1.0f + xi_51 + xi_52;
+        const float forceTerm_7 = xi_57 + xi_66 + xi_76;
+        const float forceTerm_8 = xi_66 + xi_77 + xi_78;
+        const float forceTerm_9 = xi_57 + xi_78 + xi_80;
+        const float forceTerm_10 = xi_76 + xi_77 + xi_80;
+        const float forceTerm_11 = xi_83 + xi_85 + xi_90;
+        const float forceTerm_12 = xi_83 + xi_91 + xi_92;
+        const float forceTerm_13 = xi_101 + xi_57 + xi_94;
+        const float forceTerm_14 = xi_102 + xi_77 + xi_94;
+        const float forceTerm_15 = xi_103 + xi_85 + xi_92;
+        const float forceTerm_16 = xi_103 + xi_90 + xi_91;
+        const float forceTerm_17 = xi_102 + xi_104 + xi_57;
+        const float forceTerm_18 = xi_101 + xi_104 + xi_77;
+        _data_pdfs_20_30_10[_stride_pdfs_0 * ctr_0] = forceTerm_0 + xi_107 * 0.14285714285714285f + xi_108 * 0.20000000000000001f + xi_110 * -1.0f + xi_111 * 0.085714285714285715f + xi_117 * 0.10000000000000001f + xi_124 * -0.5f + xi_127 * 0.042857142857142858f + xi_131 * 0.023809523809523808f + xi_251;
+        _data_pdfs_20_31_10[_stride_pdfs_0 * ctr_0] = forceTerm_1 + xi_135 * -1.0f + xi_140 * -1.0f + xi_151 + xi_164 + xi_246;
+        _data_pdfs_20_32_10[_stride_pdfs_0 * ctr_0] = forceTerm_2 + xi_135 + xi_140 + xi_164 + xi_165 + xi_247;
+        _data_pdfs_20_33_10[_stride_pdfs_0 * ctr_0] = forceTerm_3 + xi_168 + xi_170 + xi_175 + xi_176 + xi_253;
+        _data_pdfs_20_34_10[_stride_pdfs_0 * ctr_0] = forceTerm_4 + xi_168 * -1.0f + xi_170 * -1.0f + xi_176 + xi_177 + xi_252;
+        _data_pdfs_20_35_10[_stride_pdfs_0 * ctr_0] = forceTerm_5 + xi_179 * -1.0f + xi_181 * -1.0f + xi_186 + xi_187 + xi_256;
+        _data_pdfs_20_36_10[_stride_pdfs_0 * ctr_0] = forceTerm_6 + xi_179 + xi_181 + xi_187 + xi_188 + xi_244;
+        _data_pdfs_20_37_10[_stride_pdfs_0 * ctr_0] = forceTerm_7 + xi_177 + xi_195 + xi_200 + xi_265;
+        _data_pdfs_20_38_10[_stride_pdfs_0 * ctr_0] = forceTerm_8 + xi_175 + xi_195 + xi_201 + xi_260;
+        _data_pdfs_20_39_10[_stride_pdfs_0 * ctr_0] = forceTerm_9 + xi_177 + xi_201 + xi_202 + xi_264;
+        _data_pdfs_20_310_10[_stride_pdfs_0 * ctr_0] = forceTerm_10 + xi_175 + xi_200 + xi_202 + xi_263;
+        _data_pdfs_20_311_10[_stride_pdfs_0 * ctr_0] = forceTerm_11 + xi_210 + xi_219 + xi_224 + xi_248;
+        _data_pdfs_20_312_10[_stride_pdfs_0 * ctr_0] = forceTerm_12 + xi_219 + xi_225 + xi_226 + xi_258;
+        _data_pdfs_20_313_10[_stride_pdfs_0 * ctr_0] = forceTerm_13 + xi_231 + xi_234 + xi_239 + xi_250;
+        _data_pdfs_20_314_10[_stride_pdfs_0 * ctr_0] = forceTerm_14 + xi_231 + xi_240 + xi_241 + xi_257;
+        _data_pdfs_20_315_10[_stride_pdfs_0 * ctr_0] = forceTerm_15 + xi_224 + xi_226 + xi_242 + xi_261;
+        _data_pdfs_20_316_10[_stride_pdfs_0 * ctr_0] = forceTerm_16 + xi_210 + xi_225 + xi_242 + xi_259;
+        _data_pdfs_20_317_10[_stride_pdfs_0 * ctr_0] = forceTerm_17 + xi_239 + xi_240 + xi_243 + xi_254;
+        _data_pdfs_20_318_10[_stride_pdfs_0 * ctr_0] = forceTerm_18 + xi_234 + xi_241 + xi_243 + xi_249;
+      }
+    }
+  }
+}
+} // namespace internal_69764eed2d0964e29e3b97d1054b4693
+
+void CollideSweepSinglePrecisionThermalized::run(IBlock *block) {
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+  auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
+
+  auto &time_step = this->time_step_;
+  auto &kT = this->kT_;
+  auto &omega_odd = this->omega_odd_;
+  auto &seed = this->seed_;
+  auto &omega_bulk = this->omega_bulk_;
+  auto block_offset_0 = this->block_offset_0_;
+  auto &omega_shear = this->omega_shear_;
+  auto &omega_even = this->omega_even_;
+  auto block_offset_2 = this->block_offset_2_;
+  auto block_offset_1 = this->block_offset_1_;
+  block_offset_generator(block, block_offset_0, block_offset_1, block_offset_2);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(force->nrOfGhostLayers()));
+  float *RESTRICT const _data_force = force->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_69764eed2d0964e29e3b97d1054b4693::collidesweepsingleprecisionthermalized_collidesweepsingleprecisionthermalized(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
+}
+
+void CollideSweepSinglePrecisionThermalized::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+  auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
+
+  auto &time_step = this->time_step_;
+  auto &kT = this->kT_;
+  auto &omega_odd = this->omega_odd_;
+  auto &seed = this->seed_;
+  auto &omega_bulk = this->omega_bulk_;
+  auto block_offset_0 = this->block_offset_0_;
+  auto &omega_shear = this->omega_shear_;
+  auto &omega_even = this->omega_even_;
+  auto block_offset_2 = this->block_offset_2_;
+  auto block_offset_1 = this->block_offset_1_;
+  block_offset_generator(block, block_offset_0, block_offset_1, block_offset_2);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(force->nrOfGhostLayers()));
+  float *RESTRICT const _data_force = force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_69764eed2d0964e29e3b97d1054b4693::collidesweepsingleprecisionthermalized_collidesweepsingleprecisionthermalized(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalized.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalized.h
new file mode 100644
index 00000000000..91353151cdf
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalized.h
@@ -0,0 +1,123 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepSinglePrecisionThermalized.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class CollideSweepSinglePrecisionThermalized {
+public:
+  CollideSweepSinglePrecisionThermalized(
+      BlockDataID forceID_, BlockDataID pdfsID_, uint32_t block_offset_0,
+      uint32_t block_offset_1, uint32_t block_offset_2, float kT,
+      float omega_bulk, float omega_even, float omega_odd, float omega_shear,
+      uint32_t seed, uint32_t time_step)
+      : forceID(forceID_), pdfsID(pdfsID_), block_offset_0_(block_offset_0),
+        block_offset_1_(block_offset_1), block_offset_2_(block_offset_2),
+        kT_(kT), omega_bulk_(omega_bulk), omega_even_(omega_even),
+        omega_odd_(omega_odd), omega_shear_(omega_shear), seed_(seed),
+        time_step_(time_step){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<CollideSweepSinglePrecisionThermalized> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<CollideSweepSinglePrecisionThermalized> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID forceID;
+  BlockDataID pdfsID;
+  uint32_t block_offset_0_;
+  uint32_t block_offset_1_;
+  uint32_t block_offset_2_;
+  float kT_;
+  float omega_bulk_;
+  float omega_even_;
+  float omega_odd_;
+  float omega_shear_;
+  uint32_t seed_;
+  uint32_t time_step_;
+  std::function<void(IBlock *, uint32_t &, uint32_t &, uint32_t &)>
+      block_offset_generator =
+          [](IBlock *const, uint32_t &, uint32_t &, uint32_t &) {};
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalizedAVX.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalizedAVX.cpp
new file mode 100644
index 00000000000..0455514ffaa
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalizedAVX.cpp
@@ -0,0 +1,895 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepSinglePrecisionThermalizedAVX.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "CollideSweepSinglePrecisionThermalizedAVX.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#include "philox_rand.h"
+
+#include <immintrin.h>
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_48c9ee502281a70505dce0378c55abd5 {
+static FUNC_PREFIX void collidesweepsingleprecisionthermalizedavx_collidesweepsingleprecisionthermalizedavx(float *RESTRICT const _data_force, float *RESTRICT _data_pdfs, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, uint32_t block_offset_0, uint32_t block_offset_1, uint32_t block_offset_2, float kT, float omega_bulk, float omega_even, float omega_odd, float omega_shear, uint32_t seed, uint32_t time_step) {
+  const float xi_28 = omega_bulk * 0.5f;
+  const float xi_55 = omega_shear * 0.041666666666666664f;
+  const float xi_60 = omega_bulk * 0.041666666666666664f;
+  const float xi_71 = omega_shear * 0.125f;
+  const float xi_109 = 2.4494897427831779f;
+  const float xi_134 = omega_odd * 0.25f;
+  const float xi_145 = omega_odd * 0.083333333333333329f;
+  const float xi_198 = omega_shear * 0.25f;
+  const float xi_211 = omega_odd * 0.041666666666666664f;
+  const float xi_213 = omega_odd * 0.125f;
+  const float rr_0 = 0.0f;
+  const float xi_53 = rr_0 * 0.041666666666666664f;
+  for (int64_t ctr_2 = 0; ctr_2 < _size_force_2; ctr_2 += 1) {
+    float *RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3;
+    float *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
+    float *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
+    float *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3;
+    float *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
+    float *RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3;
+    float *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
+    float *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_force_1; ctr_1 += 1) {
+      float *RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_36;
+      float *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
+      float *RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_31;
+      float *RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_32;
+      float *RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_311;
+      float *RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_318;
+      float *RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_313;
+      float *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
+      float *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
+      float *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
+      float *RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_317;
+      float *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
+      float *RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_35;
+      float *RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_314;
+      float *RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_312;
+      float *RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_316;
+      float *RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_38;
+      float *RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_315;
+      float *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
+      float *RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_310;
+      float *RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_39;
+      float *RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_37;
+      {
+        for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((_size_force_0) / (8)) * (8); ctr_0 += 8) {
+          const __m256 xi_244 = _mm256_load_ps(&_data_pdfs_20_36_10[ctr_0]);
+          const __m256 xi_245 = _mm256_load_ps(&_data_force_20_32_10[ctr_0]);
+          const __m256 xi_246 = _mm256_load_ps(&_data_pdfs_20_31_10[ctr_0]);
+          const __m256 xi_247 = _mm256_load_ps(&_data_pdfs_20_32_10[ctr_0]);
+          const __m256 xi_248 = _mm256_load_ps(&_data_pdfs_20_311_10[ctr_0]);
+          const __m256 xi_249 = _mm256_load_ps(&_data_pdfs_20_318_10[ctr_0]);
+          const __m256 xi_250 = _mm256_load_ps(&_data_pdfs_20_313_10[ctr_0]);
+          const __m256 xi_251 = _mm256_load_ps(&_data_pdfs_20_30_10[ctr_0]);
+          const __m256 xi_252 = _mm256_load_ps(&_data_pdfs_20_34_10[ctr_0]);
+          const __m256 xi_253 = _mm256_load_ps(&_data_pdfs_20_33_10[ctr_0]);
+          const __m256 xi_254 = _mm256_load_ps(&_data_pdfs_20_317_10[ctr_0]);
+          const __m256 xi_255 = _mm256_load_ps(&_data_force_20_30_10[ctr_0]);
+          const __m256 xi_256 = _mm256_load_ps(&_data_pdfs_20_35_10[ctr_0]);
+          const __m256 xi_257 = _mm256_load_ps(&_data_pdfs_20_314_10[ctr_0]);
+          const __m256 xi_258 = _mm256_load_ps(&_data_pdfs_20_312_10[ctr_0]);
+          const __m256 xi_259 = _mm256_load_ps(&_data_pdfs_20_316_10[ctr_0]);
+          const __m256 xi_260 = _mm256_load_ps(&_data_pdfs_20_38_10[ctr_0]);
+          const __m256 xi_261 = _mm256_load_ps(&_data_pdfs_20_315_10[ctr_0]);
+          const __m256 xi_262 = _mm256_load_ps(&_data_force_20_31_10[ctr_0]);
+          const __m256 xi_263 = _mm256_load_ps(&_data_pdfs_20_310_10[ctr_0]);
+          const __m256 xi_264 = _mm256_load_ps(&_data_pdfs_20_39_10[ctr_0]);
+          const __m256 xi_265 = _mm256_load_ps(&_data_pdfs_20_37_10[ctr_0]);
+
+          __m256 random_3_0{};
+          __m256 random_3_1{};
+          __m256 random_3_2{};
+          __m256 random_3_3{};
+          if (kT > 0.) {
+            philox_float4(time_step, _mm256_add_epi32(_mm256_add_epi32(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), _mm256_set_epi32(ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0)), _mm256_set_epi32(((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)))), block_offset_1 + ctr_1, block_offset_2 + ctr_2, 3, seed, random_3_0, random_3_1, random_3_2, random_3_3);
+          }
+
+          __m256 random_2_0{};
+          __m256 random_2_1{};
+          __m256 random_2_2{};
+          __m256 random_2_3{};
+          if (kT > 0.) {
+            philox_float4(time_step, _mm256_add_epi32(_mm256_add_epi32(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), _mm256_set_epi32(ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0)), _mm256_set_epi32(((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)))), block_offset_1 + ctr_1, block_offset_2 + ctr_2, 2, seed, random_2_0, random_2_1, random_2_2, random_2_3);
+          }
+
+          __m256 random_1_0{};
+          __m256 random_1_1{};
+          __m256 random_1_2{};
+          __m256 random_1_3{};
+          if (kT > 0.) {
+            philox_float4(time_step, _mm256_add_epi32(_mm256_add_epi32(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), _mm256_set_epi32(ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0)), _mm256_set_epi32(((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)))), block_offset_1 + ctr_1, block_offset_2 + ctr_2, 1, seed, random_1_0, random_1_1, random_1_2, random_1_3);
+          }
+
+          __m256 random_0_0{};
+          __m256 random_0_1{};
+          __m256 random_0_2{};
+          __m256 random_0_3{};
+          if (kT > 0.) {
+            philox_float4(time_step, _mm256_add_epi32(_mm256_add_epi32(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), _mm256_set_epi32(ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0, ctr_0)), _mm256_set_epi32(((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)), ((int64_t)(block_offset_0)))), block_offset_1 + ctr_1, block_offset_2 + ctr_2, 0, seed, random_0_0, random_0_1, random_0_2, random_0_3);
+          }
+          const __m256 xi_2 = _mm256_add_ps(xi_249, xi_257);
+          const __m256 xi_3 = _mm256_add_ps(xi_2, xi_252);
+          const __m256 xi_4 = _mm256_add_ps(_mm256_add_ps(xi_246, xi_248), xi_261);
+          const __m256 xi_5 = _mm256_add_ps(xi_256, xi_258);
+          const __m256 xi_6 = _mm256_add_ps(xi_244, xi_254);
+          const __m256 xi_8 = _mm256_mul_ps(xi_264, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_9 = _mm256_mul_ps(xi_265, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_10 = _mm256_mul_ps(xi_254, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_11 = _mm256_mul_ps(xi_250, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_12 = _mm256_mul_ps(xi_253, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_13 = _mm256_add_ps(_mm256_add_ps(xi_10, xi_11), xi_12);
+          const __m256 xi_14 = _mm256_mul_ps(xi_247, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_15 = _mm256_mul_ps(xi_263, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_16 = _mm256_add_ps(xi_14, xi_15);
+          const __m256 xi_17 = _mm256_mul_ps(xi_259, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_18 = _mm256_mul_ps(xi_258, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_19 = _mm256_add_ps(xi_17, xi_18);
+          const __m256 xi_20 = _mm256_mul_ps(xi_249, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_21 = _mm256_add_ps(xi_10, xi_20);
+          const __m256 xi_22 = _mm256_mul_ps(xi_261, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_23 = _mm256_mul_ps(xi_244, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_24 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_17, xi_22), xi_23), xi_248);
+          const __m256 xi_29 = _mm256_mul_ps(xi_262, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f));
+          const __m256 xi_30 = _mm256_mul_ps(xi_262, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f));
+          const __m256 xi_42 = _mm256_mul_ps(xi_255, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f));
+          const __m256 xi_43 = _mm256_mul_ps(xi_255, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f));
+          const __m256 xi_49 = _mm256_mul_ps(xi_245, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f));
+          const __m256 xi_50 = _mm256_mul_ps(xi_245, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f));
+          const __m256 xi_67 = _mm256_mul_ps(xi_262, _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f));
+          const __m256 xi_72 = _mm256_mul_ps(xi_262, _mm256_set_ps(xi_71, xi_71, xi_71, xi_71, xi_71, xi_71, xi_71, xi_71));
+          const __m256 xi_114 = _mm256_mul_ps(xi_251, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_118 = _mm256_mul_ps(xi_248, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_119 = _mm256_add_ps(xi_118, xi_18);
+          const __m256 xi_120 = _mm256_add_ps(_mm256_mul_ps(xi_260, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_8);
+          const __m256 xi_122 = _mm256_mul_ps(xi_257, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_123 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_11, xi_122), xi_15), xi_21);
+          const __m256 xi_125 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_248, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f)), _mm256_mul_ps(xi_258, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f))), _mm256_mul_ps(xi_259, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f))), _mm256_mul_ps(xi_261, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f)));
+          const __m256 xi_126 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_252, _mm256_set_ps(5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f)), _mm256_mul_ps(xi_253, _mm256_set_ps(5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f))), xi_125);
+          const __m256 xi_128 = _mm256_mul_ps(xi_250, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f));
+          const __m256 xi_129 = _mm256_mul_ps(xi_257, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f));
+          const __m256 xi_130 = _mm256_add_ps(_mm256_mul_ps(xi_249, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f)), _mm256_mul_ps(xi_254, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f)));
+          const __m256 xi_132 = _mm256_add_ps(xi_118, xi_258);
+          const __m256 xi_133 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_132, xi_14), xi_22), xi_246), xi_259);
+          const __m256 xi_135 = _mm256_mul_ps(xi_133, _mm256_set_ps(xi_134, xi_134, xi_134, xi_134, xi_134, xi_134, xi_134, xi_134));
+          const __m256 xi_136 = _mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_2_3);
+          const __m256 xi_141 = _mm256_mul_ps(xi_265, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f));
+          const __m256 xi_142 = _mm256_mul_ps(xi_263, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f));
+          const __m256 xi_143 = _mm256_add_ps(_mm256_mul_ps(xi_264, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f)), _mm256_mul_ps(xi_260, _mm256_set_ps(-2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f)));
+          const __m256 xi_144 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_141, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_14), xi_142), xi_143), xi_19), xi_4);
+          const __m256 xi_146 = _mm256_mul_ps(xi_144, _mm256_set_ps(xi_145, xi_145, xi_145, xi_145, xi_145, xi_145, xi_145, xi_145));
+          const __m256 xi_147 = _mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_1_2);
+          const __m256 xi_152 = _mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_0_1);
+          const __m256 xi_166 = _mm256_add_ps(xi_122, xi_250);
+          const __m256 xi_167 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_12, xi_166), xi_20), xi_252), xi_254);
+          const __m256 xi_168 = _mm256_mul_ps(xi_167, _mm256_set_ps(xi_134, xi_134, xi_134, xi_134, xi_134, xi_134, xi_134, xi_134));
+          const __m256 xi_169 = _mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_2_1);
+          const __m256 xi_171 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_142, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_13), xi_141), xi_143), xi_3);
+          const __m256 xi_172 = _mm256_mul_ps(xi_171, _mm256_set_ps(xi_145, xi_145, xi_145, xi_145, xi_145, xi_145, xi_145, xi_145));
+          const __m256 xi_173 = _mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_2_0);
+          const __m256 xi_178 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_119, xi_23), xi_256), xi_259), xi_261);
+          const __m256 xi_179 = _mm256_mul_ps(xi_178, _mm256_set_ps(xi_134, xi_134, xi_134, xi_134, xi_134, xi_134, xi_134, xi_134));
+          const __m256 xi_180 = _mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_2_2);
+          const __m256 xi_182 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_128, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_129, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), xi_130), xi_24), xi_5);
+          const __m256 xi_183 = _mm256_mul_ps(xi_182, _mm256_set_ps(xi_145, xi_145, xi_145, xi_145, xi_145, xi_145, xi_145, xi_145));
+          const __m256 xi_184 = _mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_1_3);
+          const __m256 xi_212 = _mm256_mul_ps(xi_182, _mm256_set_ps(xi_211, xi_211, xi_211, xi_211, xi_211, xi_211, xi_211, xi_211));
+          const __m256 xi_214 = _mm256_mul_ps(xi_178, _mm256_set_ps(xi_213, xi_213, xi_213, xi_213, xi_213, xi_213, xi_213, xi_213));
+          const __m256 xi_220 = _mm256_mul_ps(xi_144, _mm256_set_ps(xi_211, xi_211, xi_211, xi_211, xi_211, xi_211, xi_211, xi_211));
+          const __m256 xi_221 = _mm256_mul_ps(xi_133, _mm256_set_ps(xi_213, xi_213, xi_213, xi_213, xi_213, xi_213, xi_213, xi_213));
+          const __m256 xi_235 = _mm256_mul_ps(xi_167, _mm256_set_ps(xi_213, xi_213, xi_213, xi_213, xi_213, xi_213, xi_213, xi_213));
+          const __m256 xi_236 = _mm256_mul_ps(xi_171, _mm256_set_ps(xi_211, xi_211, xi_211, xi_211, xi_211, xi_211, xi_211, xi_211));
+          const __m256 xi_31 = _mm256_mul_ps(xi_30, _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0));
+          const __m256 xi_44 = _mm256_mul_ps(xi_43, _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0));
+          const __m256 xi_51 = _mm256_mul_ps(xi_50, _mm256_set_ps(rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0, rr_0));
+          const __m256 xi_54 = _mm256_mul_ps(xi_255, _mm256_set_ps(xi_53, xi_53, xi_53, xi_53, xi_53, xi_53, xi_53, xi_53));
+          const __m256 xi_59 = _mm256_mul_ps(xi_262, _mm256_set_ps(xi_53, xi_53, xi_53, xi_53, xi_53, xi_53, xi_53, xi_53));
+          const __m256 xi_81 = _mm256_mul_ps(xi_245, _mm256_set_ps(xi_53, xi_53, xi_53, xi_53, xi_53, xi_53, xi_53, xi_53));
+          const __m256 vel0Term = _mm256_add_ps(_mm256_add_ps(xi_260, xi_263), xi_3);
+          const __m256 vel1Term = _mm256_add_ps(xi_265, xi_4);
+          const __m256 vel2Term = _mm256_add_ps(xi_250, xi_5);
+          const __m256 rho = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(vel0Term, vel1Term), vel2Term), xi_247), xi_251), xi_253), xi_259), xi_264), xi_6);
+          const __m256 xi_105 = _mm256_mul_ps(rho, _mm256_set_ps(kT, kT, kT, kT, kT, kT, kT, kT));
+          const __m256 xi_106 = _mm256_sqrt_ps(_mm256_mul_ps(xi_105, _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_mul_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_set_ps(omega_even, omega_even, omega_even, omega_even, omega_even, omega_even, omega_even, omega_even)), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f)), _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_set_ps(omega_even, omega_even, omega_even, omega_even, omega_even, omega_even, omega_even, omega_even)), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f)))), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f))));
+          const __m256 xi_107 = _mm256_mul_ps(_mm256_mul_ps(xi_106, _mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_3_0)), _mm256_set_ps(3.7416573867739413f, 3.7416573867739413f, 3.7416573867739413f, 3.7416573867739413f, 3.7416573867739413f, 3.7416573867739413f, 3.7416573867739413f, 3.7416573867739413f));
+          const __m256 xi_108 = _mm256_mul_ps(_mm256_mul_ps(xi_106, _mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_3_2)), _mm256_set_ps(5.4772255750516612f, 5.4772255750516612f, 5.4772255750516612f, 5.4772255750516612f, 5.4772255750516612f, 5.4772255750516612f, 5.4772255750516612f, 5.4772255750516612f));
+          const __m256 xi_110 = _mm256_mul_ps(_mm256_mul_ps(_mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_1_1), _mm256_set_ps(xi_109, xi_109, xi_109, xi_109, xi_109, xi_109, xi_109, xi_109)), _mm256_sqrt_ps(_mm256_mul_ps(xi_105, _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_mul_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_set_ps(omega_bulk, omega_bulk, omega_bulk, omega_bulk, omega_bulk, omega_bulk, omega_bulk, omega_bulk)), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f)), _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_set_ps(omega_bulk, omega_bulk, omega_bulk, omega_bulk, omega_bulk, omega_bulk, omega_bulk, omega_bulk)), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f)))), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f)))));
+          const __m256 xi_111 = _mm256_mul_ps(_mm256_mul_ps(xi_106, _mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_3_1)), _mm256_set_ps(8.3666002653407556f, 8.3666002653407556f, 8.3666002653407556f, 8.3666002653407556f, 8.3666002653407556f, 8.3666002653407556f, 8.3666002653407556f, 8.3666002653407556f));
+          const __m256 xi_137 = _mm256_sqrt_ps(_mm256_mul_ps(xi_105, _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_mul_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_set_ps(omega_odd, omega_odd, omega_odd, omega_odd, omega_odd, omega_odd, omega_odd, omega_odd)), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f)), _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_set_ps(omega_odd, omega_odd, omega_odd, omega_odd, omega_odd, omega_odd, omega_odd, omega_odd)), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f)))), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f))));
+          const __m256 xi_138 = _mm256_mul_ps(xi_137, _mm256_set_ps(1.4142135623730951f, 1.4142135623730951f, 1.4142135623730951f, 1.4142135623730951f, 1.4142135623730951f, 1.4142135623730951f, 1.4142135623730951f, 1.4142135623730951f));
+          const __m256 xi_139 = _mm256_mul_ps(xi_138, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f));
+          const __m256 xi_140 = _mm256_mul_ps(xi_136, xi_139);
+          const __m256 xi_148 = _mm256_mul_ps(xi_137, _mm256_set_ps(xi_109, xi_109, xi_109, xi_109, xi_109, xi_109, xi_109, xi_109));
+          const __m256 xi_149 = _mm256_mul_ps(xi_148, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f));
+          const __m256 xi_150 = _mm256_mul_ps(xi_147, xi_149);
+          const __m256 xi_151 = _mm256_add_ps(_mm256_mul_ps(xi_146, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_150, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)));
+          const __m256 xi_153 = _mm256_sqrt_ps(_mm256_mul_ps(xi_105, _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_mul_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f)), _mm256_add_ps(_mm256_mul_ps(_mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear)), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f)))), _mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f))));
+          const __m256 xi_154 = _mm256_mul_ps(xi_153, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f));
+          const __m256 xi_155 = _mm256_mul_ps(xi_152, xi_154);
+          const __m256 xi_161 = _mm256_mul_ps(_mm256_mul_ps(xi_153, _mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_0_0)), _mm256_set_ps(1.7320508075688772f, 1.7320508075688772f, 1.7320508075688772f, 1.7320508075688772f, 1.7320508075688772f, 1.7320508075688772f, 1.7320508075688772f, 1.7320508075688772f));
+          const __m256 xi_165 = _mm256_add_ps(xi_146, xi_150);
+          const __m256 xi_170 = _mm256_mul_ps(xi_139, xi_169);
+          const __m256 xi_174 = _mm256_mul_ps(xi_149, xi_173);
+          const __m256 xi_175 = _mm256_add_ps(xi_172, xi_174);
+          const __m256 xi_177 = _mm256_add_ps(_mm256_mul_ps(xi_172, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_174, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)));
+          const __m256 xi_181 = _mm256_mul_ps(xi_139, xi_180);
+          const __m256 xi_185 = _mm256_mul_ps(xi_149, xi_184);
+          const __m256 xi_186 = _mm256_add_ps(_mm256_mul_ps(xi_183, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_185, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)));
+          const __m256 xi_188 = _mm256_add_ps(xi_183, xi_185);
+          const __m256 xi_189 = _mm256_mul_ps(_mm256_mul_ps(xi_152, xi_153), _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f));
+          const __m256 xi_192 = _mm256_mul_ps(xi_107, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f));
+          const __m256 xi_196 = _mm256_mul_ps(xi_154, _mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_0_2));
+          const __m256 xi_203 = _mm256_mul_ps(xi_154, _mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_1_0));
+          const __m256 xi_207 = _mm256_mul_ps(xi_111, _mm256_set_ps(-0.014285714285714285f, -0.014285714285714285f, -0.014285714285714285f, -0.014285714285714285f, -0.014285714285714285f, -0.014285714285714285f, -0.014285714285714285f, -0.014285714285714285f));
+          const __m256 xi_208 = _mm256_mul_ps(xi_108, _mm256_set_ps(0.050000000000000003f, 0.050000000000000003f, 0.050000000000000003f, 0.050000000000000003f, 0.050000000000000003f, 0.050000000000000003f, 0.050000000000000003f, 0.050000000000000003f));
+          const __m256 xi_215 = _mm256_mul_ps(xi_148, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f));
+          const __m256 xi_216 = _mm256_mul_ps(xi_184, xi_215);
+          const __m256 xi_217 = _mm256_mul_ps(xi_138, _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f));
+          const __m256 xi_218 = _mm256_mul_ps(xi_180, xi_217);
+          const __m256 xi_219 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_212, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_216, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), xi_214), xi_218);
+          const __m256 xi_222 = _mm256_mul_ps(xi_147, xi_215);
+          const __m256 xi_223 = _mm256_mul_ps(xi_136, xi_217);
+          const __m256 xi_224 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_220, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_222, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), xi_221), xi_223);
+          const __m256 xi_225 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_221, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_223, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), xi_220), xi_222);
+          const __m256 xi_227 = _mm256_mul_ps(xi_189, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_230 = _mm256_mul_ps(xi_111, _mm256_set_ps(0.035714285714285712f, 0.035714285714285712f, 0.035714285714285712f, 0.035714285714285712f, 0.035714285714285712f, 0.035714285714285712f, 0.035714285714285712f, 0.035714285714285712f));
+          const __m256 xi_232 = _mm256_mul_ps(xi_154, _mm256_add_ps(_mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f), random_0_3));
+          const __m256 xi_237 = _mm256_mul_ps(xi_169, xi_217);
+          const __m256 xi_238 = _mm256_mul_ps(xi_173, xi_215);
+          const __m256 xi_239 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_235, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_237, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), xi_236), xi_238);
+          const __m256 xi_241 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_236, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_238, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), xi_235), xi_237);
+          const __m256 xi_242 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_214, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_218, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), xi_212), xi_216);
+          const __m256 xi_0 = _mm256_div_ps(_mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), rho);
+          const __m256 xi_7 = _mm256_mul_ps(xi_0, _mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f));
+          const __m256 u_0 = _mm256_add_ps(_mm256_mul_ps(xi_0, _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(vel0Term, xi_13), xi_8), xi_9)), _mm256_mul_ps(xi_255, xi_7));
+          const __m256 xi_25 = _mm256_mul_ps(u_0, xi_255);
+          const __m256 xi_37 = _mm256_mul_ps(xi_25, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f));
+          const __m256 xi_38 = _mm256_mul_ps(xi_25, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f));
+          const __m256 xi_39 = _mm256_mul_ps(xi_38, _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear));
+          const __m256 xi_40 = _mm256_add_ps(_mm256_mul_ps(xi_37, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_39);
+          const __m256 xi_56 = _mm256_add_ps(_mm256_mul_ps(_mm256_mul_ps(xi_25, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_set_ps(xi_55, xi_55, xi_55, xi_55, xi_55, xi_55, xi_55, xi_55)), xi_37);
+          const __m256 xi_57 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_43, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_54), xi_56);
+          const __m256 xi_61 = _mm256_mul_ps(_mm256_mul_ps(xi_25, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_set_ps(xi_60, xi_60, xi_60, xi_60, xi_60, xi_60, xi_60, xi_60));
+          const __m256 xi_68 = _mm256_mul_ps(u_0, xi_67);
+          const __m256 xi_73 = _mm256_mul_ps(u_0, xi_72);
+          const __m256 xi_77 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_54, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_43), xi_56);
+          const __m256 xi_84 = _mm256_mul_ps(xi_38, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_95 = _mm256_mul_ps(u_0, xi_245);
+          const __m256 xi_96 = _mm256_mul_ps(xi_95, _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f));
+          const __m256 xi_99 = _mm256_mul_ps(xi_95, _mm256_set_ps(xi_71, xi_71, xi_71, xi_71, xi_71, xi_71, xi_71, xi_71));
+          const __m256 xi_113 = _mm256_mul_ps(rho, _mm256_mul_ps(u_0, u_0));
+          const __m256 u_1 = _mm256_add_ps(_mm256_mul_ps(xi_0, _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(vel1Term, xi_16), xi_19), xi_260), xi_8)), _mm256_mul_ps(xi_262, xi_7));
+          const __m256 xi_26 = _mm256_mul_ps(u_1, xi_262);
+          const __m256 xi_32 = _mm256_mul_ps(xi_26, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f));
+          const __m256 xi_45 = _mm256_mul_ps(xi_26, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f));
+          const __m256 xi_46 = _mm256_mul_ps(xi_45, _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear));
+          const __m256 xi_47 = _mm256_add_ps(_mm256_mul_ps(xi_32, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_46);
+          const __m256 xi_62 = _mm256_mul_ps(_mm256_mul_ps(xi_26, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_set_ps(xi_60, xi_60, xi_60, xi_60, xi_60, xi_60, xi_60, xi_60));
+          const __m256 xi_69 = _mm256_mul_ps(u_1, _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f));
+          const __m256 xi_70 = _mm256_mul_ps(xi_255, xi_69);
+          const __m256 xi_74 = _mm256_mul_ps(u_1, _mm256_set_ps(xi_71, xi_71, xi_71, xi_71, xi_71, xi_71, xi_71, xi_71));
+          const __m256 xi_75 = _mm256_mul_ps(xi_255, xi_74);
+          const __m256 xi_76 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_68, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_70, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), xi_73), xi_75);
+          const __m256 xi_78 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_73, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_75, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), xi_68), xi_70);
+          const __m256 xi_86 = _mm256_mul_ps(xi_245, xi_69);
+          const __m256 xi_88 = _mm256_mul_ps(xi_245, xi_74);
+          const __m256 xi_93 = _mm256_mul_ps(xi_45, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_112 = _mm256_mul_ps(rho, _mm256_mul_ps(u_1, u_1));
+          const __m256 xi_121 = _mm256_add_ps(_mm256_add_ps(xi_112, xi_120), xi_9);
+          const __m256 xi_197 = _mm256_mul_ps(rho, u_1);
+          const __m256 xi_199 = _mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(u_0, xi_197), xi_120), xi_263), xi_265), _mm256_set_ps(xi_198, xi_198, xi_198, xi_198, xi_198, xi_198, xi_198, xi_198));
+          const __m256 xi_200 = _mm256_add_ps(_mm256_mul_ps(xi_196, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_199, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)));
+          const __m256 xi_201 = _mm256_add_ps(xi_196, xi_199);
+          const __m256 u_2 = _mm256_add_ps(_mm256_mul_ps(xi_0, _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(vel2Term, xi_21), xi_24), xi_257)), _mm256_mul_ps(xi_245, xi_7));
+          const __m256 xi_27 = _mm256_mul_ps(u_2, xi_245);
+          const __m256 xi_33 = _mm256_mul_ps(xi_27, _mm256_set_ps(0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f, 0.16666666666666666f));
+          const __m256 xi_34 = _mm256_mul_ps(xi_27, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f));
+          const __m256 xi_35 = _mm256_mul_ps(xi_34, _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear));
+          const __m256 xi_36 = _mm256_add_ps(_mm256_mul_ps(xi_33, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_35);
+          const __m256 xi_41 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_26, _mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f)), _mm256_mul_ps(_mm256_mul_ps(xi_32, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), xi_36), xi_40);
+          const __m256 xi_48 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_25, _mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f)), _mm256_mul_ps(_mm256_mul_ps(xi_37, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), xi_36), xi_47);
+          const __m256 xi_52 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_27, _mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f)), _mm256_mul_ps(_mm256_mul_ps(xi_33, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear))), xi_40), xi_47);
+          const __m256 xi_58 = _mm256_mul_ps(xi_34, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_63 = _mm256_mul_ps(_mm256_mul_ps(xi_27, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_set_ps(xi_60, xi_60, xi_60, xi_60, xi_60, xi_60, xi_60, xi_60));
+          const __m256 xi_64 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_mul_ps(xi_26, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_set_ps(xi_55, xi_55, xi_55, xi_55, xi_55, xi_55, xi_55, xi_55)), xi_32), xi_61), xi_62), xi_63);
+          const __m256 xi_65 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_59, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_30), xi_64);
+          const __m256 xi_66 = _mm256_add_ps(_mm256_add_ps(xi_35, xi_58), xi_65);
+          const __m256 xi_79 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_30, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_59), xi_64);
+          const __m256 xi_80 = _mm256_add_ps(_mm256_add_ps(xi_35, xi_58), xi_79);
+          const __m256 xi_82 = _mm256_add_ps(_mm256_mul_ps(_mm256_mul_ps(xi_27, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_set_ps(xi_55, xi_55, xi_55, xi_55, xi_55, xi_55, xi_55, xi_55)), xi_33);
+          const __m256 xi_83 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_81, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_50), xi_82);
+          const __m256 xi_85 = _mm256_add_ps(_mm256_add_ps(xi_39, xi_65), xi_84);
+          const __m256 xi_87 = _mm256_mul_ps(u_2, xi_67);
+          const __m256 xi_89 = _mm256_mul_ps(u_2, xi_72);
+          const __m256 xi_90 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_88, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_89, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), xi_86), xi_87);
+          const __m256 xi_91 = _mm256_add_ps(_mm256_add_ps(xi_39, xi_79), xi_84);
+          const __m256 xi_92 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_86, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_87, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), xi_88), xi_89);
+          const __m256 xi_94 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_46, xi_61), xi_62), xi_63), xi_83), xi_93);
+          const __m256 xi_97 = _mm256_mul_ps(u_2, xi_255);
+          const __m256 xi_98 = _mm256_mul_ps(xi_97, _mm256_set_ps(0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f));
+          const __m256 xi_100 = _mm256_mul_ps(xi_97, _mm256_set_ps(xi_71, xi_71, xi_71, xi_71, xi_71, xi_71, xi_71, xi_71));
+          const __m256 xi_101 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_96, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_98, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), xi_100), xi_99);
+          const __m256 xi_102 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_100, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_99, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), xi_96), xi_98);
+          const __m256 xi_103 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_50, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_81), xi_82);
+          const __m256 xi_104 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_103, xi_46), xi_61), xi_62), xi_63), xi_93);
+          const __m256 xi_115 = _mm256_mul_ps(rho, _mm256_mul_ps(u_2, u_2));
+          const __m256 xi_116 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_244, _mm256_set_ps(3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f)), _mm256_mul_ps(xi_256, _mm256_set_ps(3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f))), _mm256_mul_ps(xi_115, _mm256_set_ps(0.66666666666666663f, 0.66666666666666663f, 0.66666666666666663f, 0.66666666666666663f, 0.66666666666666663f, 0.66666666666666663f, 0.66666666666666663f, 0.66666666666666663f))), xi_114);
+          const __m256 xi_117 = _mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_246, _mm256_set_ps(3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f)), _mm256_mul_ps(xi_247, _mm256_set_ps(3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f))), _mm256_mul_ps(xi_112, _mm256_set_ps(0.66666666666666663f, 0.66666666666666663f, 0.66666666666666663f, 0.66666666666666663f, 0.66666666666666663f, 0.66666666666666663f, 0.66666666666666663f, 0.66666666666666663f))), _mm256_mul_ps(xi_113, _mm256_set_ps(1.6666666666666667f, 1.6666666666666667f, 1.6666666666666667f, 1.6666666666666667f, 1.6666666666666667f, 1.6666666666666667f, 1.6666666666666667f, 1.6666666666666667f))), _mm256_mul_ps(xi_248, _mm256_set_ps(-3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f))), _mm256_mul_ps(xi_258, _mm256_set_ps(-3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f))), _mm256_mul_ps(xi_259, _mm256_set_ps(-3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f))), _mm256_mul_ps(xi_261, _mm256_set_ps(-3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f, -3.0f))), xi_116), _mm256_set_ps(omega_even, omega_even, omega_even, omega_even, omega_even, omega_even, omega_even, omega_even));
+          const __m256 xi_124 = _mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_113, xi_115), xi_119), xi_121), xi_123), xi_17), xi_22), xi_251), _mm256_set_ps(omega_bulk, omega_bulk, omega_bulk, omega_bulk, omega_bulk, omega_bulk, omega_bulk, omega_bulk));
+          const __m256 xi_127 = _mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_112, _mm256_set_ps(2.3333333333333335f, 2.3333333333333335f, 2.3333333333333335f, 2.3333333333333335f, 2.3333333333333335f, 2.3333333333333335f, 2.3333333333333335f, 2.3333333333333335f)), _mm256_mul_ps(xi_246, _mm256_set_ps(-2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f))), _mm256_mul_ps(xi_247, _mm256_set_ps(-2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f))), _mm256_mul_ps(xi_249, _mm256_set_ps(-5.0f, -5.0f, -5.0f, -5.0f, -5.0f, -5.0f, -5.0f, -5.0f))), _mm256_mul_ps(xi_250, _mm256_set_ps(-5.0f, -5.0f, -5.0f, -5.0f, -5.0f, -5.0f, -5.0f, -5.0f))), _mm256_mul_ps(xi_254, _mm256_set_ps(-5.0f, -5.0f, -5.0f, -5.0f, -5.0f, -5.0f, -5.0f, -5.0f))), _mm256_mul_ps(xi_257, _mm256_set_ps(-5.0f, -5.0f, -5.0f, -5.0f, -5.0f, -5.0f, -5.0f, -5.0f))), xi_116), xi_126), _mm256_set_ps(omega_even, omega_even, omega_even, omega_even, omega_even, omega_even, omega_even, omega_even));
+          const __m256 xi_131 = _mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_115, _mm256_set_ps(3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f)), _mm256_mul_ps(xi_246, _mm256_set_ps(5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f))), _mm256_mul_ps(xi_247, _mm256_set_ps(5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f))), _mm256_mul_ps(xi_244, _mm256_set_ps(-4.0f, -4.0f, -4.0f, -4.0f, -4.0f, -4.0f, -4.0f, -4.0f))), _mm256_mul_ps(xi_256, _mm256_set_ps(-4.0f, -4.0f, -4.0f, -4.0f, -4.0f, -4.0f, -4.0f, -4.0f))), _mm256_mul_ps(xi_260, _mm256_set_ps(-7.0f, -7.0f, -7.0f, -7.0f, -7.0f, -7.0f, -7.0f, -7.0f))), _mm256_mul_ps(xi_263, _mm256_set_ps(-7.0f, -7.0f, -7.0f, -7.0f, -7.0f, -7.0f, -7.0f, -7.0f))), _mm256_mul_ps(xi_264, _mm256_set_ps(-7.0f, -7.0f, -7.0f, -7.0f, -7.0f, -7.0f, -7.0f, -7.0f))), _mm256_mul_ps(xi_265, _mm256_set_ps(-7.0f, -7.0f, -7.0f, -7.0f, -7.0f, -7.0f, -7.0f, -7.0f))), xi_114), xi_126), xi_128), xi_129), xi_130), _mm256_set_ps(omega_even, omega_even, omega_even, omega_even, omega_even, omega_even, omega_even, omega_even));
+          const __m256 xi_156 = _mm256_add_ps(_mm256_mul_ps(xi_115, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_256);
+          const __m256 xi_157 = _mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_246, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_121), xi_156), xi_16), xi_2), xi_250), xi_6), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear));
+          const __m256 xi_158 = _mm256_mul_ps(xi_157, _mm256_set_ps(0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f, 0.125f));
+          const __m256 xi_159 = _mm256_add_ps(_mm256_mul_ps(xi_131, _mm256_set_ps(-0.01984126984126984f, -0.01984126984126984f, -0.01984126984126984f, -0.01984126984126984f, -0.01984126984126984f, -0.01984126984126984f, -0.01984126984126984f, -0.01984126984126984f)), _mm256_mul_ps(xi_107, _mm256_set_ps(-0.11904761904761904f, -0.11904761904761904f, -0.11904761904761904f, -0.11904761904761904f, -0.11904761904761904f, -0.11904761904761904f, -0.11904761904761904f, -0.11904761904761904f)));
+          const __m256 xi_160 = _mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_112, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_113, _mm256_set_ps(2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f))), _mm256_mul_ps(xi_252, _mm256_set_ps(-2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f))), _mm256_mul_ps(xi_253, _mm256_set_ps(-2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f))), xi_120), xi_123), xi_125), xi_156), xi_244), xi_246), xi_247), xi_9), _mm256_set_ps(omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear, omega_shear));
+          const __m256 xi_162 = _mm256_add_ps(_mm256_mul_ps(xi_160, _mm256_set_ps(-0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f, -0.041666666666666664f)), _mm256_mul_ps(xi_161, _mm256_set_ps(-0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f, -0.16666666666666666f)));
+          const __m256 xi_163 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_117, _mm256_set_ps(-0.050000000000000003f, -0.050000000000000003f, -0.050000000000000003f, -0.050000000000000003f, -0.050000000000000003f, -0.050000000000000003f, -0.050000000000000003f, -0.050000000000000003f)), _mm256_mul_ps(xi_108, _mm256_set_ps(-0.10000000000000001f, -0.10000000000000001f, -0.10000000000000001f, -0.10000000000000001f, -0.10000000000000001f, -0.10000000000000001f, -0.10000000000000001f, -0.10000000000000001f))), xi_162);
+          const __m256 xi_164 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_127, _mm256_set_ps(0.014285714285714285f, 0.014285714285714285f, 0.014285714285714285f, 0.014285714285714285f, 0.014285714285714285f, 0.014285714285714285f, 0.014285714285714285f, 0.014285714285714285f)), _mm256_mul_ps(xi_111, _mm256_set_ps(0.028571428571428571f, 0.028571428571428571f, 0.028571428571428571f, 0.028571428571428571f, 0.028571428571428571f, 0.028571428571428571f, 0.028571428571428571f, 0.028571428571428571f))), xi_155), xi_158), xi_159), xi_163);
+          const __m256 xi_176 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_160, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)), _mm256_mul_ps(xi_161, _mm256_set_ps(0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f, 0.33333333333333331f))), _mm256_mul_ps(xi_127, _mm256_set_ps(-0.035714285714285712f, -0.035714285714285712f, -0.035714285714285712f, -0.035714285714285712f, -0.035714285714285712f, -0.035714285714285712f, -0.035714285714285712f, -0.035714285714285712f))), _mm256_mul_ps(xi_111, _mm256_set_ps(-0.071428571428571425f, -0.071428571428571425f, -0.071428571428571425f, -0.071428571428571425f, -0.071428571428571425f, -0.071428571428571425f, -0.071428571428571425f, -0.071428571428571425f))), xi_159);
+          const __m256 xi_187 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_155, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_158, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(xi_131, _mm256_set_ps(0.015873015873015872f, 0.015873015873015872f, 0.015873015873015872f, 0.015873015873015872f, 0.015873015873015872f, 0.015873015873015872f, 0.015873015873015872f, 0.015873015873015872f))), _mm256_mul_ps(xi_107, _mm256_set_ps(0.095238095238095233f, 0.095238095238095233f, 0.095238095238095233f, 0.095238095238095233f, 0.095238095238095233f, 0.095238095238095233f, 0.095238095238095233f, 0.095238095238095233f))), _mm256_mul_ps(xi_127, _mm256_set_ps(-0.021428571428571429f, -0.021428571428571429f, -0.021428571428571429f, -0.021428571428571429f, -0.021428571428571429f, -0.021428571428571429f, -0.021428571428571429f, -0.021428571428571429f))), _mm256_mul_ps(xi_111, _mm256_set_ps(-0.042857142857142858f, -0.042857142857142858f, -0.042857142857142858f, -0.042857142857142858f, -0.042857142857142858f, -0.042857142857142858f, -0.042857142857142858f, -0.042857142857142858f))), xi_163);
+          const __m256 xi_190 = _mm256_mul_ps(xi_157, _mm256_set_ps(0.0625f, 0.0625f, 0.0625f, 0.0625f, 0.0625f, 0.0625f, 0.0625f, 0.0625f));
+          const __m256 xi_191 = _mm256_mul_ps(xi_131, _mm256_set_ps(0.013888888888888888f, 0.013888888888888888f, 0.013888888888888888f, 0.013888888888888888f, 0.013888888888888888f, 0.013888888888888888f, 0.013888888888888888f, 0.013888888888888888f));
+          const __m256 xi_193 = _mm256_add_ps(_mm256_mul_ps(xi_124, _mm256_set_ps(0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f, 0.041666666666666664f)), _mm256_mul_ps(xi_110, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f)));
+          const __m256 xi_194 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_160, _mm256_set_ps(0.020833333333333332f, 0.020833333333333332f, 0.020833333333333332f, 0.020833333333333332f, 0.020833333333333332f, 0.020833333333333332f, 0.020833333333333332f, 0.020833333333333332f)), _mm256_mul_ps(xi_161, _mm256_set_ps(0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f, 0.083333333333333329f))), xi_193);
+          const __m256 xi_195 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_165, xi_189), xi_190), xi_191), xi_192), xi_194);
+          const __m256 xi_202 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_151, xi_189), xi_190), xi_191), xi_192), xi_194);
+          const __m256 xi_204 = _mm256_mul_ps(xi_127, _mm256_set_ps(-0.0071428571428571426f, -0.0071428571428571426f, -0.0071428571428571426f, -0.0071428571428571426f, -0.0071428571428571426f, -0.0071428571428571426f, -0.0071428571428571426f, -0.0071428571428571426f));
+          const __m256 xi_205 = _mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(u_2, xi_197), xi_132), xi_17), xi_261), _mm256_set_ps(xi_198, xi_198, xi_198, xi_198, xi_198, xi_198, xi_198, xi_198));
+          const __m256 xi_206 = _mm256_mul_ps(xi_117, _mm256_set_ps(0.025000000000000001f, 0.025000000000000001f, 0.025000000000000001f, 0.025000000000000001f, 0.025000000000000001f, 0.025000000000000001f, 0.025000000000000001f, 0.025000000000000001f));
+          const __m256 xi_209 = _mm256_add_ps(_mm256_mul_ps(xi_131, _mm256_set_ps(-0.003968253968253968f, -0.003968253968253968f, -0.003968253968253968f, -0.003968253968253968f, -0.003968253968253968f, -0.003968253968253968f, -0.003968253968253968f, -0.003968253968253968f)), _mm256_mul_ps(xi_107, _mm256_set_ps(-0.023809523809523808f, -0.023809523809523808f, -0.023809523809523808f, -0.023809523809523808f, -0.023809523809523808f, -0.023809523809523808f, -0.023809523809523808f, -0.023809523809523808f)));
+          const __m256 xi_210 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_162, xi_193), xi_203), xi_204), xi_205), xi_206), xi_207), xi_208), xi_209);
+          const __m256 xi_226 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_203, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_205, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), xi_162), xi_193), xi_204), xi_206), xi_207), xi_208), xi_209);
+          const __m256 xi_228 = _mm256_mul_ps(xi_190, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f));
+          const __m256 xi_229 = _mm256_mul_ps(xi_127, _mm256_set_ps(0.017857142857142856f, 0.017857142857142856f, 0.017857142857142856f, 0.017857142857142856f, 0.017857142857142856f, 0.017857142857142856f, 0.017857142857142856f, 0.017857142857142856f));
+          const __m256 xi_231 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_188, xi_194), xi_209), xi_227), xi_228), xi_229), xi_230);
+          const __m256 xi_233 = _mm256_mul_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(_mm256_mul_ps(rho, u_0), u_2), xi_10), xi_166), xi_249), _mm256_set_ps(xi_198, xi_198, xi_198, xi_198, xi_198, xi_198, xi_198, xi_198));
+          const __m256 xi_234 = _mm256_add_ps(_mm256_mul_ps(xi_232, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_233, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)));
+          const __m256 xi_240 = _mm256_add_ps(xi_232, xi_233);
+          const __m256 xi_243 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(xi_186, xi_194), xi_209), xi_227), xi_228), xi_229), xi_230);
+          const __m256 forceTerm_0 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_25, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_26, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(xi_27, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(xi_25, _mm256_set_ps(xi_28, xi_28, xi_28, xi_28, xi_28, xi_28, xi_28, xi_28))), _mm256_mul_ps(xi_26, _mm256_set_ps(xi_28, xi_28, xi_28, xi_28, xi_28, xi_28, xi_28, xi_28))), _mm256_mul_ps(xi_27, _mm256_set_ps(xi_28, xi_28, xi_28, xi_28, xi_28, xi_28, xi_28, xi_28)));
+          const __m256 forceTerm_1 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_31, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_29), xi_41);
+          const __m256 forceTerm_2 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_29, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_31), xi_41);
+          const __m256 forceTerm_3 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_42, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_44), xi_48);
+          const __m256 forceTerm_4 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_44, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_42), xi_48);
+          const __m256 forceTerm_5 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_51, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_49), xi_52);
+          const __m256 forceTerm_6 = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_49, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), xi_51), xi_52);
+          const __m256 forceTerm_7 = _mm256_add_ps(_mm256_add_ps(xi_57, xi_66), xi_76);
+          const __m256 forceTerm_8 = _mm256_add_ps(_mm256_add_ps(xi_66, xi_77), xi_78);
+          const __m256 forceTerm_9 = _mm256_add_ps(_mm256_add_ps(xi_57, xi_78), xi_80);
+          const __m256 forceTerm_10 = _mm256_add_ps(_mm256_add_ps(xi_76, xi_77), xi_80);
+          const __m256 forceTerm_11 = _mm256_add_ps(_mm256_add_ps(xi_83, xi_85), xi_90);
+          const __m256 forceTerm_12 = _mm256_add_ps(_mm256_add_ps(xi_83, xi_91), xi_92);
+          const __m256 forceTerm_13 = _mm256_add_ps(_mm256_add_ps(xi_101, xi_57), xi_94);
+          const __m256 forceTerm_14 = _mm256_add_ps(_mm256_add_ps(xi_102, xi_77), xi_94);
+          const __m256 forceTerm_15 = _mm256_add_ps(_mm256_add_ps(xi_103, xi_85), xi_92);
+          const __m256 forceTerm_16 = _mm256_add_ps(_mm256_add_ps(xi_103, xi_90), xi_91);
+          const __m256 forceTerm_17 = _mm256_add_ps(_mm256_add_ps(xi_102, xi_104), xi_57);
+          const __m256 forceTerm_18 = _mm256_add_ps(_mm256_add_ps(xi_101, xi_104), xi_77);
+          _mm256_store_ps(&_data_pdfs_20_30_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_110, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_131, _mm256_set_ps(0.023809523809523808f, 0.023809523809523808f, 0.023809523809523808f, 0.023809523809523808f, 0.023809523809523808f, 0.023809523809523808f, 0.023809523809523808f, 0.023809523809523808f))), _mm256_mul_ps(xi_107, _mm256_set_ps(0.14285714285714285f, 0.14285714285714285f, 0.14285714285714285f, 0.14285714285714285f, 0.14285714285714285f, 0.14285714285714285f, 0.14285714285714285f, 0.14285714285714285f))), _mm256_mul_ps(xi_127, _mm256_set_ps(0.042857142857142858f, 0.042857142857142858f, 0.042857142857142858f, 0.042857142857142858f, 0.042857142857142858f, 0.042857142857142858f, 0.042857142857142858f, 0.042857142857142858f))), _mm256_mul_ps(xi_111, _mm256_set_ps(0.085714285714285715f, 0.085714285714285715f, 0.085714285714285715f, 0.085714285714285715f, 0.085714285714285715f, 0.085714285714285715f, 0.085714285714285715f, 0.085714285714285715f))), _mm256_mul_ps(xi_117, _mm256_set_ps(0.10000000000000001f, 0.10000000000000001f, 0.10000000000000001f, 0.10000000000000001f, 0.10000000000000001f, 0.10000000000000001f, 0.10000000000000001f, 0.10000000000000001f))), _mm256_mul_ps(xi_108, _mm256_set_ps(0.20000000000000001f, 0.20000000000000001f, 0.20000000000000001f, 0.20000000000000001f, 0.20000000000000001f, 0.20000000000000001f, 0.20000000000000001f, 0.20000000000000001f))), _mm256_mul_ps(xi_124, _mm256_set_ps(-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f))), forceTerm_0), xi_251));
+          _mm256_store_ps(&_data_pdfs_20_31_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_135, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_140, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), forceTerm_1), xi_151), xi_164), xi_246));
+          _mm256_store_ps(&_data_pdfs_20_32_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_2, xi_135), xi_140), xi_164), xi_165), xi_247));
+          _mm256_store_ps(&_data_pdfs_20_33_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_3, xi_168), xi_170), xi_175), xi_176), xi_253));
+          _mm256_store_ps(&_data_pdfs_20_34_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_168, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_170, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), forceTerm_4), xi_176), xi_177), xi_252));
+          _mm256_store_ps(&_data_pdfs_20_35_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(xi_179, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(xi_181, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), forceTerm_5), xi_186), xi_187), xi_256));
+          _mm256_store_ps(&_data_pdfs_20_36_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_6, xi_179), xi_181), xi_187), xi_188), xi_244));
+          _mm256_store_ps(&_data_pdfs_20_37_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_7, xi_177), xi_195), xi_200), xi_265));
+          _mm256_store_ps(&_data_pdfs_20_38_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_8, xi_175), xi_195), xi_201), xi_260));
+          _mm256_store_ps(&_data_pdfs_20_39_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_9, xi_177), xi_201), xi_202), xi_264));
+          _mm256_store_ps(&_data_pdfs_20_310_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_10, xi_175), xi_200), xi_202), xi_263));
+          _mm256_store_ps(&_data_pdfs_20_311_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_11, xi_210), xi_219), xi_224), xi_248));
+          _mm256_store_ps(&_data_pdfs_20_312_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_12, xi_219), xi_225), xi_226), xi_258));
+          _mm256_store_ps(&_data_pdfs_20_313_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_13, xi_231), xi_234), xi_239), xi_250));
+          _mm256_store_ps(&_data_pdfs_20_314_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_14, xi_231), xi_240), xi_241), xi_257));
+          _mm256_store_ps(&_data_pdfs_20_315_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_15, xi_224), xi_226), xi_242), xi_261));
+          _mm256_store_ps(&_data_pdfs_20_316_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_16, xi_210), xi_225), xi_242), xi_259));
+          _mm256_store_ps(&_data_pdfs_20_317_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_17, xi_239), xi_240), xi_243), xi_254));
+          _mm256_store_ps(&_data_pdfs_20_318_10[ctr_0], _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(forceTerm_18, xi_234), xi_241), xi_243), xi_249));
+        }
+        for (int64_t ctr_0 = (int64_t)((_size_force_0) / (8)) * (8); ctr_0 < _size_force_0; ctr_0 += 1) {
+          const float xi_244 = _data_pdfs_20_36_10[ctr_0];
+          const float xi_245 = _data_force_20_32_10[ctr_0];
+          const float xi_246 = _data_pdfs_20_31_10[ctr_0];
+          const float xi_247 = _data_pdfs_20_32_10[ctr_0];
+          const float xi_248 = _data_pdfs_20_311_10[ctr_0];
+          const float xi_249 = _data_pdfs_20_318_10[ctr_0];
+          const float xi_250 = _data_pdfs_20_313_10[ctr_0];
+          const float xi_251 = _data_pdfs_20_30_10[ctr_0];
+          const float xi_252 = _data_pdfs_20_34_10[ctr_0];
+          const float xi_253 = _data_pdfs_20_33_10[ctr_0];
+          const float xi_254 = _data_pdfs_20_317_10[ctr_0];
+          const float xi_255 = _data_force_20_30_10[ctr_0];
+          const float xi_256 = _data_pdfs_20_35_10[ctr_0];
+          const float xi_257 = _data_pdfs_20_314_10[ctr_0];
+          const float xi_258 = _data_pdfs_20_312_10[ctr_0];
+          const float xi_259 = _data_pdfs_20_316_10[ctr_0];
+          const float xi_260 = _data_pdfs_20_38_10[ctr_0];
+          const float xi_261 = _data_pdfs_20_315_10[ctr_0];
+          const float xi_262 = _data_force_20_31_10[ctr_0];
+          const float xi_263 = _data_pdfs_20_310_10[ctr_0];
+          const float xi_264 = _data_pdfs_20_39_10[ctr_0];
+          const float xi_265 = _data_pdfs_20_37_10[ctr_0];
+
+          float random_3_0{};
+          float random_3_1{};
+          float random_3_2{};
+          float random_3_3{};
+          if (kT > 0.) {
+            philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 3, seed, random_3_0, random_3_1, random_3_2, random_3_3);
+          }
+
+          float random_2_0{};
+          float random_2_1{};
+          float random_2_2{};
+          float random_2_3{};
+          if (kT > 0.) {
+            philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 2, seed, random_2_0, random_2_1, random_2_2, random_2_3);
+          }
+
+          float random_1_0{};
+          float random_1_1{};
+          float random_1_2{};
+          float random_1_3{};
+          if (kT > 0.) {
+            philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 1, seed, random_1_0, random_1_1, random_1_2, random_1_3);
+          }
+
+          float random_0_0{};
+          float random_0_1{};
+          float random_0_2{};
+          float random_0_3{};
+          if (kT > 0.) {
+            philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 0, seed, random_0_0, random_0_1, random_0_2, random_0_3);
+          }
+          const float xi_2 = xi_249 + xi_257;
+          const float xi_3 = xi_2 + xi_252;
+          const float xi_4 = xi_246 + xi_248 + xi_261;
+          const float xi_5 = xi_256 + xi_258;
+          const float xi_6 = xi_244 + xi_254;
+          const float xi_8 = xi_264 * -1.0f;
+          const float xi_9 = xi_265 * -1.0f;
+          const float xi_10 = xi_254 * -1.0f;
+          const float xi_11 = xi_250 * -1.0f;
+          const float xi_12 = xi_253 * -1.0f;
+          const float xi_13 = xi_10 + xi_11 + xi_12;
+          const float xi_14 = xi_247 * -1.0f;
+          const float xi_15 = xi_263 * -1.0f;
+          const float xi_16 = xi_14 + xi_15;
+          const float xi_17 = xi_259 * -1.0f;
+          const float xi_18 = xi_258 * -1.0f;
+          const float xi_19 = xi_17 + xi_18;
+          const float xi_20 = xi_249 * -1.0f;
+          const float xi_21 = xi_10 + xi_20;
+          const float xi_22 = xi_261 * -1.0f;
+          const float xi_23 = xi_244 * -1.0f;
+          const float xi_24 = xi_17 + xi_22 + xi_23 + xi_248;
+          const float xi_29 = xi_262 * 0.16666666666666666f;
+          const float xi_30 = xi_262 * 0.083333333333333329f;
+          const float xi_42 = xi_255 * 0.16666666666666666f;
+          const float xi_43 = xi_255 * 0.083333333333333329f;
+          const float xi_49 = xi_245 * 0.16666666666666666f;
+          const float xi_50 = xi_245 * 0.083333333333333329f;
+          const float xi_67 = xi_262 * 0.25f;
+          const float xi_72 = xi_262 * xi_71;
+          const float xi_114 = xi_251 * -1.0f;
+          const float xi_118 = xi_248 * -1.0f;
+          const float xi_119 = xi_118 + xi_18;
+          const float xi_120 = xi_260 * -1.0f + xi_8;
+          const float xi_122 = xi_257 * -1.0f;
+          const float xi_123 = xi_11 + xi_122 + xi_15 + xi_21;
+          const float xi_125 = xi_248 * 2.0f + xi_258 * 2.0f + xi_259 * 2.0f + xi_261 * 2.0f;
+          const float xi_126 = xi_125 + xi_252 * 5.0f + xi_253 * 5.0f;
+          const float xi_128 = xi_250 * 2.0f;
+          const float xi_129 = xi_257 * 2.0f;
+          const float xi_130 = xi_249 * 2.0f + xi_254 * 2.0f;
+          const float xi_132 = xi_118 + xi_258;
+          const float xi_133 = xi_132 + xi_14 + xi_22 + xi_246 + xi_259;
+          const float xi_135 = xi_133 * xi_134;
+          const float xi_136 = random_2_3 - 0.5f;
+          const float xi_141 = xi_265 * 2.0f;
+          const float xi_142 = xi_263 * 2.0f;
+          const float xi_143 = xi_260 * -2.0f + xi_264 * 2.0f;
+          const float xi_144 = xi_14 + xi_141 * -1.0f + xi_142 + xi_143 + xi_19 + xi_4;
+          const float xi_146 = xi_144 * xi_145;
+          const float xi_147 = random_1_2 - 0.5f;
+          const float xi_152 = random_0_1 - 0.5f;
+          const float xi_166 = xi_122 + xi_250;
+          const float xi_167 = xi_12 + xi_166 + xi_20 + xi_252 + xi_254;
+          const float xi_168 = xi_134 * xi_167;
+          const float xi_169 = random_2_1 - 0.5f;
+          const float xi_171 = xi_13 + xi_141 + xi_142 * -1.0f + xi_143 + xi_3;
+          const float xi_172 = xi_145 * xi_171;
+          const float xi_173 = random_2_0 - 0.5f;
+          const float xi_178 = xi_119 + xi_23 + xi_256 + xi_259 + xi_261;
+          const float xi_179 = xi_134 * xi_178;
+          const float xi_180 = random_2_2 - 0.5f;
+          const float xi_182 = xi_128 * -1.0f + xi_129 * -1.0f + xi_130 + xi_24 + xi_5;
+          const float xi_183 = xi_145 * xi_182;
+          const float xi_184 = random_1_3 - 0.5f;
+          const float xi_212 = xi_182 * xi_211;
+          const float xi_214 = xi_178 * xi_213;
+          const float xi_220 = xi_144 * xi_211;
+          const float xi_221 = xi_133 * xi_213;
+          const float xi_235 = xi_167 * xi_213;
+          const float xi_236 = xi_171 * xi_211;
+          const float xi_31 = rr_0 * xi_30;
+          const float xi_44 = rr_0 * xi_43;
+          const float xi_51 = rr_0 * xi_50;
+          const float xi_54 = xi_255 * xi_53;
+          const float xi_59 = xi_262 * xi_53;
+          const float xi_81 = xi_245 * xi_53;
+          const float vel0Term = xi_260 + xi_263 + xi_3;
+          const float vel1Term = xi_265 + xi_4;
+          const float vel2Term = xi_250 + xi_5;
+          const float rho = vel0Term + vel1Term + vel2Term + xi_247 + xi_251 + xi_253 + xi_259 + xi_264 + xi_6;
+          const float xi_105 = kT * rho;
+          const float xi_106 = powf(xi_105 * (-1.0f * (omega_even * -1.0f + 1.0f) * (omega_even * -1.0f + 1.0f) + 1.0f), 0.5f);
+          const float xi_107 = xi_106 * (random_3_0 - 0.5f) * 3.7416573867739413f;
+          const float xi_108 = xi_106 * (random_3_2 - 0.5f) * 5.4772255750516612f;
+          const float xi_110 = xi_109 * (random_1_1 - 0.5f) * powf(xi_105 * (-1.0f * (omega_bulk * -1.0f + 1.0f) * (omega_bulk * -1.0f + 1.0f) + 1.0f), 0.5f);
+          const float xi_111 = xi_106 * (random_3_1 - 0.5f) * 8.3666002653407556f;
+          const float xi_137 = powf(xi_105 * (-1.0f * (omega_odd * -1.0f + 1.0f) * (omega_odd * -1.0f + 1.0f) + 1.0f), 0.5f);
+          const float xi_138 = xi_137 * 1.4142135623730951f;
+          const float xi_139 = xi_138 * 0.5f;
+          const float xi_140 = xi_136 * xi_139;
+          const float xi_148 = xi_109 * xi_137;
+          const float xi_149 = xi_148 * 0.16666666666666666f;
+          const float xi_150 = xi_147 * xi_149;
+          const float xi_151 = xi_146 * -1.0f + xi_150 * -1.0f;
+          const float xi_153 = powf(xi_105 * (-1.0f * (omega_shear * -1.0f + 1.0f) * (omega_shear * -1.0f + 1.0f) + 1.0f), 0.5f);
+          const float xi_154 = xi_153 * 0.5f;
+          const float xi_155 = xi_152 * xi_154;
+          const float xi_161 = xi_153 * (random_0_0 - 0.5f) * 1.7320508075688772f;
+          const float xi_165 = xi_146 + xi_150;
+          const float xi_170 = xi_139 * xi_169;
+          const float xi_174 = xi_149 * xi_173;
+          const float xi_175 = xi_172 + xi_174;
+          const float xi_177 = xi_172 * -1.0f + xi_174 * -1.0f;
+          const float xi_181 = xi_139 * xi_180;
+          const float xi_185 = xi_149 * xi_184;
+          const float xi_186 = xi_183 * -1.0f + xi_185 * -1.0f;
+          const float xi_188 = xi_183 + xi_185;
+          const float xi_189 = xi_152 * xi_153 * 0.25f;
+          const float xi_192 = xi_107 * 0.083333333333333329f;
+          const float xi_196 = xi_154 * (random_0_2 - 0.5f);
+          const float xi_203 = xi_154 * (random_1_0 - 0.5f);
+          const float xi_207 = xi_111 * -0.014285714285714285f;
+          const float xi_208 = xi_108 * 0.050000000000000003f;
+          const float xi_215 = xi_148 * 0.083333333333333329f;
+          const float xi_216 = xi_184 * xi_215;
+          const float xi_217 = xi_138 * 0.25f;
+          const float xi_218 = xi_180 * xi_217;
+          const float xi_219 = xi_212 * -1.0f + xi_214 + xi_216 * -1.0f + xi_218;
+          const float xi_222 = xi_147 * xi_215;
+          const float xi_223 = xi_136 * xi_217;
+          const float xi_224 = xi_220 * -1.0f + xi_221 + xi_222 * -1.0f + xi_223;
+          const float xi_225 = xi_220 + xi_221 * -1.0f + xi_222 + xi_223 * -1.0f;
+          const float xi_227 = xi_189 * -1.0f;
+          const float xi_230 = xi_111 * 0.035714285714285712f;
+          const float xi_232 = xi_154 * (random_0_3 - 0.5f);
+          const float xi_237 = xi_169 * xi_217;
+          const float xi_238 = xi_173 * xi_215;
+          const float xi_239 = xi_235 * -1.0f + xi_236 + xi_237 * -1.0f + xi_238;
+          const float xi_241 = xi_235 + xi_236 * -1.0f + xi_237 + xi_238 * -1.0f;
+          const float xi_242 = xi_212 + xi_214 * -1.0f + xi_216 + xi_218 * -1.0f;
+          const float xi_0 = ((1.0f) / (rho));
+          const float xi_7 = xi_0 * 0.5f;
+          const float u_0 = xi_0 * (vel0Term + xi_13 + xi_8 + xi_9) + xi_255 * xi_7;
+          const float xi_25 = u_0 * xi_255;
+          const float xi_37 = xi_25 * 0.16666666666666666f;
+          const float xi_38 = xi_25 * 0.083333333333333329f;
+          const float xi_39 = omega_shear * xi_38;
+          const float xi_40 = xi_37 * -1.0f + xi_39;
+          const float xi_56 = xi_25 * xi_55 * -1.0f + xi_37;
+          const float xi_57 = xi_43 * -1.0f + xi_54 + xi_56;
+          const float xi_61 = xi_25 * xi_60 * -1.0f;
+          const float xi_68 = u_0 * xi_67;
+          const float xi_73 = u_0 * xi_72;
+          const float xi_77 = xi_43 + xi_54 * -1.0f + xi_56;
+          const float xi_84 = xi_38 * -1.0f;
+          const float xi_95 = u_0 * xi_245;
+          const float xi_96 = xi_95 * 0.25f;
+          const float xi_99 = xi_71 * xi_95;
+          const float xi_113 = rho * u_0 * u_0;
+          const float u_1 = xi_0 * (vel1Term + xi_16 + xi_19 + xi_260 + xi_8) + xi_262 * xi_7;
+          const float xi_26 = u_1 * xi_262;
+          const float xi_32 = xi_26 * 0.16666666666666666f;
+          const float xi_45 = xi_26 * 0.083333333333333329f;
+          const float xi_46 = omega_shear * xi_45;
+          const float xi_47 = xi_32 * -1.0f + xi_46;
+          const float xi_62 = xi_26 * xi_60 * -1.0f;
+          const float xi_69 = u_1 * 0.25f;
+          const float xi_70 = xi_255 * xi_69;
+          const float xi_74 = u_1 * xi_71;
+          const float xi_75 = xi_255 * xi_74;
+          const float xi_76 = xi_68 * -1.0f + xi_70 * -1.0f + xi_73 + xi_75;
+          const float xi_78 = xi_68 + xi_70 + xi_73 * -1.0f + xi_75 * -1.0f;
+          const float xi_86 = xi_245 * xi_69;
+          const float xi_88 = xi_245 * xi_74;
+          const float xi_93 = xi_45 * -1.0f;
+          const float xi_112 = rho * u_1 * u_1;
+          const float xi_121 = xi_112 + xi_120 + xi_9;
+          const float xi_197 = rho * u_1;
+          const float xi_199 = xi_198 * (u_0 * xi_197 + xi_120 + xi_263 + xi_265);
+          const float xi_200 = xi_196 * -1.0f + xi_199 * -1.0f;
+          const float xi_201 = xi_196 + xi_199;
+          const float u_2 = xi_0 * (vel2Term + xi_21 + xi_24 + xi_257) + xi_245 * xi_7;
+          const float xi_27 = u_2 * xi_245;
+          const float xi_33 = xi_27 * 0.16666666666666666f;
+          const float xi_34 = xi_27 * 0.083333333333333329f;
+          const float xi_35 = omega_shear * xi_34;
+          const float xi_36 = xi_33 * -1.0f + xi_35;
+          const float xi_41 = omega_shear * xi_32 * -1.0f + xi_26 * 0.33333333333333331f + xi_36 + xi_40;
+          const float xi_48 = omega_shear * xi_37 * -1.0f + xi_25 * 0.33333333333333331f + xi_36 + xi_47;
+          const float xi_52 = omega_shear * xi_33 * -1.0f + xi_27 * 0.33333333333333331f + xi_40 + xi_47;
+          const float xi_58 = xi_34 * -1.0f;
+          const float xi_63 = xi_27 * xi_60 * -1.0f;
+          const float xi_64 = xi_26 * xi_55 * -1.0f + xi_32 + xi_61 + xi_62 + xi_63;
+          const float xi_65 = xi_30 + xi_59 * -1.0f + xi_64;
+          const float xi_66 = xi_35 + xi_58 + xi_65;
+          const float xi_79 = xi_30 * -1.0f + xi_59 + xi_64;
+          const float xi_80 = xi_35 + xi_58 + xi_79;
+          const float xi_82 = xi_27 * xi_55 * -1.0f + xi_33;
+          const float xi_83 = xi_50 + xi_81 * -1.0f + xi_82;
+          const float xi_85 = xi_39 + xi_65 + xi_84;
+          const float xi_87 = u_2 * xi_67;
+          const float xi_89 = u_2 * xi_72;
+          const float xi_90 = xi_86 + xi_87 + xi_88 * -1.0f + xi_89 * -1.0f;
+          const float xi_91 = xi_39 + xi_79 + xi_84;
+          const float xi_92 = xi_86 * -1.0f + xi_87 * -1.0f + xi_88 + xi_89;
+          const float xi_94 = xi_46 + xi_61 + xi_62 + xi_63 + xi_83 + xi_93;
+          const float xi_97 = u_2 * xi_255;
+          const float xi_98 = xi_97 * 0.25f;
+          const float xi_100 = xi_71 * xi_97;
+          const float xi_101 = xi_100 + xi_96 * -1.0f + xi_98 * -1.0f + xi_99;
+          const float xi_102 = xi_100 * -1.0f + xi_96 + xi_98 + xi_99 * -1.0f;
+          const float xi_103 = xi_50 * -1.0f + xi_81 + xi_82;
+          const float xi_104 = xi_103 + xi_46 + xi_61 + xi_62 + xi_63 + xi_93;
+          const float xi_115 = rho * u_2 * u_2;
+          const float xi_116 = xi_114 + xi_115 * 0.66666666666666663f + xi_244 * 3.0f + xi_256 * 3.0f;
+          const float xi_117 = omega_even * (xi_112 * 0.66666666666666663f + xi_113 * 1.6666666666666667f + xi_116 + xi_246 * 3.0f + xi_247 * 3.0f + xi_248 * -3.0f + xi_258 * -3.0f + xi_259 * -3.0f + xi_261 * -3.0f);
+          const float xi_124 = omega_bulk * (xi_113 + xi_115 + xi_119 + xi_121 + xi_123 + xi_17 + xi_22 + xi_251);
+          const float xi_127 = omega_even * (xi_112 * 2.3333333333333335f + xi_116 + xi_126 + xi_246 * -2.0f + xi_247 * -2.0f + xi_249 * -5.0f + xi_250 * -5.0f + xi_254 * -5.0f + xi_257 * -5.0f);
+          const float xi_131 = omega_even * (xi_114 + xi_115 * 3.0f + xi_126 + xi_128 + xi_129 + xi_130 + xi_244 * -4.0f + xi_246 * 5.0f + xi_247 * 5.0f + xi_256 * -4.0f + xi_260 * -7.0f + xi_263 * -7.0f + xi_264 * -7.0f + xi_265 * -7.0f);
+          const float xi_156 = xi_115 * -1.0f + xi_256;
+          const float xi_157 = omega_shear * (xi_121 + xi_156 + xi_16 + xi_2 + xi_246 * -1.0f + xi_250 + xi_6);
+          const float xi_158 = xi_157 * 0.125f;
+          const float xi_159 = xi_107 * -0.11904761904761904f + xi_131 * -0.01984126984126984f;
+          const float xi_160 = omega_shear * (xi_112 * -1.0f + xi_113 * 2.0f + xi_120 + xi_123 + xi_125 + xi_156 + xi_244 + xi_246 + xi_247 + xi_252 * -2.0f + xi_253 * -2.0f + xi_9);
+          const float xi_162 = xi_160 * -0.041666666666666664f + xi_161 * -0.16666666666666666f;
+          const float xi_163 = xi_108 * -0.10000000000000001f + xi_117 * -0.050000000000000003f + xi_162;
+          const float xi_164 = xi_111 * 0.028571428571428571f + xi_127 * 0.014285714285714285f + xi_155 + xi_158 + xi_159 + xi_163;
+          const float xi_176 = xi_111 * -0.071428571428571425f + xi_127 * -0.035714285714285712f + xi_159 + xi_160 * 0.083333333333333329f + xi_161 * 0.33333333333333331f;
+          const float xi_187 = xi_107 * 0.095238095238095233f + xi_111 * -0.042857142857142858f + xi_127 * -0.021428571428571429f + xi_131 * 0.015873015873015872f + xi_155 * -1.0f + xi_158 * -1.0f + xi_163;
+          const float xi_190 = xi_157 * 0.0625f;
+          const float xi_191 = xi_131 * 0.013888888888888888f;
+          const float xi_193 = xi_110 * 0.083333333333333329f + xi_124 * 0.041666666666666664f;
+          const float xi_194 = xi_160 * 0.020833333333333332f + xi_161 * 0.083333333333333329f + xi_193;
+          const float xi_195 = xi_165 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
+          const float xi_202 = xi_151 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
+          const float xi_204 = xi_127 * -0.0071428571428571426f;
+          const float xi_205 = xi_198 * (u_2 * xi_197 + xi_132 + xi_17 + xi_261);
+          const float xi_206 = xi_117 * 0.025000000000000001f;
+          const float xi_209 = xi_107 * -0.023809523809523808f + xi_131 * -0.003968253968253968f;
+          const float xi_210 = xi_162 + xi_193 + xi_203 + xi_204 + xi_205 + xi_206 + xi_207 + xi_208 + xi_209;
+          const float xi_226 = xi_162 + xi_193 + xi_203 * -1.0f + xi_204 + xi_205 * -1.0f + xi_206 + xi_207 + xi_208 + xi_209;
+          const float xi_228 = xi_190 * -1.0f;
+          const float xi_229 = xi_127 * 0.017857142857142856f;
+          const float xi_231 = xi_188 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
+          const float xi_233 = xi_198 * (rho * u_0 * u_2 + xi_10 + xi_166 + xi_249);
+          const float xi_234 = xi_232 * -1.0f + xi_233 * -1.0f;
+          const float xi_240 = xi_232 + xi_233;
+          const float xi_243 = xi_186 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
+          const float forceTerm_0 = xi_25 * xi_28 + xi_25 * -1.0f + xi_26 * xi_28 + xi_26 * -1.0f + xi_27 * xi_28 + xi_27 * -1.0f;
+          const float forceTerm_1 = xi_29 + xi_31 * -1.0f + xi_41;
+          const float forceTerm_2 = xi_29 * -1.0f + xi_31 + xi_41;
+          const float forceTerm_3 = xi_42 * -1.0f + xi_44 + xi_48;
+          const float forceTerm_4 = xi_42 + xi_44 * -1.0f + xi_48;
+          const float forceTerm_5 = xi_49 + xi_51 * -1.0f + xi_52;
+          const float forceTerm_6 = xi_49 * -1.0f + xi_51 + xi_52;
+          const float forceTerm_7 = xi_57 + xi_66 + xi_76;
+          const float forceTerm_8 = xi_66 + xi_77 + xi_78;
+          const float forceTerm_9 = xi_57 + xi_78 + xi_80;
+          const float forceTerm_10 = xi_76 + xi_77 + xi_80;
+          const float forceTerm_11 = xi_83 + xi_85 + xi_90;
+          const float forceTerm_12 = xi_83 + xi_91 + xi_92;
+          const float forceTerm_13 = xi_101 + xi_57 + xi_94;
+          const float forceTerm_14 = xi_102 + xi_77 + xi_94;
+          const float forceTerm_15 = xi_103 + xi_85 + xi_92;
+          const float forceTerm_16 = xi_103 + xi_90 + xi_91;
+          const float forceTerm_17 = xi_102 + xi_104 + xi_57;
+          const float forceTerm_18 = xi_101 + xi_104 + xi_77;
+          _data_pdfs_20_30_10[ctr_0] = forceTerm_0 + xi_107 * 0.14285714285714285f + xi_108 * 0.20000000000000001f + xi_110 * -1.0f + xi_111 * 0.085714285714285715f + xi_117 * 0.10000000000000001f + xi_124 * -0.5f + xi_127 * 0.042857142857142858f + xi_131 * 0.023809523809523808f + xi_251;
+          _data_pdfs_20_31_10[ctr_0] = forceTerm_1 + xi_135 * -1.0f + xi_140 * -1.0f + xi_151 + xi_164 + xi_246;
+          _data_pdfs_20_32_10[ctr_0] = forceTerm_2 + xi_135 + xi_140 + xi_164 + xi_165 + xi_247;
+          _data_pdfs_20_33_10[ctr_0] = forceTerm_3 + xi_168 + xi_170 + xi_175 + xi_176 + xi_253;
+          _data_pdfs_20_34_10[ctr_0] = forceTerm_4 + xi_168 * -1.0f + xi_170 * -1.0f + xi_176 + xi_177 + xi_252;
+          _data_pdfs_20_35_10[ctr_0] = forceTerm_5 + xi_179 * -1.0f + xi_181 * -1.0f + xi_186 + xi_187 + xi_256;
+          _data_pdfs_20_36_10[ctr_0] = forceTerm_6 + xi_179 + xi_181 + xi_187 + xi_188 + xi_244;
+          _data_pdfs_20_37_10[ctr_0] = forceTerm_7 + xi_177 + xi_195 + xi_200 + xi_265;
+          _data_pdfs_20_38_10[ctr_0] = forceTerm_8 + xi_175 + xi_195 + xi_201 + xi_260;
+          _data_pdfs_20_39_10[ctr_0] = forceTerm_9 + xi_177 + xi_201 + xi_202 + xi_264;
+          _data_pdfs_20_310_10[ctr_0] = forceTerm_10 + xi_175 + xi_200 + xi_202 + xi_263;
+          _data_pdfs_20_311_10[ctr_0] = forceTerm_11 + xi_210 + xi_219 + xi_224 + xi_248;
+          _data_pdfs_20_312_10[ctr_0] = forceTerm_12 + xi_219 + xi_225 + xi_226 + xi_258;
+          _data_pdfs_20_313_10[ctr_0] = forceTerm_13 + xi_231 + xi_234 + xi_239 + xi_250;
+          _data_pdfs_20_314_10[ctr_0] = forceTerm_14 + xi_231 + xi_240 + xi_241 + xi_257;
+          _data_pdfs_20_315_10[ctr_0] = forceTerm_15 + xi_224 + xi_226 + xi_242 + xi_261;
+          _data_pdfs_20_316_10[ctr_0] = forceTerm_16 + xi_210 + xi_225 + xi_242 + xi_259;
+          _data_pdfs_20_317_10[ctr_0] = forceTerm_17 + xi_239 + xi_240 + xi_243 + xi_254;
+          _data_pdfs_20_318_10[ctr_0] = forceTerm_18 + xi_234 + xi_241 + xi_243 + xi_249;
+        }
+      }
+    }
+  }
+}
+} // namespace internal_48c9ee502281a70505dce0378c55abd5
+
+void CollideSweepSinglePrecisionThermalizedAVX::run(IBlock *block) {
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+  auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
+
+  auto &time_step = this->time_step_;
+  auto &kT = this->kT_;
+  auto &omega_odd = this->omega_odd_;
+  auto &seed = this->seed_;
+  auto &omega_bulk = this->omega_bulk_;
+  auto block_offset_0 = this->block_offset_0_;
+  auto &omega_shear = this->omega_shear_;
+  auto &omega_even = this->omega_even_;
+  auto block_offset_2 = this->block_offset_2_;
+  auto block_offset_1 = this->block_offset_1_;
+  block_offset_generator(block, block_offset_0, block_offset_1, block_offset_2);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(force->nrOfGhostLayers()));
+  float *RESTRICT const _data_force = force->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_48c9ee502281a70505dce0378c55abd5::collidesweepsingleprecisionthermalizedavx_collidesweepsingleprecisionthermalizedavx(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
+}
+
+void CollideSweepSinglePrecisionThermalizedAVX::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+  auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
+
+  auto &time_step = this->time_step_;
+  auto &kT = this->kT_;
+  auto &omega_odd = this->omega_odd_;
+  auto &seed = this->seed_;
+  auto &omega_bulk = this->omega_bulk_;
+  auto block_offset_0 = this->block_offset_0_;
+  auto &omega_shear = this->omega_shear_;
+  auto &omega_even = this->omega_even_;
+  auto block_offset_2 = this->block_offset_2_;
+  auto block_offset_1 = this->block_offset_1_;
+  block_offset_generator(block, block_offset_0, block_offset_1, block_offset_2);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(force->nrOfGhostLayers()));
+  float *RESTRICT const _data_force = force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_48c9ee502281a70505dce0378c55abd5::collidesweepsingleprecisionthermalizedavx_collidesweepsingleprecisionthermalizedavx(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalizedAVX.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalizedAVX.h
new file mode 100644
index 00000000000..36af5562973
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CollideSweepSinglePrecisionThermalizedAVX.h
@@ -0,0 +1,123 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file CollideSweepSinglePrecisionThermalizedAVX.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class CollideSweepSinglePrecisionThermalizedAVX {
+public:
+  CollideSweepSinglePrecisionThermalizedAVX(
+      BlockDataID forceID_, BlockDataID pdfsID_, uint32_t block_offset_0,
+      uint32_t block_offset_1, uint32_t block_offset_2, float kT,
+      float omega_bulk, float omega_even, float omega_odd, float omega_shear,
+      uint32_t seed, uint32_t time_step)
+      : forceID(forceID_), pdfsID(pdfsID_), block_offset_0_(block_offset_0),
+        block_offset_1_(block_offset_1), block_offset_2_(block_offset_2),
+        kT_(kT), omega_bulk_(omega_bulk), omega_even_(omega_even),
+        omega_odd_(omega_odd), omega_shear_(omega_shear), seed_(seed),
+        time_step_(time_step){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)> getSweep(
+      const shared_ptr<CollideSweepSinglePrecisionThermalizedAVX> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<CollideSweepSinglePrecisionThermalizedAVX> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID forceID;
+  BlockDataID pdfsID;
+  uint32_t block_offset_0_;
+  uint32_t block_offset_1_;
+  uint32_t block_offset_2_;
+  float kT_;
+  float omega_bulk_;
+  float omega_even_;
+  float omega_odd_;
+  float omega_shear_;
+  uint32_t seed_;
+  uint32_t time_step_;
+  std::function<void(IBlock *, uint32_t &, uint32_t &, uint32_t &)>
+      block_offset_generator =
+          [](IBlock *const, uint32_t &, uint32_t &, uint32_t &) {};
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.cpp
new file mode 100644
index 00000000000..1ab063d94be
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.cpp
@@ -0,0 +1,118 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file Dynamic_UBB_double_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "Dynamic_UBB_double_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_451fd042b8d7665063ea81b98853365b {
+static FUNC_PREFIX void dynamic_ubb_double_precision_boundary_Dynamic_UBB_double_precision(uint8_t *RESTRICT const _data_indexVector, double *RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) {
+
+  const int32_t f_in_inv_dir_idx[] = {0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13};
+
+  const double weights[] = {0.33333333333333333, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778};
+
+  const int32_t neighbour_offset_x[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1};
+  const int32_t neighbour_offset_y[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0};
+  const int32_t neighbour_offset_z[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1};
+
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[40 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[40 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[40 * ctr_0 + 8]));
+    const int32_t dir = *((int32_t *)(&_data_indexVector[40 * ctr_0 + 12]));
+    const double vel0Term = _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_1 + _stride_pdfs_2 * z + 8 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 4 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_2 * z + _stride_pdfs_2 + 14 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_2 * z - _stride_pdfs_2 + 18 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 + _stride_pdfs_1 * y - _stride_pdfs_1 + _stride_pdfs_2 * z + 10 * _stride_pdfs_3];
+    const double vel1Term = _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_1 + _stride_pdfs_2 * z + _stride_pdfs_2 + 11 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_1 + _stride_pdfs_2 * z + _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_1 + _stride_pdfs_2 * z - _stride_pdfs_2 + 15 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x - _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_1 + _stride_pdfs_2 * z + 7 * _stride_pdfs_3];
+    const double vel2Term = _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + _stride_pdfs_2 + 5 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y - _stride_pdfs_1 + _stride_pdfs_2 * z + _stride_pdfs_2 + 12 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x - _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_2 * z + _stride_pdfs_2 + 13 * _stride_pdfs_3];
+    const double rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z - _stride_pdfs_2 + 6 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y - _stride_pdfs_1 + _stride_pdfs_2 * z + 2 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y - _stride_pdfs_1 + _stride_pdfs_2 * z - _stride_pdfs_2 + 16 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x - _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 3 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x - _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_2 * z - _stride_pdfs_2 + 17 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x - _stride_pdfs_0 + _stride_pdfs_1 * y - _stride_pdfs_1 + _stride_pdfs_2 * z + 9 * _stride_pdfs_3];
+    _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 * neighbour_offset_x[dir] + _stride_pdfs_1 * y + _stride_pdfs_1 * neighbour_offset_y[dir] + _stride_pdfs_2 * z + _stride_pdfs_2 * neighbour_offset_z[dir] + _stride_pdfs_3 * f_in_inv_dir_idx[dir]] = rho * (6.0 * ((double)(neighbour_offset_x[dir])) * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) + 6.0 * ((double)(neighbour_offset_y[dir])) * *((double *)(&_data_indexVector[40 * ctr_0 + 24])) + 6.0 * ((double)(neighbour_offset_z[dir])) * *((double *)(&_data_indexVector[40 * ctr_0 + 32]))) * -1.0 * weights[dir] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + _stride_pdfs_3 * dir];
+  }
+}
+} // namespace internal_451fd042b8d7665063ea81b98853365b
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void Dynamic_UBB_double_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_451fd042b8d7665063ea81b98853365b::dynamic_ubb_double_precision_boundary_Dynamic_UBB_double_precision(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+}
+
+void Dynamic_UBB_double_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void Dynamic_UBB_double_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void Dynamic_UBB_double_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace lbm
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h
new file mode 100644
index 00000000000..f2a93a9f94c
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h
@@ -0,0 +1,569 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file Dynamic_UBB_double_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "blockforest/StructuredBlockForest.h"
+#include "core/debug/Debug.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "field/FlagField.h"
+#include "field/GhostLayerField.h"
+
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+class Dynamic_UBB_double_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    int32_t dir;
+    double vel_0;
+    double vel_1;
+    double vel_2;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)
+        : x(x_), y(y_), z(z_), dir(dir_), vel_0(), vel_1(), vel_2() {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z && dir == o.dir &&
+             floatIsEqual(vel_0, o.vel_0) && floatIsEqual(vel_1, o.vel_1) &&
+             floatIsEqual(vel_2, o.vel_2);
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  Dynamic_UBB_double_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID pdfsID_,
+      std::function<Vector3<double>(const Cell &,
+                                    const shared_ptr<StructuredBlockForest> &,
+                                    IBlock &)> &velocityCallback)
+      : elementInitaliser(velocityCallback), pdfsID(pdfsID_) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_Dynamic_UBB_double_precision");
+  };
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(blocks, &*blockIt, flagFieldID,
+                                     boundaryFlagUID, domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 0, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 0, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 1);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 1, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, -1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 2);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() - 1, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 0, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 3);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() - 1, it.y() + 0, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 0, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 4);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 0, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 0, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 5);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 0, it.z() + 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 0, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 6);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 0, it.z() - 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 7);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() - 1, it.y() + 1, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 8);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 1, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, -1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 9);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() - 1, it.y() - 1, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, -1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 10);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() - 1, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 11);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 1, it.z() + 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, -1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 12);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() - 1, it.z() + 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 0, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 13);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() - 1, it.y() + 0, it.z() + 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 0, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 14);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 0, it.z() + 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 15);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 1, it.z() - 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, -1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 16);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() - 1, it.z() - 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 0, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 17);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() - 1, it.y() + 0, it.z() - 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 0, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 18);
+        Vector3<double> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 0, it.z() - 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+  std::function<Vector3<double>(
+      const Cell &, const shared_ptr<StructuredBlockForest> &, IBlock &)>
+      elementInitaliser;
+
+public:
+  BlockDataID pdfsID;
+};
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.cpp
new file mode 100644
index 00000000000..36c70e20e91
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.cpp
@@ -0,0 +1,118 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file Dynamic_UBB_single_precision.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "Dynamic_UBB_single_precision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_efdc97602c407e557fff6737dd9b4d80 {
+static FUNC_PREFIX void dynamic_ubb_single_precision_boundary_Dynamic_UBB_single_precision(uint8_t *RESTRICT const _data_indexVector, float *RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) {
+
+  const int32_t f_in_inv_dir_idx[] = {0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13};
+
+  const float weights[] = {0.33333333333333333f, 0.055555555555555556f, 0.055555555555555556f, 0.055555555555555556f, 0.055555555555555556f, 0.055555555555555556f, 0.055555555555555556f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f};
+
+  const int32_t neighbour_offset_x[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1};
+  const int32_t neighbour_offset_y[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0};
+  const int32_t neighbour_offset_z[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1};
+
+  for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
+    const int32_t x = *((int32_t *)(&_data_indexVector[28 * ctr_0]));
+    const int32_t y = *((int32_t *)(&_data_indexVector[28 * ctr_0 + 4]));
+    const int32_t z = *((int32_t *)(&_data_indexVector[28 * ctr_0 + 8]));
+    const int32_t dir = *((int32_t *)(&_data_indexVector[28 * ctr_0 + 12]));
+    const float vel0Term = _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_1 + _stride_pdfs_2 * z + 8 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 4 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_2 * z + _stride_pdfs_2 + 14 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_2 * z - _stride_pdfs_2 + 18 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 + _stride_pdfs_1 * y - _stride_pdfs_1 + _stride_pdfs_2 * z + 10 * _stride_pdfs_3];
+    const float vel1Term = _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_1 + _stride_pdfs_2 * z + _stride_pdfs_2 + 11 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_1 + _stride_pdfs_2 * z + _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_1 + _stride_pdfs_2 * z - _stride_pdfs_2 + 15 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x - _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_1 + _stride_pdfs_2 * z + 7 * _stride_pdfs_3];
+    const float vel2Term = _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + _stride_pdfs_2 + 5 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y - _stride_pdfs_1 + _stride_pdfs_2 * z + _stride_pdfs_2 + 12 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x - _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_2 * z + _stride_pdfs_2 + 13 * _stride_pdfs_3];
+    const float rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z - _stride_pdfs_2 + 6 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y - _stride_pdfs_1 + _stride_pdfs_2 * z + 2 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y - _stride_pdfs_1 + _stride_pdfs_2 * z - _stride_pdfs_2 + 16 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x - _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 3 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x - _stride_pdfs_0 + _stride_pdfs_1 * y + _stride_pdfs_2 * z - _stride_pdfs_2 + 17 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x - _stride_pdfs_0 + _stride_pdfs_1 * y - _stride_pdfs_1 + _stride_pdfs_2 * z + 9 * _stride_pdfs_3];
+    _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 * neighbour_offset_x[dir] + _stride_pdfs_1 * y + _stride_pdfs_1 * neighbour_offset_y[dir] + _stride_pdfs_2 * z + _stride_pdfs_2 * neighbour_offset_z[dir] + _stride_pdfs_3 * f_in_inv_dir_idx[dir]] = rho * (6.0f * ((float)(neighbour_offset_x[dir])) * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) + 6.0f * ((float)(neighbour_offset_y[dir])) * *((float *)(&_data_indexVector[28 * ctr_0 + 20])) + 6.0f * ((float)(neighbour_offset_z[dir])) * *((float *)(&_data_indexVector[28 * ctr_0 + 24]))) * -1.0f * weights[dir] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + _stride_pdfs_3 * dir];
+  }
+}
+} // namespace internal_efdc97602c407e557fff6737dd9b4d80
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+void Dynamic_UBB_single_precision::run_impl(IBlock *block, IndexVectors::Type type) {
+  auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
+  if (indexVectorSize == 0)
+    return;
+
+  auto pointer = indexVectors->pointerCpu(type);
+
+  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
+
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  internal_efdc97602c407e557fff6737dd9b4d80::dynamic_ubb_single_precision_boundary_Dynamic_UBB_single_precision(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+}
+
+void Dynamic_UBB_single_precision::run(IBlock *block) {
+  run_impl(block, IndexVectors::ALL);
+}
+
+void Dynamic_UBB_single_precision::inner(IBlock *block) {
+  run_impl(block, IndexVectors::INNER);
+}
+
+void Dynamic_UBB_single_precision::outer(IBlock *block) {
+  run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace lbm
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h
new file mode 100644
index 00000000000..847d63b9ff2
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h
@@ -0,0 +1,569 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file Dynamic_UBB_single_precision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "blockforest/StructuredBlockForest.h"
+#include "core/debug/Debug.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "field/FlagField.h"
+#include "field/GhostLayerField.h"
+
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+class Dynamic_UBB_single_precision {
+public:
+  struct IndexInfo {
+    int32_t x;
+    int32_t y;
+    int32_t z;
+    int32_t dir;
+    float vel_0;
+    float vel_1;
+    float vel_2;
+    IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)
+        : x(x_), y(y_), z(z_), dir(dir_), vel_0(), vel_1(), vel_2() {}
+    bool operator==(const IndexInfo &o) const {
+      return x == o.x && y == o.y && z == o.z && dir == o.dir &&
+             floatIsEqual(vel_0, o.vel_0) && floatIsEqual(vel_1, o.vel_1) &&
+             floatIsEqual(vel_2, o.vel_2);
+    }
+  };
+
+  class IndexVectors {
+  public:
+    using CpuIndexVector = std::vector<IndexInfo>;
+
+    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+    IndexVectors() = default;
+    bool operator==(IndexVectors const &other) const {
+      return other.cpuVectors_ == cpuVectors_;
+    }
+
+    CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
+    IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
+
+    void syncGPU() {}
+
+  private:
+    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+  };
+
+  Dynamic_UBB_single_precision(
+      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID pdfsID_,
+      std::function<Vector3<float>(const Cell &,
+                                   const shared_ptr<StructuredBlockForest> &,
+                                   IBlock &)> &velocityCallback)
+      : elementInitaliser(velocityCallback), pdfsID(pdfsID_) {
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
+        createIdxVector, "IndexField_Dynamic_UBB_single_precision");
+  };
+
+  void run(IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  void inner(IBlock *block);
+
+  void outer(IBlock *block);
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)> getInnerSweep() {
+    return [this](IBlock *b) { this->inner(b); };
+  }
+
+  std::function<void(IBlock *)> getOuterSweep() {
+    return [this](IBlock *b) { this->outer(b); };
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
+                         FlagUID domainFlagUID) {
+    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+      fillFromFlagField<FlagField_T>(blocks, &*blockIt, flagFieldID,
+                                     boundaryFlagUID, domainFlagUID);
+  }
+
+  template <typename FlagField_T>
+  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+                         IBlock *block, ConstBlockDataID flagFieldID,
+                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
+    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block->getData<FlagField_T>(flagFieldID);
+
+    if (!(flagField->flagExists(boundaryFlagUID) &&
+          flagField->flagExists(domainFlagUID)))
+      return;
+
+    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+    auto domainFlag = flagField->getFlag(domainFlagUID);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 0, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 0, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 1);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 1, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, -1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 2);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() - 1, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 0, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 3);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() - 1, it.y() + 0, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 0, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 4);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 0, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 0, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 5);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 0, it.z() + 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 0, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 6);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 0, it.z() - 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 7);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() - 1, it.y() + 1, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 8);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 1, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, -1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 9);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() - 1, it.y() - 1, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, -1, 0, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 10);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() - 1, it.z() + 0), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 11);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 1, it.z() + 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, -1, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 12);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() - 1, it.z() + 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 0, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 13);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() - 1, it.y() + 0, it.z() + 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 0, 1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 14);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 0, it.z() + 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, 1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 15);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() + 1, it.z() - 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(0, -1, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 16);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 0, it.y() - 1, it.z() - 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(-1, 0, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 17);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() - 1, it.y() + 0, it.z() - 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    for (auto it = flagField->beginWithGhostLayerXYZ(
+             cell_idx_c(flagField->nrOfGhostLayers() - 1));
+         it != flagField->end(); ++it) {
+      if (!isFlagSet(it, domainFlag))
+        continue;
+
+      if (isFlagSet(it.neighbor(1, 0, -1, 0), boundaryFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z(), 18);
+        Vector3<float> InitialisatonAdditionalData = elementInitaliser(
+            Cell(it.x() + 1, it.y() + 0, it.z() - 1), blocks, *block);
+        element.vel_0 = InitialisatonAdditionalData[0];
+        element.vel_1 = InitialisatonAdditionalData[1];
+        element.vel_2 = InitialisatonAdditionalData[2];
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z()))
+          indexVectorInner.push_back(element);
+        else
+          indexVectorOuter.push_back(element);
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
+
+private:
+  void run_impl(IBlock *block, IndexVectors::Type type);
+
+  BlockDataID indexVectorID;
+  std::function<Vector3<float>(
+      const Cell &, const shared_ptr<StructuredBlockForest> &, IBlock &)>
+      elementInitaliser;
+
+public:
+  BlockDataID pdfsID;
+};
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h
new file mode 100644
index 00000000000..c05a863edba
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h
@@ -0,0 +1,832 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ * Copyright (C) 2020 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+/**
+ * @file
+ * Lattice field accessors.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/a16141524c58ab88386e2a0f8fdd7c63c5edd704/python/lbmpy_walberla/templates/LatticeModel.tmpl.h
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+#include <core/cell/Cell.h>
+#include <core/cell/CellInterval.h>
+#include <core/math/Matrix3.h>
+#include <core/math/Vector3.h>
+
+#include <field/GhostLayerField.h>
+#include <stencil/D3Q19.h>
+
+#include <array>
+#include <cassert>
+#include <tuple>
+#include <vector>
+
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#endif
+
+#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#endif
+
+namespace walberla {
+namespace lbm {
+namespace accessor {
+
+namespace Population {
+inline std::array<double, 19u>
+get(GhostLayerField<double, uint_t{19u}> const *pdf_field, Cell const &cell) {
+  double const &xyz0 = pdf_field->get(cell, uint_t{0u});
+  std::array<double, 19u> pop;
+  pop[0u] = pdf_field->getF(&xyz0, uint_t{0u});
+  pop[1u] = pdf_field->getF(&xyz0, uint_t{1u});
+  pop[2u] = pdf_field->getF(&xyz0, uint_t{2u});
+  pop[3u] = pdf_field->getF(&xyz0, uint_t{3u});
+  pop[4u] = pdf_field->getF(&xyz0, uint_t{4u});
+  pop[5u] = pdf_field->getF(&xyz0, uint_t{5u});
+  pop[6u] = pdf_field->getF(&xyz0, uint_t{6u});
+  pop[7u] = pdf_field->getF(&xyz0, uint_t{7u});
+  pop[8u] = pdf_field->getF(&xyz0, uint_t{8u});
+  pop[9u] = pdf_field->getF(&xyz0, uint_t{9u});
+  pop[10u] = pdf_field->getF(&xyz0, uint_t{10u});
+  pop[11u] = pdf_field->getF(&xyz0, uint_t{11u});
+  pop[12u] = pdf_field->getF(&xyz0, uint_t{12u});
+  pop[13u] = pdf_field->getF(&xyz0, uint_t{13u});
+  pop[14u] = pdf_field->getF(&xyz0, uint_t{14u});
+  pop[15u] = pdf_field->getF(&xyz0, uint_t{15u});
+  pop[16u] = pdf_field->getF(&xyz0, uint_t{16u});
+  pop[17u] = pdf_field->getF(&xyz0, uint_t{17u});
+  pop[18u] = pdf_field->getF(&xyz0, uint_t{18u});
+  return pop;
+}
+
+inline void set(GhostLayerField<double, uint_t{19u}> *pdf_field,
+                std::array<double, 19u> const &pop, Cell const &cell) {
+  double &xyz0 = pdf_field->get(cell, uint_t{0u});
+  pdf_field->getF(&xyz0, uint_t{0u}) = pop[0u];
+  pdf_field->getF(&xyz0, uint_t{1u}) = pop[1u];
+  pdf_field->getF(&xyz0, uint_t{2u}) = pop[2u];
+  pdf_field->getF(&xyz0, uint_t{3u}) = pop[3u];
+  pdf_field->getF(&xyz0, uint_t{4u}) = pop[4u];
+  pdf_field->getF(&xyz0, uint_t{5u}) = pop[5u];
+  pdf_field->getF(&xyz0, uint_t{6u}) = pop[6u];
+  pdf_field->getF(&xyz0, uint_t{7u}) = pop[7u];
+  pdf_field->getF(&xyz0, uint_t{8u}) = pop[8u];
+  pdf_field->getF(&xyz0, uint_t{9u}) = pop[9u];
+  pdf_field->getF(&xyz0, uint_t{10u}) = pop[10u];
+  pdf_field->getF(&xyz0, uint_t{11u}) = pop[11u];
+  pdf_field->getF(&xyz0, uint_t{12u}) = pop[12u];
+  pdf_field->getF(&xyz0, uint_t{13u}) = pop[13u];
+  pdf_field->getF(&xyz0, uint_t{14u}) = pop[14u];
+  pdf_field->getF(&xyz0, uint_t{15u}) = pop[15u];
+  pdf_field->getF(&xyz0, uint_t{16u}) = pop[16u];
+  pdf_field->getF(&xyz0, uint_t{17u}) = pop[17u];
+  pdf_field->getF(&xyz0, uint_t{18u}) = pop[18u];
+}
+
+inline void broadcast(GhostLayerField<double, uint_t{19u}> *pdf_field,
+                      std::array<double, 19u> const &pop) {
+  WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(pdf_field, {
+    double &xyz0 = pdf_field->get(x, y, z, uint_t{0u});
+    pdf_field->getF(&xyz0, uint_t{0u}) = pop[0u];
+    pdf_field->getF(&xyz0, uint_t{1u}) = pop[1u];
+    pdf_field->getF(&xyz0, uint_t{2u}) = pop[2u];
+    pdf_field->getF(&xyz0, uint_t{3u}) = pop[3u];
+    pdf_field->getF(&xyz0, uint_t{4u}) = pop[4u];
+    pdf_field->getF(&xyz0, uint_t{5u}) = pop[5u];
+    pdf_field->getF(&xyz0, uint_t{6u}) = pop[6u];
+    pdf_field->getF(&xyz0, uint_t{7u}) = pop[7u];
+    pdf_field->getF(&xyz0, uint_t{8u}) = pop[8u];
+    pdf_field->getF(&xyz0, uint_t{9u}) = pop[9u];
+    pdf_field->getF(&xyz0, uint_t{10u}) = pop[10u];
+    pdf_field->getF(&xyz0, uint_t{11u}) = pop[11u];
+    pdf_field->getF(&xyz0, uint_t{12u}) = pop[12u];
+    pdf_field->getF(&xyz0, uint_t{13u}) = pop[13u];
+    pdf_field->getF(&xyz0, uint_t{14u}) = pop[14u];
+    pdf_field->getF(&xyz0, uint_t{15u}) = pop[15u];
+    pdf_field->getF(&xyz0, uint_t{16u}) = pop[16u];
+    pdf_field->getF(&xyz0, uint_t{17u}) = pop[17u];
+    pdf_field->getF(&xyz0, uint_t{18u}) = pop[18u];
+  });
+}
+
+inline std::vector<double>
+get(GhostLayerField<double, uint_t{19u}> const *pdf_field,
+    CellInterval const &ci) {
+  std::vector<double> out;
+  out.reserve(ci.numCells() * uint_t(19u));
+  for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+    for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+      for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+        double const &xyz0 = pdf_field->get(x, y, z, uint_t{0u});
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{0u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{1u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{2u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{3u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{4u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{5u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{6u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{7u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{8u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{9u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{10u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{11u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{12u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{13u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{14u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{15u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{16u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{17u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{18u}));
+      }
+    }
+  }
+  return out;
+}
+
+inline void set(GhostLayerField<double, uint_t{19u}> *pdf_field,
+                std::vector<double> const &values, CellInterval const &ci) {
+  assert(uint_c(values.size()) == ci.numCells() * uint_t(19u));
+  auto values_ptr = values.data();
+  for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+    for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+      for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+        double &xyz0 = pdf_field->get(x, y, z, uint_t{0u});
+        pdf_field->getF(&xyz0, uint_t{0u}) = values_ptr[0u];
+        pdf_field->getF(&xyz0, uint_t{1u}) = values_ptr[1u];
+        pdf_field->getF(&xyz0, uint_t{2u}) = values_ptr[2u];
+        pdf_field->getF(&xyz0, uint_t{3u}) = values_ptr[3u];
+        pdf_field->getF(&xyz0, uint_t{4u}) = values_ptr[4u];
+        pdf_field->getF(&xyz0, uint_t{5u}) = values_ptr[5u];
+        pdf_field->getF(&xyz0, uint_t{6u}) = values_ptr[6u];
+        pdf_field->getF(&xyz0, uint_t{7u}) = values_ptr[7u];
+        pdf_field->getF(&xyz0, uint_t{8u}) = values_ptr[8u];
+        pdf_field->getF(&xyz0, uint_t{9u}) = values_ptr[9u];
+        pdf_field->getF(&xyz0, uint_t{10u}) = values_ptr[10u];
+        pdf_field->getF(&xyz0, uint_t{11u}) = values_ptr[11u];
+        pdf_field->getF(&xyz0, uint_t{12u}) = values_ptr[12u];
+        pdf_field->getF(&xyz0, uint_t{13u}) = values_ptr[13u];
+        pdf_field->getF(&xyz0, uint_t{14u}) = values_ptr[14u];
+        pdf_field->getF(&xyz0, uint_t{15u}) = values_ptr[15u];
+        pdf_field->getF(&xyz0, uint_t{16u}) = values_ptr[16u];
+        pdf_field->getF(&xyz0, uint_t{17u}) = values_ptr[17u];
+        pdf_field->getF(&xyz0, uint_t{18u}) = values_ptr[18u];
+        values_ptr += 19u;
+      }
+    }
+  }
+}
+} // namespace Population
+
+namespace Vector {
+inline Vector3<double> get(GhostLayerField<double, uint_t{3u}> const *vec_field,
+                           Cell const &cell) {
+  const double &xyz0 = vec_field->get(cell, uint_t{0u});
+  Vector3<double> vec;
+  vec[0] = vec_field->getF(&xyz0, uint_t{0u});
+  vec[1] = vec_field->getF(&xyz0, uint_t{1u});
+  vec[2] = vec_field->getF(&xyz0, uint_t{2u});
+  return vec;
+}
+
+inline void set(GhostLayerField<double, uint_t{3u}> *vec_field,
+                Vector3<double> const &vec, Cell const &cell) {
+  double &xyz0 = vec_field->get(cell, uint_t{0u});
+  vec_field->getF(&xyz0, uint_t{0u}) = vec[0u];
+  vec_field->getF(&xyz0, uint_t{1u}) = vec[1u];
+  vec_field->getF(&xyz0, uint_t{2u}) = vec[2u];
+}
+
+inline void add(GhostLayerField<double, uint_t{3u}> *vec_field,
+                Vector3<double> const &vec, Cell const &cell) {
+  double &xyz0 = vec_field->get(cell, uint_t{0u});
+  vec_field->getF(&xyz0, uint_t{0u}) += vec[0u];
+  vec_field->getF(&xyz0, uint_t{1u}) += vec[1u];
+  vec_field->getF(&xyz0, uint_t{2u}) += vec[2u];
+}
+
+inline void broadcast(GhostLayerField<double, uint_t{3u}> *vec_field,
+                      Vector3<double> const &vec) {
+  WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(vec_field, {
+    double &xyz0 = vec_field->get(x, y, z, uint_t{0u});
+    vec_field->getF(&xyz0, uint_t{0u}) = vec[0u];
+    vec_field->getF(&xyz0, uint_t{1u}) = vec[1u];
+    vec_field->getF(&xyz0, uint_t{2u}) = vec[2u];
+  });
+}
+
+inline void add_to_all(GhostLayerField<double, uint_t{3u}> *vec_field,
+                       Vector3<double> const &vec) {
+  WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(vec_field, {
+    double &xyz0 = vec_field->get(x, y, z, uint_t{0u});
+    vec_field->getF(&xyz0, uint_t{0u}) += vec[0u];
+    vec_field->getF(&xyz0, uint_t{1u}) += vec[1u];
+    vec_field->getF(&xyz0, uint_t{2u}) += vec[2u];
+  });
+}
+
+inline std::vector<double>
+get(GhostLayerField<double, uint_t{3u}> const *vec_field,
+    CellInterval const &ci) {
+  std::vector<double> out;
+  out.reserve(ci.numCells() * uint_t(3u));
+  for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+    for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+      for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+        const double &xyz0 = vec_field->get(x, y, z, uint_t{0u});
+        out.emplace_back(vec_field->getF(&xyz0, uint_t{0u}));
+        out.emplace_back(vec_field->getF(&xyz0, uint_t{1u}));
+        out.emplace_back(vec_field->getF(&xyz0, uint_t{2u}));
+      }
+    }
+  }
+  return out;
+}
+
+inline void set(GhostLayerField<double, uint_t{3u}> *vec_field,
+                std::vector<double> const &values, CellInterval const &ci) {
+  assert(uint_c(values.size()) == ci.numCells() * uint_t(3u));
+  auto values_ptr = values.data();
+  for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+    for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+      for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+        double &xyz0 = vec_field->get(x, y, z, uint_t{0u});
+        vec_field->getF(&xyz0, uint_t{0u}) = values_ptr[0u];
+        vec_field->getF(&xyz0, uint_t{1u}) = values_ptr[1u];
+        vec_field->getF(&xyz0, uint_t{2u}) = values_ptr[2u];
+        values_ptr += 3u;
+      }
+    }
+  }
+}
+} // namespace Vector
+
+namespace EquilibriumDistribution {
+inline double get(stencil::Direction const direction,
+                  Vector3<double> const &u = Vector3<double>(double(0.0)),
+                  double rho = double(1.0)) {
+
+  using namespace stencil;
+  switch (direction) {
+  case C:
+    return rho * -0.33333333333333331 * (u[0] * u[0]) +
+           rho * -0.33333333333333331 * (u[1] * u[1]) +
+           rho * -0.33333333333333331 * (u[2] * u[2]) +
+           rho * 0.33333333333333331;
+  case N:
+    return rho * -0.16666666666666666 * (u[0] * u[0]) +
+           rho * -0.16666666666666666 * (u[2] * u[2]) +
+           rho * 0.055555555555555552 + rho * 0.16666666666666666 * u[1] +
+           rho * 0.16666666666666666 * (u[1] * u[1]);
+  case S:
+    return rho * -0.16666666666666666 * u[1] +
+           rho * -0.16666666666666666 * (u[0] * u[0]) +
+           rho * -0.16666666666666666 * (u[2] * u[2]) +
+           rho * 0.055555555555555552 +
+           rho * 0.16666666666666666 * (u[1] * u[1]);
+  case W:
+    return rho * -0.16666666666666666 * u[0] +
+           rho * -0.16666666666666666 * (u[1] * u[1]) +
+           rho * -0.16666666666666666 * (u[2] * u[2]) +
+           rho * 0.055555555555555552 +
+           rho * 0.16666666666666666 * (u[0] * u[0]);
+  case E:
+    return rho * -0.16666666666666666 * (u[1] * u[1]) +
+           rho * -0.16666666666666666 * (u[2] * u[2]) +
+           rho * 0.055555555555555552 + rho * 0.16666666666666666 * u[0] +
+           rho * 0.16666666666666666 * (u[0] * u[0]);
+  case T:
+    return rho * -0.16666666666666666 * (u[0] * u[0]) +
+           rho * -0.16666666666666666 * (u[1] * u[1]) +
+           rho * 0.055555555555555552 + rho * 0.16666666666666666 * u[2] +
+           rho * 0.16666666666666666 * (u[2] * u[2]);
+  case B:
+    return rho * -0.16666666666666666 * u[2] +
+           rho * -0.16666666666666666 * (u[0] * u[0]) +
+           rho * -0.16666666666666666 * (u[1] * u[1]) +
+           rho * 0.055555555555555552 +
+           rho * 0.16666666666666666 * (u[2] * u[2]);
+  case NW:
+    return rho * -0.083333333333333329 * u[0] + rho * -0.25 * u[0] * u[1] +
+           rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[1] +
+           rho * 0.083333333333333329 * (u[0] * u[0]) +
+           rho * 0.083333333333333329 * (u[1] * u[1]);
+  case NE:
+    return rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[0] +
+           rho * 0.083333333333333329 * u[1] +
+           rho * 0.083333333333333329 * (u[0] * u[0]) +
+           rho * 0.083333333333333329 * (u[1] * u[1]) +
+           rho * 0.25 * u[0] * u[1];
+  case SW:
+    return rho * -0.083333333333333329 * u[0] +
+           rho * -0.083333333333333329 * u[1] + rho * 0.027777777777777776 +
+           rho * 0.083333333333333329 * (u[0] * u[0]) +
+           rho * 0.083333333333333329 * (u[1] * u[1]) +
+           rho * 0.25 * u[0] * u[1];
+  case SE:
+    return rho * -0.083333333333333329 * u[1] + rho * -0.25 * u[0] * u[1] +
+           rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[0] +
+           rho * 0.083333333333333329 * (u[0] * u[0]) +
+           rho * 0.083333333333333329 * (u[1] * u[1]);
+  case TN:
+    return rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[1] +
+           rho * 0.083333333333333329 * u[2] +
+           rho * 0.083333333333333329 * (u[1] * u[1]) +
+           rho * 0.083333333333333329 * (u[2] * u[2]) +
+           rho * 0.25 * u[1] * u[2];
+  case TS:
+    return rho * -0.083333333333333329 * u[1] + rho * -0.25 * u[1] * u[2] +
+           rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[2] +
+           rho * 0.083333333333333329 * (u[1] * u[1]) +
+           rho * 0.083333333333333329 * (u[2] * u[2]);
+  case TW:
+    return rho * -0.083333333333333329 * u[0] + rho * -0.25 * u[0] * u[2] +
+           rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[2] +
+           rho * 0.083333333333333329 * (u[0] * u[0]) +
+           rho * 0.083333333333333329 * (u[2] * u[2]);
+  case TE:
+    return rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[0] +
+           rho * 0.083333333333333329 * u[2] +
+           rho * 0.083333333333333329 * (u[0] * u[0]) +
+           rho * 0.083333333333333329 * (u[2] * u[2]) +
+           rho * 0.25 * u[0] * u[2];
+  case BN:
+    return rho * -0.083333333333333329 * u[2] + rho * -0.25 * u[1] * u[2] +
+           rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[1] +
+           rho * 0.083333333333333329 * (u[1] * u[1]) +
+           rho * 0.083333333333333329 * (u[2] * u[2]);
+  case BS:
+    return rho * -0.083333333333333329 * u[1] +
+           rho * -0.083333333333333329 * u[2] + rho * 0.027777777777777776 +
+           rho * 0.083333333333333329 * (u[1] * u[1]) +
+           rho * 0.083333333333333329 * (u[2] * u[2]) +
+           rho * 0.25 * u[1] * u[2];
+  case BW:
+    return rho * -0.083333333333333329 * u[0] +
+           rho * -0.083333333333333329 * u[2] + rho * 0.027777777777777776 +
+           rho * 0.083333333333333329 * (u[0] * u[0]) +
+           rho * 0.083333333333333329 * (u[2] * u[2]) +
+           rho * 0.25 * u[0] * u[2];
+  case BE:
+    return rho * -0.083333333333333329 * u[2] + rho * -0.25 * u[0] * u[2] +
+           rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[0] +
+           rho * 0.083333333333333329 * (u[0] * u[0]) +
+           rho * 0.083333333333333329 * (u[2] * u[2]);
+  default:
+    WALBERLA_ABORT("Invalid Direction")
+  }
+}
+} // namespace EquilibriumDistribution
+
+namespace Equilibrium {
+inline void set(GhostLayerField<double, uint_t{19u}> *pdf_field,
+                Vector3<double> const &u, double const rho, Cell const &cell) {
+
+  double &xyz0 = pdf_field->get(cell, uint_t{0u});
+  pdf_field->getF(&xyz0, uint_t{0u}) =
+      rho * -0.33333333333333331 * (u[0] * u[0]) +
+      rho * -0.33333333333333331 * (u[1] * u[1]) +
+      rho * -0.33333333333333331 * (u[2] * u[2]) + rho * 0.33333333333333331;
+  pdf_field->getF(&xyz0, uint_t{1u}) =
+      rho * -0.16666666666666666 * (u[0] * u[0]) +
+      rho * -0.16666666666666666 * (u[2] * u[2]) + rho * 0.055555555555555552 +
+      rho * 0.16666666666666666 * u[1] +
+      rho * 0.16666666666666666 * (u[1] * u[1]);
+  pdf_field->getF(&xyz0, uint_t{2u}) =
+      rho * -0.16666666666666666 * u[1] +
+      rho * -0.16666666666666666 * (u[0] * u[0]) +
+      rho * -0.16666666666666666 * (u[2] * u[2]) + rho * 0.055555555555555552 +
+      rho * 0.16666666666666666 * (u[1] * u[1]);
+  pdf_field->getF(&xyz0, uint_t{3u}) =
+      rho * -0.16666666666666666 * u[0] +
+      rho * -0.16666666666666666 * (u[1] * u[1]) +
+      rho * -0.16666666666666666 * (u[2] * u[2]) + rho * 0.055555555555555552 +
+      rho * 0.16666666666666666 * (u[0] * u[0]);
+  pdf_field->getF(&xyz0, uint_t{4u}) =
+      rho * -0.16666666666666666 * (u[1] * u[1]) +
+      rho * -0.16666666666666666 * (u[2] * u[2]) + rho * 0.055555555555555552 +
+      rho * 0.16666666666666666 * u[0] +
+      rho * 0.16666666666666666 * (u[0] * u[0]);
+  pdf_field->getF(&xyz0, uint_t{5u}) =
+      rho * -0.16666666666666666 * (u[0] * u[0]) +
+      rho * -0.16666666666666666 * (u[1] * u[1]) + rho * 0.055555555555555552 +
+      rho * 0.16666666666666666 * u[2] +
+      rho * 0.16666666666666666 * (u[2] * u[2]);
+  pdf_field->getF(&xyz0, uint_t{6u}) =
+      rho * -0.16666666666666666 * u[2] +
+      rho * -0.16666666666666666 * (u[0] * u[0]) +
+      rho * -0.16666666666666666 * (u[1] * u[1]) + rho * 0.055555555555555552 +
+      rho * 0.16666666666666666 * (u[2] * u[2]);
+  pdf_field->getF(&xyz0, uint_t{7u}) =
+      rho * -0.083333333333333329 * u[0] + rho * -0.25 * u[0] * u[1] +
+      rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[1] +
+      rho * 0.083333333333333329 * (u[0] * u[0]) +
+      rho * 0.083333333333333329 * (u[1] * u[1]);
+  pdf_field->getF(&xyz0, uint_t{8u}) =
+      rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[0] +
+      rho * 0.083333333333333329 * u[1] +
+      rho * 0.083333333333333329 * (u[0] * u[0]) +
+      rho * 0.083333333333333329 * (u[1] * u[1]) + rho * 0.25 * u[0] * u[1];
+  pdf_field->getF(&xyz0, uint_t{9u}) =
+      rho * -0.083333333333333329 * u[0] + rho * -0.083333333333333329 * u[1] +
+      rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u[0] * u[0]) +
+      rho * 0.083333333333333329 * (u[1] * u[1]) + rho * 0.25 * u[0] * u[1];
+  pdf_field->getF(&xyz0, uint_t{10u}) =
+      rho * -0.083333333333333329 * u[1] + rho * -0.25 * u[0] * u[1] +
+      rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[0] +
+      rho * 0.083333333333333329 * (u[0] * u[0]) +
+      rho * 0.083333333333333329 * (u[1] * u[1]);
+  pdf_field->getF(&xyz0, uint_t{11u}) =
+      rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[1] +
+      rho * 0.083333333333333329 * u[2] +
+      rho * 0.083333333333333329 * (u[1] * u[1]) +
+      rho * 0.083333333333333329 * (u[2] * u[2]) + rho * 0.25 * u[1] * u[2];
+  pdf_field->getF(&xyz0, uint_t{12u}) =
+      rho * -0.083333333333333329 * u[1] + rho * -0.25 * u[1] * u[2] +
+      rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[2] +
+      rho * 0.083333333333333329 * (u[1] * u[1]) +
+      rho * 0.083333333333333329 * (u[2] * u[2]);
+  pdf_field->getF(&xyz0, uint_t{13u}) =
+      rho * -0.083333333333333329 * u[0] + rho * -0.25 * u[0] * u[2] +
+      rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[2] +
+      rho * 0.083333333333333329 * (u[0] * u[0]) +
+      rho * 0.083333333333333329 * (u[2] * u[2]);
+  pdf_field->getF(&xyz0, uint_t{14u}) =
+      rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[0] +
+      rho * 0.083333333333333329 * u[2] +
+      rho * 0.083333333333333329 * (u[0] * u[0]) +
+      rho * 0.083333333333333329 * (u[2] * u[2]) + rho * 0.25 * u[0] * u[2];
+  pdf_field->getF(&xyz0, uint_t{15u}) =
+      rho * -0.083333333333333329 * u[2] + rho * -0.25 * u[1] * u[2] +
+      rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[1] +
+      rho * 0.083333333333333329 * (u[1] * u[1]) +
+      rho * 0.083333333333333329 * (u[2] * u[2]);
+  pdf_field->getF(&xyz0, uint_t{16u}) =
+      rho * -0.083333333333333329 * u[1] + rho * -0.083333333333333329 * u[2] +
+      rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u[1] * u[1]) +
+      rho * 0.083333333333333329 * (u[2] * u[2]) + rho * 0.25 * u[1] * u[2];
+  pdf_field->getF(&xyz0, uint_t{17u}) =
+      rho * -0.083333333333333329 * u[0] + rho * -0.083333333333333329 * u[2] +
+      rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u[0] * u[0]) +
+      rho * 0.083333333333333329 * (u[2] * u[2]) + rho * 0.25 * u[0] * u[2];
+  pdf_field->getF(&xyz0, uint_t{18u}) =
+      rho * -0.083333333333333329 * u[2] + rho * -0.25 * u[0] * u[2] +
+      rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[0] +
+      rho * 0.083333333333333329 * (u[0] * u[0]) +
+      rho * 0.083333333333333329 * (u[2] * u[2]);
+}
+} // namespace Equilibrium
+
+namespace Density {
+inline double get(GhostLayerField<double, uint_t{19u}> const *pdf_field,
+                  Cell const &cell) {
+  const double &xyz0 = pdf_field->get(cell, uint_t{0u});
+  const double f_0 = pdf_field->getF(&xyz0, uint_t{0u});
+  const double f_1 = pdf_field->getF(&xyz0, uint_t{1u});
+  const double f_2 = pdf_field->getF(&xyz0, uint_t{2u});
+  const double f_3 = pdf_field->getF(&xyz0, uint_t{3u});
+  const double f_4 = pdf_field->getF(&xyz0, uint_t{4u});
+  const double f_5 = pdf_field->getF(&xyz0, uint_t{5u});
+  const double f_6 = pdf_field->getF(&xyz0, uint_t{6u});
+  const double f_7 = pdf_field->getF(&xyz0, uint_t{7u});
+  const double f_8 = pdf_field->getF(&xyz0, uint_t{8u});
+  const double f_9 = pdf_field->getF(&xyz0, uint_t{9u});
+  const double f_10 = pdf_field->getF(&xyz0, uint_t{10u});
+  const double f_11 = pdf_field->getF(&xyz0, uint_t{11u});
+  const double f_12 = pdf_field->getF(&xyz0, uint_t{12u});
+  const double f_13 = pdf_field->getF(&xyz0, uint_t{13u});
+  const double f_14 = pdf_field->getF(&xyz0, uint_t{14u});
+  const double f_15 = pdf_field->getF(&xyz0, uint_t{15u});
+  const double f_16 = pdf_field->getF(&xyz0, uint_t{16u});
+  const double f_17 = pdf_field->getF(&xyz0, uint_t{17u});
+  const double f_18 = pdf_field->getF(&xyz0, uint_t{18u});
+  const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
+  const double vel1Term = f_1 + f_11 + f_15 + f_7;
+  const double vel2Term = f_12 + f_13 + f_5;
+  const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term +
+                     vel1Term + vel2Term;
+  return rho;
+}
+
+inline void set(GhostLayerField<double, uint_t{19u}> *pdf_field,
+                double const rho_in, Cell const &cell) {
+  const double &xyz0 = pdf_field->get(cell, uint_t{0u});
+  const double f_0 = pdf_field->getF(&xyz0, uint_t{0u});
+  const double f_1 = pdf_field->getF(&xyz0, uint_t{1u});
+  const double f_2 = pdf_field->getF(&xyz0, uint_t{2u});
+  const double f_3 = pdf_field->getF(&xyz0, uint_t{3u});
+  const double f_4 = pdf_field->getF(&xyz0, uint_t{4u});
+  const double f_5 = pdf_field->getF(&xyz0, uint_t{5u});
+  const double f_6 = pdf_field->getF(&xyz0, uint_t{6u});
+  const double f_7 = pdf_field->getF(&xyz0, uint_t{7u});
+  const double f_8 = pdf_field->getF(&xyz0, uint_t{8u});
+  const double f_9 = pdf_field->getF(&xyz0, uint_t{9u});
+  const double f_10 = pdf_field->getF(&xyz0, uint_t{10u});
+  const double f_11 = pdf_field->getF(&xyz0, uint_t{11u});
+  const double f_12 = pdf_field->getF(&xyz0, uint_t{12u});
+  const double f_13 = pdf_field->getF(&xyz0, uint_t{13u});
+  const double f_14 = pdf_field->getF(&xyz0, uint_t{14u});
+  const double f_15 = pdf_field->getF(&xyz0, uint_t{15u});
+  const double f_16 = pdf_field->getF(&xyz0, uint_t{16u});
+  const double f_17 = pdf_field->getF(&xyz0, uint_t{17u});
+  const double f_18 = pdf_field->getF(&xyz0, uint_t{18u});
+  const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
+  const double momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
+  const double vel1Term = f_1 + f_11 + f_15 + f_7;
+  const double momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
+  const double vel2Term = f_12 + f_13 + f_5;
+  const double momdensity_2 =
+      f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
+  const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term +
+                     vel1Term + vel2Term;
+
+  // calculate current velocity (before density change)
+  const double conversion = double(1) / rho;
+  Vector3<double> velocity;
+  velocity[0u] = momdensity_0 * conversion;
+  velocity[1u] = momdensity_1 * conversion;
+  velocity[2u] = momdensity_2 * conversion;
+
+  Equilibrium::set(pdf_field, velocity, rho_in, cell);
+}
+
+inline std::vector<double>
+get(GhostLayerField<double, uint_t{19u}> const *pdf_field,
+    CellInterval const &ci) {
+  std::vector<double> out;
+  out.reserve(ci.numCells());
+  for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+    for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+      for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+        const double &xyz0 = pdf_field->get(x, y, z, uint_t{0u});
+        const double f_0 = pdf_field->getF(&xyz0, uint_t{0u});
+        const double f_1 = pdf_field->getF(&xyz0, uint_t{1u});
+        const double f_2 = pdf_field->getF(&xyz0, uint_t{2u});
+        const double f_3 = pdf_field->getF(&xyz0, uint_t{3u});
+        const double f_4 = pdf_field->getF(&xyz0, uint_t{4u});
+        const double f_5 = pdf_field->getF(&xyz0, uint_t{5u});
+        const double f_6 = pdf_field->getF(&xyz0, uint_t{6u});
+        const double f_7 = pdf_field->getF(&xyz0, uint_t{7u});
+        const double f_8 = pdf_field->getF(&xyz0, uint_t{8u});
+        const double f_9 = pdf_field->getF(&xyz0, uint_t{9u});
+        const double f_10 = pdf_field->getF(&xyz0, uint_t{10u});
+        const double f_11 = pdf_field->getF(&xyz0, uint_t{11u});
+        const double f_12 = pdf_field->getF(&xyz0, uint_t{12u});
+        const double f_13 = pdf_field->getF(&xyz0, uint_t{13u});
+        const double f_14 = pdf_field->getF(&xyz0, uint_t{14u});
+        const double f_15 = pdf_field->getF(&xyz0, uint_t{15u});
+        const double f_16 = pdf_field->getF(&xyz0, uint_t{16u});
+        const double f_17 = pdf_field->getF(&xyz0, uint_t{17u});
+        const double f_18 = pdf_field->getF(&xyz0, uint_t{18u});
+        const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
+        const double vel1Term = f_1 + f_11 + f_15 + f_7;
+        const double vel2Term = f_12 + f_13 + f_5;
+        const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 +
+                           vel0Term + vel1Term + vel2Term;
+        out.emplace_back(rho);
+      }
+    }
+  }
+  return out;
+}
+
+inline void set(GhostLayerField<double, uint_t{19u}> *pdf_field,
+                std::vector<double> const &values, CellInterval const &ci) {
+  assert(uint_c(values.size()) == ci.numCells());
+  auto values_it = values.begin();
+  for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+    for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+      for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+        const double &xyz0 = pdf_field->get(x, y, z, uint_t{0u});
+        const double f_0 = pdf_field->getF(&xyz0, uint_t{0u});
+        const double f_1 = pdf_field->getF(&xyz0, uint_t{1u});
+        const double f_2 = pdf_field->getF(&xyz0, uint_t{2u});
+        const double f_3 = pdf_field->getF(&xyz0, uint_t{3u});
+        const double f_4 = pdf_field->getF(&xyz0, uint_t{4u});
+        const double f_5 = pdf_field->getF(&xyz0, uint_t{5u});
+        const double f_6 = pdf_field->getF(&xyz0, uint_t{6u});
+        const double f_7 = pdf_field->getF(&xyz0, uint_t{7u});
+        const double f_8 = pdf_field->getF(&xyz0, uint_t{8u});
+        const double f_9 = pdf_field->getF(&xyz0, uint_t{9u});
+        const double f_10 = pdf_field->getF(&xyz0, uint_t{10u});
+        const double f_11 = pdf_field->getF(&xyz0, uint_t{11u});
+        const double f_12 = pdf_field->getF(&xyz0, uint_t{12u});
+        const double f_13 = pdf_field->getF(&xyz0, uint_t{13u});
+        const double f_14 = pdf_field->getF(&xyz0, uint_t{14u});
+        const double f_15 = pdf_field->getF(&xyz0, uint_t{15u});
+        const double f_16 = pdf_field->getF(&xyz0, uint_t{16u});
+        const double f_17 = pdf_field->getF(&xyz0, uint_t{17u});
+        const double f_18 = pdf_field->getF(&xyz0, uint_t{18u});
+        const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
+        const double momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
+        const double vel1Term = f_1 + f_11 + f_15 + f_7;
+        const double momdensity_1 =
+            -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
+        const double vel2Term = f_12 + f_13 + f_5;
+        const double momdensity_2 =
+            f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
+        const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 +
+                           vel0Term + vel1Term + vel2Term;
+
+        // calculate current velocity (before density change)
+        const double conversion = double(1) / rho;
+        Vector3<double> velocity;
+        velocity[0u] = momdensity_0 * conversion;
+        velocity[1u] = momdensity_1 * conversion;
+        velocity[2u] = momdensity_2 * conversion;
+
+        Equilibrium::set(pdf_field, velocity, *values_it, Cell{x, y, z});
+        ++values_it;
+      }
+    }
+  }
+}
+} // namespace Density
+
+namespace Velocity {
+inline void set(GhostLayerField<double, uint_t{19u}> *pdf_field,
+                GhostLayerField<double, uint_t{3u}> const *force_field,
+                Vector3<double> const &u, Cell const &cell) {
+  const double &xyz0 = pdf_field->get(cell, uint_t{0u});
+  const double f_0 = pdf_field->getF(&xyz0, uint_t{0u});
+  const double f_1 = pdf_field->getF(&xyz0, uint_t{1u});
+  const double f_2 = pdf_field->getF(&xyz0, uint_t{2u});
+  const double f_3 = pdf_field->getF(&xyz0, uint_t{3u});
+  const double f_4 = pdf_field->getF(&xyz0, uint_t{4u});
+  const double f_5 = pdf_field->getF(&xyz0, uint_t{5u});
+  const double f_6 = pdf_field->getF(&xyz0, uint_t{6u});
+  const double f_7 = pdf_field->getF(&xyz0, uint_t{7u});
+  const double f_8 = pdf_field->getF(&xyz0, uint_t{8u});
+  const double f_9 = pdf_field->getF(&xyz0, uint_t{9u});
+  const double f_10 = pdf_field->getF(&xyz0, uint_t{10u});
+  const double f_11 = pdf_field->getF(&xyz0, uint_t{11u});
+  const double f_12 = pdf_field->getF(&xyz0, uint_t{12u});
+  const double f_13 = pdf_field->getF(&xyz0, uint_t{13u});
+  const double f_14 = pdf_field->getF(&xyz0, uint_t{14u});
+  const double f_15 = pdf_field->getF(&xyz0, uint_t{15u});
+  const double f_16 = pdf_field->getF(&xyz0, uint_t{16u});
+  const double f_17 = pdf_field->getF(&xyz0, uint_t{17u});
+  const double f_18 = pdf_field->getF(&xyz0, uint_t{18u});
+  const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
+  const double vel1Term = f_1 + f_11 + f_15 + f_7;
+  const double vel2Term = f_12 + f_13 + f_5;
+  const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term +
+                     vel1Term + vel2Term;
+
+  const auto x = cell.x();
+  const auto y = cell.y();
+  const auto z = cell.z();
+  const double u_0 =
+      -force_field->get(x, y, z, 0) * 0.50000000000000000 / rho + u[0];
+  const double u_1 =
+      -force_field->get(x, y, z, 1) * 0.50000000000000000 / rho + u[1];
+  const double u_2 =
+      -force_field->get(x, y, z, 2) * 0.50000000000000000 / rho + u[2];
+
+  Equilibrium::set(pdf_field, Vector3<double>(u_0, u_1, u_2), rho, cell);
+}
+} // namespace Velocity
+
+namespace MomentumDensity {
+inline Vector3<double>
+reduce(GhostLayerField<double, uint_t{19u}> const *pdf_field,
+       GhostLayerField<double, uint_t{3u}> const *force_field) {
+  Vector3<double> momentumDensity(double{0});
+  WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, {
+    const double &xyz0 = pdf_field->get(x, y, z, uint_t{0u});
+    const double f_0 = pdf_field->getF(&xyz0, uint_t{0u});
+    const double f_1 = pdf_field->getF(&xyz0, uint_t{1u});
+    const double f_2 = pdf_field->getF(&xyz0, uint_t{2u});
+    const double f_3 = pdf_field->getF(&xyz0, uint_t{3u});
+    const double f_4 = pdf_field->getF(&xyz0, uint_t{4u});
+    const double f_5 = pdf_field->getF(&xyz0, uint_t{5u});
+    const double f_6 = pdf_field->getF(&xyz0, uint_t{6u});
+    const double f_7 = pdf_field->getF(&xyz0, uint_t{7u});
+    const double f_8 = pdf_field->getF(&xyz0, uint_t{8u});
+    const double f_9 = pdf_field->getF(&xyz0, uint_t{9u});
+    const double f_10 = pdf_field->getF(&xyz0, uint_t{10u});
+    const double f_11 = pdf_field->getF(&xyz0, uint_t{11u});
+    const double f_12 = pdf_field->getF(&xyz0, uint_t{12u});
+    const double f_13 = pdf_field->getF(&xyz0, uint_t{13u});
+    const double f_14 = pdf_field->getF(&xyz0, uint_t{14u});
+    const double f_15 = pdf_field->getF(&xyz0, uint_t{15u});
+    const double f_16 = pdf_field->getF(&xyz0, uint_t{16u});
+    const double f_17 = pdf_field->getF(&xyz0, uint_t{17u});
+    const double f_18 = pdf_field->getF(&xyz0, uint_t{18u});
+    const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
+    const double momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
+    const double vel1Term = f_1 + f_11 + f_15 + f_7;
+    const double momdensity_1 =
+        -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
+    const double vel2Term = f_12 + f_13 + f_5;
+    const double momdensity_2 =
+        f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
+    const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term +
+                       vel1Term + vel2Term;
+    const double md_0 =
+        force_field->get(x, y, z, 0) * 0.50000000000000000 + momdensity_0;
+    const double md_1 =
+        force_field->get(x, y, z, 1) * 0.50000000000000000 + momdensity_1;
+    const double md_2 =
+        force_field->get(x, y, z, 2) * 0.50000000000000000 + momdensity_2;
+
+    momentumDensity[0u] += md_0;
+    momentumDensity[1u] += md_1;
+    momentumDensity[2u] += md_2;
+  });
+  return momentumDensity;
+}
+} // namespace MomentumDensity
+
+namespace PressureTensor {
+inline Matrix3<double>
+get(GhostLayerField<double, uint_t{19u}> const *pdf_field, Cell const &cell) {
+  const double &xyz0 = pdf_field->get(cell, uint_t{0u});
+  const double f_0 = pdf_field->getF(&xyz0, uint_t{0u});
+  const double f_1 = pdf_field->getF(&xyz0, uint_t{1u});
+  const double f_2 = pdf_field->getF(&xyz0, uint_t{2u});
+  const double f_3 = pdf_field->getF(&xyz0, uint_t{3u});
+  const double f_4 = pdf_field->getF(&xyz0, uint_t{4u});
+  const double f_5 = pdf_field->getF(&xyz0, uint_t{5u});
+  const double f_6 = pdf_field->getF(&xyz0, uint_t{6u});
+  const double f_7 = pdf_field->getF(&xyz0, uint_t{7u});
+  const double f_8 = pdf_field->getF(&xyz0, uint_t{8u});
+  const double f_9 = pdf_field->getF(&xyz0, uint_t{9u});
+  const double f_10 = pdf_field->getF(&xyz0, uint_t{10u});
+  const double f_11 = pdf_field->getF(&xyz0, uint_t{11u});
+  const double f_12 = pdf_field->getF(&xyz0, uint_t{12u});
+  const double f_13 = pdf_field->getF(&xyz0, uint_t{13u});
+  const double f_14 = pdf_field->getF(&xyz0, uint_t{14u});
+  const double f_15 = pdf_field->getF(&xyz0, uint_t{15u});
+  const double f_16 = pdf_field->getF(&xyz0, uint_t{16u});
+  const double f_17 = pdf_field->getF(&xyz0, uint_t{17u});
+  const double f_18 = pdf_field->getF(&xyz0, uint_t{18u});
+  const double p_0 =
+      f_10 + f_13 + f_14 + f_17 + f_18 + f_3 + f_4 + f_7 + f_8 + f_9;
+  const double p_1 = -f_10 - f_7 + f_8 + f_9;
+  const double p_2 = -f_13 + f_14 + f_17 - f_18;
+  const double p_3 = -f_10 - f_7 + f_8 + f_9;
+  const double p_4 =
+      f_1 + f_10 + f_11 + f_12 + f_15 + f_16 + f_2 + f_7 + f_8 + f_9;
+  const double p_5 = f_11 - f_12 - f_15 + f_16;
+  const double p_6 = -f_13 + f_14 + f_17 - f_18;
+  const double p_7 = f_11 - f_12 - f_15 + f_16;
+  const double p_8 =
+      f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18 + f_5 + f_6;
+
+  Matrix3<double> pressureTensor;
+  pressureTensor[0u] = p_0;
+  pressureTensor[1u] = p_1;
+  pressureTensor[2u] = p_2;
+
+  pressureTensor[3u] = p_3;
+  pressureTensor[4u] = p_4;
+  pressureTensor[5u] = p_5;
+
+  pressureTensor[6u] = p_6;
+  pressureTensor[7u] = p_7;
+  pressureTensor[8u] = p_8;
+
+  return pressureTensor;
+}
+} // namespace PressureTensor
+
+} // namespace accessor
+} // namespace lbm
+} // namespace walberla
+
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma clang diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h
new file mode 100644
index 00000000000..1790c5b984f
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h
@@ -0,0 +1,834 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ * Copyright (C) 2020 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+/**
+ * @file
+ * Lattice field accessors.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/a16141524c58ab88386e2a0f8fdd7c63c5edd704/python/lbmpy_walberla/templates/LatticeModel.tmpl.h
+ */
+
+#pragma once
+
+#include <core/DataTypes.h>
+#include <core/cell/Cell.h>
+#include <core/cell/CellInterval.h>
+#include <core/math/Matrix3.h>
+#include <core/math/Vector3.h>
+
+#include <field/GhostLayerField.h>
+#include <stencil/D3Q19.h>
+
+#include <array>
+#include <cassert>
+#include <tuple>
+#include <vector>
+
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#endif
+
+#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#endif
+
+namespace walberla {
+namespace lbm {
+namespace accessor {
+
+namespace Population {
+inline std::array<float, 19u>
+get(GhostLayerField<float, uint_t{19u}> const *pdf_field, Cell const &cell) {
+  float const &xyz0 = pdf_field->get(cell, uint_t{0u});
+  std::array<float, 19u> pop;
+  pop[0u] = pdf_field->getF(&xyz0, uint_t{0u});
+  pop[1u] = pdf_field->getF(&xyz0, uint_t{1u});
+  pop[2u] = pdf_field->getF(&xyz0, uint_t{2u});
+  pop[3u] = pdf_field->getF(&xyz0, uint_t{3u});
+  pop[4u] = pdf_field->getF(&xyz0, uint_t{4u});
+  pop[5u] = pdf_field->getF(&xyz0, uint_t{5u});
+  pop[6u] = pdf_field->getF(&xyz0, uint_t{6u});
+  pop[7u] = pdf_field->getF(&xyz0, uint_t{7u});
+  pop[8u] = pdf_field->getF(&xyz0, uint_t{8u});
+  pop[9u] = pdf_field->getF(&xyz0, uint_t{9u});
+  pop[10u] = pdf_field->getF(&xyz0, uint_t{10u});
+  pop[11u] = pdf_field->getF(&xyz0, uint_t{11u});
+  pop[12u] = pdf_field->getF(&xyz0, uint_t{12u});
+  pop[13u] = pdf_field->getF(&xyz0, uint_t{13u});
+  pop[14u] = pdf_field->getF(&xyz0, uint_t{14u});
+  pop[15u] = pdf_field->getF(&xyz0, uint_t{15u});
+  pop[16u] = pdf_field->getF(&xyz0, uint_t{16u});
+  pop[17u] = pdf_field->getF(&xyz0, uint_t{17u});
+  pop[18u] = pdf_field->getF(&xyz0, uint_t{18u});
+  return pop;
+}
+
+inline void set(GhostLayerField<float, uint_t{19u}> *pdf_field,
+                std::array<float, 19u> const &pop, Cell const &cell) {
+  float &xyz0 = pdf_field->get(cell, uint_t{0u});
+  pdf_field->getF(&xyz0, uint_t{0u}) = pop[0u];
+  pdf_field->getF(&xyz0, uint_t{1u}) = pop[1u];
+  pdf_field->getF(&xyz0, uint_t{2u}) = pop[2u];
+  pdf_field->getF(&xyz0, uint_t{3u}) = pop[3u];
+  pdf_field->getF(&xyz0, uint_t{4u}) = pop[4u];
+  pdf_field->getF(&xyz0, uint_t{5u}) = pop[5u];
+  pdf_field->getF(&xyz0, uint_t{6u}) = pop[6u];
+  pdf_field->getF(&xyz0, uint_t{7u}) = pop[7u];
+  pdf_field->getF(&xyz0, uint_t{8u}) = pop[8u];
+  pdf_field->getF(&xyz0, uint_t{9u}) = pop[9u];
+  pdf_field->getF(&xyz0, uint_t{10u}) = pop[10u];
+  pdf_field->getF(&xyz0, uint_t{11u}) = pop[11u];
+  pdf_field->getF(&xyz0, uint_t{12u}) = pop[12u];
+  pdf_field->getF(&xyz0, uint_t{13u}) = pop[13u];
+  pdf_field->getF(&xyz0, uint_t{14u}) = pop[14u];
+  pdf_field->getF(&xyz0, uint_t{15u}) = pop[15u];
+  pdf_field->getF(&xyz0, uint_t{16u}) = pop[16u];
+  pdf_field->getF(&xyz0, uint_t{17u}) = pop[17u];
+  pdf_field->getF(&xyz0, uint_t{18u}) = pop[18u];
+}
+
+inline void broadcast(GhostLayerField<float, uint_t{19u}> *pdf_field,
+                      std::array<float, 19u> const &pop) {
+  WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(pdf_field, {
+    float &xyz0 = pdf_field->get(x, y, z, uint_t{0u});
+    pdf_field->getF(&xyz0, uint_t{0u}) = pop[0u];
+    pdf_field->getF(&xyz0, uint_t{1u}) = pop[1u];
+    pdf_field->getF(&xyz0, uint_t{2u}) = pop[2u];
+    pdf_field->getF(&xyz0, uint_t{3u}) = pop[3u];
+    pdf_field->getF(&xyz0, uint_t{4u}) = pop[4u];
+    pdf_field->getF(&xyz0, uint_t{5u}) = pop[5u];
+    pdf_field->getF(&xyz0, uint_t{6u}) = pop[6u];
+    pdf_field->getF(&xyz0, uint_t{7u}) = pop[7u];
+    pdf_field->getF(&xyz0, uint_t{8u}) = pop[8u];
+    pdf_field->getF(&xyz0, uint_t{9u}) = pop[9u];
+    pdf_field->getF(&xyz0, uint_t{10u}) = pop[10u];
+    pdf_field->getF(&xyz0, uint_t{11u}) = pop[11u];
+    pdf_field->getF(&xyz0, uint_t{12u}) = pop[12u];
+    pdf_field->getF(&xyz0, uint_t{13u}) = pop[13u];
+    pdf_field->getF(&xyz0, uint_t{14u}) = pop[14u];
+    pdf_field->getF(&xyz0, uint_t{15u}) = pop[15u];
+    pdf_field->getF(&xyz0, uint_t{16u}) = pop[16u];
+    pdf_field->getF(&xyz0, uint_t{17u}) = pop[17u];
+    pdf_field->getF(&xyz0, uint_t{18u}) = pop[18u];
+  });
+}
+
+inline std::vector<float>
+get(GhostLayerField<float, uint_t{19u}> const *pdf_field,
+    CellInterval const &ci) {
+  std::vector<float> out;
+  out.reserve(ci.numCells() * uint_t(19u));
+  for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+    for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+      for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+        float const &xyz0 = pdf_field->get(x, y, z, uint_t{0u});
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{0u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{1u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{2u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{3u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{4u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{5u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{6u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{7u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{8u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{9u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{10u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{11u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{12u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{13u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{14u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{15u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{16u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{17u}));
+        out.emplace_back(pdf_field->getF(&xyz0, uint_t{18u}));
+      }
+    }
+  }
+  return out;
+}
+
+inline void set(GhostLayerField<float, uint_t{19u}> *pdf_field,
+                std::vector<float> const &values, CellInterval const &ci) {
+  assert(uint_c(values.size()) == ci.numCells() * uint_t(19u));
+  auto values_ptr = values.data();
+  for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+    for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+      for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+        float &xyz0 = pdf_field->get(x, y, z, uint_t{0u});
+        pdf_field->getF(&xyz0, uint_t{0u}) = values_ptr[0u];
+        pdf_field->getF(&xyz0, uint_t{1u}) = values_ptr[1u];
+        pdf_field->getF(&xyz0, uint_t{2u}) = values_ptr[2u];
+        pdf_field->getF(&xyz0, uint_t{3u}) = values_ptr[3u];
+        pdf_field->getF(&xyz0, uint_t{4u}) = values_ptr[4u];
+        pdf_field->getF(&xyz0, uint_t{5u}) = values_ptr[5u];
+        pdf_field->getF(&xyz0, uint_t{6u}) = values_ptr[6u];
+        pdf_field->getF(&xyz0, uint_t{7u}) = values_ptr[7u];
+        pdf_field->getF(&xyz0, uint_t{8u}) = values_ptr[8u];
+        pdf_field->getF(&xyz0, uint_t{9u}) = values_ptr[9u];
+        pdf_field->getF(&xyz0, uint_t{10u}) = values_ptr[10u];
+        pdf_field->getF(&xyz0, uint_t{11u}) = values_ptr[11u];
+        pdf_field->getF(&xyz0, uint_t{12u}) = values_ptr[12u];
+        pdf_field->getF(&xyz0, uint_t{13u}) = values_ptr[13u];
+        pdf_field->getF(&xyz0, uint_t{14u}) = values_ptr[14u];
+        pdf_field->getF(&xyz0, uint_t{15u}) = values_ptr[15u];
+        pdf_field->getF(&xyz0, uint_t{16u}) = values_ptr[16u];
+        pdf_field->getF(&xyz0, uint_t{17u}) = values_ptr[17u];
+        pdf_field->getF(&xyz0, uint_t{18u}) = values_ptr[18u];
+        values_ptr += 19u;
+      }
+    }
+  }
+}
+} // namespace Population
+
+namespace Vector {
+inline Vector3<float> get(GhostLayerField<float, uint_t{3u}> const *vec_field,
+                          Cell const &cell) {
+  const float &xyz0 = vec_field->get(cell, uint_t{0u});
+  Vector3<float> vec;
+  vec[0] = vec_field->getF(&xyz0, uint_t{0u});
+  vec[1] = vec_field->getF(&xyz0, uint_t{1u});
+  vec[2] = vec_field->getF(&xyz0, uint_t{2u});
+  return vec;
+}
+
+inline void set(GhostLayerField<float, uint_t{3u}> *vec_field,
+                Vector3<float> const &vec, Cell const &cell) {
+  float &xyz0 = vec_field->get(cell, uint_t{0u});
+  vec_field->getF(&xyz0, uint_t{0u}) = vec[0u];
+  vec_field->getF(&xyz0, uint_t{1u}) = vec[1u];
+  vec_field->getF(&xyz0, uint_t{2u}) = vec[2u];
+}
+
+inline void add(GhostLayerField<float, uint_t{3u}> *vec_field,
+                Vector3<float> const &vec, Cell const &cell) {
+  float &xyz0 = vec_field->get(cell, uint_t{0u});
+  vec_field->getF(&xyz0, uint_t{0u}) += vec[0u];
+  vec_field->getF(&xyz0, uint_t{1u}) += vec[1u];
+  vec_field->getF(&xyz0, uint_t{2u}) += vec[2u];
+}
+
+inline void broadcast(GhostLayerField<float, uint_t{3u}> *vec_field,
+                      Vector3<float> const &vec) {
+  WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(vec_field, {
+    float &xyz0 = vec_field->get(x, y, z, uint_t{0u});
+    vec_field->getF(&xyz0, uint_t{0u}) = vec[0u];
+    vec_field->getF(&xyz0, uint_t{1u}) = vec[1u];
+    vec_field->getF(&xyz0, uint_t{2u}) = vec[2u];
+  });
+}
+
+inline void add_to_all(GhostLayerField<float, uint_t{3u}> *vec_field,
+                       Vector3<float> const &vec) {
+  WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(vec_field, {
+    float &xyz0 = vec_field->get(x, y, z, uint_t{0u});
+    vec_field->getF(&xyz0, uint_t{0u}) += vec[0u];
+    vec_field->getF(&xyz0, uint_t{1u}) += vec[1u];
+    vec_field->getF(&xyz0, uint_t{2u}) += vec[2u];
+  });
+}
+
+inline std::vector<float>
+get(GhostLayerField<float, uint_t{3u}> const *vec_field,
+    CellInterval const &ci) {
+  std::vector<float> out;
+  out.reserve(ci.numCells() * uint_t(3u));
+  for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+    for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+      for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+        const float &xyz0 = vec_field->get(x, y, z, uint_t{0u});
+        out.emplace_back(vec_field->getF(&xyz0, uint_t{0u}));
+        out.emplace_back(vec_field->getF(&xyz0, uint_t{1u}));
+        out.emplace_back(vec_field->getF(&xyz0, uint_t{2u}));
+      }
+    }
+  }
+  return out;
+}
+
+inline void set(GhostLayerField<float, uint_t{3u}> *vec_field,
+                std::vector<float> const &values, CellInterval const &ci) {
+  assert(uint_c(values.size()) == ci.numCells() * uint_t(3u));
+  auto values_ptr = values.data();
+  for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+    for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+      for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+        float &xyz0 = vec_field->get(x, y, z, uint_t{0u});
+        vec_field->getF(&xyz0, uint_t{0u}) = values_ptr[0u];
+        vec_field->getF(&xyz0, uint_t{1u}) = values_ptr[1u];
+        vec_field->getF(&xyz0, uint_t{2u}) = values_ptr[2u];
+        values_ptr += 3u;
+      }
+    }
+  }
+}
+} // namespace Vector
+
+namespace EquilibriumDistribution {
+inline float get(stencil::Direction const direction,
+                 Vector3<float> const &u = Vector3<float>(float(0.0)),
+                 float rho = float(1.0)) {
+
+  using namespace stencil;
+  switch (direction) {
+  case C:
+    return rho * -0.33333333333333331f * (u[0] * u[0]) +
+           rho * -0.33333333333333331f * (u[1] * u[1]) +
+           rho * -0.33333333333333331f * (u[2] * u[2]) +
+           rho * 0.33333333333333331f;
+  case N:
+    return rho * -0.16666666666666666f * (u[0] * u[0]) +
+           rho * -0.16666666666666666f * (u[2] * u[2]) +
+           rho * 0.055555555555555552f + rho * 0.16666666666666666f * u[1] +
+           rho * 0.16666666666666666f * (u[1] * u[1]);
+  case S:
+    return rho * -0.16666666666666666f * u[1] +
+           rho * -0.16666666666666666f * (u[0] * u[0]) +
+           rho * -0.16666666666666666f * (u[2] * u[2]) +
+           rho * 0.055555555555555552f +
+           rho * 0.16666666666666666f * (u[1] * u[1]);
+  case W:
+    return rho * -0.16666666666666666f * u[0] +
+           rho * -0.16666666666666666f * (u[1] * u[1]) +
+           rho * -0.16666666666666666f * (u[2] * u[2]) +
+           rho * 0.055555555555555552f +
+           rho * 0.16666666666666666f * (u[0] * u[0]);
+  case E:
+    return rho * -0.16666666666666666f * (u[1] * u[1]) +
+           rho * -0.16666666666666666f * (u[2] * u[2]) +
+           rho * 0.055555555555555552f + rho * 0.16666666666666666f * u[0] +
+           rho * 0.16666666666666666f * (u[0] * u[0]);
+  case T:
+    return rho * -0.16666666666666666f * (u[0] * u[0]) +
+           rho * -0.16666666666666666f * (u[1] * u[1]) +
+           rho * 0.055555555555555552f + rho * 0.16666666666666666f * u[2] +
+           rho * 0.16666666666666666f * (u[2] * u[2]);
+  case B:
+    return rho * -0.16666666666666666f * u[2] +
+           rho * -0.16666666666666666f * (u[0] * u[0]) +
+           rho * -0.16666666666666666f * (u[1] * u[1]) +
+           rho * 0.055555555555555552f +
+           rho * 0.16666666666666666f * (u[2] * u[2]);
+  case NW:
+    return rho * -0.083333333333333329f * u[0] + rho * -0.25f * u[0] * u[1] +
+           rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[1] +
+           rho * 0.083333333333333329f * (u[0] * u[0]) +
+           rho * 0.083333333333333329f * (u[1] * u[1]);
+  case NE:
+    return rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] +
+           rho * 0.083333333333333329f * u[1] +
+           rho * 0.083333333333333329f * (u[0] * u[0]) +
+           rho * 0.083333333333333329f * (u[1] * u[1]) +
+           rho * 0.25f * u[0] * u[1];
+  case SW:
+    return rho * -0.083333333333333329f * u[0] +
+           rho * -0.083333333333333329f * u[1] + rho * 0.027777777777777776f +
+           rho * 0.083333333333333329f * (u[0] * u[0]) +
+           rho * 0.083333333333333329f * (u[1] * u[1]) +
+           rho * 0.25f * u[0] * u[1];
+  case SE:
+    return rho * -0.083333333333333329f * u[1] + rho * -0.25f * u[0] * u[1] +
+           rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] +
+           rho * 0.083333333333333329f * (u[0] * u[0]) +
+           rho * 0.083333333333333329f * (u[1] * u[1]);
+  case TN:
+    return rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[1] +
+           rho * 0.083333333333333329f * u[2] +
+           rho * 0.083333333333333329f * (u[1] * u[1]) +
+           rho * 0.083333333333333329f * (u[2] * u[2]) +
+           rho * 0.25f * u[1] * u[2];
+  case TS:
+    return rho * -0.083333333333333329f * u[1] + rho * -0.25f * u[1] * u[2] +
+           rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[2] +
+           rho * 0.083333333333333329f * (u[1] * u[1]) +
+           rho * 0.083333333333333329f * (u[2] * u[2]);
+  case TW:
+    return rho * -0.083333333333333329f * u[0] + rho * -0.25f * u[0] * u[2] +
+           rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[2] +
+           rho * 0.083333333333333329f * (u[0] * u[0]) +
+           rho * 0.083333333333333329f * (u[2] * u[2]);
+  case TE:
+    return rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] +
+           rho * 0.083333333333333329f * u[2] +
+           rho * 0.083333333333333329f * (u[0] * u[0]) +
+           rho * 0.083333333333333329f * (u[2] * u[2]) +
+           rho * 0.25f * u[0] * u[2];
+  case BN:
+    return rho * -0.083333333333333329f * u[2] + rho * -0.25f * u[1] * u[2] +
+           rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[1] +
+           rho * 0.083333333333333329f * (u[1] * u[1]) +
+           rho * 0.083333333333333329f * (u[2] * u[2]);
+  case BS:
+    return rho * -0.083333333333333329f * u[1] +
+           rho * -0.083333333333333329f * u[2] + rho * 0.027777777777777776f +
+           rho * 0.083333333333333329f * (u[1] * u[1]) +
+           rho * 0.083333333333333329f * (u[2] * u[2]) +
+           rho * 0.25f * u[1] * u[2];
+  case BW:
+    return rho * -0.083333333333333329f * u[0] +
+           rho * -0.083333333333333329f * u[2] + rho * 0.027777777777777776f +
+           rho * 0.083333333333333329f * (u[0] * u[0]) +
+           rho * 0.083333333333333329f * (u[2] * u[2]) +
+           rho * 0.25f * u[0] * u[2];
+  case BE:
+    return rho * -0.083333333333333329f * u[2] + rho * -0.25f * u[0] * u[2] +
+           rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] +
+           rho * 0.083333333333333329f * (u[0] * u[0]) +
+           rho * 0.083333333333333329f * (u[2] * u[2]);
+  default:
+    WALBERLA_ABORT("Invalid Direction")
+  }
+}
+} // namespace EquilibriumDistribution
+
+namespace Equilibrium {
+inline void set(GhostLayerField<float, uint_t{19u}> *pdf_field,
+                Vector3<float> const &u, float const rho, Cell const &cell) {
+
+  float &xyz0 = pdf_field->get(cell, uint_t{0u});
+  pdf_field->getF(&xyz0, uint_t{0u}) =
+      rho * -0.33333333333333331f * (u[0] * u[0]) +
+      rho * -0.33333333333333331f * (u[1] * u[1]) +
+      rho * -0.33333333333333331f * (u[2] * u[2]) + rho * 0.33333333333333331f;
+  pdf_field->getF(&xyz0, uint_t{1u}) =
+      rho * -0.16666666666666666f * (u[0] * u[0]) +
+      rho * -0.16666666666666666f * (u[2] * u[2]) +
+      rho * 0.055555555555555552f + rho * 0.16666666666666666f * u[1] +
+      rho * 0.16666666666666666f * (u[1] * u[1]);
+  pdf_field->getF(&xyz0, uint_t{2u}) =
+      rho * -0.16666666666666666f * u[1] +
+      rho * -0.16666666666666666f * (u[0] * u[0]) +
+      rho * -0.16666666666666666f * (u[2] * u[2]) +
+      rho * 0.055555555555555552f + rho * 0.16666666666666666f * (u[1] * u[1]);
+  pdf_field->getF(&xyz0, uint_t{3u}) =
+      rho * -0.16666666666666666f * u[0] +
+      rho * -0.16666666666666666f * (u[1] * u[1]) +
+      rho * -0.16666666666666666f * (u[2] * u[2]) +
+      rho * 0.055555555555555552f + rho * 0.16666666666666666f * (u[0] * u[0]);
+  pdf_field->getF(&xyz0, uint_t{4u}) =
+      rho * -0.16666666666666666f * (u[1] * u[1]) +
+      rho * -0.16666666666666666f * (u[2] * u[2]) +
+      rho * 0.055555555555555552f + rho * 0.16666666666666666f * u[0] +
+      rho * 0.16666666666666666f * (u[0] * u[0]);
+  pdf_field->getF(&xyz0, uint_t{5u}) =
+      rho * -0.16666666666666666f * (u[0] * u[0]) +
+      rho * -0.16666666666666666f * (u[1] * u[1]) +
+      rho * 0.055555555555555552f + rho * 0.16666666666666666f * u[2] +
+      rho * 0.16666666666666666f * (u[2] * u[2]);
+  pdf_field->getF(&xyz0, uint_t{6u}) =
+      rho * -0.16666666666666666f * u[2] +
+      rho * -0.16666666666666666f * (u[0] * u[0]) +
+      rho * -0.16666666666666666f * (u[1] * u[1]) +
+      rho * 0.055555555555555552f + rho * 0.16666666666666666f * (u[2] * u[2]);
+  pdf_field->getF(&xyz0, uint_t{7u}) =
+      rho * -0.083333333333333329f * u[0] + rho * -0.25f * u[0] * u[1] +
+      rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[1] +
+      rho * 0.083333333333333329f * (u[0] * u[0]) +
+      rho * 0.083333333333333329f * (u[1] * u[1]);
+  pdf_field->getF(&xyz0, uint_t{8u}) =
+      rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] +
+      rho * 0.083333333333333329f * u[1] +
+      rho * 0.083333333333333329f * (u[0] * u[0]) +
+      rho * 0.083333333333333329f * (u[1] * u[1]) + rho * 0.25f * u[0] * u[1];
+  pdf_field->getF(&xyz0, uint_t{9u}) =
+      rho * -0.083333333333333329f * u[0] +
+      rho * -0.083333333333333329f * u[1] + rho * 0.027777777777777776f +
+      rho * 0.083333333333333329f * (u[0] * u[0]) +
+      rho * 0.083333333333333329f * (u[1] * u[1]) + rho * 0.25f * u[0] * u[1];
+  pdf_field->getF(&xyz0, uint_t{10u}) =
+      rho * -0.083333333333333329f * u[1] + rho * -0.25f * u[0] * u[1] +
+      rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] +
+      rho * 0.083333333333333329f * (u[0] * u[0]) +
+      rho * 0.083333333333333329f * (u[1] * u[1]);
+  pdf_field->getF(&xyz0, uint_t{11u}) =
+      rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[1] +
+      rho * 0.083333333333333329f * u[2] +
+      rho * 0.083333333333333329f * (u[1] * u[1]) +
+      rho * 0.083333333333333329f * (u[2] * u[2]) + rho * 0.25f * u[1] * u[2];
+  pdf_field->getF(&xyz0, uint_t{12u}) =
+      rho * -0.083333333333333329f * u[1] + rho * -0.25f * u[1] * u[2] +
+      rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[2] +
+      rho * 0.083333333333333329f * (u[1] * u[1]) +
+      rho * 0.083333333333333329f * (u[2] * u[2]);
+  pdf_field->getF(&xyz0, uint_t{13u}) =
+      rho * -0.083333333333333329f * u[0] + rho * -0.25f * u[0] * u[2] +
+      rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[2] +
+      rho * 0.083333333333333329f * (u[0] * u[0]) +
+      rho * 0.083333333333333329f * (u[2] * u[2]);
+  pdf_field->getF(&xyz0, uint_t{14u}) =
+      rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] +
+      rho * 0.083333333333333329f * u[2] +
+      rho * 0.083333333333333329f * (u[0] * u[0]) +
+      rho * 0.083333333333333329f * (u[2] * u[2]) + rho * 0.25f * u[0] * u[2];
+  pdf_field->getF(&xyz0, uint_t{15u}) =
+      rho * -0.083333333333333329f * u[2] + rho * -0.25f * u[1] * u[2] +
+      rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[1] +
+      rho * 0.083333333333333329f * (u[1] * u[1]) +
+      rho * 0.083333333333333329f * (u[2] * u[2]);
+  pdf_field->getF(&xyz0, uint_t{16u}) =
+      rho * -0.083333333333333329f * u[1] +
+      rho * -0.083333333333333329f * u[2] + rho * 0.027777777777777776f +
+      rho * 0.083333333333333329f * (u[1] * u[1]) +
+      rho * 0.083333333333333329f * (u[2] * u[2]) + rho * 0.25f * u[1] * u[2];
+  pdf_field->getF(&xyz0, uint_t{17u}) =
+      rho * -0.083333333333333329f * u[0] +
+      rho * -0.083333333333333329f * u[2] + rho * 0.027777777777777776f +
+      rho * 0.083333333333333329f * (u[0] * u[0]) +
+      rho * 0.083333333333333329f * (u[2] * u[2]) + rho * 0.25f * u[0] * u[2];
+  pdf_field->getF(&xyz0, uint_t{18u}) =
+      rho * -0.083333333333333329f * u[2] + rho * -0.25f * u[0] * u[2] +
+      rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] +
+      rho * 0.083333333333333329f * (u[0] * u[0]) +
+      rho * 0.083333333333333329f * (u[2] * u[2]);
+}
+} // namespace Equilibrium
+
+namespace Density {
+inline float get(GhostLayerField<float, uint_t{19u}> const *pdf_field,
+                 Cell const &cell) {
+  const float &xyz0 = pdf_field->get(cell, uint_t{0u});
+  const float f_0 = pdf_field->getF(&xyz0, uint_t{0u});
+  const float f_1 = pdf_field->getF(&xyz0, uint_t{1u});
+  const float f_2 = pdf_field->getF(&xyz0, uint_t{2u});
+  const float f_3 = pdf_field->getF(&xyz0, uint_t{3u});
+  const float f_4 = pdf_field->getF(&xyz0, uint_t{4u});
+  const float f_5 = pdf_field->getF(&xyz0, uint_t{5u});
+  const float f_6 = pdf_field->getF(&xyz0, uint_t{6u});
+  const float f_7 = pdf_field->getF(&xyz0, uint_t{7u});
+  const float f_8 = pdf_field->getF(&xyz0, uint_t{8u});
+  const float f_9 = pdf_field->getF(&xyz0, uint_t{9u});
+  const float f_10 = pdf_field->getF(&xyz0, uint_t{10u});
+  const float f_11 = pdf_field->getF(&xyz0, uint_t{11u});
+  const float f_12 = pdf_field->getF(&xyz0, uint_t{12u});
+  const float f_13 = pdf_field->getF(&xyz0, uint_t{13u});
+  const float f_14 = pdf_field->getF(&xyz0, uint_t{14u});
+  const float f_15 = pdf_field->getF(&xyz0, uint_t{15u});
+  const float f_16 = pdf_field->getF(&xyz0, uint_t{16u});
+  const float f_17 = pdf_field->getF(&xyz0, uint_t{17u});
+  const float f_18 = pdf_field->getF(&xyz0, uint_t{18u});
+  const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
+  const float vel1Term = f_1 + f_11 + f_15 + f_7;
+  const float vel2Term = f_12 + f_13 + f_5;
+  const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term +
+                    vel1Term + vel2Term;
+  return rho;
+}
+
+inline void set(GhostLayerField<float, uint_t{19u}> *pdf_field,
+                float const rho_in, Cell const &cell) {
+  const float &xyz0 = pdf_field->get(cell, uint_t{0u});
+  const float f_0 = pdf_field->getF(&xyz0, uint_t{0u});
+  const float f_1 = pdf_field->getF(&xyz0, uint_t{1u});
+  const float f_2 = pdf_field->getF(&xyz0, uint_t{2u});
+  const float f_3 = pdf_field->getF(&xyz0, uint_t{3u});
+  const float f_4 = pdf_field->getF(&xyz0, uint_t{4u});
+  const float f_5 = pdf_field->getF(&xyz0, uint_t{5u});
+  const float f_6 = pdf_field->getF(&xyz0, uint_t{6u});
+  const float f_7 = pdf_field->getF(&xyz0, uint_t{7u});
+  const float f_8 = pdf_field->getF(&xyz0, uint_t{8u});
+  const float f_9 = pdf_field->getF(&xyz0, uint_t{9u});
+  const float f_10 = pdf_field->getF(&xyz0, uint_t{10u});
+  const float f_11 = pdf_field->getF(&xyz0, uint_t{11u});
+  const float f_12 = pdf_field->getF(&xyz0, uint_t{12u});
+  const float f_13 = pdf_field->getF(&xyz0, uint_t{13u});
+  const float f_14 = pdf_field->getF(&xyz0, uint_t{14u});
+  const float f_15 = pdf_field->getF(&xyz0, uint_t{15u});
+  const float f_16 = pdf_field->getF(&xyz0, uint_t{16u});
+  const float f_17 = pdf_field->getF(&xyz0, uint_t{17u});
+  const float f_18 = pdf_field->getF(&xyz0, uint_t{18u});
+  const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
+  const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
+  const float vel1Term = f_1 + f_11 + f_15 + f_7;
+  const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
+  const float vel2Term = f_12 + f_13 + f_5;
+  const float momdensity_2 =
+      f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
+  const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term +
+                    vel1Term + vel2Term;
+
+  // calculate current velocity (before density change)
+  const float conversion = float(1) / rho;
+  Vector3<float> velocity;
+  velocity[0u] = momdensity_0 * conversion;
+  velocity[1u] = momdensity_1 * conversion;
+  velocity[2u] = momdensity_2 * conversion;
+
+  Equilibrium::set(pdf_field, velocity, rho_in, cell);
+}
+
+inline std::vector<float>
+get(GhostLayerField<float, uint_t{19u}> const *pdf_field,
+    CellInterval const &ci) {
+  std::vector<float> out;
+  out.reserve(ci.numCells());
+  for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+    for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+      for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+        const float &xyz0 = pdf_field->get(x, y, z, uint_t{0u});
+        const float f_0 = pdf_field->getF(&xyz0, uint_t{0u});
+        const float f_1 = pdf_field->getF(&xyz0, uint_t{1u});
+        const float f_2 = pdf_field->getF(&xyz0, uint_t{2u});
+        const float f_3 = pdf_field->getF(&xyz0, uint_t{3u});
+        const float f_4 = pdf_field->getF(&xyz0, uint_t{4u});
+        const float f_5 = pdf_field->getF(&xyz0, uint_t{5u});
+        const float f_6 = pdf_field->getF(&xyz0, uint_t{6u});
+        const float f_7 = pdf_field->getF(&xyz0, uint_t{7u});
+        const float f_8 = pdf_field->getF(&xyz0, uint_t{8u});
+        const float f_9 = pdf_field->getF(&xyz0, uint_t{9u});
+        const float f_10 = pdf_field->getF(&xyz0, uint_t{10u});
+        const float f_11 = pdf_field->getF(&xyz0, uint_t{11u});
+        const float f_12 = pdf_field->getF(&xyz0, uint_t{12u});
+        const float f_13 = pdf_field->getF(&xyz0, uint_t{13u});
+        const float f_14 = pdf_field->getF(&xyz0, uint_t{14u});
+        const float f_15 = pdf_field->getF(&xyz0, uint_t{15u});
+        const float f_16 = pdf_field->getF(&xyz0, uint_t{16u});
+        const float f_17 = pdf_field->getF(&xyz0, uint_t{17u});
+        const float f_18 = pdf_field->getF(&xyz0, uint_t{18u});
+        const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
+        const float vel1Term = f_1 + f_11 + f_15 + f_7;
+        const float vel2Term = f_12 + f_13 + f_5;
+        const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term +
+                          vel1Term + vel2Term;
+        out.emplace_back(rho);
+      }
+    }
+  }
+  return out;
+}
+
+inline void set(GhostLayerField<float, uint_t{19u}> *pdf_field,
+                std::vector<float> const &values, CellInterval const &ci) {
+  assert(uint_c(values.size()) == ci.numCells());
+  auto values_it = values.begin();
+  for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+    for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+      for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+        const float &xyz0 = pdf_field->get(x, y, z, uint_t{0u});
+        const float f_0 = pdf_field->getF(&xyz0, uint_t{0u});
+        const float f_1 = pdf_field->getF(&xyz0, uint_t{1u});
+        const float f_2 = pdf_field->getF(&xyz0, uint_t{2u});
+        const float f_3 = pdf_field->getF(&xyz0, uint_t{3u});
+        const float f_4 = pdf_field->getF(&xyz0, uint_t{4u});
+        const float f_5 = pdf_field->getF(&xyz0, uint_t{5u});
+        const float f_6 = pdf_field->getF(&xyz0, uint_t{6u});
+        const float f_7 = pdf_field->getF(&xyz0, uint_t{7u});
+        const float f_8 = pdf_field->getF(&xyz0, uint_t{8u});
+        const float f_9 = pdf_field->getF(&xyz0, uint_t{9u});
+        const float f_10 = pdf_field->getF(&xyz0, uint_t{10u});
+        const float f_11 = pdf_field->getF(&xyz0, uint_t{11u});
+        const float f_12 = pdf_field->getF(&xyz0, uint_t{12u});
+        const float f_13 = pdf_field->getF(&xyz0, uint_t{13u});
+        const float f_14 = pdf_field->getF(&xyz0, uint_t{14u});
+        const float f_15 = pdf_field->getF(&xyz0, uint_t{15u});
+        const float f_16 = pdf_field->getF(&xyz0, uint_t{16u});
+        const float f_17 = pdf_field->getF(&xyz0, uint_t{17u});
+        const float f_18 = pdf_field->getF(&xyz0, uint_t{18u});
+        const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
+        const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
+        const float vel1Term = f_1 + f_11 + f_15 + f_7;
+        const float momdensity_1 =
+            -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
+        const float vel2Term = f_12 + f_13 + f_5;
+        const float momdensity_2 =
+            f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
+        const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term +
+                          vel1Term + vel2Term;
+
+        // calculate current velocity (before density change)
+        const float conversion = float(1) / rho;
+        Vector3<float> velocity;
+        velocity[0u] = momdensity_0 * conversion;
+        velocity[1u] = momdensity_1 * conversion;
+        velocity[2u] = momdensity_2 * conversion;
+
+        Equilibrium::set(pdf_field, velocity, *values_it, Cell{x, y, z});
+        ++values_it;
+      }
+    }
+  }
+}
+} // namespace Density
+
+namespace Velocity {
+inline void set(GhostLayerField<float, uint_t{19u}> *pdf_field,
+                GhostLayerField<float, uint_t{3u}> const *force_field,
+                Vector3<float> const &u, Cell const &cell) {
+  const float &xyz0 = pdf_field->get(cell, uint_t{0u});
+  const float f_0 = pdf_field->getF(&xyz0, uint_t{0u});
+  const float f_1 = pdf_field->getF(&xyz0, uint_t{1u});
+  const float f_2 = pdf_field->getF(&xyz0, uint_t{2u});
+  const float f_3 = pdf_field->getF(&xyz0, uint_t{3u});
+  const float f_4 = pdf_field->getF(&xyz0, uint_t{4u});
+  const float f_5 = pdf_field->getF(&xyz0, uint_t{5u});
+  const float f_6 = pdf_field->getF(&xyz0, uint_t{6u});
+  const float f_7 = pdf_field->getF(&xyz0, uint_t{7u});
+  const float f_8 = pdf_field->getF(&xyz0, uint_t{8u});
+  const float f_9 = pdf_field->getF(&xyz0, uint_t{9u});
+  const float f_10 = pdf_field->getF(&xyz0, uint_t{10u});
+  const float f_11 = pdf_field->getF(&xyz0, uint_t{11u});
+  const float f_12 = pdf_field->getF(&xyz0, uint_t{12u});
+  const float f_13 = pdf_field->getF(&xyz0, uint_t{13u});
+  const float f_14 = pdf_field->getF(&xyz0, uint_t{14u});
+  const float f_15 = pdf_field->getF(&xyz0, uint_t{15u});
+  const float f_16 = pdf_field->getF(&xyz0, uint_t{16u});
+  const float f_17 = pdf_field->getF(&xyz0, uint_t{17u});
+  const float f_18 = pdf_field->getF(&xyz0, uint_t{18u});
+  const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
+  const float vel1Term = f_1 + f_11 + f_15 + f_7;
+  const float vel2Term = f_12 + f_13 + f_5;
+  const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term +
+                    vel1Term + vel2Term;
+
+  const auto x = cell.x();
+  const auto y = cell.y();
+  const auto z = cell.z();
+  const float u_0 =
+      -force_field->get(x, y, z, 0) * 0.50000000000000000f / rho + u[0];
+  const float u_1 =
+      -force_field->get(x, y, z, 1) * 0.50000000000000000f / rho + u[1];
+  const float u_2 =
+      -force_field->get(x, y, z, 2) * 0.50000000000000000f / rho + u[2];
+
+  Equilibrium::set(pdf_field, Vector3<float>(u_0, u_1, u_2), rho, cell);
+}
+} // namespace Velocity
+
+namespace MomentumDensity {
+inline Vector3<float>
+reduce(GhostLayerField<float, uint_t{19u}> const *pdf_field,
+       GhostLayerField<float, uint_t{3u}> const *force_field) {
+  Vector3<float> momentumDensity(float{0});
+  WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, {
+    const float &xyz0 = pdf_field->get(x, y, z, uint_t{0u});
+    const float f_0 = pdf_field->getF(&xyz0, uint_t{0u});
+    const float f_1 = pdf_field->getF(&xyz0, uint_t{1u});
+    const float f_2 = pdf_field->getF(&xyz0, uint_t{2u});
+    const float f_3 = pdf_field->getF(&xyz0, uint_t{3u});
+    const float f_4 = pdf_field->getF(&xyz0, uint_t{4u});
+    const float f_5 = pdf_field->getF(&xyz0, uint_t{5u});
+    const float f_6 = pdf_field->getF(&xyz0, uint_t{6u});
+    const float f_7 = pdf_field->getF(&xyz0, uint_t{7u});
+    const float f_8 = pdf_field->getF(&xyz0, uint_t{8u});
+    const float f_9 = pdf_field->getF(&xyz0, uint_t{9u});
+    const float f_10 = pdf_field->getF(&xyz0, uint_t{10u});
+    const float f_11 = pdf_field->getF(&xyz0, uint_t{11u});
+    const float f_12 = pdf_field->getF(&xyz0, uint_t{12u});
+    const float f_13 = pdf_field->getF(&xyz0, uint_t{13u});
+    const float f_14 = pdf_field->getF(&xyz0, uint_t{14u});
+    const float f_15 = pdf_field->getF(&xyz0, uint_t{15u});
+    const float f_16 = pdf_field->getF(&xyz0, uint_t{16u});
+    const float f_17 = pdf_field->getF(&xyz0, uint_t{17u});
+    const float f_18 = pdf_field->getF(&xyz0, uint_t{18u});
+    const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
+    const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
+    const float vel1Term = f_1 + f_11 + f_15 + f_7;
+    const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
+    const float vel2Term = f_12 + f_13 + f_5;
+    const float momdensity_2 =
+        f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
+    const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term +
+                      vel1Term + vel2Term;
+    const float md_0 =
+        force_field->get(x, y, z, 0) * 0.50000000000000000f + momdensity_0;
+    const float md_1 =
+        force_field->get(x, y, z, 1) * 0.50000000000000000f + momdensity_1;
+    const float md_2 =
+        force_field->get(x, y, z, 2) * 0.50000000000000000f + momdensity_2;
+
+    momentumDensity[0u] += md_0;
+    momentumDensity[1u] += md_1;
+    momentumDensity[2u] += md_2;
+  });
+  return momentumDensity;
+}
+} // namespace MomentumDensity
+
+namespace PressureTensor {
+inline Matrix3<float> get(GhostLayerField<float, uint_t{19u}> const *pdf_field,
+                          Cell const &cell) {
+  const float &xyz0 = pdf_field->get(cell, uint_t{0u});
+  const float f_0 = pdf_field->getF(&xyz0, uint_t{0u});
+  const float f_1 = pdf_field->getF(&xyz0, uint_t{1u});
+  const float f_2 = pdf_field->getF(&xyz0, uint_t{2u});
+  const float f_3 = pdf_field->getF(&xyz0, uint_t{3u});
+  const float f_4 = pdf_field->getF(&xyz0, uint_t{4u});
+  const float f_5 = pdf_field->getF(&xyz0, uint_t{5u});
+  const float f_6 = pdf_field->getF(&xyz0, uint_t{6u});
+  const float f_7 = pdf_field->getF(&xyz0, uint_t{7u});
+  const float f_8 = pdf_field->getF(&xyz0, uint_t{8u});
+  const float f_9 = pdf_field->getF(&xyz0, uint_t{9u});
+  const float f_10 = pdf_field->getF(&xyz0, uint_t{10u});
+  const float f_11 = pdf_field->getF(&xyz0, uint_t{11u});
+  const float f_12 = pdf_field->getF(&xyz0, uint_t{12u});
+  const float f_13 = pdf_field->getF(&xyz0, uint_t{13u});
+  const float f_14 = pdf_field->getF(&xyz0, uint_t{14u});
+  const float f_15 = pdf_field->getF(&xyz0, uint_t{15u});
+  const float f_16 = pdf_field->getF(&xyz0, uint_t{16u});
+  const float f_17 = pdf_field->getF(&xyz0, uint_t{17u});
+  const float f_18 = pdf_field->getF(&xyz0, uint_t{18u});
+  const float p_0 =
+      f_10 + f_13 + f_14 + f_17 + f_18 + f_3 + f_4 + f_7 + f_8 + f_9;
+  const float p_1 = -f_10 - f_7 + f_8 + f_9;
+  const float p_2 = -f_13 + f_14 + f_17 - f_18;
+  const float p_3 = -f_10 - f_7 + f_8 + f_9;
+  const float p_4 =
+      f_1 + f_10 + f_11 + f_12 + f_15 + f_16 + f_2 + f_7 + f_8 + f_9;
+  const float p_5 = f_11 - f_12 - f_15 + f_16;
+  const float p_6 = -f_13 + f_14 + f_17 - f_18;
+  const float p_7 = f_11 - f_12 - f_15 + f_16;
+  const float p_8 =
+      f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18 + f_5 + f_6;
+
+  Matrix3<float> pressureTensor;
+  pressureTensor[0u] = p_0;
+  pressureTensor[1u] = p_1;
+  pressureTensor[2u] = p_2;
+
+  pressureTensor[3u] = p_3;
+  pressureTensor[4u] = p_4;
+  pressureTensor[5u] = p_5;
+
+  pressureTensor[6u] = p_6;
+  pressureTensor[7u] = p_7;
+  pressureTensor[8u] = p_8;
+
+  return pressureTensor;
+}
+} // namespace PressureTensor
+
+} // namespace accessor
+} // namespace lbm
+} // namespace walberla
+
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma clang diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterDoublePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterDoublePrecision.cpp
new file mode 100644
index 00000000000..5407a10dc6f
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterDoublePrecision.cpp
@@ -0,0 +1,234 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file InitialPDFsSetterDoublePrecision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "InitialPDFsSetterDoublePrecision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_2df07fce91f5444fc18533f996cd1a79 {
+static FUNC_PREFIX void initialpdfssetterdoubleprecision_initialpdfssetterdoubleprecision(double *RESTRICT const _data_force, double *RESTRICT _data_pdfs, double *RESTRICT const _data_velocity, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_0, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_velocity_0, int64_t const _stride_velocity_1, int64_t const _stride_velocity_2, int64_t const _stride_velocity_3, double rho_0) {
+  const double rho = rho_0;
+  for (int64_t ctr_2 = 0; ctr_2 < _size_force_2; ctr_2 += 1) {
+    double *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
+    double *RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2 * ctr_2;
+    double *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
+    double *RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2 * ctr_2 + _stride_velocity_3;
+    double *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
+    double *RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2 * ctr_2 + 2 * _stride_velocity_3;
+    double *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
+    double *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_force_1; ctr_1 += 1) {
+      double *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
+      double *RESTRICT _data_velocity_20_30_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_30;
+      double *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
+      double *RESTRICT _data_velocity_20_31_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_31;
+      double *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
+      double *RESTRICT _data_velocity_20_32_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_32;
+      double *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
+      double *RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_31;
+      double *RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_32;
+      double *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
+      double *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
+      double *RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_35;
+      double *RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_36;
+      double *RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_37;
+      double *RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_38;
+      double *RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_39;
+      double *RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_310;
+      double *RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_311;
+      double *RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_312;
+      double *RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_313;
+      double *RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_314;
+      double *RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_315;
+      double *RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_316;
+      double *RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_317;
+      double *RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_318;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_force_0; ctr_0 += 1) {
+        const double u_0 = -0.5 * ((1.0) / (rho)) * _data_force_20_30_10[_stride_force_0 * ctr_0] + _data_velocity_20_30_10[_stride_velocity_0 * ctr_0];
+        const double u_1 = -0.5 * ((1.0) / (rho)) * _data_force_20_31_10[_stride_force_0 * ctr_0] + _data_velocity_20_31_10[_stride_velocity_0 * ctr_0];
+        const double u_2 = -0.5 * ((1.0) / (rho)) * _data_force_20_32_10[_stride_force_0 * ctr_0] + _data_velocity_20_32_10[_stride_velocity_0 * ctr_0];
+        _data_pdfs_20_30_10[_stride_pdfs_0 * ctr_0] = rho * -0.33333333333333331 * (u_0 * u_0) + rho * -0.33333333333333331 * (u_1 * u_1) + rho * -0.33333333333333331 * (u_2 * u_2) + rho * 0.33333333333333331;
+        _data_pdfs_20_31_10[_stride_pdfs_0 * ctr_0] = rho * u_1 * 0.16666666666666666 + rho * -0.16666666666666666 * (u_0 * u_0) + rho * -0.16666666666666666 * (u_2 * u_2) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u_1 * u_1);
+        _data_pdfs_20_32_10[_stride_pdfs_0 * ctr_0] = rho * u_1 * -0.16666666666666666 + rho * -0.16666666666666666 * (u_0 * u_0) + rho * -0.16666666666666666 * (u_2 * u_2) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u_1 * u_1);
+        _data_pdfs_20_33_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * -0.16666666666666666 + rho * -0.16666666666666666 * (u_1 * u_1) + rho * -0.16666666666666666 * (u_2 * u_2) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u_0 * u_0);
+        _data_pdfs_20_34_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * 0.16666666666666666 + rho * -0.16666666666666666 * (u_1 * u_1) + rho * -0.16666666666666666 * (u_2 * u_2) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u_0 * u_0);
+        _data_pdfs_20_35_10[_stride_pdfs_0 * ctr_0] = rho * u_2 * 0.16666666666666666 + rho * -0.16666666666666666 * (u_0 * u_0) + rho * -0.16666666666666666 * (u_1 * u_1) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u_2 * u_2);
+        _data_pdfs_20_36_10[_stride_pdfs_0 * ctr_0] = rho * u_2 * -0.16666666666666666 + rho * -0.16666666666666666 * (u_0 * u_0) + rho * -0.16666666666666666 * (u_1 * u_1) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u_2 * u_2);
+        _data_pdfs_20_37_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_1 * -0.25 + rho * u_0 * -0.083333333333333329 + rho * u_1 * 0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_1 * u_1);
+        _data_pdfs_20_38_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_1 * 0.25 + rho * u_0 * 0.083333333333333329 + rho * u_1 * 0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_1 * u_1);
+        _data_pdfs_20_39_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_1 * 0.25 + rho * u_0 * -0.083333333333333329 + rho * u_1 * -0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_1 * u_1);
+        _data_pdfs_20_310_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_1 * -0.25 + rho * u_0 * 0.083333333333333329 + rho * u_1 * -0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_1 * u_1);
+        _data_pdfs_20_311_10[_stride_pdfs_0 * ctr_0] = rho * u_1 * u_2 * 0.25 + rho * u_1 * 0.083333333333333329 + rho * u_2 * 0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_1 * u_1) + rho * 0.083333333333333329 * (u_2 * u_2);
+        _data_pdfs_20_312_10[_stride_pdfs_0 * ctr_0] = rho * u_1 * u_2 * -0.25 + rho * u_1 * -0.083333333333333329 + rho * u_2 * 0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_1 * u_1) + rho * 0.083333333333333329 * (u_2 * u_2);
+        _data_pdfs_20_313_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_2 * -0.25 + rho * u_0 * -0.083333333333333329 + rho * u_2 * 0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_2 * u_2);
+        _data_pdfs_20_314_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_2 * 0.25 + rho * u_0 * 0.083333333333333329 + rho * u_2 * 0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_2 * u_2);
+        _data_pdfs_20_315_10[_stride_pdfs_0 * ctr_0] = rho * u_1 * u_2 * -0.25 + rho * u_1 * 0.083333333333333329 + rho * u_2 * -0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_1 * u_1) + rho * 0.083333333333333329 * (u_2 * u_2);
+        _data_pdfs_20_316_10[_stride_pdfs_0 * ctr_0] = rho * u_1 * u_2 * 0.25 + rho * u_1 * -0.083333333333333329 + rho * u_2 * -0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_1 * u_1) + rho * 0.083333333333333329 * (u_2 * u_2);
+        _data_pdfs_20_317_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_2 * 0.25 + rho * u_0 * -0.083333333333333329 + rho * u_2 * -0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_2 * u_2);
+        _data_pdfs_20_318_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_2 * -0.25 + rho * u_0 * 0.083333333333333329 + rho * u_2 * -0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_2 * u_2);
+      }
+    }
+  }
+}
+} // namespace internal_2df07fce91f5444fc18533f996cd1a79
+
+void InitialPDFsSetterDoublePrecision::run(IBlock *block) {
+  auto force = block->getData<field::GhostLayerField<double, 3>>(forceID);
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+  auto velocity = block->getData<field::GhostLayerField<double, 3>>(velocityID);
+
+  auto &rho_0 = this->rho_0_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(force->nrOfGhostLayers()));
+  double *RESTRICT const _data_force = force->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(velocity->nrOfGhostLayers()));
+  double *RESTRICT const _data_velocity = velocity->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(velocity->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+  const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+  const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+  const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+  internal_2df07fce91f5444fc18533f996cd1a79::initialpdfssetterdoubleprecision_initialpdfssetterdoubleprecision(_data_force, _data_pdfs, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3, rho_0);
+}
+
+void InitialPDFsSetterDoublePrecision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto force = block->getData<field::GhostLayerField<double, 3>>(forceID);
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+  auto velocity = block->getData<field::GhostLayerField<double, 3>>(velocityID);
+
+  auto &rho_0 = this->rho_0_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(force->nrOfGhostLayers()));
+  double *RESTRICT const _data_force = force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(velocity->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(velocity->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(velocity->nrOfGhostLayers()));
+  double *RESTRICT const _data_velocity = velocity->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(velocity->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+  const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+  const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+  const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+  internal_2df07fce91f5444fc18533f996cd1a79::initialpdfssetterdoubleprecision_initialpdfssetterdoubleprecision(_data_force, _data_pdfs, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3, rho_0);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterDoublePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterDoublePrecision.h
new file mode 100644
index 00000000000..31b316dcbb8
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterDoublePrecision.h
@@ -0,0 +1,106 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file InitialPDFsSetterDoublePrecision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class InitialPDFsSetterDoublePrecision {
+public:
+  InitialPDFsSetterDoublePrecision(BlockDataID forceID_, BlockDataID pdfsID_,
+                                   BlockDataID velocityID_, double rho_0)
+      : forceID(forceID_), pdfsID(pdfsID_), velocityID(velocityID_),
+        rho_0_(rho_0){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<InitialPDFsSetterDoublePrecision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<InitialPDFsSetterDoublePrecision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID forceID;
+  BlockDataID pdfsID;
+  BlockDataID velocityID;
+  double rho_0_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterSinglePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterSinglePrecision.cpp
new file mode 100644
index 00000000000..0c963fb65e5
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterSinglePrecision.cpp
@@ -0,0 +1,234 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file InitialPDFsSetterSinglePrecision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "InitialPDFsSetterSinglePrecision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_b8085d63d6b7e842485134abbac511e8 {
+static FUNC_PREFIX void initialpdfssettersingleprecision_initialpdfssettersingleprecision(float *RESTRICT const _data_force, float *RESTRICT _data_pdfs, float *RESTRICT const _data_velocity, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_0, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_velocity_0, int64_t const _stride_velocity_1, int64_t const _stride_velocity_2, int64_t const _stride_velocity_3, float rho_0) {
+  const float rho = rho_0;
+  for (int64_t ctr_2 = 0; ctr_2 < _size_force_2; ctr_2 += 1) {
+    float *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
+    float *RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2 * ctr_2;
+    float *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
+    float *RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2 * ctr_2 + _stride_velocity_3;
+    float *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
+    float *RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2 * ctr_2 + 2 * _stride_velocity_3;
+    float *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
+    float *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3;
+    for (int64_t ctr_1 = 0; ctr_1 < _size_force_1; ctr_1 += 1) {
+      float *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
+      float *RESTRICT _data_velocity_20_30_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_30;
+      float *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
+      float *RESTRICT _data_velocity_20_31_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_31;
+      float *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
+      float *RESTRICT _data_velocity_20_32_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_32;
+      float *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
+      float *RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_31;
+      float *RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_32;
+      float *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
+      float *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
+      float *RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_35;
+      float *RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_36;
+      float *RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_37;
+      float *RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_38;
+      float *RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_39;
+      float *RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_310;
+      float *RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_311;
+      float *RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_312;
+      float *RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_313;
+      float *RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_314;
+      float *RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_315;
+      float *RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_316;
+      float *RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_317;
+      float *RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_318;
+      for (int64_t ctr_0 = 0; ctr_0 < _size_force_0; ctr_0 += 1) {
+        const float u_0 = -0.5f * ((1.0f) / (rho)) * _data_force_20_30_10[_stride_force_0 * ctr_0] + _data_velocity_20_30_10[_stride_velocity_0 * ctr_0];
+        const float u_1 = -0.5f * ((1.0f) / (rho)) * _data_force_20_31_10[_stride_force_0 * ctr_0] + _data_velocity_20_31_10[_stride_velocity_0 * ctr_0];
+        const float u_2 = -0.5f * ((1.0f) / (rho)) * _data_force_20_32_10[_stride_force_0 * ctr_0] + _data_velocity_20_32_10[_stride_velocity_0 * ctr_0];
+        _data_pdfs_20_30_10[_stride_pdfs_0 * ctr_0] = rho * -0.33333333333333331f * (u_0 * u_0) + rho * -0.33333333333333331f * (u_1 * u_1) + rho * -0.33333333333333331f * (u_2 * u_2) + rho * 0.33333333333333331f;
+        _data_pdfs_20_31_10[_stride_pdfs_0 * ctr_0] = rho * u_1 * 0.16666666666666666f + rho * -0.16666666666666666f * (u_0 * u_0) + rho * -0.16666666666666666f * (u_2 * u_2) + rho * 0.055555555555555552f + rho * 0.16666666666666666f * (u_1 * u_1);
+        _data_pdfs_20_32_10[_stride_pdfs_0 * ctr_0] = rho * u_1 * -0.16666666666666666f + rho * -0.16666666666666666f * (u_0 * u_0) + rho * -0.16666666666666666f * (u_2 * u_2) + rho * 0.055555555555555552f + rho * 0.16666666666666666f * (u_1 * u_1);
+        _data_pdfs_20_33_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * -0.16666666666666666f + rho * -0.16666666666666666f * (u_1 * u_1) + rho * -0.16666666666666666f * (u_2 * u_2) + rho * 0.055555555555555552f + rho * 0.16666666666666666f * (u_0 * u_0);
+        _data_pdfs_20_34_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * 0.16666666666666666f + rho * -0.16666666666666666f * (u_1 * u_1) + rho * -0.16666666666666666f * (u_2 * u_2) + rho * 0.055555555555555552f + rho * 0.16666666666666666f * (u_0 * u_0);
+        _data_pdfs_20_35_10[_stride_pdfs_0 * ctr_0] = rho * u_2 * 0.16666666666666666f + rho * -0.16666666666666666f * (u_0 * u_0) + rho * -0.16666666666666666f * (u_1 * u_1) + rho * 0.055555555555555552f + rho * 0.16666666666666666f * (u_2 * u_2);
+        _data_pdfs_20_36_10[_stride_pdfs_0 * ctr_0] = rho * u_2 * -0.16666666666666666f + rho * -0.16666666666666666f * (u_0 * u_0) + rho * -0.16666666666666666f * (u_1 * u_1) + rho * 0.055555555555555552f + rho * 0.16666666666666666f * (u_2 * u_2);
+        _data_pdfs_20_37_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_1 * -0.25f + rho * u_0 * -0.083333333333333329f + rho * u_1 * 0.083333333333333329f + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u_0 * u_0) + rho * 0.083333333333333329f * (u_1 * u_1);
+        _data_pdfs_20_38_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_1 * 0.25f + rho * u_0 * 0.083333333333333329f + rho * u_1 * 0.083333333333333329f + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u_0 * u_0) + rho * 0.083333333333333329f * (u_1 * u_1);
+        _data_pdfs_20_39_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_1 * 0.25f + rho * u_0 * -0.083333333333333329f + rho * u_1 * -0.083333333333333329f + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u_0 * u_0) + rho * 0.083333333333333329f * (u_1 * u_1);
+        _data_pdfs_20_310_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_1 * -0.25f + rho * u_0 * 0.083333333333333329f + rho * u_1 * -0.083333333333333329f + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u_0 * u_0) + rho * 0.083333333333333329f * (u_1 * u_1);
+        _data_pdfs_20_311_10[_stride_pdfs_0 * ctr_0] = rho * u_1 * u_2 * 0.25f + rho * u_1 * 0.083333333333333329f + rho * u_2 * 0.083333333333333329f + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u_1 * u_1) + rho * 0.083333333333333329f * (u_2 * u_2);
+        _data_pdfs_20_312_10[_stride_pdfs_0 * ctr_0] = rho * u_1 * u_2 * -0.25f + rho * u_1 * -0.083333333333333329f + rho * u_2 * 0.083333333333333329f + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u_1 * u_1) + rho * 0.083333333333333329f * (u_2 * u_2);
+        _data_pdfs_20_313_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_2 * -0.25f + rho * u_0 * -0.083333333333333329f + rho * u_2 * 0.083333333333333329f + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u_0 * u_0) + rho * 0.083333333333333329f * (u_2 * u_2);
+        _data_pdfs_20_314_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_2 * 0.25f + rho * u_0 * 0.083333333333333329f + rho * u_2 * 0.083333333333333329f + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u_0 * u_0) + rho * 0.083333333333333329f * (u_2 * u_2);
+        _data_pdfs_20_315_10[_stride_pdfs_0 * ctr_0] = rho * u_1 * u_2 * -0.25f + rho * u_1 * 0.083333333333333329f + rho * u_2 * -0.083333333333333329f + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u_1 * u_1) + rho * 0.083333333333333329f * (u_2 * u_2);
+        _data_pdfs_20_316_10[_stride_pdfs_0 * ctr_0] = rho * u_1 * u_2 * 0.25f + rho * u_1 * -0.083333333333333329f + rho * u_2 * -0.083333333333333329f + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u_1 * u_1) + rho * 0.083333333333333329f * (u_2 * u_2);
+        _data_pdfs_20_317_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_2 * 0.25f + rho * u_0 * -0.083333333333333329f + rho * u_2 * -0.083333333333333329f + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u_0 * u_0) + rho * 0.083333333333333329f * (u_2 * u_2);
+        _data_pdfs_20_318_10[_stride_pdfs_0 * ctr_0] = rho * u_0 * u_2 * -0.25f + rho * u_0 * 0.083333333333333329f + rho * u_2 * -0.083333333333333329f + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u_0 * u_0) + rho * 0.083333333333333329f * (u_2 * u_2);
+      }
+    }
+  }
+}
+} // namespace internal_b8085d63d6b7e842485134abbac511e8
+
+void InitialPDFsSetterSinglePrecision::run(IBlock *block) {
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+  auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
+  auto velocity = block->getData<field::GhostLayerField<float, 3>>(velocityID);
+
+  auto &rho_0 = this->rho_0_;
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(force->nrOfGhostLayers()));
+  float *RESTRICT const _data_force = force->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(velocity->nrOfGhostLayers()));
+  float *RESTRICT const _data_velocity = velocity->dataAt(0, 0, 0, 0);
+  WALBERLA_ASSERT_EQUAL(velocity->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+  const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+  const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+  const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+  internal_b8085d63d6b7e842485134abbac511e8::initialpdfssettersingleprecision_initialpdfssettersingleprecision(_data_force, _data_pdfs, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3, rho_0);
+}
+
+void InitialPDFsSetterSinglePrecision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+  auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
+  auto velocity = block->getData<field::GhostLayerField<float, 3>>(velocityID);
+
+  auto &rho_0 = this->rho_0_;
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(force->nrOfGhostLayers()));
+  float *RESTRICT const _data_force = force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(velocity->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(velocity->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(velocity->nrOfGhostLayers()));
+  float *RESTRICT const _data_velocity = velocity->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+  WALBERLA_ASSERT_EQUAL(velocity->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+  const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+  const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+  const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+  internal_b8085d63d6b7e842485134abbac511e8::initialpdfssettersingleprecision_initialpdfssettersingleprecision(_data_force, _data_pdfs, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3, rho_0);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterSinglePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterSinglePrecision.h
new file mode 100644
index 00000000000..d6c4553c4e6
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/InitialPDFsSetterSinglePrecision.h
@@ -0,0 +1,106 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file InitialPDFsSetterSinglePrecision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class InitialPDFsSetterSinglePrecision {
+public:
+  InitialPDFsSetterSinglePrecision(BlockDataID forceID_, BlockDataID pdfsID_,
+                                   BlockDataID velocityID_, float rho_0)
+      : forceID(forceID_), pdfsID(pdfsID_), velocityID(velocityID_),
+        rho_0_(rho_0){};
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<InitialPDFsSetterSinglePrecision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<InitialPDFsSetterSinglePrecision> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID forceID;
+  BlockDataID pdfsID;
+  BlockDataID velocityID;
+  float rho_0_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecision.cpp
new file mode 100644
index 00000000000..65bbfd9f8d7
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecision.cpp
@@ -0,0 +1,338 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file StreamSweepDoublePrecision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "StreamSweepDoublePrecision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_streamsweepdoubleprecision_streamsweepdoubleprecision {
+static FUNC_PREFIX void streamsweepdoubleprecision_streamsweepdoubleprecision(double *RESTRICT const _data_force, double *RESTRICT const _data_pdfs, double *RESTRICT _data_pdfs_tmp, double *RESTRICT _data_velocity, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_0, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3, int64_t const _stride_velocity_0, int64_t const _stride_velocity_1, int64_t const _stride_velocity_2, int64_t const _stride_velocity_3) {
+  for (int64_t ctr_2 = 1; ctr_2 < _size_force_2 - 1; ctr_2 += 1) {
+    double *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
+    double *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 5 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 6 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 11 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 12 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 13 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 14 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 15 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 16 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 17 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 18 * _stride_pdfs_3;
+    double *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
+    double *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
+    double *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
+    double *RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2 * ctr_2;
+    double *RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2 * ctr_2 + _stride_velocity_3;
+    double *RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2 * ctr_2 + 2 * _stride_velocity_3;
+    double *RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2;
+    double *RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 2 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 3 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 4 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 5 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 6 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 7 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 8 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 9 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 10 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 11 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 12 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 13 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 14 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 15 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 16 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 17 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 18 * _stride_pdfs_tmp_3;
+    for (int64_t ctr_1 = 1; ctr_1 < _size_force_1 - 1; ctr_1 += 1) {
+      double *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
+      double *RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
+      double *RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
+      double *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
+      double *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
+      double *RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_35;
+      double *RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_36;
+      double *RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
+      double *RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
+      double *RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
+      double *RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
+      double *RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
+      double *RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
+      double *RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_313;
+      double *RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_314;
+      double *RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
+      double *RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
+      double *RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_317;
+      double *RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_318;
+      double *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
+      double *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
+      double *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
+      double *RESTRICT _data_velocity_20_30_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_30;
+      double *RESTRICT _data_velocity_20_31_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_31;
+      double *RESTRICT _data_velocity_20_32_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_32;
+      double *RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_30;
+      double *RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_31;
+      double *RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_32;
+      double *RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_33;
+      double *RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_34;
+      double *RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_35;
+      double *RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_36;
+      double *RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_37;
+      double *RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_38;
+      double *RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_39;
+      double *RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_310;
+      double *RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_311;
+      double *RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_312;
+      double *RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_313;
+      double *RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_314;
+      double *RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_315;
+      double *RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_316;
+      double *RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_317;
+      double *RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_318;
+      for (int64_t ctr_0 = 1; ctr_0 < _size_force_0 - 1; ctr_0 += 1) {
+        const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0 * ctr_0];
+        const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0 * ctr_0];
+        const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0 * ctr_0];
+        const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
+        const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
+        const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0 * ctr_0];
+        const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0 * ctr_0];
+        const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
+        const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
+        const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
+        const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
+        const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0 * ctr_0];
+        const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0 * ctr_0];
+        const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
+        const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
+        const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0 * ctr_0];
+        const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0 * ctr_0];
+        const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
+        const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
+        const double vel0Term = streamed_10 + streamed_14 + streamed_18 + streamed_4 + streamed_8;
+        const double momdensity_0 = streamed_13 * -1.0 + streamed_17 * -1.0 + streamed_3 * -1.0 + streamed_7 * -1.0 + streamed_9 * -1.0 + vel0Term;
+        const double vel1Term = streamed_1 + streamed_11 + streamed_15 + streamed_7;
+        const double momdensity_1 = streamed_10 * -1.0 + streamed_12 * -1.0 + streamed_16 * -1.0 + streamed_2 * -1.0 + streamed_8 + streamed_9 * -1.0 + vel1Term;
+        const double vel2Term = streamed_12 + streamed_13 + streamed_5;
+        const double rho = streamed_0 + streamed_16 + streamed_17 + streamed_2 + streamed_3 + streamed_6 + streamed_9 + vel0Term + vel1Term + vel2Term;
+        const double momdensity_2 = streamed_11 + streamed_14 + streamed_15 * -1.0 + streamed_16 * -1.0 + streamed_17 * -1.0 + streamed_18 * -1.0 + streamed_6 * -1.0 + vel2Term;
+        const double u_0 = momdensity_0 * ((1.0) / (rho)) + 0.5 * ((1.0) / (rho)) * _data_force_20_30_10[_stride_force_0 * ctr_0];
+        const double u_1 = momdensity_1 * ((1.0) / (rho)) + 0.5 * ((1.0) / (rho)) * _data_force_20_31_10[_stride_force_0 * ctr_0];
+        const double u_2 = momdensity_2 * ((1.0) / (rho)) + 0.5 * ((1.0) / (rho)) * _data_force_20_32_10[_stride_force_0 * ctr_0];
+        _data_velocity_20_30_10[_stride_velocity_0 * ctr_0] = u_0;
+        _data_velocity_20_31_10[_stride_velocity_0 * ctr_0] = u_1;
+        _data_velocity_20_32_10[_stride_velocity_0 * ctr_0] = u_2;
+        _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_0;
+        _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_1;
+        _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_2;
+        _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_3;
+        _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_4;
+        _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_5;
+        _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_6;
+        _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_7;
+        _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_8;
+        _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_9;
+        _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_10;
+        _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_11;
+        _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_12;
+        _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_13;
+        _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_14;
+        _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_15;
+        _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_16;
+        _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_17;
+        _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_18;
+      }
+    }
+  }
+}
+} // namespace internal_streamsweepdoubleprecision_streamsweepdoubleprecision
+
+void StreamSweepDoublePrecision::run(IBlock *block) {
+  auto force = block->getData<field::GhostLayerField<double, 3>>(forceID);
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+  auto velocity = block->getData<field::GhostLayerField<double, 3>>(velocityID);
+  field::GhostLayerField<double, 19> *pdfs_tmp;
+  {
+    // Getting temporary field pdfs_tmp
+    auto it = cache_pdfs_.find(pdfs);
+    if (it != cache_pdfs_.end()) {
+      pdfs_tmp = *it;
+    } else {
+      pdfs_tmp = pdfs->cloneUninitialized();
+      cache_pdfs_.insert(pdfs_tmp);
+    }
+  }
+
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(force->nrOfGhostLayers()));
+  double *RESTRICT const _data_force = force->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT const _data_pdfs = pdfs->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(velocity->nrOfGhostLayers()));
+  double *RESTRICT _data_velocity = velocity->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(velocity->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 2));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 2));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 2));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+  const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+  const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+  const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+  const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+  const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+  const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+  const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+  internal_streamsweepdoubleprecision_streamsweepdoubleprecision::streamsweepdoubleprecision_streamsweepdoubleprecision(_data_force, _data_pdfs, _data_pdfs_tmp, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+  pdfs->swapDataPointers(pdfs_tmp);
+}
+
+void StreamSweepDoublePrecision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto force = block->getData<field::GhostLayerField<double, 3>>(forceID);
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+  auto velocity = block->getData<field::GhostLayerField<double, 3>>(velocityID);
+  field::GhostLayerField<double, 19> *pdfs_tmp;
+  {
+    // Getting temporary field pdfs_tmp
+    auto it = cache_pdfs_.find(pdfs);
+    if (it != cache_pdfs_.end()) {
+      pdfs_tmp = *it;
+    } else {
+      pdfs_tmp = pdfs->cloneUninitialized();
+      cache_pdfs_.insert(pdfs_tmp);
+    }
+  }
+
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(force->nrOfGhostLayers()));
+  double *RESTRICT const _data_force = force->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(velocity->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(velocity->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(velocity->nrOfGhostLayers()));
+  double *RESTRICT _data_velocity = velocity->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(velocity->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+  const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+  const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+  const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+  const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+  const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+  const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+  const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+  internal_streamsweepdoubleprecision_streamsweepdoubleprecision::streamsweepdoubleprecision_streamsweepdoubleprecision(_data_force, _data_pdfs, _data_pdfs_tmp, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+  pdfs->swapDataPointers(pdfs_tmp);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecision.h
new file mode 100644
index 00000000000..de3d343cb2d
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecision.h
@@ -0,0 +1,116 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file StreamSweepDoublePrecision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class StreamSweepDoublePrecision {
+public:
+  StreamSweepDoublePrecision(BlockDataID forceID_, BlockDataID pdfsID_,
+                             BlockDataID velocityID_)
+      : forceID(forceID_), pdfsID(pdfsID_), velocityID(velocityID_){};
+
+  ~StreamSweepDoublePrecision() {
+    for (auto p : cache_pdfs_) {
+      delete p;
+    }
+  }
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<StreamSweepDoublePrecision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StreamSweepDoublePrecision> &kernel,
+                         const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID forceID;
+  BlockDataID pdfsID;
+  BlockDataID velocityID;
+
+private:
+  std::set<field::GhostLayerField<double, 19> *,
+           field::SwapableCompare<field::GhostLayerField<double, 19> *>>
+      cache_pdfs_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecisionAVX.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecisionAVX.cpp
new file mode 100644
index 00000000000..666330a7003
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecisionAVX.cpp
@@ -0,0 +1,401 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file StreamSweepDoublePrecisionAVX.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "StreamSweepDoublePrecisionAVX.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#include <immintrin.h>
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_91e2c9bdb4c4fa8a405803890749bf98 {
+static FUNC_PREFIX void streamsweepdoubleprecisionavx_streamsweepdoubleprecisionavx(double *RESTRICT const _data_force, double *RESTRICT const _data_pdfs, double *RESTRICT _data_pdfs_tmp, double *RESTRICT _data_velocity, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3, int64_t const _stride_velocity_1, int64_t const _stride_velocity_2, int64_t const _stride_velocity_3) {
+  for (int64_t ctr_2 = 1; ctr_2 < _size_force_2 - 1; ctr_2 += 1) {
+    double *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
+    double *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 5 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 6 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 11 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 12 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 13 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 14 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 15 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 16 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 17 * _stride_pdfs_3;
+    double *RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 18 * _stride_pdfs_3;
+    double *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
+    double *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
+    double *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
+    double *RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2 * ctr_2;
+    double *RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2 * ctr_2 + _stride_velocity_3;
+    double *RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2 * ctr_2 + 2 * _stride_velocity_3;
+    double *RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2;
+    double *RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 2 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 3 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 4 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 5 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 6 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 7 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 8 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 9 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 10 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 11 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 12 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 13 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 14 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 15 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 16 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 17 * _stride_pdfs_tmp_3;
+    double *RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 18 * _stride_pdfs_tmp_3;
+    for (int64_t ctr_1 = 1; ctr_1 < _size_force_1 - 1; ctr_1 += 1) {
+      double *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
+      double *RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
+      double *RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
+      double *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
+      double *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
+      double *RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_35;
+      double *RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_36;
+      double *RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
+      double *RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
+      double *RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
+      double *RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
+      double *RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
+      double *RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
+      double *RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_313;
+      double *RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_314;
+      double *RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
+      double *RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
+      double *RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_317;
+      double *RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_318;
+      double *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
+      double *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
+      double *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
+      double *RESTRICT _data_velocity_20_30_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_30;
+      double *RESTRICT _data_velocity_20_31_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_31;
+      double *RESTRICT _data_velocity_20_32_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_32;
+      double *RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_30;
+      double *RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_31;
+      double *RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_32;
+      double *RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_33;
+      double *RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_34;
+      double *RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_35;
+      double *RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_36;
+      double *RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_37;
+      double *RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_38;
+      double *RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_39;
+      double *RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_310;
+      double *RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_311;
+      double *RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_312;
+      double *RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_313;
+      double *RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_314;
+      double *RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_315;
+      double *RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_316;
+      double *RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_317;
+      double *RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_318;
+      {
+        for (int64_t ctr_0 = 1; ctr_0 < (int64_t)((_size_force_0 - 2) / (4)) * (4) + 1; ctr_0 += 4) {
+          const __m256d streamed_0 = _mm256_load_pd(&_data_pdfs_20_30_10[ctr_0]);
+          const __m256d streamed_1 = _mm256_load_pd(&_data_pdfs_20_31_1m1[ctr_0]);
+          const __m256d streamed_2 = _mm256_load_pd(&_data_pdfs_20_32_11[ctr_0]);
+          const __m256d streamed_3 = _mm256_loadu_pd(&_data_pdfs_20_33_10[ctr_0 + 1]);
+          const __m256d streamed_4 = _mm256_loadu_pd(&_data_pdfs_20_34_10[ctr_0 - 1]);
+          const __m256d streamed_5 = _mm256_load_pd(&_data_pdfs_2m1_35_10[ctr_0]);
+          const __m256d streamed_6 = _mm256_load_pd(&_data_pdfs_21_36_10[ctr_0]);
+          const __m256d streamed_7 = _mm256_loadu_pd(&_data_pdfs_20_37_1m1[ctr_0 + 1]);
+          const __m256d streamed_8 = _mm256_loadu_pd(&_data_pdfs_20_38_1m1[ctr_0 - 1]);
+          const __m256d streamed_9 = _mm256_loadu_pd(&_data_pdfs_20_39_11[ctr_0 + 1]);
+          const __m256d streamed_10 = _mm256_loadu_pd(&_data_pdfs_20_310_11[ctr_0 - 1]);
+          const __m256d streamed_11 = _mm256_load_pd(&_data_pdfs_2m1_311_1m1[ctr_0]);
+          const __m256d streamed_12 = _mm256_load_pd(&_data_pdfs_2m1_312_11[ctr_0]);
+          const __m256d streamed_13 = _mm256_loadu_pd(&_data_pdfs_2m1_313_10[ctr_0 + 1]);
+          const __m256d streamed_14 = _mm256_loadu_pd(&_data_pdfs_2m1_314_10[ctr_0 - 1]);
+          const __m256d streamed_15 = _mm256_load_pd(&_data_pdfs_21_315_1m1[ctr_0]);
+          const __m256d streamed_16 = _mm256_load_pd(&_data_pdfs_21_316_11[ctr_0]);
+          const __m256d streamed_17 = _mm256_loadu_pd(&_data_pdfs_21_317_10[ctr_0 + 1]);
+          const __m256d streamed_18 = _mm256_loadu_pd(&_data_pdfs_21_318_10[ctr_0 - 1]);
+          const __m256d vel0Term = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(streamed_10, streamed_14), streamed_18), streamed_4), streamed_8);
+          const __m256d momdensity_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(streamed_13, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(streamed_17, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(streamed_3, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(streamed_7, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(streamed_9, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), vel0Term);
+          const __m256d vel1Term = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(streamed_1, streamed_11), streamed_15), streamed_7);
+          const __m256d momdensity_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(streamed_10, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(streamed_12, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(streamed_16, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(streamed_2, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(streamed_9, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), streamed_8), vel1Term);
+          const __m256d vel2Term = _mm256_add_pd(_mm256_add_pd(streamed_12, streamed_13), streamed_5);
+          const __m256d rho = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(streamed_0, streamed_16), streamed_17), streamed_2), streamed_3), streamed_6), streamed_9), vel0Term), vel1Term), vel2Term);
+          const __m256d momdensity_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(streamed_15, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0)), _mm256_mul_pd(streamed_16, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(streamed_17, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(streamed_18, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), _mm256_mul_pd(streamed_6, _mm256_set_pd(-1.0, -1.0, -1.0, -1.0))), streamed_11), streamed_14), vel2Term);
+          const __m256d u_0 = _mm256_add_pd(_mm256_mul_pd(momdensity_0, _mm256_div_pd(_mm256_set_pd(1.0, 1.0, 1.0, 1.0), rho)), _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.5, 0.5, 0.5, 0.5), _mm256_div_pd(_mm256_set_pd(1.0, 1.0, 1.0, 1.0), rho)), _mm256_load_pd(&_data_force_20_30_10[ctr_0])));
+          const __m256d u_1 = _mm256_add_pd(_mm256_mul_pd(momdensity_1, _mm256_div_pd(_mm256_set_pd(1.0, 1.0, 1.0, 1.0), rho)), _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.5, 0.5, 0.5, 0.5), _mm256_div_pd(_mm256_set_pd(1.0, 1.0, 1.0, 1.0), rho)), _mm256_load_pd(&_data_force_20_31_10[ctr_0])));
+          const __m256d u_2 = _mm256_add_pd(_mm256_mul_pd(momdensity_2, _mm256_div_pd(_mm256_set_pd(1.0, 1.0, 1.0, 1.0), rho)), _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.5, 0.5, 0.5, 0.5), _mm256_div_pd(_mm256_set_pd(1.0, 1.0, 1.0, 1.0), rho)), _mm256_load_pd(&_data_force_20_32_10[ctr_0])));
+          _mm256_store_pd(&_data_velocity_20_30_10[ctr_0], u_0);
+          _mm256_store_pd(&_data_velocity_20_31_10[ctr_0], u_1);
+          _mm256_store_pd(&_data_velocity_20_32_10[ctr_0], u_2);
+          _mm256_store_pd(&_data_pdfs_tmp_20_30_10[ctr_0], streamed_0);
+          _mm256_store_pd(&_data_pdfs_tmp_20_31_10[ctr_0], streamed_1);
+          _mm256_store_pd(&_data_pdfs_tmp_20_32_10[ctr_0], streamed_2);
+          _mm256_store_pd(&_data_pdfs_tmp_20_33_10[ctr_0], streamed_3);
+          _mm256_store_pd(&_data_pdfs_tmp_20_34_10[ctr_0], streamed_4);
+          _mm256_store_pd(&_data_pdfs_tmp_20_35_10[ctr_0], streamed_5);
+          _mm256_store_pd(&_data_pdfs_tmp_20_36_10[ctr_0], streamed_6);
+          _mm256_store_pd(&_data_pdfs_tmp_20_37_10[ctr_0], streamed_7);
+          _mm256_store_pd(&_data_pdfs_tmp_20_38_10[ctr_0], streamed_8);
+          _mm256_store_pd(&_data_pdfs_tmp_20_39_10[ctr_0], streamed_9);
+          _mm256_store_pd(&_data_pdfs_tmp_20_310_10[ctr_0], streamed_10);
+          _mm256_store_pd(&_data_pdfs_tmp_20_311_10[ctr_0], streamed_11);
+          _mm256_store_pd(&_data_pdfs_tmp_20_312_10[ctr_0], streamed_12);
+          _mm256_store_pd(&_data_pdfs_tmp_20_313_10[ctr_0], streamed_13);
+          _mm256_store_pd(&_data_pdfs_tmp_20_314_10[ctr_0], streamed_14);
+          _mm256_store_pd(&_data_pdfs_tmp_20_315_10[ctr_0], streamed_15);
+          _mm256_store_pd(&_data_pdfs_tmp_20_316_10[ctr_0], streamed_16);
+          _mm256_store_pd(&_data_pdfs_tmp_20_317_10[ctr_0], streamed_17);
+          _mm256_store_pd(&_data_pdfs_tmp_20_318_10[ctr_0], streamed_18);
+        }
+        for (int64_t ctr_0 = (int64_t)((_size_force_0 - 2) / (4)) * (4) + 1; ctr_0 < _size_force_0 - 1; ctr_0 += 1) {
+          const double streamed_0 = _data_pdfs_20_30_10[ctr_0];
+          const double streamed_1 = _data_pdfs_20_31_1m1[ctr_0];
+          const double streamed_2 = _data_pdfs_20_32_11[ctr_0];
+          const double streamed_3 = _data_pdfs_20_33_10[ctr_0 + 1];
+          const double streamed_4 = _data_pdfs_20_34_10[ctr_0 - 1];
+          const double streamed_5 = _data_pdfs_2m1_35_10[ctr_0];
+          const double streamed_6 = _data_pdfs_21_36_10[ctr_0];
+          const double streamed_7 = _data_pdfs_20_37_1m1[ctr_0 + 1];
+          const double streamed_8 = _data_pdfs_20_38_1m1[ctr_0 - 1];
+          const double streamed_9 = _data_pdfs_20_39_11[ctr_0 + 1];
+          const double streamed_10 = _data_pdfs_20_310_11[ctr_0 - 1];
+          const double streamed_11 = _data_pdfs_2m1_311_1m1[ctr_0];
+          const double streamed_12 = _data_pdfs_2m1_312_11[ctr_0];
+          const double streamed_13 = _data_pdfs_2m1_313_10[ctr_0 + 1];
+          const double streamed_14 = _data_pdfs_2m1_314_10[ctr_0 - 1];
+          const double streamed_15 = _data_pdfs_21_315_1m1[ctr_0];
+          const double streamed_16 = _data_pdfs_21_316_11[ctr_0];
+          const double streamed_17 = _data_pdfs_21_317_10[ctr_0 + 1];
+          const double streamed_18 = _data_pdfs_21_318_10[ctr_0 - 1];
+          const double vel0Term = streamed_10 + streamed_14 + streamed_18 + streamed_4 + streamed_8;
+          const double momdensity_0 = streamed_13 * -1.0 + streamed_17 * -1.0 + streamed_3 * -1.0 + streamed_7 * -1.0 + streamed_9 * -1.0 + vel0Term;
+          const double vel1Term = streamed_1 + streamed_11 + streamed_15 + streamed_7;
+          const double momdensity_1 = streamed_10 * -1.0 + streamed_12 * -1.0 + streamed_16 * -1.0 + streamed_2 * -1.0 + streamed_8 + streamed_9 * -1.0 + vel1Term;
+          const double vel2Term = streamed_12 + streamed_13 + streamed_5;
+          const double rho = streamed_0 + streamed_16 + streamed_17 + streamed_2 + streamed_3 + streamed_6 + streamed_9 + vel0Term + vel1Term + vel2Term;
+          const double momdensity_2 = streamed_11 + streamed_14 + streamed_15 * -1.0 + streamed_16 * -1.0 + streamed_17 * -1.0 + streamed_18 * -1.0 + streamed_6 * -1.0 + vel2Term;
+          const double u_0 = momdensity_0 * ((1.0) / (rho)) + 0.5 * ((1.0) / (rho)) * _data_force_20_30_10[ctr_0];
+          const double u_1 = momdensity_1 * ((1.0) / (rho)) + 0.5 * ((1.0) / (rho)) * _data_force_20_31_10[ctr_0];
+          const double u_2 = momdensity_2 * ((1.0) / (rho)) + 0.5 * ((1.0) / (rho)) * _data_force_20_32_10[ctr_0];
+          _data_velocity_20_30_10[ctr_0] = u_0;
+          _data_velocity_20_31_10[ctr_0] = u_1;
+          _data_velocity_20_32_10[ctr_0] = u_2;
+          _data_pdfs_tmp_20_30_10[ctr_0] = streamed_0;
+          _data_pdfs_tmp_20_31_10[ctr_0] = streamed_1;
+          _data_pdfs_tmp_20_32_10[ctr_0] = streamed_2;
+          _data_pdfs_tmp_20_33_10[ctr_0] = streamed_3;
+          _data_pdfs_tmp_20_34_10[ctr_0] = streamed_4;
+          _data_pdfs_tmp_20_35_10[ctr_0] = streamed_5;
+          _data_pdfs_tmp_20_36_10[ctr_0] = streamed_6;
+          _data_pdfs_tmp_20_37_10[ctr_0] = streamed_7;
+          _data_pdfs_tmp_20_38_10[ctr_0] = streamed_8;
+          _data_pdfs_tmp_20_39_10[ctr_0] = streamed_9;
+          _data_pdfs_tmp_20_310_10[ctr_0] = streamed_10;
+          _data_pdfs_tmp_20_311_10[ctr_0] = streamed_11;
+          _data_pdfs_tmp_20_312_10[ctr_0] = streamed_12;
+          _data_pdfs_tmp_20_313_10[ctr_0] = streamed_13;
+          _data_pdfs_tmp_20_314_10[ctr_0] = streamed_14;
+          _data_pdfs_tmp_20_315_10[ctr_0] = streamed_15;
+          _data_pdfs_tmp_20_316_10[ctr_0] = streamed_16;
+          _data_pdfs_tmp_20_317_10[ctr_0] = streamed_17;
+          _data_pdfs_tmp_20_318_10[ctr_0] = streamed_18;
+        }
+      }
+    }
+  }
+}
+} // namespace internal_91e2c9bdb4c4fa8a405803890749bf98
+
+void StreamSweepDoublePrecisionAVX::run(IBlock *block) {
+  auto force = block->getData<field::GhostLayerField<double, 3>>(forceID);
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+  auto velocity = block->getData<field::GhostLayerField<double, 3>>(velocityID);
+  field::GhostLayerField<double, 19> *pdfs_tmp;
+  {
+    // Getting temporary field pdfs_tmp
+    auto it = cache_pdfs_.find(pdfs);
+    if (it != cache_pdfs_.end()) {
+      pdfs_tmp = *it;
+    } else {
+      pdfs_tmp = pdfs->cloneUninitialized();
+      cache_pdfs_.insert(pdfs_tmp);
+    }
+  }
+
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(force->nrOfGhostLayers()));
+  double *RESTRICT const _data_force = force->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT const _data_pdfs = pdfs->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs_tmp->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(velocity->nrOfGhostLayers()));
+  double *RESTRICT _data_velocity = velocity->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(velocity->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)velocity->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 2));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 2));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 2));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+  const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+  const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+  const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+  const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+  const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+  internal_91e2c9bdb4c4fa8a405803890749bf98::streamsweepdoubleprecisionavx_streamsweepdoubleprecisionavx(_data_force, _data_pdfs, _data_pdfs_tmp, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+  pdfs->swapDataPointers(pdfs_tmp);
+}
+
+void StreamSweepDoublePrecisionAVX::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto force = block->getData<field::GhostLayerField<double, 3>>(forceID);
+  auto pdfs = block->getData<field::GhostLayerField<double, 19>>(pdfsID);
+  auto velocity = block->getData<field::GhostLayerField<double, 3>>(velocityID);
+  field::GhostLayerField<double, 19> *pdfs_tmp;
+  {
+    // Getting temporary field pdfs_tmp
+    auto it = cache_pdfs_.find(pdfs);
+    if (it != cache_pdfs_.end()) {
+      pdfs_tmp = *it;
+    } else {
+      pdfs_tmp = pdfs->cloneUninitialized();
+      cache_pdfs_.insert(pdfs_tmp);
+    }
+  }
+
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(force->nrOfGhostLayers()));
+  double *RESTRICT const _data_force = force->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
+  double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  double *RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs_tmp->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(velocity->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(velocity->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(velocity->nrOfGhostLayers()));
+  double *RESTRICT _data_velocity = velocity->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(velocity->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)velocity->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+  const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+  const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+  const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+  const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+  const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+  internal_91e2c9bdb4c4fa8a405803890749bf98::streamsweepdoubleprecisionavx_streamsweepdoubleprecisionavx(_data_force, _data_pdfs, _data_pdfs_tmp, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+  pdfs->swapDataPointers(pdfs_tmp);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecisionAVX.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecisionAVX.h
new file mode 100644
index 00000000000..5ff5ed1738c
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepDoublePrecisionAVX.h
@@ -0,0 +1,115 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file StreamSweepDoublePrecisionAVX.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class StreamSweepDoublePrecisionAVX {
+public:
+  StreamSweepDoublePrecisionAVX(BlockDataID forceID_, BlockDataID pdfsID_,
+                                BlockDataID velocityID_)
+      : forceID(forceID_), pdfsID(pdfsID_), velocityID(velocityID_){};
+
+  ~StreamSweepDoublePrecisionAVX() {
+    for (auto p : cache_pdfs_) {
+      delete p;
+    }
+  }
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<StreamSweepDoublePrecisionAVX> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<StreamSweepDoublePrecisionAVX> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID forceID;
+  BlockDataID pdfsID;
+  BlockDataID velocityID;
+
+private:
+  std::set<field::GhostLayerField<double, 19> *,
+           field::SwapableCompare<field::GhostLayerField<double, 19> *>>
+      cache_pdfs_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecision.cpp
new file mode 100644
index 00000000000..ea431d01eb1
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecision.cpp
@@ -0,0 +1,338 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file StreamSweepSinglePrecision.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "StreamSweepSinglePrecision.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_streamsweepsingleprecision_streamsweepsingleprecision {
+static FUNC_PREFIX void streamsweepsingleprecision_streamsweepsingleprecision(float *RESTRICT const _data_force, float *RESTRICT const _data_pdfs, float *RESTRICT _data_pdfs_tmp, float *RESTRICT _data_velocity, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_0, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3, int64_t const _stride_velocity_0, int64_t const _stride_velocity_1, int64_t const _stride_velocity_2, int64_t const _stride_velocity_3) {
+  for (int64_t ctr_2 = 1; ctr_2 < _size_force_2 - 1; ctr_2 += 1) {
+    float *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
+    float *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 5 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 6 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 11 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 12 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 13 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 14 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 15 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 16 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 17 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 18 * _stride_pdfs_3;
+    float *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
+    float *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
+    float *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
+    float *RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2 * ctr_2;
+    float *RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2 * ctr_2 + _stride_velocity_3;
+    float *RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2 * ctr_2 + 2 * _stride_velocity_3;
+    float *RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2;
+    float *RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 2 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 3 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 4 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 5 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 6 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 7 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 8 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 9 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 10 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 11 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 12 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 13 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 14 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 15 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 16 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 17 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 18 * _stride_pdfs_tmp_3;
+    for (int64_t ctr_1 = 1; ctr_1 < _size_force_1 - 1; ctr_1 += 1) {
+      float *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
+      float *RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
+      float *RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
+      float *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
+      float *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
+      float *RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_35;
+      float *RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_36;
+      float *RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
+      float *RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
+      float *RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
+      float *RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
+      float *RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
+      float *RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
+      float *RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_313;
+      float *RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_314;
+      float *RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
+      float *RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
+      float *RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_317;
+      float *RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_318;
+      float *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
+      float *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
+      float *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
+      float *RESTRICT _data_velocity_20_30_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_30;
+      float *RESTRICT _data_velocity_20_31_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_31;
+      float *RESTRICT _data_velocity_20_32_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_32;
+      float *RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_30;
+      float *RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_31;
+      float *RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_32;
+      float *RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_33;
+      float *RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_34;
+      float *RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_35;
+      float *RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_36;
+      float *RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_37;
+      float *RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_38;
+      float *RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_39;
+      float *RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_310;
+      float *RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_311;
+      float *RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_312;
+      float *RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_313;
+      float *RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_314;
+      float *RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_315;
+      float *RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_316;
+      float *RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_317;
+      float *RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_318;
+      for (int64_t ctr_0 = 1; ctr_0 < _size_force_0 - 1; ctr_0 += 1) {
+        const float streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0 * ctr_0];
+        const float streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0 * ctr_0];
+        const float streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0 * ctr_0];
+        const float streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
+        const float streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
+        const float streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0 * ctr_0];
+        const float streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0 * ctr_0];
+        const float streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
+        const float streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
+        const float streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
+        const float streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
+        const float streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0 * ctr_0];
+        const float streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0 * ctr_0];
+        const float streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
+        const float streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
+        const float streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0 * ctr_0];
+        const float streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0 * ctr_0];
+        const float streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
+        const float streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
+        const float vel0Term = streamed_10 + streamed_14 + streamed_18 + streamed_4 + streamed_8;
+        const float momdensity_0 = streamed_13 * -1.0f + streamed_17 * -1.0f + streamed_3 * -1.0f + streamed_7 * -1.0f + streamed_9 * -1.0f + vel0Term;
+        const float vel1Term = streamed_1 + streamed_11 + streamed_15 + streamed_7;
+        const float momdensity_1 = streamed_10 * -1.0f + streamed_12 * -1.0f + streamed_16 * -1.0f + streamed_2 * -1.0f + streamed_8 + streamed_9 * -1.0f + vel1Term;
+        const float vel2Term = streamed_12 + streamed_13 + streamed_5;
+        const float rho = streamed_0 + streamed_16 + streamed_17 + streamed_2 + streamed_3 + streamed_6 + streamed_9 + vel0Term + vel1Term + vel2Term;
+        const float momdensity_2 = streamed_11 + streamed_14 + streamed_15 * -1.0f + streamed_16 * -1.0f + streamed_17 * -1.0f + streamed_18 * -1.0f + streamed_6 * -1.0f + vel2Term;
+        const float u_0 = momdensity_0 * ((1.0f) / (rho)) + 0.5f * ((1.0f) / (rho)) * _data_force_20_30_10[_stride_force_0 * ctr_0];
+        const float u_1 = momdensity_1 * ((1.0f) / (rho)) + 0.5f * ((1.0f) / (rho)) * _data_force_20_31_10[_stride_force_0 * ctr_0];
+        const float u_2 = momdensity_2 * ((1.0f) / (rho)) + 0.5f * ((1.0f) / (rho)) * _data_force_20_32_10[_stride_force_0 * ctr_0];
+        _data_velocity_20_30_10[_stride_velocity_0 * ctr_0] = u_0;
+        _data_velocity_20_31_10[_stride_velocity_0 * ctr_0] = u_1;
+        _data_velocity_20_32_10[_stride_velocity_0 * ctr_0] = u_2;
+        _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_0;
+        _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_1;
+        _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_2;
+        _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_3;
+        _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_4;
+        _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_5;
+        _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_6;
+        _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_7;
+        _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_8;
+        _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_9;
+        _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_10;
+        _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_11;
+        _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_12;
+        _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_13;
+        _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_14;
+        _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_15;
+        _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_16;
+        _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_17;
+        _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_18;
+      }
+    }
+  }
+}
+} // namespace internal_streamsweepsingleprecision_streamsweepsingleprecision
+
+void StreamSweepSinglePrecision::run(IBlock *block) {
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+  auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
+  auto velocity = block->getData<field::GhostLayerField<float, 3>>(velocityID);
+  field::GhostLayerField<float, 19> *pdfs_tmp;
+  {
+    // Getting temporary field pdfs_tmp
+    auto it = cache_pdfs_.find(pdfs);
+    if (it != cache_pdfs_.end()) {
+      pdfs_tmp = *it;
+    } else {
+      pdfs_tmp = pdfs->cloneUninitialized();
+      cache_pdfs_.insert(pdfs_tmp);
+    }
+  }
+
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(force->nrOfGhostLayers()));
+  float *RESTRICT const _data_force = force->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT const _data_pdfs = pdfs->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(velocity->nrOfGhostLayers()));
+  float *RESTRICT _data_velocity = velocity->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(velocity->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 2));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 2));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 2));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+  const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+  const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+  const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+  const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+  const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+  const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+  const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+  internal_streamsweepsingleprecision_streamsweepsingleprecision::streamsweepsingleprecision_streamsweepsingleprecision(_data_force, _data_pdfs, _data_pdfs_tmp, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+  pdfs->swapDataPointers(pdfs_tmp);
+}
+
+void StreamSweepSinglePrecision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+  auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
+  auto velocity = block->getData<field::GhostLayerField<float, 3>>(velocityID);
+  field::GhostLayerField<float, 19> *pdfs_tmp;
+  {
+    // Getting temporary field pdfs_tmp
+    auto it = cache_pdfs_.find(pdfs);
+    if (it != cache_pdfs_.end()) {
+      pdfs_tmp = *it;
+    } else {
+      pdfs_tmp = pdfs->cloneUninitialized();
+      cache_pdfs_.insert(pdfs_tmp);
+    }
+  }
+
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(force->nrOfGhostLayers()));
+  float *RESTRICT const _data_force = force->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(velocity->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(velocity->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(velocity->nrOfGhostLayers()));
+  float *RESTRICT _data_velocity = velocity->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(velocity->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  const int64_t _stride_force_0 = int64_t(force->xStride());
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+  const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+  const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+  const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+  const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+  const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+  const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+  const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+  internal_streamsweepsingleprecision_streamsweepsingleprecision::streamsweepsingleprecision_streamsweepsingleprecision(_data_force, _data_pdfs, _data_pdfs_tmp, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+  pdfs->swapDataPointers(pdfs_tmp);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecision.h
new file mode 100644
index 00000000000..3cb474e0b33
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecision.h
@@ -0,0 +1,116 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file StreamSweepSinglePrecision.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class StreamSweepSinglePrecision {
+public:
+  StreamSweepSinglePrecision(BlockDataID forceID_, BlockDataID pdfsID_,
+                             BlockDataID velocityID_)
+      : forceID(forceID_), pdfsID(pdfsID_), velocityID(velocityID_){};
+
+  ~StreamSweepSinglePrecision() {
+    for (auto p : cache_pdfs_) {
+      delete p;
+    }
+  }
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<StreamSweepSinglePrecision> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StreamSweepSinglePrecision> &kernel,
+                         const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID forceID;
+  BlockDataID pdfsID;
+  BlockDataID velocityID;
+
+private:
+  std::set<field::GhostLayerField<float, 19> *,
+           field::SwapableCompare<field::GhostLayerField<float, 19> *>>
+      cache_pdfs_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecisionAVX.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecisionAVX.cpp
new file mode 100644
index 00000000000..a6778800989
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecisionAVX.cpp
@@ -0,0 +1,401 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file StreamSweepSinglePrecisionAVX.cpp
+//! \\ingroup lbm
+//! \\author lbmpy
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#include <cmath>
+
+#include "StreamSweepSinglePrecisionAVX.h"
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#include <immintrin.h>
+
+#define FUNC_PREFIX
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning push
+#pragma warning(disable : 1599)
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace pystencils {
+
+namespace internal_5e7ed0276adbfbb1ac4789ac0a0f54c4 {
+static FUNC_PREFIX void streamsweepsingleprecisionavx_streamsweepsingleprecisionavx(float *RESTRICT const _data_force, float *RESTRICT const _data_pdfs, float *RESTRICT _data_pdfs_tmp, float *RESTRICT _data_velocity, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3, int64_t const _stride_velocity_1, int64_t const _stride_velocity_2, int64_t const _stride_velocity_3) {
+  for (int64_t ctr_2 = 1; ctr_2 < _size_force_2 - 1; ctr_2 += 1) {
+    float *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
+    float *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 5 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 6 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 11 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 12 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 13 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 14 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 15 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 16 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 17 * _stride_pdfs_3;
+    float *RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 18 * _stride_pdfs_3;
+    float *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
+    float *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
+    float *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
+    float *RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2 * ctr_2;
+    float *RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2 * ctr_2 + _stride_velocity_3;
+    float *RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2 * ctr_2 + 2 * _stride_velocity_3;
+    float *RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2;
+    float *RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 2 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 3 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 4 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 5 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 6 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 7 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 8 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 9 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 10 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 11 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 12 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 13 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 14 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 15 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 16 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 17 * _stride_pdfs_tmp_3;
+    float *RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 18 * _stride_pdfs_tmp_3;
+    for (int64_t ctr_1 = 1; ctr_1 < _size_force_1 - 1; ctr_1 += 1) {
+      float *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
+      float *RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
+      float *RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
+      float *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
+      float *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
+      float *RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_35;
+      float *RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_36;
+      float *RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
+      float *RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
+      float *RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
+      float *RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
+      float *RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
+      float *RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
+      float *RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_313;
+      float *RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_314;
+      float *RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
+      float *RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
+      float *RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_317;
+      float *RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_318;
+      float *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
+      float *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
+      float *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
+      float *RESTRICT _data_velocity_20_30_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_30;
+      float *RESTRICT _data_velocity_20_31_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_31;
+      float *RESTRICT _data_velocity_20_32_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_32;
+      float *RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_30;
+      float *RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_31;
+      float *RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_32;
+      float *RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_33;
+      float *RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_34;
+      float *RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_35;
+      float *RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_36;
+      float *RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_37;
+      float *RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_38;
+      float *RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_39;
+      float *RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_310;
+      float *RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_311;
+      float *RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_312;
+      float *RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_313;
+      float *RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_314;
+      float *RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_315;
+      float *RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_316;
+      float *RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_317;
+      float *RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_318;
+      {
+        for (int64_t ctr_0 = 1; ctr_0 < (int64_t)((_size_force_0 - 2) / (8)) * (8) + 1; ctr_0 += 8) {
+          const __m256 streamed_0 = _mm256_load_ps(&_data_pdfs_20_30_10[ctr_0]);
+          const __m256 streamed_1 = _mm256_load_ps(&_data_pdfs_20_31_1m1[ctr_0]);
+          const __m256 streamed_2 = _mm256_load_ps(&_data_pdfs_20_32_11[ctr_0]);
+          const __m256 streamed_3 = _mm256_loadu_ps(&_data_pdfs_20_33_10[ctr_0 + 1]);
+          const __m256 streamed_4 = _mm256_loadu_ps(&_data_pdfs_20_34_10[ctr_0 - 1]);
+          const __m256 streamed_5 = _mm256_load_ps(&_data_pdfs_2m1_35_10[ctr_0]);
+          const __m256 streamed_6 = _mm256_load_ps(&_data_pdfs_21_36_10[ctr_0]);
+          const __m256 streamed_7 = _mm256_loadu_ps(&_data_pdfs_20_37_1m1[ctr_0 + 1]);
+          const __m256 streamed_8 = _mm256_loadu_ps(&_data_pdfs_20_38_1m1[ctr_0 - 1]);
+          const __m256 streamed_9 = _mm256_loadu_ps(&_data_pdfs_20_39_11[ctr_0 + 1]);
+          const __m256 streamed_10 = _mm256_loadu_ps(&_data_pdfs_20_310_11[ctr_0 - 1]);
+          const __m256 streamed_11 = _mm256_load_ps(&_data_pdfs_2m1_311_1m1[ctr_0]);
+          const __m256 streamed_12 = _mm256_load_ps(&_data_pdfs_2m1_312_11[ctr_0]);
+          const __m256 streamed_13 = _mm256_loadu_ps(&_data_pdfs_2m1_313_10[ctr_0 + 1]);
+          const __m256 streamed_14 = _mm256_loadu_ps(&_data_pdfs_2m1_314_10[ctr_0 - 1]);
+          const __m256 streamed_15 = _mm256_load_ps(&_data_pdfs_21_315_1m1[ctr_0]);
+          const __m256 streamed_16 = _mm256_load_ps(&_data_pdfs_21_316_11[ctr_0]);
+          const __m256 streamed_17 = _mm256_loadu_ps(&_data_pdfs_21_317_10[ctr_0 + 1]);
+          const __m256 streamed_18 = _mm256_loadu_ps(&_data_pdfs_21_318_10[ctr_0 - 1]);
+          const __m256 vel0Term = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(streamed_10, streamed_14), streamed_18), streamed_4), streamed_8);
+          const __m256 momdensity_0 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(streamed_13, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(streamed_17, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_3, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_7, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_9, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), vel0Term);
+          const __m256 vel1Term = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(streamed_1, streamed_11), streamed_15), streamed_7);
+          const __m256 momdensity_1 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(streamed_10, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(streamed_12, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_16, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_2, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_9, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), streamed_8), vel1Term);
+          const __m256 vel2Term = _mm256_add_ps(_mm256_add_ps(streamed_12, streamed_13), streamed_5);
+          const __m256 rho = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(streamed_0, streamed_16), streamed_17), streamed_2), streamed_3), streamed_6), streamed_9), vel0Term), vel1Term), vel2Term);
+          const __m256 momdensity_2 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(streamed_15, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(streamed_16, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_17, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_18, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_6, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), streamed_11), streamed_14), vel2Term);
+          const __m256 u_0 = _mm256_add_ps(_mm256_mul_ps(momdensity_0, _mm256_div_ps(_mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), rho)), _mm256_mul_ps(_mm256_mul_ps(_mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f), _mm256_div_ps(_mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), rho)), _mm256_load_ps(&_data_force_20_30_10[ctr_0])));
+          const __m256 u_1 = _mm256_add_ps(_mm256_mul_ps(momdensity_1, _mm256_div_ps(_mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), rho)), _mm256_mul_ps(_mm256_mul_ps(_mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f), _mm256_div_ps(_mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), rho)), _mm256_load_ps(&_data_force_20_31_10[ctr_0])));
+          const __m256 u_2 = _mm256_add_ps(_mm256_mul_ps(momdensity_2, _mm256_div_ps(_mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), rho)), _mm256_mul_ps(_mm256_mul_ps(_mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f), _mm256_div_ps(_mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), rho)), _mm256_load_ps(&_data_force_20_32_10[ctr_0])));
+          _mm256_store_ps(&_data_velocity_20_30_10[ctr_0], u_0);
+          _mm256_store_ps(&_data_velocity_20_31_10[ctr_0], u_1);
+          _mm256_store_ps(&_data_velocity_20_32_10[ctr_0], u_2);
+          _mm256_store_ps(&_data_pdfs_tmp_20_30_10[ctr_0], streamed_0);
+          _mm256_store_ps(&_data_pdfs_tmp_20_31_10[ctr_0], streamed_1);
+          _mm256_store_ps(&_data_pdfs_tmp_20_32_10[ctr_0], streamed_2);
+          _mm256_store_ps(&_data_pdfs_tmp_20_33_10[ctr_0], streamed_3);
+          _mm256_store_ps(&_data_pdfs_tmp_20_34_10[ctr_0], streamed_4);
+          _mm256_store_ps(&_data_pdfs_tmp_20_35_10[ctr_0], streamed_5);
+          _mm256_store_ps(&_data_pdfs_tmp_20_36_10[ctr_0], streamed_6);
+          _mm256_store_ps(&_data_pdfs_tmp_20_37_10[ctr_0], streamed_7);
+          _mm256_store_ps(&_data_pdfs_tmp_20_38_10[ctr_0], streamed_8);
+          _mm256_store_ps(&_data_pdfs_tmp_20_39_10[ctr_0], streamed_9);
+          _mm256_store_ps(&_data_pdfs_tmp_20_310_10[ctr_0], streamed_10);
+          _mm256_store_ps(&_data_pdfs_tmp_20_311_10[ctr_0], streamed_11);
+          _mm256_store_ps(&_data_pdfs_tmp_20_312_10[ctr_0], streamed_12);
+          _mm256_store_ps(&_data_pdfs_tmp_20_313_10[ctr_0], streamed_13);
+          _mm256_store_ps(&_data_pdfs_tmp_20_314_10[ctr_0], streamed_14);
+          _mm256_store_ps(&_data_pdfs_tmp_20_315_10[ctr_0], streamed_15);
+          _mm256_store_ps(&_data_pdfs_tmp_20_316_10[ctr_0], streamed_16);
+          _mm256_store_ps(&_data_pdfs_tmp_20_317_10[ctr_0], streamed_17);
+          _mm256_store_ps(&_data_pdfs_tmp_20_318_10[ctr_0], streamed_18);
+        }
+        for (int64_t ctr_0 = (int64_t)((_size_force_0 - 2) / (8)) * (8) + 1; ctr_0 < _size_force_0 - 1; ctr_0 += 1) {
+          const float streamed_0 = _data_pdfs_20_30_10[ctr_0];
+          const float streamed_1 = _data_pdfs_20_31_1m1[ctr_0];
+          const float streamed_2 = _data_pdfs_20_32_11[ctr_0];
+          const float streamed_3 = _data_pdfs_20_33_10[ctr_0 + 1];
+          const float streamed_4 = _data_pdfs_20_34_10[ctr_0 - 1];
+          const float streamed_5 = _data_pdfs_2m1_35_10[ctr_0];
+          const float streamed_6 = _data_pdfs_21_36_10[ctr_0];
+          const float streamed_7 = _data_pdfs_20_37_1m1[ctr_0 + 1];
+          const float streamed_8 = _data_pdfs_20_38_1m1[ctr_0 - 1];
+          const float streamed_9 = _data_pdfs_20_39_11[ctr_0 + 1];
+          const float streamed_10 = _data_pdfs_20_310_11[ctr_0 - 1];
+          const float streamed_11 = _data_pdfs_2m1_311_1m1[ctr_0];
+          const float streamed_12 = _data_pdfs_2m1_312_11[ctr_0];
+          const float streamed_13 = _data_pdfs_2m1_313_10[ctr_0 + 1];
+          const float streamed_14 = _data_pdfs_2m1_314_10[ctr_0 - 1];
+          const float streamed_15 = _data_pdfs_21_315_1m1[ctr_0];
+          const float streamed_16 = _data_pdfs_21_316_11[ctr_0];
+          const float streamed_17 = _data_pdfs_21_317_10[ctr_0 + 1];
+          const float streamed_18 = _data_pdfs_21_318_10[ctr_0 - 1];
+          const float vel0Term = streamed_10 + streamed_14 + streamed_18 + streamed_4 + streamed_8;
+          const float momdensity_0 = streamed_13 * -1.0f + streamed_17 * -1.0f + streamed_3 * -1.0f + streamed_7 * -1.0f + streamed_9 * -1.0f + vel0Term;
+          const float vel1Term = streamed_1 + streamed_11 + streamed_15 + streamed_7;
+          const float momdensity_1 = streamed_10 * -1.0f + streamed_12 * -1.0f + streamed_16 * -1.0f + streamed_2 * -1.0f + streamed_8 + streamed_9 * -1.0f + vel1Term;
+          const float vel2Term = streamed_12 + streamed_13 + streamed_5;
+          const float rho = streamed_0 + streamed_16 + streamed_17 + streamed_2 + streamed_3 + streamed_6 + streamed_9 + vel0Term + vel1Term + vel2Term;
+          const float momdensity_2 = streamed_11 + streamed_14 + streamed_15 * -1.0f + streamed_16 * -1.0f + streamed_17 * -1.0f + streamed_18 * -1.0f + streamed_6 * -1.0f + vel2Term;
+          const float u_0 = momdensity_0 * ((1.0f) / (rho)) + 0.5f * ((1.0f) / (rho)) * _data_force_20_30_10[ctr_0];
+          const float u_1 = momdensity_1 * ((1.0f) / (rho)) + 0.5f * ((1.0f) / (rho)) * _data_force_20_31_10[ctr_0];
+          const float u_2 = momdensity_2 * ((1.0f) / (rho)) + 0.5f * ((1.0f) / (rho)) * _data_force_20_32_10[ctr_0];
+          _data_velocity_20_30_10[ctr_0] = u_0;
+          _data_velocity_20_31_10[ctr_0] = u_1;
+          _data_velocity_20_32_10[ctr_0] = u_2;
+          _data_pdfs_tmp_20_30_10[ctr_0] = streamed_0;
+          _data_pdfs_tmp_20_31_10[ctr_0] = streamed_1;
+          _data_pdfs_tmp_20_32_10[ctr_0] = streamed_2;
+          _data_pdfs_tmp_20_33_10[ctr_0] = streamed_3;
+          _data_pdfs_tmp_20_34_10[ctr_0] = streamed_4;
+          _data_pdfs_tmp_20_35_10[ctr_0] = streamed_5;
+          _data_pdfs_tmp_20_36_10[ctr_0] = streamed_6;
+          _data_pdfs_tmp_20_37_10[ctr_0] = streamed_7;
+          _data_pdfs_tmp_20_38_10[ctr_0] = streamed_8;
+          _data_pdfs_tmp_20_39_10[ctr_0] = streamed_9;
+          _data_pdfs_tmp_20_310_10[ctr_0] = streamed_10;
+          _data_pdfs_tmp_20_311_10[ctr_0] = streamed_11;
+          _data_pdfs_tmp_20_312_10[ctr_0] = streamed_12;
+          _data_pdfs_tmp_20_313_10[ctr_0] = streamed_13;
+          _data_pdfs_tmp_20_314_10[ctr_0] = streamed_14;
+          _data_pdfs_tmp_20_315_10[ctr_0] = streamed_15;
+          _data_pdfs_tmp_20_316_10[ctr_0] = streamed_16;
+          _data_pdfs_tmp_20_317_10[ctr_0] = streamed_17;
+          _data_pdfs_tmp_20_318_10[ctr_0] = streamed_18;
+        }
+      }
+    }
+  }
+}
+} // namespace internal_5e7ed0276adbfbb1ac4789ac0a0f54c4
+
+void StreamSweepSinglePrecisionAVX::run(IBlock *block) {
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+  auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
+  auto velocity = block->getData<field::GhostLayerField<float, 3>>(velocityID);
+  field::GhostLayerField<float, 19> *pdfs_tmp;
+  {
+    // Getting temporary field pdfs_tmp
+    auto it = cache_pdfs_.find(pdfs);
+    if (it != cache_pdfs_.end()) {
+      pdfs_tmp = *it;
+    } else {
+      pdfs_tmp = pdfs->cloneUninitialized();
+      cache_pdfs_.insert(pdfs_tmp);
+    }
+  }
+
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(force->nrOfGhostLayers()));
+  float *RESTRICT const _data_force = force->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT const _data_pdfs = pdfs->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs_tmp->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(velocity->nrOfGhostLayers()));
+  float *RESTRICT _data_velocity = velocity->dataAt(-1, -1, -1, 0);
+  WALBERLA_ASSERT_EQUAL(velocity->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)velocity->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 2));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 2));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 2));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+  const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+  const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+  const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+  const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+  const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+  internal_5e7ed0276adbfbb1ac4789ac0a0f54c4::streamsweepsingleprecisionavx_streamsweepsingleprecisionavx(_data_force, _data_pdfs, _data_pdfs_tmp, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+  pdfs->swapDataPointers(pdfs_tmp);
+}
+
+void StreamSweepSinglePrecisionAVX::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
+  CellInterval ci = globalCellInterval;
+  CellInterval blockBB = blocks->getBlockCellBB(*block);
+  blockBB.expand(ghostLayers);
+  ci.intersect(blockBB);
+  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
+  if (ci.empty())
+    return;
+
+  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
+  auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
+  auto velocity = block->getData<field::GhostLayerField<float, 3>>(velocityID);
+  field::GhostLayerField<float, 19> *pdfs_tmp;
+  {
+    // Getting temporary field pdfs_tmp
+    auto it = cache_pdfs_.find(pdfs);
+    if (it != cache_pdfs_.end()) {
+      pdfs_tmp = *it;
+    } else {
+      pdfs_tmp = pdfs->cloneUninitialized();
+      cache_pdfs_.insert(pdfs_tmp);
+    }
+  }
+
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(force->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(force->nrOfGhostLayers()));
+  float *RESTRICT const _data_force = force->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
+  float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
+  float *RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)pdfs_tmp->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(velocity->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(velocity->nrOfGhostLayers()));
+  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(velocity->nrOfGhostLayers()));
+  float *RESTRICT _data_velocity = velocity->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+  WALBERLA_ASSERT_EQUAL(velocity->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)velocity->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+  const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+  const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+  const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
+  WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
+  WALBERLA_ASSERT_EQUAL((uintptr_t)force->dataAt(0, 0, 0, 0) % 32, 0);
+  const int64_t _stride_force_1 = int64_t(force->yStride());
+  const int64_t _stride_force_2 = int64_t(force->zStride());
+  const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
+  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+  const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+  const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+  const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+  const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+  const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+  const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+  internal_5e7ed0276adbfbb1ac4789ac0a0f54c4::streamsweepsingleprecisionavx_streamsweepsingleprecisionavx(_data_force, _data_pdfs, _data_pdfs_tmp, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+  pdfs->swapDataPointers(pdfs_tmp);
+}
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecisionAVX.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecisionAVX.h
new file mode 100644
index 00000000000..57285414ff5
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/StreamSweepSinglePrecisionAVX.h
@@ -0,0 +1,115 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file StreamSweepSinglePrecisionAVX.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.2, lbmpy v1.2,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+#include <set>
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+class StreamSweepSinglePrecisionAVX {
+public:
+  StreamSweepSinglePrecisionAVX(BlockDataID forceID_, BlockDataID pdfsID_,
+                                BlockDataID velocityID_)
+      : forceID(forceID_), pdfsID(pdfsID_), velocityID(velocityID_){};
+
+  ~StreamSweepSinglePrecisionAVX() {
+    for (auto p : cache_pdfs_) {
+      delete p;
+    }
+  }
+
+  void run(IBlock *block);
+
+  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers, IBlock *block);
+
+  void operator()(IBlock *block) { run(block); }
+
+  static std::function<void(IBlock *)>
+  getSweep(const shared_ptr<StreamSweepSinglePrecisionAVX> &kernel) {
+    return [kernel](IBlock *b) { kernel->run(b); };
+  }
+
+  static std::function<void(IBlock *)> getSweepOnCellInterval(
+      const shared_ptr<StreamSweepSinglePrecisionAVX> &kernel,
+      const shared_ptr<StructuredBlockStorage> &blocks,
+      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
+    return [kernel, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  std::function<void(IBlock *)> getSweep() {
+    return [this](IBlock *b) { this->run(b); };
+  }
+
+  std::function<void(IBlock *)>
+  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
+                         const CellInterval &globalCellInterval,
+                         cell_idx_t ghostLayers = 1) {
+    return [this, blocks, globalCellInterval, ghostLayers](IBlock *b) {
+      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b);
+    };
+  }
+
+  BlockDataID forceID;
+  BlockDataID pdfsID;
+  BlockDataID velocityID;
+
+private:
+  std::set<field::GhostLayerField<float, 19> *,
+           field::SwapableCompare<field::GhostLayerField<float, 19> *>>
+      cache_pdfs_;
+};
+
+} // namespace pystencils
+} // namespace walberla
+
+#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \
+    (defined WALBERLA_CXX_COMPILER_IS_CLANG)
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/myintrin.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/myintrin.h
new file mode 100644
index 00000000000..ca634371953
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/myintrin.h
@@ -0,0 +1,127 @@
+// kernel generated with pystencils v1.0+12.g54b91e2, lbmpy
+// v1.0+9.g19115d4.dirty, lbmpy_walberla/pystencils_walberla from commit
+// e1fe2ad1dcbe8f31ea79d95e8a5a5cc0ee3691f3
+
+#pragma once
+
+#if defined(__SSE2__) || defined(_MSC_VER)
+QUALIFIERS __m128 _my_cvtepu32_ps(const __m128i v) {
+#ifdef __AVX512VL__
+  return _mm_cvtepu32_ps(v);
+#else
+  __m128i v2 = _mm_srli_epi32(v, 1);
+  __m128i v1 = _mm_and_si128(v, _mm_set1_epi32(1));
+  __m128 v2f = _mm_cvtepi32_ps(v2);
+  __m128 v1f = _mm_cvtepi32_ps(v1);
+  return _mm_add_ps(_mm_add_ps(v2f, v2f), v1f);
+#endif
+}
+
+QUALIFIERS void _MY_TRANSPOSE4_EPI32(__m128i &R0, __m128i &R1, __m128i &R2,
+                                     __m128i &R3) {
+  __m128i T0, T1, T2, T3;
+  T0 = _mm_unpacklo_epi32(R0, R1);
+  T1 = _mm_unpacklo_epi32(R2, R3);
+  T2 = _mm_unpackhi_epi32(R0, R1);
+  T3 = _mm_unpackhi_epi32(R2, R3);
+  R0 = _mm_unpacklo_epi64(T0, T1);
+  R1 = _mm_unpackhi_epi64(T0, T1);
+  R2 = _mm_unpacklo_epi64(T2, T3);
+  R3 = _mm_unpackhi_epi64(T2, T3);
+}
+#endif
+
+#if defined(__SSE4_1__) || defined(_MSC_VER)
+#if !defined(__AVX512VL__) && defined(__GNUC__) && __GNUC__ >= 5 &&            \
+    !defined(__clang__)
+__attribute__((optimize("no-associative-math")))
+#endif
+QUALIFIERS __m128d
+_my_cvtepu64_pd(const __m128i x) {
+#ifdef __AVX512VL__
+  return _mm_cvtepu64_pd(x);
+#elif defined(__clang__)
+  return __builtin_convertvector(
+      (uint64_t __attribute__((__vector_size__(16))))x, __m128d);
+#else
+  __m128i xH = _mm_srli_epi64(x, 32);
+  xH = _mm_or_si128(
+      xH, _mm_castpd_si128(_mm_set1_pd(19342813113834066795298816.))); //  2^84
+  __m128i xL = _mm_blend_epi16(
+      x, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)), 0xcc); //  2^52
+  __m128d f =
+      _mm_sub_pd(_mm_castsi128_pd(xH),
+                 _mm_set1_pd(19342813118337666422669312.)); //  2^84 + 2^52
+  return _mm_add_pd(f, _mm_castsi128_pd(xL));
+#endif
+}
+#endif
+
+#ifdef __AVX2__
+QUALIFIERS __m256i _my256_set_m128i(__m128i hi, __m128i lo) {
+#if (!defined(__GNUC__) || __GNUC__ >= 8) || defined(__clang__)
+  return _mm256_set_m128i(hi, lo);
+#else
+  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
+#endif
+}
+
+QUALIFIERS __m256d _my256_set_m128d(__m128d hi, __m128d lo) {
+#if (!defined(__GNUC__) || __GNUC__ >= 8) || defined(__clang__)
+  return _mm256_set_m128d(hi, lo);
+#else
+  return _mm256_insertf128_pd(_mm256_castpd128_pd256(lo), hi, 1);
+#endif
+}
+
+QUALIFIERS __m256 _my256_cvtepu32_ps(const __m256i v) {
+#ifdef __AVX512VL__
+  return _mm256_cvtepu32_ps(v);
+#else
+  __m256i v2 = _mm256_srli_epi32(v, 1);
+  __m256i v1 = _mm256_and_si256(v, _mm256_set1_epi32(1));
+  __m256 v2f = _mm256_cvtepi32_ps(v2);
+  __m256 v1f = _mm256_cvtepi32_ps(v1);
+  return _mm256_add_ps(_mm256_add_ps(v2f, v2f), v1f);
+#endif
+}
+
+#if !defined(__AVX512VL__) && defined(__GNUC__) && __GNUC__ >= 5 &&            \
+    !defined(__clang__)
+__attribute__((optimize("no-associative-math")))
+#endif
+QUALIFIERS __m256d
+_my256_cvtepu64_pd(const __m256i x) {
+#ifdef __AVX512VL__
+  return _mm256_cvtepu64_pd(x);
+#elif defined(__clang__)
+  return __builtin_convertvector(
+      (uint64_t __attribute__((__vector_size__(32))))x, __m256d);
+#else
+  __m256i xH = _mm256_srli_epi64(x, 32);
+  xH = _mm256_or_si256(xH, _mm256_castpd_si256(_mm256_set1_pd(
+                               19342813113834066795298816.))); //  2^84
+  __m256i xL = _mm256_blend_epi16(
+      x, _mm256_castpd_si256(_mm256_set1_pd(0x0010000000000000)),
+      0xcc); //  2^52
+  __m256d f = _mm256_sub_pd(
+      _mm256_castsi256_pd(xH),
+      _mm256_set1_pd(19342813118337666422669312.)); //  2^84 + 2^52
+  return _mm256_add_pd(f, _mm256_castsi256_pd(xL));
+#endif
+}
+#endif
+
+#ifdef __AVX512F__
+QUALIFIERS __m512i _my512_set_m128i(__m128i d, __m128i c, __m128i b,
+                                    __m128i a) {
+  return _mm512_inserti32x4(
+      _mm512_inserti32x4(_mm512_inserti32x4(_mm512_castsi128_si512(a), b, 1), c,
+                         2),
+      d, 3);
+}
+
+QUALIFIERS __m512d _my512_set_m256d(__m256d b, __m256d a) {
+  return _mm512_insertf64x4(_mm512_castpd256_pd512(a), b, 1);
+}
+#endif
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/philox_rand.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/philox_rand.h
new file mode 100644
index 00000000000..3c97a91f984
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/philox_rand.h
@@ -0,0 +1,1299 @@
+// kernel generated with pystencils v1.0+12.g54b91e2, lbmpy
+// v1.0+9.g19115d4.dirty, lbmpy_walberla/pystencils_walberla from commit
+// e1fe2ad1dcbe8f31ea79d95e8a5a5cc0ee3691f3
+
+#include <cstdint>
+
+#if defined(__SSE2__) || defined(_MSC_VER)
+#include <emmintrin.h> // SSE2
+#endif
+#ifdef __AVX2__
+#include <immintrin.h> // AVX*
+#elif defined(__SSE4_1__) || defined(_MSC_VER)
+#include <smmintrin.h> // SSE4
+#ifdef __FMA__
+#include <immintrin.h> // FMA
+#endif
+#endif
+
+#ifdef __ARM_NEON
+#include <arm_neon.h>
+#endif
+#ifdef __ARM_FEATURE_SVE
+#include <arm_sve.h>
+#endif
+
+#if defined(__powerpc__) && defined(__GNUC__) && !defined(__clang__) &&        \
+    !defined(__xlC__)
+#include <ppu_intrinsics.h>
+#endif
+#ifdef __ALTIVEC__
+#include <altivec.h>
+#undef bool
+#ifndef _ARCH_PWR8
+#include <pveclib/vec_int64_ppc.h>
+#endif
+#endif
+
+#ifndef __CUDA_ARCH__
+#define QUALIFIERS inline
+#include "myintrin.h"
+#else
+#define QUALIFIERS static __forceinline__ __device__
+#endif
+
+#define PHILOX_W32_0 (0x9E3779B9)
+#define PHILOX_W32_1 (0xBB67AE85)
+#define PHILOX_M4x32_0 (0xD2511F53)
+#define PHILOX_M4x32_1 (0xCD9E8D57)
+#define TWOPOW53_INV_DOUBLE (1.1102230246251565e-16)
+#define TWOPOW32_INV_FLOAT (2.3283064e-10f)
+
+typedef std::uint32_t uint32;
+typedef std::uint64_t uint64;
+
+#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE_BITS) &&           \
+    __ARM_FEATURE_SVE_BITS > 0
+typedef svfloat32_t svfloat32_st
+    __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
+typedef svfloat64_t svfloat64_st
+    __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
+#elif defined(__ARM_FEATURE_SVE)
+typedef svfloat32_t svfloat32_st;
+typedef svfloat64_t svfloat64_st;
+#endif
+
+QUALIFIERS uint32 mulhilo32(uint32 a, uint32 b, uint32 *hip) {
+#ifndef __CUDA_ARCH__
+  // host code
+#if defined(__powerpc__) && (!defined(__clang__) || defined(__xlC__))
+  *hip = __mulhwu(a, b);
+  return a * b;
+#else
+  uint64 product = ((uint64)a) * ((uint64)b);
+  *hip = product >> 32;
+  return (uint32)product;
+#endif
+#else
+  // device code
+  *hip = __umulhi(a, b);
+  return a * b;
+#endif
+}
+
+QUALIFIERS void _philox4x32round(uint32 *ctr, uint32 *key) {
+  uint32 hi0;
+  uint32 hi1;
+  uint32 lo0 = mulhilo32(PHILOX_M4x32_0, ctr[0], &hi0);
+  uint32 lo1 = mulhilo32(PHILOX_M4x32_1, ctr[2], &hi1);
+
+  ctr[0] = hi1 ^ ctr[1] ^ key[0];
+  ctr[1] = lo1;
+  ctr[2] = hi0 ^ ctr[3] ^ key[1];
+  ctr[3] = lo0;
+}
+
+QUALIFIERS void _philox4x32bumpkey(uint32 *key) {
+  key[0] += PHILOX_W32_0;
+  key[1] += PHILOX_W32_1;
+}
+
+QUALIFIERS double _uniform_double_hq(uint32 x, uint32 y) {
+  double z = (double)((uint64)x ^ ((uint64)y << (53 - 32)));
+  return z * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE / 2.0);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, uint32 ctr1, uint32 ctr2,
+                               uint32 ctr3, uint32 key0, uint32 key1,
+                               double &rnd1, double &rnd2) {
+  uint32 key[2] = {key0, key1};
+  uint32 ctr[4] = {ctr0, ctr1, ctr2, ctr3};
+  _philox4x32round(ctr, key); // 1
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 2
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 3
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 4
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 5
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 6
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 7
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 8
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 9
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 10
+
+  rnd1 = _uniform_double_hq(ctr[0], ctr[1]);
+  rnd2 = _uniform_double_hq(ctr[2], ctr[3]);
+}
+
+QUALIFIERS void philox_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2,
+                              uint32 ctr3, uint32 key0, uint32 key1,
+                              float &rnd1, float &rnd2, float &rnd3,
+                              float &rnd4) {
+  uint32 key[2] = {key0, key1};
+  uint32 ctr[4] = {ctr0, ctr1, ctr2, ctr3};
+  _philox4x32round(ctr, key); // 1
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 2
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 3
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 4
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 5
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 6
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 7
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 8
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 9
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 10
+
+  rnd1 = (float)(ctr[0]) * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT / 2.0f);
+  rnd2 = (float)(ctr[1]) * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT / 2.0f);
+  rnd3 = (float)(ctr[2]) * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT / 2.0f);
+  rnd4 = (float)(ctr[3]) * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT / 2.0f);
+}
+
+#ifndef __CUDA_ARCH__
+#if defined(__SSE4_1__) || defined(_MSC_VER)
+QUALIFIERS void _philox4x32round(__m128i *ctr, __m128i *key) {
+  __m128i lohi0a = _mm_mul_epu32(ctr[0], _mm_set1_epi32(PHILOX_M4x32_0));
+  __m128i lohi0b =
+      _mm_mul_epu32(_mm_srli_epi64(ctr[0], 32), _mm_set1_epi32(PHILOX_M4x32_0));
+  __m128i lohi1a = _mm_mul_epu32(ctr[2], _mm_set1_epi32(PHILOX_M4x32_1));
+  __m128i lohi1b =
+      _mm_mul_epu32(_mm_srli_epi64(ctr[2], 32), _mm_set1_epi32(PHILOX_M4x32_1));
+
+  lohi0a = _mm_shuffle_epi32(lohi0a, 0xD8);
+  lohi0b = _mm_shuffle_epi32(lohi0b, 0xD8);
+  lohi1a = _mm_shuffle_epi32(lohi1a, 0xD8);
+  lohi1b = _mm_shuffle_epi32(lohi1b, 0xD8);
+
+  __m128i lo0 = _mm_unpacklo_epi32(lohi0a, lohi0b);
+  __m128i hi0 = _mm_unpackhi_epi32(lohi0a, lohi0b);
+  __m128i lo1 = _mm_unpacklo_epi32(lohi1a, lohi1b);
+  __m128i hi1 = _mm_unpackhi_epi32(lohi1a, lohi1b);
+
+  ctr[0] = _mm_xor_si128(_mm_xor_si128(hi1, ctr[1]), key[0]);
+  ctr[1] = lo1;
+  ctr[2] = _mm_xor_si128(_mm_xor_si128(hi0, ctr[3]), key[1]);
+  ctr[3] = lo0;
+}
+
+QUALIFIERS void _philox4x32bumpkey(__m128i *key) {
+  key[0] = _mm_add_epi32(key[0], _mm_set1_epi32(PHILOX_W32_0));
+  key[1] = _mm_add_epi32(key[1], _mm_set1_epi32(PHILOX_W32_1));
+}
+
+template <bool high>
+QUALIFIERS __m128d _uniform_double_hq(__m128i x, __m128i y) {
+  // convert 32 to 64 bit
+  if (high) {
+    x = _mm_unpackhi_epi32(x, _mm_setzero_si128());
+    y = _mm_unpackhi_epi32(y, _mm_setzero_si128());
+  } else {
+    x = _mm_unpacklo_epi32(x, _mm_setzero_si128());
+    y = _mm_unpacklo_epi32(y, _mm_setzero_si128());
+  }
+
+  // calculate z = x ^ y << (53 - 32))
+  __m128i z = _mm_sll_epi64(y, _mm_set1_epi64x(53 - 32));
+  z = _mm_xor_si128(x, z);
+
+  // convert uint64 to double
+  __m128d rs = _my_cvtepu64_pd(z);
+  // calculate rs * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0)
+#ifdef __FMA__
+  rs = _mm_fmadd_pd(rs, _mm_set1_pd(TWOPOW53_INV_DOUBLE),
+                    _mm_set1_pd(TWOPOW53_INV_DOUBLE / 2.0));
+#else
+  rs = _mm_mul_pd(rs, _mm_set1_pd(TWOPOW53_INV_DOUBLE));
+  rs = _mm_add_pd(rs, _mm_set1_pd(TWOPOW53_INV_DOUBLE / 2.0));
+#endif
+
+  return rs;
+}
+
+QUALIFIERS void philox_float4(__m128i ctr0, __m128i ctr1, __m128i ctr2,
+                              __m128i ctr3, uint32 key0, uint32 key1,
+                              __m128 &rnd1, __m128 &rnd2, __m128 &rnd3,
+                              __m128 &rnd4) {
+  __m128i key[2] = {_mm_set1_epi32(key0), _mm_set1_epi32(key1)};
+  __m128i ctr[4] = {ctr0, ctr1, ctr2, ctr3};
+  _philox4x32round(ctr, key); // 1
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 2
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 3
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 4
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 5
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 6
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 7
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 8
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 9
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 10
+
+  // convert uint32 to float
+  rnd1 = _my_cvtepu32_ps(ctr[0]);
+  rnd2 = _my_cvtepu32_ps(ctr[1]);
+  rnd3 = _my_cvtepu32_ps(ctr[2]);
+  rnd4 = _my_cvtepu32_ps(ctr[3]);
+  // calculate rnd * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f)
+#ifdef __FMA__
+  rnd1 = _mm_fmadd_ps(rnd1, _mm_set1_ps(TWOPOW32_INV_FLOAT),
+                      _mm_set1_ps(TWOPOW32_INV_FLOAT / 2.0));
+  rnd2 = _mm_fmadd_ps(rnd2, _mm_set1_ps(TWOPOW32_INV_FLOAT),
+                      _mm_set1_ps(TWOPOW32_INV_FLOAT / 2.0));
+  rnd3 = _mm_fmadd_ps(rnd3, _mm_set1_ps(TWOPOW32_INV_FLOAT),
+                      _mm_set1_ps(TWOPOW32_INV_FLOAT / 2.0));
+  rnd4 = _mm_fmadd_ps(rnd4, _mm_set1_ps(TWOPOW32_INV_FLOAT),
+                      _mm_set1_ps(TWOPOW32_INV_FLOAT / 2.0));
+#else
+  rnd1 = _mm_mul_ps(rnd1, _mm_set1_ps(TWOPOW32_INV_FLOAT));
+  rnd1 = _mm_add_ps(rnd1, _mm_set1_ps(TWOPOW32_INV_FLOAT / 2.0f));
+  rnd2 = _mm_mul_ps(rnd2, _mm_set1_ps(TWOPOW32_INV_FLOAT));
+  rnd2 = _mm_add_ps(rnd2, _mm_set1_ps(TWOPOW32_INV_FLOAT / 2.0f));
+  rnd3 = _mm_mul_ps(rnd3, _mm_set1_ps(TWOPOW32_INV_FLOAT));
+  rnd3 = _mm_add_ps(rnd3, _mm_set1_ps(TWOPOW32_INV_FLOAT / 2.0f));
+  rnd4 = _mm_mul_ps(rnd4, _mm_set1_ps(TWOPOW32_INV_FLOAT));
+  rnd4 = _mm_add_ps(rnd4, _mm_set1_ps(TWOPOW32_INV_FLOAT / 2.0f));
+#endif
+}
+
+QUALIFIERS void philox_double2(__m128i ctr0, __m128i ctr1, __m128i ctr2,
+                               __m128i ctr3, uint32 key0, uint32 key1,
+                               __m128d &rnd1lo, __m128d &rnd1hi,
+                               __m128d &rnd2lo, __m128d &rnd2hi) {
+  __m128i key[2] = {_mm_set1_epi32(key0), _mm_set1_epi32(key1)};
+  __m128i ctr[4] = {ctr0, ctr1, ctr2, ctr3};
+  _philox4x32round(ctr, key); // 1
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 2
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 3
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 4
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 5
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 6
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 7
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 8
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 9
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 10
+
+  rnd1lo = _uniform_double_hq<false>(ctr[0], ctr[1]);
+  rnd1hi = _uniform_double_hq<true>(ctr[0], ctr[1]);
+  rnd2lo = _uniform_double_hq<false>(ctr[2], ctr[3]);
+  rnd2hi = _uniform_double_hq<true>(ctr[2], ctr[3]);
+}
+
+QUALIFIERS void philox_float4(uint32 ctr0, __m128i ctr1, uint32 ctr2,
+                              uint32 ctr3, uint32 key0, uint32 key1,
+                              __m128 &rnd1, __m128 &rnd2, __m128 &rnd3,
+                              __m128 &rnd4) {
+  __m128i ctr0v = _mm_set1_epi32(ctr0);
+  __m128i ctr2v = _mm_set1_epi32(ctr2);
+  __m128i ctr3v = _mm_set1_epi32(ctr3);
+
+  philox_float4(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1, rnd2, rnd3, rnd4);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, __m128i ctr1, uint32 ctr2,
+                               uint32 ctr3, uint32 key0, uint32 key1,
+                               __m128d &rnd1lo, __m128d &rnd1hi,
+                               __m128d &rnd2lo, __m128d &rnd2hi) {
+  __m128i ctr0v = _mm_set1_epi32(ctr0);
+  __m128i ctr2v = _mm_set1_epi32(ctr2);
+  __m128i ctr3v = _mm_set1_epi32(ctr3);
+
+  philox_double2(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1lo, rnd1hi, rnd2lo,
+                 rnd2hi);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, __m128i ctr1, uint32 ctr2,
+                               uint32 ctr3, uint32 key0, uint32 key1,
+                               __m128d &rnd1, __m128d &rnd2) {
+  __m128i ctr0v = _mm_set1_epi32(ctr0);
+  __m128i ctr2v = _mm_set1_epi32(ctr2);
+  __m128i ctr3v = _mm_set1_epi32(ctr3);
+
+  __m128d ignore;
+  philox_double2(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1, ignore, rnd2,
+                 ignore);
+}
+#endif
+
+#ifdef __ALTIVEC__
+QUALIFIERS void _philox4x32round(__vector unsigned int *ctr,
+                                 __vector unsigned int *key) {
+#ifndef _ARCH_PWR8
+  __vector unsigned int lo0 = vec_mul(ctr[0], vec_splats(PHILOX_M4x32_0));
+  __vector unsigned int lo1 = vec_mul(ctr[2], vec_splats(PHILOX_M4x32_1));
+  __vector unsigned int hi0 = vec_mulhuw(ctr[0], vec_splats(PHILOX_M4x32_0));
+  __vector unsigned int hi1 = vec_mulhuw(ctr[2], vec_splats(PHILOX_M4x32_1));
+#elif defined(_ARCH_PWR10)
+  __vector unsigned int lo0 = vec_mul(ctr[0], vec_splats(PHILOX_M4x32_0));
+  __vector unsigned int lo1 = vec_mul(ctr[2], vec_splats(PHILOX_M4x32_1));
+  __vector unsigned int hi0 = vec_mulh(ctr[0], vec_splats(PHILOX_M4x32_0));
+  __vector unsigned int hi1 = vec_mulh(ctr[2], vec_splats(PHILOX_M4x32_1));
+#else
+  __vector unsigned int lohi0a =
+      (__vector unsigned int)vec_mule(ctr[0], vec_splats(PHILOX_M4x32_0));
+  __vector unsigned int lohi0b =
+      (__vector unsigned int)vec_mulo(ctr[0], vec_splats(PHILOX_M4x32_0));
+  __vector unsigned int lohi1a =
+      (__vector unsigned int)vec_mule(ctr[2], vec_splats(PHILOX_M4x32_1));
+  __vector unsigned int lohi1b =
+      (__vector unsigned int)vec_mulo(ctr[2], vec_splats(PHILOX_M4x32_1));
+
+#ifdef __LITTLE_ENDIAN__
+  __vector unsigned int lo0 = vec_mergee(lohi0a, lohi0b);
+  __vector unsigned int lo1 = vec_mergee(lohi1a, lohi1b);
+  __vector unsigned int hi0 = vec_mergeo(lohi0a, lohi0b);
+  __vector unsigned int hi1 = vec_mergeo(lohi1a, lohi1b);
+#else
+  __vector unsigned int lo0 = vec_mergeo(lohi0a, lohi0b);
+  __vector unsigned int lo1 = vec_mergeo(lohi1a, lohi1b);
+  __vector unsigned int hi0 = vec_mergee(lohi0a, lohi0b);
+  __vector unsigned int hi1 = vec_mergee(lohi1a, lohi1b);
+#endif
+#endif
+
+  ctr[0] = vec_xor(vec_xor(hi1, ctr[1]), key[0]);
+  ctr[1] = lo1;
+  ctr[2] = vec_xor(vec_xor(hi0, ctr[3]), key[1]);
+  ctr[3] = lo0;
+}
+
+QUALIFIERS void _philox4x32bumpkey(__vector unsigned int *key) {
+  key[0] = vec_add(key[0], vec_splats(PHILOX_W32_0));
+  key[1] = vec_add(key[1], vec_splats(PHILOX_W32_1));
+}
+
+#ifdef __VSX__
+template <bool high>
+QUALIFIERS __vector double _uniform_double_hq(__vector unsigned int x,
+                                              __vector unsigned int y) {
+  // convert 32 to 64 bit
+#ifdef __LITTLE_ENDIAN__
+  if (high) {
+    x = vec_mergel(x, vec_splats(0U));
+    y = vec_mergel(y, vec_splats(0U));
+  } else {
+    x = vec_mergeh(x, vec_splats(0U));
+    y = vec_mergeh(y, vec_splats(0U));
+  }
+#else
+  if (high) {
+    x = vec_mergel(vec_splats(0U), x);
+    y = vec_mergel(vec_splats(0U), y);
+  } else {
+    x = vec_mergeh(vec_splats(0U), x);
+    y = vec_mergeh(vec_splats(0U), y);
+  }
+#endif
+
+  // calculate z = x ^ y << (53 - 32))
+#ifdef _ARCH_PWR8
+  __vector unsigned long long z =
+      vec_sl((__vector unsigned long long)y, vec_splats(53ULL - 32ULL));
+#else
+  __vector unsigned long long z =
+      vec_vsld((__vector unsigned long long)y, vec_splats(53ULL - 32ULL));
+#endif
+  z = vec_xor((__vector unsigned long long)x, z);
+
+  // convert uint64 to double
+#ifdef __xlC__
+  __vector double rs = vec_ctd(z, 0);
+#else
+  __vector double rs = vec_ctf(z, 0);
+#endif
+  // calculate rs * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0)
+  rs = vec_madd(rs, vec_splats(TWOPOW53_INV_DOUBLE),
+                vec_splats(TWOPOW53_INV_DOUBLE / 2.0));
+
+  return rs;
+}
+#endif
+
+QUALIFIERS void philox_float4(__vector unsigned int ctr0,
+                              __vector unsigned int ctr1,
+                              __vector unsigned int ctr2,
+                              __vector unsigned int ctr3, uint32 key0,
+                              uint32 key1, __vector float &rnd1,
+                              __vector float &rnd2, __vector float &rnd3,
+                              __vector float &rnd4) {
+  __vector unsigned int key[2] = {vec_splats(key0), vec_splats(key1)};
+  __vector unsigned int ctr[4] = {ctr0, ctr1, ctr2, ctr3};
+  _philox4x32round(ctr, key); // 1
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 2
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 3
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 4
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 5
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 6
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 7
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 8
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 9
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 10
+
+  // convert uint32 to float
+  rnd1 = vec_ctf(ctr[0], 0);
+  rnd2 = vec_ctf(ctr[1], 0);
+  rnd3 = vec_ctf(ctr[2], 0);
+  rnd4 = vec_ctf(ctr[3], 0);
+  // calculate rnd * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f)
+  rnd1 = vec_madd(rnd1, vec_splats(TWOPOW32_INV_FLOAT),
+                  vec_splats(TWOPOW32_INV_FLOAT / 2.0f));
+  rnd2 = vec_madd(rnd2, vec_splats(TWOPOW32_INV_FLOAT),
+                  vec_splats(TWOPOW32_INV_FLOAT / 2.0f));
+  rnd3 = vec_madd(rnd3, vec_splats(TWOPOW32_INV_FLOAT),
+                  vec_splats(TWOPOW32_INV_FLOAT / 2.0f));
+  rnd4 = vec_madd(rnd4, vec_splats(TWOPOW32_INV_FLOAT),
+                  vec_splats(TWOPOW32_INV_FLOAT / 2.0f));
+}
+
+#ifdef __VSX__
+QUALIFIERS void philox_double2(__vector unsigned int ctr0,
+                               __vector unsigned int ctr1,
+                               __vector unsigned int ctr2,
+                               __vector unsigned int ctr3, uint32 key0,
+                               uint32 key1, __vector double &rnd1lo,
+                               __vector double &rnd1hi, __vector double &rnd2lo,
+                               __vector double &rnd2hi) {
+  __vector unsigned int key[2] = {vec_splats(key0), vec_splats(key1)};
+  __vector unsigned int ctr[4] = {ctr0, ctr1, ctr2, ctr3};
+  _philox4x32round(ctr, key); // 1
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 2
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 3
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 4
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 5
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 6
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 7
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 8
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 9
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 10
+
+  rnd1lo = _uniform_double_hq<false>(ctr[0], ctr[1]);
+  rnd1hi = _uniform_double_hq<true>(ctr[0], ctr[1]);
+  rnd2lo = _uniform_double_hq<false>(ctr[2], ctr[3]);
+  rnd2hi = _uniform_double_hq<true>(ctr[2], ctr[3]);
+}
+#endif
+
+QUALIFIERS void philox_float4(uint32 ctr0, __vector unsigned int ctr1,
+                              uint32 ctr2, uint32 ctr3, uint32 key0,
+                              uint32 key1, __vector float &rnd1,
+                              __vector float &rnd2, __vector float &rnd3,
+                              __vector float &rnd4) {
+  __vector unsigned int ctr0v = vec_splats(ctr0);
+  __vector unsigned int ctr2v = vec_splats(ctr2);
+  __vector unsigned int ctr3v = vec_splats(ctr3);
+
+  philox_float4(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1, rnd2, rnd3, rnd4);
+}
+
+QUALIFIERS void philox_float4(uint32 ctr0, __vector int ctr1, uint32 ctr2,
+                              uint32 ctr3, uint32 key0, uint32 key1,
+                              __vector float &rnd1, __vector float &rnd2,
+                              __vector float &rnd3, __vector float &rnd4) {
+  philox_float4(ctr0, (__vector unsigned int)ctr1, ctr2, ctr3, key0, key1, rnd1,
+                rnd2, rnd3, rnd4);
+}
+
+#ifdef __VSX__
+QUALIFIERS void philox_double2(uint32 ctr0, __vector unsigned int ctr1,
+                               uint32 ctr2, uint32 ctr3, uint32 key0,
+                               uint32 key1, __vector double &rnd1lo,
+                               __vector double &rnd1hi, __vector double &rnd2lo,
+                               __vector double &rnd2hi) {
+  __vector unsigned int ctr0v = vec_splats(ctr0);
+  __vector unsigned int ctr2v = vec_splats(ctr2);
+  __vector unsigned int ctr3v = vec_splats(ctr3);
+
+  philox_double2(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1lo, rnd1hi, rnd2lo,
+                 rnd2hi);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, __vector unsigned int ctr1,
+                               uint32 ctr2, uint32 ctr3, uint32 key0,
+                               uint32 key1, __vector double &rnd1,
+                               __vector double &rnd2) {
+  __vector unsigned int ctr0v = vec_splats(ctr0);
+  __vector unsigned int ctr2v = vec_splats(ctr2);
+  __vector unsigned int ctr3v = vec_splats(ctr3);
+
+  __vector double ignore;
+  philox_double2(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1, ignore, rnd2,
+                 ignore);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, __vector int ctr1, uint32 ctr2,
+                               uint32 ctr3, uint32 key0, uint32 key1,
+                               __vector double &rnd1, __vector double &rnd2) {
+  philox_double2(ctr0, (__vector unsigned int)ctr1, ctr2, ctr3, key0, key1,
+                 rnd1, rnd2);
+}
+#endif
+#endif
+
+#if defined(__ARM_NEON)
+QUALIFIERS void _philox4x32round(uint32x4_t *ctr, uint32x4_t *key) {
+  uint32x4_t lohi0a = vreinterpretq_u32_u64(
+      vmull_u32(vget_low_u32(ctr[0]), vdup_n_u32(PHILOX_M4x32_0)));
+  uint32x4_t lohi0b = vreinterpretq_u32_u64(
+      vmull_high_u32(ctr[0], vdupq_n_u32(PHILOX_M4x32_0)));
+  uint32x4_t lohi1a = vreinterpretq_u32_u64(
+      vmull_u32(vget_low_u32(ctr[2]), vdup_n_u32(PHILOX_M4x32_1)));
+  uint32x4_t lohi1b = vreinterpretq_u32_u64(
+      vmull_high_u32(ctr[2], vdupq_n_u32(PHILOX_M4x32_1)));
+
+  uint32x4_t lo0 = vuzp1q_u32(lohi0a, lohi0b);
+  uint32x4_t lo1 = vuzp1q_u32(lohi1a, lohi1b);
+  uint32x4_t hi0 = vuzp2q_u32(lohi0a, lohi0b);
+  uint32x4_t hi1 = vuzp2q_u32(lohi1a, lohi1b);
+
+  ctr[0] = veorq_u32(veorq_u32(hi1, ctr[1]), key[0]);
+  ctr[1] = lo1;
+  ctr[2] = veorq_u32(veorq_u32(hi0, ctr[3]), key[1]);
+  ctr[3] = lo0;
+}
+
+QUALIFIERS void _philox4x32bumpkey(uint32x4_t *key) {
+  key[0] = vaddq_u32(key[0], vdupq_n_u32(PHILOX_W32_0));
+  key[1] = vaddq_u32(key[1], vdupq_n_u32(PHILOX_W32_1));
+}
+
+template <bool high>
+QUALIFIERS float64x2_t _uniform_double_hq(uint32x4_t x, uint32x4_t y) {
+  // convert 32 to 64 bit
+  if (high) {
+    x = vzip2q_u32(x, vdupq_n_u32(0));
+    y = vzip2q_u32(y, vdupq_n_u32(0));
+  } else {
+    x = vzip1q_u32(x, vdupq_n_u32(0));
+    y = vzip1q_u32(y, vdupq_n_u32(0));
+  }
+
+  // calculate z = x ^ y << (53 - 32))
+  uint64x2_t z = vshlq_n_u64(vreinterpretq_u64_u32(y), 53 - 32);
+  z = veorq_u64(vreinterpretq_u64_u32(x), z);
+
+  // convert uint64 to double
+  float64x2_t rs = vcvtq_f64_u64(z);
+  // calculate rs * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0)
+  rs = vfmaq_f64(vdupq_n_f64(TWOPOW53_INV_DOUBLE / 2.0),
+                 vdupq_n_f64(TWOPOW53_INV_DOUBLE), rs);
+
+  return rs;
+}
+
+QUALIFIERS void philox_float4(uint32x4_t ctr0, uint32x4_t ctr1, uint32x4_t ctr2,
+                              uint32x4_t ctr3, uint32 key0, uint32 key1,
+                              float32x4_t &rnd1, float32x4_t &rnd2,
+                              float32x4_t &rnd3, float32x4_t &rnd4) {
+  uint32x4_t key[2] = {vdupq_n_u32(key0), vdupq_n_u32(key1)};
+  uint32x4_t ctr[4] = {ctr0, ctr1, ctr2, ctr3};
+  _philox4x32round(ctr, key); // 1
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 2
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 3
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 4
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 5
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 6
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 7
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 8
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 9
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 10
+
+  // convert uint32 to float
+  rnd1 = vcvtq_f32_u32(ctr[0]);
+  rnd2 = vcvtq_f32_u32(ctr[1]);
+  rnd3 = vcvtq_f32_u32(ctr[2]);
+  rnd4 = vcvtq_f32_u32(ctr[3]);
+  // calculate rnd * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f)
+  rnd1 = vfmaq_f32(vdupq_n_f32(TWOPOW32_INV_FLOAT / 2.0),
+                   vdupq_n_f32(TWOPOW32_INV_FLOAT), rnd1);
+  rnd2 = vfmaq_f32(vdupq_n_f32(TWOPOW32_INV_FLOAT / 2.0),
+                   vdupq_n_f32(TWOPOW32_INV_FLOAT), rnd2);
+  rnd3 = vfmaq_f32(vdupq_n_f32(TWOPOW32_INV_FLOAT / 2.0),
+                   vdupq_n_f32(TWOPOW32_INV_FLOAT), rnd3);
+  rnd4 = vfmaq_f32(vdupq_n_f32(TWOPOW32_INV_FLOAT / 2.0),
+                   vdupq_n_f32(TWOPOW32_INV_FLOAT), rnd4);
+}
+
+QUALIFIERS void philox_double2(uint32x4_t ctr0, uint32x4_t ctr1,
+                               uint32x4_t ctr2, uint32x4_t ctr3, uint32 key0,
+                               uint32 key1, float64x2_t &rnd1lo,
+                               float64x2_t &rnd1hi, float64x2_t &rnd2lo,
+                               float64x2_t &rnd2hi) {
+  uint32x4_t key[2] = {vdupq_n_u32(key0), vdupq_n_u32(key1)};
+  uint32x4_t ctr[4] = {ctr0, ctr1, ctr2, ctr3};
+  _philox4x32round(ctr, key); // 1
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 2
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 3
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 4
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 5
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 6
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 7
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 8
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 9
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 10
+
+  rnd1lo = _uniform_double_hq<false>(ctr[0], ctr[1]);
+  rnd1hi = _uniform_double_hq<true>(ctr[0], ctr[1]);
+  rnd2lo = _uniform_double_hq<false>(ctr[2], ctr[3]);
+  rnd2hi = _uniform_double_hq<true>(ctr[2], ctr[3]);
+}
+
+QUALIFIERS void philox_float4(uint32 ctr0, uint32x4_t ctr1, uint32 ctr2,
+                              uint32 ctr3, uint32 key0, uint32 key1,
+                              float32x4_t &rnd1, float32x4_t &rnd2,
+                              float32x4_t &rnd3, float32x4_t &rnd4) {
+  uint32x4_t ctr0v = vdupq_n_u32(ctr0);
+  uint32x4_t ctr2v = vdupq_n_u32(ctr2);
+  uint32x4_t ctr3v = vdupq_n_u32(ctr3);
+
+  philox_float4(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1, rnd2, rnd3, rnd4);
+}
+
+QUALIFIERS void philox_float4(uint32 ctr0, int32x4_t ctr1, uint32 ctr2,
+                              uint32 ctr3, uint32 key0, uint32 key1,
+                              float32x4_t &rnd1, float32x4_t &rnd2,
+                              float32x4_t &rnd3, float32x4_t &rnd4) {
+  philox_float4(ctr0, vreinterpretq_u32_s32(ctr1), ctr2, ctr3, key0, key1, rnd1,
+                rnd2, rnd3, rnd4);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, uint32x4_t ctr1, uint32 ctr2,
+                               uint32 ctr3, uint32 key0, uint32 key1,
+                               float64x2_t &rnd1lo, float64x2_t &rnd1hi,
+                               float64x2_t &rnd2lo, float64x2_t &rnd2hi) {
+  uint32x4_t ctr0v = vdupq_n_u32(ctr0);
+  uint32x4_t ctr2v = vdupq_n_u32(ctr2);
+  uint32x4_t ctr3v = vdupq_n_u32(ctr3);
+
+  philox_double2(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1lo, rnd1hi, rnd2lo,
+                 rnd2hi);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, uint32x4_t ctr1, uint32 ctr2,
+                               uint32 ctr3, uint32 key0, uint32 key1,
+                               float64x2_t &rnd1, float64x2_t &rnd2) {
+  uint32x4_t ctr0v = vdupq_n_u32(ctr0);
+  uint32x4_t ctr2v = vdupq_n_u32(ctr2);
+  uint32x4_t ctr3v = vdupq_n_u32(ctr3);
+
+  float64x2_t ignore;
+  philox_double2(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1, ignore, rnd2,
+                 ignore);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, int32x4_t ctr1, uint32 ctr2,
+                               uint32 ctr3, uint32 key0, uint32 key1,
+                               float64x2_t &rnd1, float64x2_t &rnd2) {
+  philox_double2(ctr0, vreinterpretq_u32_s32(ctr1), ctr2, ctr3, key0, key1,
+                 rnd1, rnd2);
+}
+#endif
+
+#if defined(__ARM_FEATURE_SVE)
+QUALIFIERS void _philox4x32round(svuint32x4_t &ctr, svuint32x2_t &key) {
+  svuint32_t lo0 =
+      svmul_u32_x(svptrue_b32(), svget4_u32(ctr, 0), svdup_u32(PHILOX_M4x32_0));
+  svuint32_t lo1 =
+      svmul_u32_x(svptrue_b32(), svget4_u32(ctr, 2), svdup_u32(PHILOX_M4x32_1));
+  svuint32_t hi0 = svmulh_u32_x(svptrue_b32(), svget4_u32(ctr, 0),
+                                svdup_u32(PHILOX_M4x32_0));
+  svuint32_t hi1 = svmulh_u32_x(svptrue_b32(), svget4_u32(ctr, 2),
+                                svdup_u32(PHILOX_M4x32_1));
+
+  ctr = svset4_u32(
+      ctr, 0,
+      sveor_u32_x(svptrue_b32(),
+                  sveor_u32_x(svptrue_b32(), hi1, svget4_u32(ctr, 1)),
+                  svget2_u32(key, 0)));
+  ctr = svset4_u32(ctr, 1, lo1);
+  ctr = svset4_u32(
+      ctr, 2,
+      sveor_u32_x(svptrue_b32(),
+                  sveor_u32_x(svptrue_b32(), hi0, svget4_u32(ctr, 3)),
+                  svget2_u32(key, 1)));
+  ctr = svset4_u32(ctr, 3, lo0);
+}
+
+QUALIFIERS void _philox4x32bumpkey(svuint32x2_t &key) {
+  key = svset2_u32(
+      key, 0,
+      svadd_u32_x(svptrue_b32(), svget2_u32(key, 0), svdup_u32(PHILOX_W32_0)));
+  key = svset2_u32(
+      key, 1,
+      svadd_u32_x(svptrue_b32(), svget2_u32(key, 1), svdup_u32(PHILOX_W32_1)));
+}
+
+template <bool high>
+QUALIFIERS svfloat64_t _uniform_double_hq(svuint32_t x, svuint32_t y) {
+  // convert 32 to 64 bit
+  if (high) {
+    x = svzip2_u32(x, svdup_u32(0));
+    y = svzip2_u32(y, svdup_u32(0));
+  } else {
+    x = svzip1_u32(x, svdup_u32(0));
+    y = svzip1_u32(y, svdup_u32(0));
+  }
+
+  // calculate z = x ^ y << (53 - 32))
+  svuint64_t z =
+      svlsl_n_u64_x(svptrue_b64(), svreinterpret_u64_u32(y), 53 - 32);
+  z = sveor_u64_x(svptrue_b64(), svreinterpret_u64_u32(x), z);
+
+  // convert uint64 to double
+  svfloat64_t rs = svcvt_f64_u64_x(svptrue_b64(), z);
+  // calculate rs * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0)
+  rs = svmad_f64_x(svptrue_b64(), rs, svdup_f64(TWOPOW53_INV_DOUBLE),
+                   svdup_f64(TWOPOW53_INV_DOUBLE / 2.0));
+
+  return rs;
+}
+
+QUALIFIERS void philox_float4(svuint32_t ctr0, svuint32_t ctr1, svuint32_t ctr2,
+                              svuint32_t ctr3, uint32 key0, uint32 key1,
+                              svfloat32_st &rnd1, svfloat32_st &rnd2,
+                              svfloat32_st &rnd3, svfloat32_st &rnd4) {
+  svuint32x2_t key = svcreate2_u32(svdup_u32(key0), svdup_u32(key1));
+  svuint32x4_t ctr = svcreate4_u32(ctr0, ctr1, ctr2, ctr3);
+  _philox4x32round(ctr, key); // 1
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 2
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 3
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 4
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 5
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 6
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 7
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 8
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 9
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 10
+
+  // convert uint32 to float
+  rnd1 = svcvt_f32_u32_x(svptrue_b32(), svget4_u32(ctr, 0));
+  rnd2 = svcvt_f32_u32_x(svptrue_b32(), svget4_u32(ctr, 1));
+  rnd3 = svcvt_f32_u32_x(svptrue_b32(), svget4_u32(ctr, 2));
+  rnd4 = svcvt_f32_u32_x(svptrue_b32(), svget4_u32(ctr, 3));
+  // calculate rnd * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f)
+  rnd1 = svmad_f32_x(svptrue_b32(), rnd1, svdup_f32(TWOPOW32_INV_FLOAT),
+                     svdup_f32(TWOPOW32_INV_FLOAT / 2.0));
+  rnd2 = svmad_f32_x(svptrue_b32(), rnd2, svdup_f32(TWOPOW32_INV_FLOAT),
+                     svdup_f32(TWOPOW32_INV_FLOAT / 2.0));
+  rnd3 = svmad_f32_x(svptrue_b32(), rnd3, svdup_f32(TWOPOW32_INV_FLOAT),
+                     svdup_f32(TWOPOW32_INV_FLOAT / 2.0));
+  rnd4 = svmad_f32_x(svptrue_b32(), rnd4, svdup_f32(TWOPOW32_INV_FLOAT),
+                     svdup_f32(TWOPOW32_INV_FLOAT / 2.0));
+}
+
+QUALIFIERS void philox_double2(svuint32_t ctr0, svuint32_t ctr1,
+                               svuint32_t ctr2, svuint32_t ctr3, uint32 key0,
+                               uint32 key1, svfloat64_st &rnd1lo,
+                               svfloat64_st &rnd1hi, svfloat64_st &rnd2lo,
+                               svfloat64_st &rnd2hi) {
+  svuint32x2_t key = svcreate2_u32(svdup_u32(key0), svdup_u32(key1));
+  svuint32x4_t ctr = svcreate4_u32(ctr0, ctr1, ctr2, ctr3);
+  _philox4x32round(ctr, key); // 1
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 2
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 3
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 4
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 5
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 6
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 7
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 8
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 9
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 10
+
+  rnd1lo = _uniform_double_hq<false>(svget4_u32(ctr, 0), svget4_u32(ctr, 1));
+  rnd1hi = _uniform_double_hq<true>(svget4_u32(ctr, 0), svget4_u32(ctr, 1));
+  rnd2lo = _uniform_double_hq<false>(svget4_u32(ctr, 2), svget4_u32(ctr, 3));
+  rnd2hi = _uniform_double_hq<true>(svget4_u32(ctr, 2), svget4_u32(ctr, 3));
+}
+
+QUALIFIERS void philox_float4(uint32 ctr0, svuint32_t ctr1, uint32 ctr2,
+                              uint32 ctr3, uint32 key0, uint32 key1,
+                              svfloat32_st &rnd1, svfloat32_st &rnd2,
+                              svfloat32_st &rnd3, svfloat32_st &rnd4) {
+  svuint32_t ctr0v = svdup_u32(ctr0);
+  svuint32_t ctr2v = svdup_u32(ctr2);
+  svuint32_t ctr3v = svdup_u32(ctr3);
+
+  philox_float4(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1, rnd2, rnd3, rnd4);
+}
+
+QUALIFIERS void philox_float4(uint32 ctr0, svint32_t ctr1, uint32 ctr2,
+                              uint32 ctr3, uint32 key0, uint32 key1,
+                              svfloat32_st &rnd1, svfloat32_st &rnd2,
+                              svfloat32_st &rnd3, svfloat32_st &rnd4) {
+  philox_float4(ctr0, svreinterpret_u32_s32(ctr1), ctr2, ctr3, key0, key1, rnd1,
+                rnd2, rnd3, rnd4);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, svuint32_t ctr1, uint32 ctr2,
+                               uint32 ctr3, uint32 key0, uint32 key1,
+                               svfloat64_st &rnd1lo, svfloat64_st &rnd1hi,
+                               svfloat64_st &rnd2lo, svfloat64_st &rnd2hi) {
+  svuint32_t ctr0v = svdup_u32(ctr0);
+  svuint32_t ctr2v = svdup_u32(ctr2);
+  svuint32_t ctr3v = svdup_u32(ctr3);
+
+  philox_double2(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1lo, rnd1hi, rnd2lo,
+                 rnd2hi);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, svuint32_t ctr1, uint32 ctr2,
+                               uint32 ctr3, uint32 key0, uint32 key1,
+                               svfloat64_st &rnd1, svfloat64_st &rnd2) {
+  svuint32_t ctr0v = svdup_u32(ctr0);
+  svuint32_t ctr2v = svdup_u32(ctr2);
+  svuint32_t ctr3v = svdup_u32(ctr3);
+
+  svfloat64_st ignore;
+  philox_double2(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1, ignore, rnd2,
+                 ignore);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, svint32_t ctr1, uint32 ctr2,
+                               uint32 ctr3, uint32 key0, uint32 key1,
+                               svfloat64_st &rnd1, svfloat64_st &rnd2) {
+  philox_double2(ctr0, svreinterpret_u32_s32(ctr1), ctr2, ctr3, key0, key1,
+                 rnd1, rnd2);
+}
+#endif
+
+#ifdef __AVX2__
+QUALIFIERS void _philox4x32round(__m256i *ctr, __m256i *key) {
+  __m256i lohi0a = _mm256_mul_epu32(ctr[0], _mm256_set1_epi32(PHILOX_M4x32_0));
+  __m256i lohi0b = _mm256_mul_epu32(_mm256_srli_epi64(ctr[0], 32),
+                                    _mm256_set1_epi32(PHILOX_M4x32_0));
+  __m256i lohi1a = _mm256_mul_epu32(ctr[2], _mm256_set1_epi32(PHILOX_M4x32_1));
+  __m256i lohi1b = _mm256_mul_epu32(_mm256_srli_epi64(ctr[2], 32),
+                                    _mm256_set1_epi32(PHILOX_M4x32_1));
+
+  lohi0a = _mm256_shuffle_epi32(lohi0a, 0xD8);
+  lohi0b = _mm256_shuffle_epi32(lohi0b, 0xD8);
+  lohi1a = _mm256_shuffle_epi32(lohi1a, 0xD8);
+  lohi1b = _mm256_shuffle_epi32(lohi1b, 0xD8);
+
+  __m256i lo0 = _mm256_unpacklo_epi32(lohi0a, lohi0b);
+  __m256i hi0 = _mm256_unpackhi_epi32(lohi0a, lohi0b);
+  __m256i lo1 = _mm256_unpacklo_epi32(lohi1a, lohi1b);
+  __m256i hi1 = _mm256_unpackhi_epi32(lohi1a, lohi1b);
+
+  ctr[0] = _mm256_xor_si256(_mm256_xor_si256(hi1, ctr[1]), key[0]);
+  ctr[1] = lo1;
+  ctr[2] = _mm256_xor_si256(_mm256_xor_si256(hi0, ctr[3]), key[1]);
+  ctr[3] = lo0;
+}
+
+QUALIFIERS void _philox4x32bumpkey(__m256i *key) {
+  key[0] = _mm256_add_epi32(key[0], _mm256_set1_epi32(PHILOX_W32_0));
+  key[1] = _mm256_add_epi32(key[1], _mm256_set1_epi32(PHILOX_W32_1));
+}
+
+template <bool high>
+QUALIFIERS __m256d _uniform_double_hq(__m256i x, __m256i y) {
+  // convert 32 to 64 bit
+  if (high) {
+    x = _mm256_cvtepu32_epi64(_mm256_extracti128_si256(x, 1));
+    y = _mm256_cvtepu32_epi64(_mm256_extracti128_si256(y, 1));
+  } else {
+    x = _mm256_cvtepu32_epi64(_mm256_extracti128_si256(x, 0));
+    y = _mm256_cvtepu32_epi64(_mm256_extracti128_si256(y, 0));
+  }
+
+  // calculate z = x ^ y << (53 - 32))
+  __m256i z = _mm256_sll_epi64(y, _mm_set1_epi64x(53 - 32));
+  z = _mm256_xor_si256(x, z);
+
+  // convert uint64 to double
+  __m256d rs = _my256_cvtepu64_pd(z);
+  // calculate rs * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0)
+#ifdef __FMA__
+  rs = _mm256_fmadd_pd(rs, _mm256_set1_pd(TWOPOW53_INV_DOUBLE),
+                       _mm256_set1_pd(TWOPOW53_INV_DOUBLE / 2.0));
+#else
+  rs = _mm256_mul_pd(rs, _mm256_set1_pd(TWOPOW53_INV_DOUBLE));
+  rs = _mm256_add_pd(rs, _mm256_set1_pd(TWOPOW53_INV_DOUBLE / 2.0));
+#endif
+
+  return rs;
+}
+
+QUALIFIERS void philox_float4(__m256i ctr0, __m256i ctr1, __m256i ctr2,
+                              __m256i ctr3, uint32 key0, uint32 key1,
+                              __m256 &rnd1, __m256 &rnd2, __m256 &rnd3,
+                              __m256 &rnd4) {
+  __m256i key[2] = {_mm256_set1_epi32(key0), _mm256_set1_epi32(key1)};
+  __m256i ctr[4] = {ctr0, ctr1, ctr2, ctr3};
+  _philox4x32round(ctr, key); // 1
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 2
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 3
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 4
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 5
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 6
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 7
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 8
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 9
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 10
+
+  // convert uint32 to float
+  rnd1 = _my256_cvtepu32_ps(ctr[0]);
+  rnd2 = _my256_cvtepu32_ps(ctr[1]);
+  rnd3 = _my256_cvtepu32_ps(ctr[2]);
+  rnd4 = _my256_cvtepu32_ps(ctr[3]);
+  // calculate rnd * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f)
+#ifdef __FMA__
+  rnd1 = _mm256_fmadd_ps(rnd1, _mm256_set1_ps(TWOPOW32_INV_FLOAT),
+                         _mm256_set1_ps(TWOPOW32_INV_FLOAT / 2.0));
+  rnd2 = _mm256_fmadd_ps(rnd2, _mm256_set1_ps(TWOPOW32_INV_FLOAT),
+                         _mm256_set1_ps(TWOPOW32_INV_FLOAT / 2.0));
+  rnd3 = _mm256_fmadd_ps(rnd3, _mm256_set1_ps(TWOPOW32_INV_FLOAT),
+                         _mm256_set1_ps(TWOPOW32_INV_FLOAT / 2.0));
+  rnd4 = _mm256_fmadd_ps(rnd4, _mm256_set1_ps(TWOPOW32_INV_FLOAT),
+                         _mm256_set1_ps(TWOPOW32_INV_FLOAT / 2.0));
+#else
+  rnd1 = _mm256_mul_ps(rnd1, _mm256_set1_ps(TWOPOW32_INV_FLOAT));
+  rnd1 = _mm256_add_ps(rnd1, _mm256_set1_ps(TWOPOW32_INV_FLOAT / 2.0f));
+  rnd2 = _mm256_mul_ps(rnd2, _mm256_set1_ps(TWOPOW32_INV_FLOAT));
+  rnd2 = _mm256_add_ps(rnd2, _mm256_set1_ps(TWOPOW32_INV_FLOAT / 2.0f));
+  rnd3 = _mm256_mul_ps(rnd3, _mm256_set1_ps(TWOPOW32_INV_FLOAT));
+  rnd3 = _mm256_add_ps(rnd3, _mm256_set1_ps(TWOPOW32_INV_FLOAT / 2.0f));
+  rnd4 = _mm256_mul_ps(rnd4, _mm256_set1_ps(TWOPOW32_INV_FLOAT));
+  rnd4 = _mm256_add_ps(rnd4, _mm256_set1_ps(TWOPOW32_INV_FLOAT / 2.0f));
+#endif
+}
+
+QUALIFIERS void philox_double2(__m256i ctr0, __m256i ctr1, __m256i ctr2,
+                               __m256i ctr3, uint32 key0, uint32 key1,
+                               __m256d &rnd1lo, __m256d &rnd1hi,
+                               __m256d &rnd2lo, __m256d &rnd2hi) {
+  __m256i key[2] = {_mm256_set1_epi32(key0), _mm256_set1_epi32(key1)};
+  __m256i ctr[4] = {ctr0, ctr1, ctr2, ctr3};
+  _philox4x32round(ctr, key); // 1
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 2
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 3
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 4
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 5
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 6
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 7
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 8
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 9
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 10
+
+  rnd1lo = _uniform_double_hq<false>(ctr[0], ctr[1]);
+  rnd1hi = _uniform_double_hq<true>(ctr[0], ctr[1]);
+  rnd2lo = _uniform_double_hq<false>(ctr[2], ctr[3]);
+  rnd2hi = _uniform_double_hq<true>(ctr[2], ctr[3]);
+}
+
+QUALIFIERS void philox_float4(uint32 ctr0, __m256i ctr1, uint32 ctr2,
+                              uint32 ctr3, uint32 key0, uint32 key1,
+                              __m256 &rnd1, __m256 &rnd2, __m256 &rnd3,
+                              __m256 &rnd4) {
+  __m256i ctr0v = _mm256_set1_epi32(ctr0);
+  __m256i ctr2v = _mm256_set1_epi32(ctr2);
+  __m256i ctr3v = _mm256_set1_epi32(ctr3);
+
+  philox_float4(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1, rnd2, rnd3, rnd4);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, __m256i ctr1, uint32 ctr2,
+                               uint32 ctr3, uint32 key0, uint32 key1,
+                               __m256d &rnd1lo, __m256d &rnd1hi,
+                               __m256d &rnd2lo, __m256d &rnd2hi) {
+  __m256i ctr0v = _mm256_set1_epi32(ctr0);
+  __m256i ctr2v = _mm256_set1_epi32(ctr2);
+  __m256i ctr3v = _mm256_set1_epi32(ctr3);
+
+  philox_double2(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1lo, rnd1hi, rnd2lo,
+                 rnd2hi);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, __m256i ctr1, uint32 ctr2,
+                               uint32 ctr3, uint32 key0, uint32 key1,
+                               __m256d &rnd1, __m256d &rnd2) {
+#if 0
+    __m256i ctr0v = _mm256_set1_epi32(ctr0);
+    __m256i ctr2v = _mm256_set1_epi32(ctr2);
+    __m256i ctr3v = _mm256_set1_epi32(ctr3);
+
+    __m256d ignore;
+    philox_double2(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1, ignore, rnd2, ignore);
+#else
+  __m128d rnd1lo, rnd1hi, rnd2lo, rnd2hi;
+  philox_double2(ctr0, _mm256_extractf128_si256(ctr1, 0), ctr2, ctr3, key0,
+                 key1, rnd1lo, rnd1hi, rnd2lo, rnd2hi);
+  rnd1 = _my256_set_m128d(rnd1hi, rnd1lo);
+  rnd2 = _my256_set_m128d(rnd2hi, rnd2lo);
+#endif
+}
+#endif
+
+#ifdef __AVX512F__
+QUALIFIERS void _philox4x32round(__m512i *ctr, __m512i *key) {
+  __m512i lohi0a = _mm512_mul_epu32(ctr[0], _mm512_set1_epi32(PHILOX_M4x32_0));
+  __m512i lohi0b = _mm512_mul_epu32(_mm512_srli_epi64(ctr[0], 32),
+                                    _mm512_set1_epi32(PHILOX_M4x32_0));
+  __m512i lohi1a = _mm512_mul_epu32(ctr[2], _mm512_set1_epi32(PHILOX_M4x32_1));
+  __m512i lohi1b = _mm512_mul_epu32(_mm512_srli_epi64(ctr[2], 32),
+                                    _mm512_set1_epi32(PHILOX_M4x32_1));
+
+  lohi0a = _mm512_shuffle_epi32(lohi0a, _MM_PERM_DBCA);
+  lohi0b = _mm512_shuffle_epi32(lohi0b, _MM_PERM_DBCA);
+  lohi1a = _mm512_shuffle_epi32(lohi1a, _MM_PERM_DBCA);
+  lohi1b = _mm512_shuffle_epi32(lohi1b, _MM_PERM_DBCA);
+
+  __m512i lo0 = _mm512_unpacklo_epi32(lohi0a, lohi0b);
+  __m512i hi0 = _mm512_unpackhi_epi32(lohi0a, lohi0b);
+  __m512i lo1 = _mm512_unpacklo_epi32(lohi1a, lohi1b);
+  __m512i hi1 = _mm512_unpackhi_epi32(lohi1a, lohi1b);
+
+  ctr[0] = _mm512_xor_si512(_mm512_xor_si512(hi1, ctr[1]), key[0]);
+  ctr[1] = lo1;
+  ctr[2] = _mm512_xor_si512(_mm512_xor_si512(hi0, ctr[3]), key[1]);
+  ctr[3] = lo0;
+}
+
+QUALIFIERS void _philox4x32bumpkey(__m512i *key) {
+  key[0] = _mm512_add_epi32(key[0], _mm512_set1_epi32(PHILOX_W32_0));
+  key[1] = _mm512_add_epi32(key[1], _mm512_set1_epi32(PHILOX_W32_1));
+}
+
+template <bool high>
+QUALIFIERS __m512d _uniform_double_hq(__m512i x, __m512i y) {
+  // convert 32 to 64 bit
+  if (high) {
+    x = _mm512_cvtepu32_epi64(_mm512_extracti64x4_epi64(x, 1));
+    y = _mm512_cvtepu32_epi64(_mm512_extracti64x4_epi64(y, 1));
+  } else {
+    x = _mm512_cvtepu32_epi64(_mm512_extracti64x4_epi64(x, 0));
+    y = _mm512_cvtepu32_epi64(_mm512_extracti64x4_epi64(y, 0));
+  }
+
+  // calculate z = x ^ y << (53 - 32))
+  __m512i z = _mm512_sll_epi64(y, _mm_set1_epi64x(53 - 32));
+  z = _mm512_xor_si512(x, z);
+
+  // convert uint64 to double
+  __m512d rs = _mm512_cvtepu64_pd(z);
+  // calculate rs * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0)
+  rs = _mm512_fmadd_pd(rs, _mm512_set1_pd(TWOPOW53_INV_DOUBLE),
+                       _mm512_set1_pd(TWOPOW53_INV_DOUBLE / 2.0));
+
+  return rs;
+}
+
+QUALIFIERS void philox_float4(__m512i ctr0, __m512i ctr1, __m512i ctr2,
+                              __m512i ctr3, uint32 key0, uint32 key1,
+                              __m512 &rnd1, __m512 &rnd2, __m512 &rnd3,
+                              __m512 &rnd4) {
+  __m512i key[2] = {_mm512_set1_epi32(key0), _mm512_set1_epi32(key1)};
+  __m512i ctr[4] = {ctr0, ctr1, ctr2, ctr3};
+  _philox4x32round(ctr, key); // 1
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 2
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 3
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 4
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 5
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 6
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 7
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 8
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 9
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 10
+
+  // convert uint32 to float
+  rnd1 = _mm512_cvtepu32_ps(ctr[0]);
+  rnd2 = _mm512_cvtepu32_ps(ctr[1]);
+  rnd3 = _mm512_cvtepu32_ps(ctr[2]);
+  rnd4 = _mm512_cvtepu32_ps(ctr[3]);
+  // calculate rnd * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f)
+  rnd1 = _mm512_fmadd_ps(rnd1, _mm512_set1_ps(TWOPOW32_INV_FLOAT),
+                         _mm512_set1_ps(TWOPOW32_INV_FLOAT / 2.0));
+  rnd2 = _mm512_fmadd_ps(rnd2, _mm512_set1_ps(TWOPOW32_INV_FLOAT),
+                         _mm512_set1_ps(TWOPOW32_INV_FLOAT / 2.0));
+  rnd3 = _mm512_fmadd_ps(rnd3, _mm512_set1_ps(TWOPOW32_INV_FLOAT),
+                         _mm512_set1_ps(TWOPOW32_INV_FLOAT / 2.0));
+  rnd4 = _mm512_fmadd_ps(rnd4, _mm512_set1_ps(TWOPOW32_INV_FLOAT),
+                         _mm512_set1_ps(TWOPOW32_INV_FLOAT / 2.0));
+}
+
+QUALIFIERS void philox_double2(__m512i ctr0, __m512i ctr1, __m512i ctr2,
+                               __m512i ctr3, uint32 key0, uint32 key1,
+                               __m512d &rnd1lo, __m512d &rnd1hi,
+                               __m512d &rnd2lo, __m512d &rnd2hi) {
+  __m512i key[2] = {_mm512_set1_epi32(key0), _mm512_set1_epi32(key1)};
+  __m512i ctr[4] = {ctr0, ctr1, ctr2, ctr3};
+  _philox4x32round(ctr, key); // 1
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 2
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 3
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 4
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 5
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 6
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 7
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 8
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 9
+  _philox4x32bumpkey(key);
+  _philox4x32round(ctr, key); // 10
+
+  rnd1lo = _uniform_double_hq<false>(ctr[0], ctr[1]);
+  rnd1hi = _uniform_double_hq<true>(ctr[0], ctr[1]);
+  rnd2lo = _uniform_double_hq<false>(ctr[2], ctr[3]);
+  rnd2hi = _uniform_double_hq<true>(ctr[2], ctr[3]);
+}
+
+QUALIFIERS void philox_float4(uint32 ctr0, __m512i ctr1, uint32 ctr2,
+                              uint32 ctr3, uint32 key0, uint32 key1,
+                              __m512 &rnd1, __m512 &rnd2, __m512 &rnd3,
+                              __m512 &rnd4) {
+  __m512i ctr0v = _mm512_set1_epi32(ctr0);
+  __m512i ctr2v = _mm512_set1_epi32(ctr2);
+  __m512i ctr3v = _mm512_set1_epi32(ctr3);
+
+  philox_float4(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1, rnd2, rnd3, rnd4);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, __m512i ctr1, uint32 ctr2,
+                               uint32 ctr3, uint32 key0, uint32 key1,
+                               __m512d &rnd1lo, __m512d &rnd1hi,
+                               __m512d &rnd2lo, __m512d &rnd2hi) {
+  __m512i ctr0v = _mm512_set1_epi32(ctr0);
+  __m512i ctr2v = _mm512_set1_epi32(ctr2);
+  __m512i ctr3v = _mm512_set1_epi32(ctr3);
+
+  philox_double2(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1lo, rnd1hi, rnd2lo,
+                 rnd2hi);
+}
+
+QUALIFIERS void philox_double2(uint32 ctr0, __m512i ctr1, uint32 ctr2,
+                               uint32 ctr3, uint32 key0, uint32 key1,
+                               __m512d &rnd1, __m512d &rnd2) {
+#if 0
+    __m512i ctr0v = _mm512_set1_epi32(ctr0);
+    __m512i ctr2v = _mm512_set1_epi32(ctr2);
+    __m512i ctr3v = _mm512_set1_epi32(ctr3);
+
+    __m512d ignore;
+    philox_double2(ctr0v, ctr1, ctr2v, ctr3v, key0, key1, rnd1, ignore, rnd2, ignore);
+#else
+  __m256d rnd1lo, rnd1hi, rnd2lo, rnd2hi;
+  philox_double2(ctr0, _mm512_extracti64x4_epi64(ctr1, 0), ctr2, ctr3, key0,
+                 key1, rnd1lo, rnd1hi, rnd2lo, rnd2hi);
+  rnd1 = _my512_set_m256d(rnd1hi, rnd1lo);
+  rnd2 = _my512_set_m256d(rnd2hi, rnd2lo);
+#endif
+}
+#endif
+#endif
diff --git a/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp b/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp
new file mode 100644
index 00000000000..7e065049f8c
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <walberla_bridge/Architecture.hpp>
+
+#include "generated_kernels/Dynamic_UBB_double_precision.h"
+#include "generated_kernels/Dynamic_UBB_single_precision.h"
+#include "generated_kernels/FieldAccessorsDoublePrecision.h"
+#include "generated_kernels/FieldAccessorsSinglePrecision.h"
+#include "generated_kernels/InitialPDFsSetterDoublePrecision.h"
+#include "generated_kernels/InitialPDFsSetterSinglePrecision.h"
+#include "generated_kernels/StreamSweepDoublePrecision.h"
+#include "generated_kernels/StreamSweepSinglePrecision.h"
+
+#ifdef __AVX2__
+#include "generated_kernels/CollideSweepDoublePrecisionLeesEdwardsAVX.h"
+#include "generated_kernels/CollideSweepDoublePrecisionThermalizedAVX.h"
+#include "generated_kernels/CollideSweepSinglePrecisionLeesEdwardsAVX.h"
+#include "generated_kernels/CollideSweepSinglePrecisionThermalizedAVX.h"
+#else
+#include "generated_kernels/CollideSweepDoublePrecisionLeesEdwards.h"
+#include "generated_kernels/CollideSweepDoublePrecisionThermalized.h"
+#include "generated_kernels/CollideSweepSinglePrecisionLeesEdwards.h"
+#include "generated_kernels/CollideSweepSinglePrecisionThermalized.h"
+#endif
+
+namespace walberla {
+namespace detail {
+
+using lbmpy::Arch;
+
+template <typename FT = double, Arch AT = Arch::CPU> struct KernelTrait {
+#ifdef __AVX2__
+  using CollisionModelThermalized =
+      pystencils::CollideSweepDoublePrecisionThermalizedAVX;
+  using CollisionModelLeesEdwards =
+      pystencils::CollideSweepDoublePrecisionLeesEdwardsAVX;
+#else
+  using CollisionModelThermalized =
+      pystencils::CollideSweepDoublePrecisionThermalized;
+  using CollisionModelLeesEdwards =
+      pystencils::CollideSweepDoublePrecisionLeesEdwards;
+#endif
+  using StreamSweep = pystencils::StreamSweepDoublePrecision;
+  using InitialPDFsSetter = pystencils::InitialPDFsSetterDoublePrecision;
+};
+
+template <> struct KernelTrait<float, Arch::CPU> {
+#ifdef __AVX2__
+  using CollisionModelThermalized =
+      pystencils::CollideSweepSinglePrecisionThermalizedAVX;
+  using CollisionModelLeesEdwards =
+      pystencils::CollideSweepSinglePrecisionLeesEdwardsAVX;
+#else
+  using CollisionModelThermalized =
+      pystencils::CollideSweepSinglePrecisionThermalized;
+  using CollisionModelLeesEdwards =
+      pystencils::CollideSweepSinglePrecisionLeesEdwards;
+#endif
+  using StreamSweep = pystencils::StreamSweepSinglePrecision;
+  using InitialPDFsSetter = pystencils::InitialPDFsSetterSinglePrecision;
+};
+
+template <typename FT = double, Arch AT = Arch::CPU>
+struct BoundaryHandlingTrait {
+  using Dynamic_UBB = lbm::Dynamic_UBB_double_precision;
+};
+
+template <> struct BoundaryHandlingTrait<float, Arch::CPU> {
+  using Dynamic_UBB = lbm::Dynamic_UBB_single_precision;
+};
+
+} // namespace detail
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/lb_walberla_init.cpp b/src/walberla_bridge/src/lattice_boltzmann/lb_walberla_init.cpp
new file mode 100644
index 00000000000..9ce57049b42
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/lb_walberla_init.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "LBWalberlaImpl.hpp"
+
+#include <walberla_bridge/Architecture.hpp>
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
+#include <walberla_bridge/lattice_boltzmann/lb_walberla_init.hpp>
+
+#include <memory>
+
+std::shared_ptr<LBWalberlaBase>
+new_lb_walberla(std::shared_ptr<LatticeWalberla> const &lattice,
+                double viscosity, double density, bool single_precision) {
+  if (single_precision) {
+    return std::make_shared<walberla::LBWalberlaImpl<float, lbmpy::Arch::CPU>>(
+        lattice, viscosity, density);
+  }
+  return std::make_shared<walberla::LBWalberlaImpl<double, lbmpy::Arch::CPU>>(
+      lattice, viscosity, density);
+}
diff --git a/src/core/grid_based_algorithms/electrokinetics.cpp b/src/walberla_bridge/src/lattice_boltzmann/lb_walberla_init.cu
similarity index 93%
rename from src/core/grid_based_algorithms/electrokinetics.cpp
rename to src/walberla_bridge/src/lattice_boltzmann/lb_walberla_init.cu
index c1c311d13f9..9e75c37075d 100644
--- a/src/core/grid_based_algorithms/electrokinetics.cpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/lb_walberla_init.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 The ESPResSo project
+ * Copyright (C) 2022-2023 The ESPResSo project
  *
  * This file is part of ESPResSo.
  *
diff --git a/src/walberla_bridge/src/walberla_init.cpp b/src/walberla_bridge/src/walberla_init.cpp
new file mode 100644
index 00000000000..cf6f72870fa
--- /dev/null
+++ b/src/walberla_bridge/src/walberla_init.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <walberla_bridge/utils/ResourceManager.hpp>
+#include <walberla_bridge/walberla_init.hpp>
+
+#include <core/mpi/Environment.h>
+#include <core/mpi/MPIManager.h>
+
+#include <cassert>
+#include <memory>
+
+/** @brief waLBerla MPI communicator. */
+static std::shared_ptr<walberla::mpi::MPIManager> walberla_mpi_comm;
+/** @brief waLBerla MPI environment (destructor depends on the communicator). */
+static std::shared_ptr<walberla::mpi::Environment> walberla_mpi_env;
+
+namespace walberla {
+
+void mpi_init() {
+  assert(::walberla_mpi_env == nullptr);
+  assert(::walberla_mpi_comm == nullptr);
+  int argc = 0;
+  char **argv = nullptr;
+  ::walberla_mpi_env = std::make_shared<walberla::mpi::Environment>(argc, argv);
+  ::walberla_mpi_comm = walberla::MPIManager::instance();
+}
+
+std::unique_ptr<ResourceManager> get_vtk_dependent_resources() {
+  auto vtk_dependencies = std::make_unique<ResourceManager>();
+  // waLBerla MPI communicator (singleton)
+  vtk_dependencies->acquire_lock(::walberla_mpi_comm);
+  // waLBerla MPI environment (destructor depends on the MPI communicator)
+  vtk_dependencies->acquire_lock(::walberla_mpi_env);
+  return vtk_dependencies;
+}
+
+} // namespace walberla
diff --git a/src/walberla_bridge/tests/CMakeLists.txt b/src/walberla_bridge/tests/CMakeLists.txt
new file mode 100644
index 00000000000..5abe24f1499
--- /dev/null
+++ b/src/walberla_bridge/tests/CMakeLists.txt
@@ -0,0 +1,74 @@
+#
+# Copyright (C) 2020-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+include(unit_test)
+
+function(ESPRESSO_WALBERLA_UNIT_TEST)
+  cmake_parse_arguments(TEST "" "NAME;NUM_PROC" "SRC;DEPENDS" ${ARGN})
+  unit_test(NAME ${TEST_NAME} NUM_PROC ${TEST_NUM_PROC} SRC ${TEST_SRC} DEPENDS
+            ${TEST_DEPENDS})
+  set_target_properties(${TEST_NAME} PROPERTIES CXX_CLANG_TIDY "")
+  target_include_directories(${TEST_NAME} PRIVATE ${WALBERLA_INCLUDE_DIRS}
+                                                  ${walberla_BINARY_DIR}/src)
+  target_link_libraries(${TEST_NAME} PRIVATE ${WALBERLA_LIBS})
+endfunction()
+
+espresso_walberla_unit_test(
+  NAME ResourceManager_test SRC ResourceManager_test.cpp DEPENDS
+  espresso::walberla espresso::walberla::cpp_flags)
+
+espresso_walberla_unit_test(
+  NAME kernels_unit_tests SRC kernels_unit_tests.cpp DEPENDS espresso::walberla
+  espresso::walberla::cpp_flags espresso::utils)
+
+espresso_walberla_unit_test(
+  NAME LatticeWalberla_unit_tests SRC LatticeWalberla_unit_tests.cpp DEPENDS
+  espresso::walberla espresso::walberla::cpp_flags espresso::utils Boost::mpi
+  NUM_PROC 2)
+
+espresso_walberla_unit_test(
+  NAME LBWalberlaImpl_unit_tests SRC LBWalberlaImpl_unit_tests.cpp DEPENDS
+  espresso::walberla espresso::walberla::cpp_flags espresso::utils Boost::mpi
+  NUM_PROC 2)
+
+espresso_walberla_unit_test(
+  NAME LBWalberlaImpl_bspline_tests SRC LBWalberlaImpl_bspline_tests.cpp
+  DEPENDS espresso::walberla espresso::walberla::cpp_flags espresso::utils
+  Boost::mpi NUM_PROC 2)
+
+if(NOT (ESPRESSO_BUILD_WITH_ASAN OR ESPRESSO_BUILD_WITH_UBSAN))
+  espresso_walberla_unit_test(
+    NAME LBWalberlaImpl_statistical_tests SRC
+    LBWalberlaImpl_statistical_tests.cpp DEPENDS espresso::walberla
+    espresso::walberla::cpp_flags espresso::utils Boost::mpi)
+endif()
+
+espresso_walberla_unit_test(
+  NAME LBWalberlaImpl_flow_tests SRC LBWalberlaImpl_flow_tests.cpp DEPENDS
+  espresso::walberla espresso::walberla::cpp_flags espresso::utils Boost::mpi)
+
+espresso_walberla_unit_test(
+  NAME LBWalberlaImpl_lees_edwards_test SRC LBWalberlaImpl_lees_edwards.cpp
+  DEPENDS espresso::walberla espresso::walberla::cpp_flags espresso::utils
+  Boost::mpi)
+
+espresso_walberla_unit_test(
+  NAME EKinWalberlaImpl_unit_tests SRC EKinWalberlaImpl_unit_tests.cpp DEPENDS
+  espresso::walberla espresso::walberla::cpp_flags espresso::utils Boost::mpi
+  NUM_PROC 2)
diff --git a/src/walberla_bridge/tests/EKinWalberlaImpl_unit_tests.cpp b/src/walberla_bridge/tests/EKinWalberlaImpl_unit_tests.cpp
new file mode 100644
index 00000000000..c72afd106af
--- /dev/null
+++ b/src/walberla_bridge/tests/EKinWalberlaImpl_unit_tests.cpp
@@ -0,0 +1,583 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#define BOOST_TEST_MODULE EK walberla node setters and getters test
+#define BOOST_TEST_DYN_LINK
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#define BOOST_TEST_NO_MAIN
+
+#include <boost/test/data/monomorphic.hpp>
+#include <boost/test/data/test_case.hpp>
+#include <boost/test/unit_test.hpp>
+
+#include "tests_common_ek.hpp"
+
+#include <walberla_bridge/VTKHandle.hpp>
+#include <walberla_bridge/electrokinetics/EKinWalberlaBase.hpp>
+#include <walberla_bridge/electrokinetics/ek_walberla_init.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <boost/mpi/collectives/all_reduce.hpp>
+#include <boost/mpi/communicator.hpp>
+#include <boost/multi_array.hpp>
+
+#include <mpi.h>
+
+#include <cmath>
+#include <functional>
+#include <initializer_list>
+#include <memory>
+#include <stdexcept>
+#include <unordered_map>
+#include <vector>
+
+using Utils::hadamard_product;
+using Utils::Vector3d;
+using Utils::Vector3i;
+
+namespace bdata = boost::unit_test::data;
+
+static EKTestParameters params; // populated in main()
+
+BOOST_DATA_TEST_CASE(dimensions, bdata::make(all_eks()), ek_generator) {
+  using boost::test_tools::per_element;
+  auto ek = ek_generator(params);
+  auto constexpr zero = Vector3i{0, 0, 0};
+
+  auto const grid_dim = ek->get_lattice().get_grid_dimensions();
+  BOOST_TEST(grid_dim == params.grid_dimensions, per_element());
+
+  auto const [my_left, my_right] = ek->get_lattice().get_local_domain();
+  auto const my_size = my_right - my_left;
+  BOOST_TEST(my_size > zero, per_element());
+  BOOST_TEST(my_left >= zero, per_element());
+  BOOST_TEST(my_right <= params.grid_dimensions, per_element());
+}
+
+BOOST_AUTO_TEST_CASE(stencil_size) {
+  auto constexpr stencil_size = std::size_t{9u};
+  auto ek = std::make_shared<walberla::EKinWalberlaImpl<stencil_size, float>>(
+      params.lattice, params.diffusion, 0., params.valency, params.ext_efield,
+      params.density, params.advection, params.friction_coupling);
+  BOOST_CHECK_EQUAL(ek->stencil_size(), stencil_size);
+}
+
+BOOST_DATA_TEST_CASE(set_diffusion, bdata::make(all_eks()), ek_generator) {
+  auto ek = ek_generator(params);
+  auto new_diffusion = 0.005;
+  ek->set_diffusion(new_diffusion);
+  BOOST_CHECK_CLOSE(ek->get_diffusion(), new_diffusion, 1E-11);
+}
+
+BOOST_DATA_TEST_CASE(set_valency, bdata::make(all_eks()), ek_generator) {
+  auto ek = ek_generator(params);
+  auto new_valency = 2.;
+  ek->set_valency(new_valency);
+  BOOST_CHECK_CLOSE(ek->get_valency(), new_valency, 1E-11);
+}
+
+BOOST_DATA_TEST_CASE(set_kT, bdata::make(all_eks()), ek_generator) {
+  auto ek = ek_generator(params);
+  auto new_kT = 2.;
+  ek->set_kT(new_kT);
+  BOOST_CHECK_CLOSE(ek->get_kT(), new_kT, 1E-11);
+}
+
+BOOST_DATA_TEST_CASE(set_advection, bdata::make(all_eks()), ek_generator) {
+  auto ek = ek_generator(params);
+  auto new_advection = false;
+  ek->set_advection(new_advection);
+  BOOST_CHECK_EQUAL(ek->get_advection(), new_advection);
+}
+
+BOOST_DATA_TEST_CASE(set_coupling, bdata::make(all_eks()), ek_generator) {
+  auto ek = ek_generator(params);
+  auto new_friction_coupling = false;
+  ek->set_friction_coupling(new_friction_coupling);
+  BOOST_CHECK_EQUAL(ek->get_friction_coupling(), new_friction_coupling);
+}
+
+BOOST_DATA_TEST_CASE(initial_state, bdata::make(all_eks()), ek_generator) {
+  auto ek = ek_generator(params);
+  for (auto const &node : local_nodes_incl_ghosts(ek->get_lattice())) {
+    auto const consider_ghosts = !ek->get_lattice().node_in_local_domain(node);
+    BOOST_CHECK(!(*ek->get_node_is_boundary(node, consider_ghosts)));
+    if (ek->get_lattice().node_in_local_domain(node)) {
+      BOOST_CHECK_CLOSE((*ek->get_node_density(node)), params.density, 1E-10);
+    }
+  }
+
+  BOOST_CHECK_CLOSE(ek->get_diffusion(), params.diffusion, 1E-11);
+  BOOST_CHECK_CLOSE(ek->get_valency(), params.valency, 1E-11);
+  BOOST_CHECK_EQUAL(ek->get_advection(), params.advection);
+  BOOST_CHECK_EQUAL(ek->get_friction_coupling(), params.friction_coupling);
+}
+
+BOOST_DATA_TEST_CASE(kT_unthermalized, bdata::make(unthermalized_eks()),
+                     ek_generator) {
+  auto ek = ek_generator(params);
+  BOOST_CHECK_EQUAL(ek->get_kT(), 0.);
+}
+
+BOOST_DATA_TEST_CASE(kT_thermalized, bdata::make(thermalized_eks()),
+                     ek_generator) {
+  auto ek = ek_generator(params);
+  BOOST_CHECK_EQUAL(ek->get_kT(), params.kT);
+}
+
+BOOST_DATA_TEST_CASE(node_flux_boundary, bdata::make(all_eks()), ek_generator) {
+  auto ek = ek_generator(params);
+  auto const flux = Vector3d{{0.2, 3.8, 4.2}};
+  auto const n_ghost_layers =
+      static_cast<int>(ek->get_lattice().get_ghost_layers());
+  for (auto const &node : std::vector<Vector3i>{
+           {-n_ghost_layers, 0, 0}, {0, 0, 0}, {0, 1, 2}, {9, 9, 9}}) {
+    if (ek->get_lattice().node_in_local_halo(node)) {
+      {
+        auto const res = ek->get_node_is_boundary(node, true);
+        // Did we get a value?
+        BOOST_REQUIRE(res);
+        // Should not be a boundary node
+        BOOST_CHECK(*res == false);
+      }
+      {
+        BOOST_CHECK(ek->set_node_flux_boundary(node, flux));
+        {
+          auto const res = ek->get_node_is_boundary(node, true);
+          BOOST_REQUIRE(res);
+          BOOST_CHECK(*res == true);
+        }
+        {
+          auto const res = ek->get_node_is_flux_boundary(node, true);
+          BOOST_REQUIRE(res);
+          BOOST_CHECK(*res == true);
+        }
+        {
+          auto const res = ek->get_node_is_density_boundary(node, true);
+          BOOST_REQUIRE(res);
+          BOOST_CHECK(*res == false);
+        }
+      }
+      {
+        auto const flux_check = ek->get_node_flux_at_boundary(node, true);
+        // Do we have a value
+        BOOST_REQUIRE(flux_check);
+        // Check the value
+        BOOST_CHECK_SMALL((*flux_check - flux).norm(), 1E-12);
+      }
+      {
+        BOOST_CHECK(ek->remove_node_from_flux_boundary(node));
+        {
+          auto const res = ek->get_node_is_boundary(node, true);
+          BOOST_REQUIRE(res);
+          BOOST_CHECK(*res == false);
+        }
+        {
+          auto const res = ek->get_node_is_flux_boundary(node, true);
+          BOOST_REQUIRE(res);
+          BOOST_CHECK(*res == false);
+        }
+        {
+          auto const res = ek->get_node_is_density_boundary(node, true);
+          BOOST_REQUIRE(res);
+          BOOST_CHECK(*res == false);
+        }
+      }
+    } else {
+      // Not in the local halo.
+      BOOST_CHECK(!ek->set_node_flux_boundary(node, flux));
+      BOOST_CHECK(!ek->get_node_flux_at_boundary(node));
+      BOOST_CHECK(!ek->remove_node_from_flux_boundary(node));
+      BOOST_CHECK(!ek->get_node_is_flux_boundary(node));
+    }
+  }
+
+  ek->clear_flux_boundaries();
+  for (auto const &node : local_nodes_incl_ghosts(ek->get_lattice())) {
+    BOOST_CHECK(!(*ek->get_node_is_flux_boundary(node, true)));
+  }
+}
+
+BOOST_DATA_TEST_CASE(node_dens_boundary, bdata::make(all_eks()), ek_generator) {
+  auto ek = ek_generator(params);
+  auto const density = 0.2;
+  auto const n_ghost_layers =
+      static_cast<int>(ek->get_lattice().get_ghost_layers());
+  for (auto const &node : std::vector<Vector3i>{
+           {-n_ghost_layers, 0, 0}, {0, 0, 0}, {0, 1, 2}, {9, 9, 9}}) {
+    if (ek->get_lattice().node_in_local_halo(node)) {
+      {
+        auto const res = ek->get_node_is_boundary(node, true);
+        // Did we get a value?
+        BOOST_REQUIRE(res);
+        // Should not be a boundary node
+        BOOST_CHECK(*res == false);
+      }
+      {
+        BOOST_CHECK(ek->set_node_density_boundary(node, density));
+        {
+          auto const res = ek->get_node_is_boundary(node, true);
+          BOOST_REQUIRE(res);
+          BOOST_CHECK(*res == true);
+        }
+        {
+          auto const res = ek->get_node_is_density_boundary(node, true);
+          BOOST_REQUIRE(res);
+          BOOST_CHECK(*res == true);
+        }
+        {
+          auto const res = ek->get_node_is_flux_boundary(node, true);
+          BOOST_REQUIRE(res);
+          BOOST_CHECK(*res == false);
+        }
+      }
+      {
+        auto const density_check = ek->get_node_density_at_boundary(node, true);
+        // Do we have a value
+        BOOST_REQUIRE(density_check);
+        // Check the value
+        BOOST_CHECK_SMALL(std::abs(*density_check - density), 1E-12);
+      }
+      {
+        BOOST_CHECK(ek->remove_node_from_density_boundary(node));
+        {
+          auto const res = ek->get_node_is_boundary(node, true);
+          BOOST_REQUIRE(res);
+          BOOST_CHECK(*res == false);
+        }
+        {
+          auto const res = ek->get_node_is_density_boundary(node, true);
+          BOOST_REQUIRE(res);
+          BOOST_CHECK(*res == false);
+        }
+        {
+          auto const res = ek->get_node_is_flux_boundary(node, true);
+          BOOST_REQUIRE(res);
+          BOOST_CHECK(*res == false);
+        }
+      }
+    } else {
+      // Not in the local halo.
+      BOOST_CHECK(!ek->set_node_density_boundary(node, density));
+      BOOST_CHECK(!ek->get_node_density_at_boundary(node));
+      BOOST_CHECK(!ek->remove_node_from_density_boundary(node));
+      BOOST_CHECK(!ek->get_node_is_density_boundary(node));
+    }
+  }
+
+  ek->clear_density_boundaries();
+  for (auto const &node : local_nodes_incl_ghosts(ek->get_lattice())) {
+    BOOST_CHECK(!(*ek->get_node_is_density_boundary(node, true)));
+  }
+}
+
+BOOST_DATA_TEST_CASE(update_flux_boundary_from_shape, bdata::make(all_eks()),
+                     ek_generator) {
+  auto ek = ek_generator(params);
+  auto const n_ghost_layers =
+      static_cast<int>(ek->get_lattice().get_ghost_layers());
+  auto const flux = Vector3d{{0.2, 3.8, 4.2}};
+
+  auto const vec3to4 = [](Utils::Vector<int, 3> const &d, int v) {
+    return Utils::Vector<int, 4>{{d[0], d[1], d[2], v}};
+  };
+
+  auto const nodes = std::vector<Vector3i>{
+      {-n_ghost_layers, 0, 0}, {0, 0, 0}, {0, 1, 2}, {9, 9, 9}};
+  // set up boundary
+  {
+    auto const n_grid_points = Utils::product(params.grid_dimensions);
+    boost::multi_array<int, 3> raster_3d(params.grid_dimensions);
+    boost::multi_array<double, 4> flux_3d(vec3to4(params.grid_dimensions, 3));
+    BOOST_CHECK_EQUAL(raster_3d.num_elements(), n_grid_points);
+    for (auto const &node : nodes) {
+      auto const idx = (node + params.grid_dimensions) % params.grid_dimensions;
+      raster_3d(idx) = 1;
+      for (auto const i : {0, 1, 2}) {
+        flux_3d(vec3to4(idx, i)) = flux[i];
+      }
+    }
+    std::vector<int> raster_flat(raster_3d.data(),
+                                 raster_3d.data() + raster_3d.num_elements());
+    std::vector<double> flux_flat(flux_3d.data(),
+                                  flux_3d.data() + flux_3d.num_elements());
+    ek->update_flux_boundary_from_shape(raster_flat, flux_flat);
+  }
+
+  for (auto const &node : nodes) {
+    if (ek->get_lattice().node_in_local_halo(node)) {
+      {
+        auto const res = ek->get_node_is_boundary(node, true);
+        BOOST_REQUIRE(res);
+        BOOST_CHECK(*res == true);
+      }
+      {
+        auto const res = ek->get_node_is_flux_boundary(node, true);
+        BOOST_REQUIRE(res);
+        BOOST_CHECK(*res == true);
+      }
+      {
+        auto const res = ek->get_node_is_density_boundary(node, true);
+        BOOST_REQUIRE(res);
+        BOOST_CHECK(*res == false);
+      }
+      {
+        auto const flux_check = ek->get_node_flux_at_boundary(node, true);
+        // Do we have a value
+        BOOST_REQUIRE(flux_check);
+        // Check the value
+        BOOST_CHECK_SMALL((*flux_check - flux).norm(), 1E-12);
+      }
+    } else {
+      // Not in the local halo.
+      BOOST_CHECK(!ek->get_node_flux_at_boundary(node));
+    }
+  }
+
+  ek->clear_flux_boundaries();
+  ek->ghost_communication();
+  for (auto const &node : local_nodes_incl_ghosts(ek->get_lattice())) {
+    BOOST_CHECK(!(*ek->get_node_is_flux_boundary(node, true)));
+  }
+}
+
+BOOST_DATA_TEST_CASE(update_density_boundary_from_shape, bdata::make(all_eks()),
+                     ek_generator) {
+  auto ek = ek_generator(params);
+  auto const n_ghost_layers =
+      static_cast<int>(ek->get_lattice().get_ghost_layers());
+  auto const density = 0.2;
+
+  auto const nodes = std::vector<Vector3i>{
+      {-n_ghost_layers, 0, 0}, {0, 0, 0}, {0, 1, 2}, {9, 9, 9}};
+  // set up boundary
+  {
+    auto const n_grid_points = Utils::product(params.grid_dimensions);
+    boost::multi_array<int, 3> raster_3d(params.grid_dimensions);
+    boost::multi_array<double, 3> dens_3d(params.grid_dimensions);
+    BOOST_CHECK_EQUAL(raster_3d.num_elements(), n_grid_points);
+    for (auto const &node : nodes) {
+      auto const idx = (node + params.grid_dimensions) % params.grid_dimensions;
+      raster_3d(idx) = 1;
+      dens_3d(idx) = density;
+    }
+    std::vector<int> raster_flat(raster_3d.data(),
+                                 raster_3d.data() + raster_3d.num_elements());
+    std::vector<double> dens_flat(dens_3d.data(),
+                                  dens_3d.data() + dens_3d.num_elements());
+    ek->update_density_boundary_from_shape(raster_flat, dens_flat);
+  }
+
+  for (auto const &node : nodes) {
+    if (ek->get_lattice().node_in_local_halo(node)) {
+      {
+        auto const res = ek->get_node_is_boundary(node, true);
+        BOOST_REQUIRE(res);
+        BOOST_CHECK(*res == true);
+      }
+      {
+        auto const res = ek->get_node_is_density_boundary(node, true);
+        BOOST_REQUIRE(res);
+        BOOST_CHECK(*res == true);
+      }
+      {
+        auto const res = ek->get_node_is_flux_boundary(node, true);
+        BOOST_REQUIRE(res);
+        BOOST_CHECK(*res == false);
+      }
+      {
+        auto const density_check = ek->get_node_density_at_boundary(node, true);
+        // Do we have a value
+        BOOST_REQUIRE(density_check);
+        // Check the value
+        BOOST_CHECK_SMALL(std::abs(*density_check - density), 1E-12);
+      }
+    } else {
+      // Not in the local halo.
+      BOOST_CHECK(!ek->get_node_density_at_boundary(node));
+    }
+  }
+
+  ek->clear_density_boundaries();
+  ek->ghost_communication();
+  for (auto const &node : local_nodes_incl_ghosts(ek->get_lattice())) {
+    BOOST_CHECK(!(*ek->get_node_is_density_boundary(node, true)));
+  }
+}
+
+BOOST_DATA_TEST_CASE(domain_and_halo, bdata::make(all_eks()), ek_generator) {
+  auto ek = ek_generator(params);
+  auto const n_ghost_layers = ek->get_lattice().get_ghost_layers();
+  auto const [my_left, my_right] = ek->get_lattice().get_local_domain();
+
+  for (auto const &n : all_nodes_incl_ghosts(ek->get_lattice())) {
+    auto const pos = n + Vector3d::broadcast(.5);
+    int is_local = 0;
+    // Nodes in local domain
+    if (Vector3d(n) >= my_left and Vector3d(n) < my_right) {
+      BOOST_CHECK(ek->get_lattice().node_in_local_domain(n));
+      BOOST_CHECK(ek->get_lattice().node_in_local_halo(n));
+
+      BOOST_CHECK(ek->get_lattice().pos_in_local_domain(pos));
+      BOOST_CHECK(ek->get_lattice().pos_in_local_halo(pos));
+      is_local = 1;
+    } else {
+      // in local halo?
+      if ((n + Vector3d::broadcast(n_ghost_layers)) >= my_left and
+          (n - Vector3d::broadcast(n_ghost_layers)) < my_right) {
+        BOOST_CHECK(!ek->get_lattice().node_in_local_domain(n));
+        BOOST_CHECK(ek->get_lattice().node_in_local_halo(n));
+
+        BOOST_CHECK(!ek->get_lattice().pos_in_local_domain(pos));
+        BOOST_CHECK(ek->get_lattice().pos_in_local_halo(pos));
+      } else {
+        // neither in domain nor in halo
+        BOOST_CHECK(!ek->get_lattice().node_in_local_domain(n));
+        BOOST_CHECK(!ek->get_lattice().node_in_local_halo(n));
+
+        BOOST_CHECK(!ek->get_lattice().pos_in_local_domain(pos));
+        BOOST_CHECK(!ek->get_lattice().pos_in_local_halo(pos));
+      }
+    }
+
+    // If the cell is in the global physical domain
+    // check that only one mpi rank said the node was local
+    auto constexpr origin = Vector3i{0, 0, 0};
+    if (n >= origin and n < params.grid_dimensions) {
+      boost::mpi::communicator world;
+      auto const is_local_sum =
+          boost::mpi::all_reduce(world, is_local, std::plus<int>());
+      BOOST_CHECK(is_local_sum == 1);
+    }
+  }
+}
+
+static auto fold_node(Vector3i n) {
+  for (unsigned int i = 0; i < 3; i++) {
+    if (n[i] < 0) {
+      n[i] += params.grid_dimensions[i];
+    } else if (n[i] >= params.grid_dimensions[i]) {
+      n[i] -= params.grid_dimensions[i];
+    }
+  }
+  return n;
+}
+
+BOOST_DATA_TEST_CASE(set_node_density, bdata::make(all_eks()), ek_generator) {
+  auto ek = ek_generator(params);
+
+  auto n_dens = [](Vector3i const &node) {
+    return 1. + static_cast<double>(Utils::product(fold_node(node))) * 1e-6;
+  };
+
+  // Assign densities
+  for (auto const &node : all_nodes_incl_ghosts(ek->get_lattice())) {
+    if (ek->get_lattice().node_in_local_domain(node)) {
+      BOOST_CHECK(ek->set_node_density(node, n_dens(node)));
+    } else {
+      // Check that access to node density is not possible
+      BOOST_CHECK(!ek->set_node_density(node, 0.));
+    }
+  }
+
+  ek->ghost_communication();
+
+  // check densities
+  for (auto const &node : all_nodes_incl_ghosts(ek->get_lattice())) {
+    auto constexpr eps = 1E-8;
+    if (ek->get_lattice().node_in_local_halo(node)) {
+      auto const consider_ghosts =
+          !ek->get_lattice().node_in_local_domain(node);
+      auto res = ek->get_node_density(node, consider_ghosts);
+      BOOST_REQUIRE(res);                          // value available?
+      BOOST_CHECK_SMALL(*res - n_dens(node), eps); // value correct?
+    } else {
+      // Check that access to node density is not possible
+      BOOST_CHECK(!ek->get_node_density(node));
+    }
+  }
+}
+
+BOOST_DATA_TEST_CASE(vtk_exceptions,
+                     bdata::make(EkGeneratorVector{unthermalized_eks()[0]}),
+                     ek_generator) {
+  std::unordered_map<std::string, double> const units = {{"density", 1.}};
+  auto ek = ek_generator(params);
+  auto const flag =
+      static_cast<std::underlying_type_t<OutputVTK>>(OutputVTK::density);
+  // cannot create the same observable twice
+  ek->create_vtk(1u, 0u, flag, units, "density", "vtk_out", "step");
+  BOOST_CHECK_THROW(
+      ek->create_vtk(1u, 0u, flag, units, "density", "vtk_out", "step"),
+      std::runtime_error);
+  // cannot manually call an automatic observable
+  ek->create_vtk(1u, 0u, flag, units, "auto", "vtk_out", "step");
+  BOOST_CHECK_THROW(ek->write_vtk("vtk_out/auto"), std::runtime_error);
+  // cannot activate a manual observable
+  ek->create_vtk(0u, 0u, flag, units, "manual", "vtk_out", "step");
+  BOOST_CHECK_THROW(ek->switch_vtk("vtk_out/manual", 0), std::runtime_error);
+  // cannot call or activate observables that haven't been registered yet
+  BOOST_CHECK_THROW(ek->write_vtk("unknown"), std::runtime_error);
+  BOOST_CHECK_THROW(ek->switch_vtk("unknown", 0), std::runtime_error);
+}
+
+BOOST_AUTO_TEST_CASE(ek_exceptions) {
+  auto ek = std::make_shared<walberla::EKinWalberlaImpl<>>(
+      params.lattice, params.diffusion, 0., params.valency, params.ext_efield,
+      params.density, params.advection, params.friction_coupling);
+  BOOST_CHECK_THROW(ek->integrate(std::size_t{}, std::size_t{}, std::size_t{}),
+                    std::runtime_error);
+  // no diffusion leads to early exit
+  ek->set_diffusion(0.);
+  ek->integrate(std::size_t{}, std::size_t{}, std::size_t{});
+}
+
+int main(int argc, char **argv) {
+  int n_nodes;
+  Vector3i mpi_shape{};
+
+  MPI_Init(&argc, &argv);
+  MPI_Comm_size(MPI_COMM_WORLD, &n_nodes);
+  MPI_Dims_create(n_nodes, 3, mpi_shape.data());
+  walberla::mpi_init();
+
+  params.seed = 0u;
+  params.kT = 1.3E-4;
+  params.density = 1.4;
+  params.diffusion = 0.003;
+  params.valency = 1.;
+  params.advection = true;
+  params.friction_coupling = true;
+  params.ext_efield = Vector3d{0.01, 0.02, 0.03};
+  params.grid_dimensions = Vector3i{12, 12, 18};
+  params.box_dimensions = Vector3d{12, 12, 18};
+  params.lattice =
+      std::make_shared<LatticeWalberla>(params.grid_dimensions, mpi_shape, 1u);
+
+  auto const res = boost::unit_test::unit_test_main(init_unit_test, argc, argv);
+  MPI_Finalize();
+  return res;
+}
+
+#else // WALBERLA
+int main(int argc, char **argv) {}
+#endif
diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_bspline_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_bspline_tests.cpp
new file mode 100644
index 00000000000..a0123cbe67e
--- /dev/null
+++ b/src/walberla_bridge/tests/LBWalberlaImpl_bspline_tests.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#define BOOST_TEST_MODULE Walberla interpolation test
+#define BOOST_TEST_DYN_LINK
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#define BOOST_TEST_NO_MAIN
+
+#include <boost/test/data/monomorphic.hpp>
+#include <boost/test/data/test_case.hpp>
+#include <boost/test/unit_test.hpp>
+
+#include "tests_common_lb.hpp"
+
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
+#include <walberla_bridge/lattice_boltzmann/lb_walberla_init.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <mpi.h>
+
+#include <cassert>
+#include <cmath>
+#include <numeric>
+
+using Utils::Vector3d;
+using Utils::Vector3i;
+
+namespace bdata = boost::unit_test::data;
+
+static LBTestParameters params; // populated in main()
+
+BOOST_DATA_TEST_CASE(force_interpolation_bspline, bdata::make(all_lbs()),
+                     lb_generator) {
+  auto lb = lb_generator(params);
+
+  /* Check the bspline weights sum up to 1 in each direction.
+   * The position for the interpolation is sampled uniformly
+   * in the range (-0.5, +0.5) around the LB node mid point.
+   */
+
+  constexpr auto dx = 0.02;
+  auto const f = Vector3d{{-1.0, 0.5, 1.5}};
+  auto offset = Vector3d::broadcast(-0.5 + dx);
+  int index = 0;
+  for (auto const &n : local_nodes_incl_ghosts(lb->get_lattice(), false)) {
+    if (lb->get_lattice().node_in_local_halo(n)) {
+      index = (index + 1) % 3;
+      offset[index] = std::fmod(offset[index] + 0.5, 1. - dx) - 0.5 + dx;
+      auto const pos = n + offset;
+      lb->add_force_at_pos(pos, f);
+      // Check neighboring nodes for bspline weights
+      Vector3d sum{};
+      for (int x : {0, 1}) {
+        for (int y : {0, 1}) {
+          for (int z : {0, 1}) {
+            Vector3i const check_node{{n[0] - x, n[1] - y, n[2] - z}};
+            if (lb->get_lattice().node_in_local_halo(check_node)) {
+              auto const res = lb->get_node_force_to_be_applied(check_node);
+              sum += *res;
+            }
+          }
+        }
+      }
+      BOOST_CHECK_SMALL((sum - f).norm(), 1E-10);
+      // Apply counter force to clear force field
+      lb->add_force_at_pos(pos, -f);
+    }
+  }
+}
+
+BOOST_DATA_TEST_CASE(velocity_interpolation_bspline, bdata::make(all_lbs()),
+                     lb_generator) {
+  auto lb = lb_generator(params);
+
+  /* Check linear interpolation of the velocity. LB cells can couple
+   * to particles that are at most 1 agrid away from the cell mid point.
+   * The test assigns a velocity to every third node on a simple cubic
+   * lattice with lattice constant l0 = 3 * agrid. A particle moving on a
+   * line along the x-, y- or z-axis should experience a coupling whose
+   * profile is a series of peaks with formula:
+   *   f(x) = sum_i v_i * max(0, 1 - abs(x - x_i))
+   * where x_i are the peak centers and v_i their velocity.
+   * In ASCII art: _/\_/\_/\_/\_
+   */
+
+  // make sure the lattice constant is commensurate with the box dimensions
+  assert(params.grid_dimensions[0] % 3 == 0 and
+         params.grid_dimensions[1] % 3 == 0 and
+         params.grid_dimensions[2] % 3 == 0);
+
+  // set node velocities on a simple cubic lattice
+  auto const vel = Vector3d{{-1., 0.5, 1.5}};
+  for (auto const &n : local_nodes_incl_ghosts(lb->get_lattice(), false)) {
+    if (lb->get_lattice().node_in_local_domain(n)) {
+      if ((n[0] + 2) % 3 == 0 and (n[1] + 2) % 3 == 0 and (n[2] + 2) % 3 == 0) {
+        BOOST_CHECK(lb->set_node_velocity(n, vel));
+      }
+    }
+  }
+
+  lb->ghost_communication();
+
+  for (double x = 0.0; x < params.box_dimensions[0]; x += 0.3) {
+    for (double y = 0.1; y < params.box_dimensions[1]; y += 0.3) {
+      for (double z = 0.2; z < params.box_dimensions[2]; z += 0.3) {
+        Vector3d const pos{x, y, z};
+        if (lb->get_lattice().pos_in_local_domain(pos)) {
+          auto const factor = std::accumulate(
+              pos.begin(), pos.end(), 1., [](double a, double x) {
+                return a * std::max(0., 1. - std::fabs(std::fmod(x, 3.) - 1.5));
+              });
+          auto const ref = factor * vel;
+          auto const res = lb->get_velocity_at_pos(pos, true);
+          BOOST_CHECK(res); // locally available
+          BOOST_CHECK_SMALL((*res - ref).norm(), 1E-10);
+        }
+      }
+    }
+  }
+}
+
+// TODO: check last applied force on a ghost node, i.e. when two forces
+// are applied at (agrid/2, 0, 0) and (box_l - agrid/2, 0, 0)
+
+int main(int argc, char **argv) {
+  int n_nodes;
+  Vector3i mpi_shape{};
+
+  MPI_Init(&argc, &argv);
+  MPI_Comm_size(MPI_COMM_WORLD, &n_nodes);
+  MPI_Dims_create(n_nodes, 3, mpi_shape.data());
+  walberla::mpi_init();
+
+  params.seed = 0u;
+  params.kT = 1.3E-4;
+  params.viscosity = 0.003;
+  params.density = 1.4;
+  params.grid_dimensions = Vector3i{12, 6, 9};
+  params.box_dimensions = Vector3d{12, 6, 9};
+  params.lattice =
+      std::make_shared<LatticeWalberla>(params.grid_dimensions, mpi_shape, 1u);
+
+  auto const res = boost::unit_test::unit_test_main(init_unit_test, argc, argv);
+  MPI_Finalize();
+  return res;
+}
+
+#else // WALBERLA
+int main(int argc, char **argv) {}
+#endif
diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_flow_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_flow_tests.cpp
new file mode 100644
index 00000000000..ce0adc84103
--- /dev/null
+++ b/src/walberla_bridge/tests/LBWalberlaImpl_flow_tests.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#define BOOST_TEST_MODULE Walberla point force test
+#define BOOST_TEST_DYN_LINK
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#define BOOST_TEST_NO_MAIN
+
+#include <boost/test/data/monomorphic.hpp>
+#include <boost/test/data/test_case.hpp>
+#include <boost/test/unit_test.hpp>
+
+#include "tests_common_lb.hpp"
+
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
+#include <walberla_bridge/lattice_boltzmann/lb_walberla_init.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <boost/mpi/collectives/all_reduce.hpp>
+#include <boost/mpi/communicator.hpp>
+
+#include <mpi.h>
+
+#include <cmath>
+#include <functional>
+#include <iostream>
+#include <memory>
+#include <vector>
+
+using Utils::hadamard_product;
+using Utils::Vector3d;
+using Utils::Vector3i;
+
+namespace bdata = boost::unit_test::data;
+
+static LBTestParameters params; // populated in main()
+
+BOOST_DATA_TEST_CASE(integrate_with_point_force_thermalized,
+                     bdata::make(thermalized_lbs()), lb_generator) {
+  auto lb = lb_generator(params);
+  boost::mpi::communicator world;
+
+  // Check that momentum stays zero after initial integration
+  lb->integrate();
+  lb->integrate();
+  auto mom_local = lb->get_momentum();
+  auto mom = boost::mpi::all_reduce(world, mom_local, std::plus<Vector3d>());
+  BOOST_CHECK_SMALL(mom.norm(), 1E-10);
+
+  // Check that momentum changes as expected when applying forces
+  // auto f = Vector3d{0.15, 0.25, -0.22};
+  // auto f = Vector3d{0.0006, -0.0013, 0.000528};
+  auto const f1 = Vector3d{0., 0., 0.};
+  auto const f2 = Vector3d{0.1, 0.2, -0.3};
+  lb->set_external_force(f1);
+  auto const force_node = Vector3i{{1, 1, 1}};
+  lb->add_force_at_pos(force_node + Vector3d::broadcast(.5), f2);
+  lb->integrate();
+  for (auto const &n : all_nodes_incl_ghosts(lb->get_lattice())) {
+    if (lb->get_lattice().node_in_local_halo(n)) {
+      auto const laf = *(lb->get_node_last_applied_force(n, true));
+      if (n == force_node) {
+        BOOST_CHECK_SMALL((laf - f1 - f2).norm(), 1E-10);
+      } else {
+        BOOST_CHECK_SMALL((laf - f1).norm(), 1E-10);
+      }
+    }
+  }
+  mom_local = lb->get_momentum();
+  mom = boost::mpi::all_reduce(world, mom_local, std::plus<Vector3d>());
+
+  // Expected momentum = momentum added in prev. time step
+  // + f/2 from velocity shift due to last applied forces
+  auto mom_exp = 1.5 * f1 * Utils::product(params.grid_dimensions) + 1.5 * f2;
+  auto d = mom - mom_exp;
+  BOOST_CHECK_SMALL((mom - mom_exp).norm(), 1E-10);
+  std::cout << "thermalized: " << mom << " | " << mom_exp << " | " << d << "\n";
+
+  // check that momentum doesn't drift when no force is applied again
+  lb->set_external_force(Vector3d{});
+  // The expected moment is just that applied during a single time step
+  // No f/2 correction, since no force was applied in last time step
+  mom_exp = 1.0 * f1 * Utils::product(params.grid_dimensions) + 1.0 * f2;
+  lb->integrate();
+  mom_local = lb->get_momentum();
+  mom = boost::mpi::all_reduce(world, mom_local, std::plus<Vector3d>());
+  BOOST_CHECK_SMALL((mom - mom_exp).norm(), 1E-10);
+}
+
+// this can be merged with the thermalized test, once that passes
+BOOST_DATA_TEST_CASE(integrate_with_point_force_unthermalized,
+                     bdata::make(unthermalized_lbs()), lb_generator) {
+  auto lb = lb_generator(params);
+  boost::mpi::communicator world;
+
+  // Check that momentum stays zero after initial integration
+  lb->integrate();
+  BOOST_CHECK_SMALL(lb->get_momentum().norm(), 1E-10);
+
+  // Check that momentum changes as expected when applying forces
+  // auto f = Vector3d{0.0006, -0.0013, 0.000528};
+  auto const f1 = Vector3d{0., 0., 0.};
+  auto const f2 = Vector3d{0.095, 0.23, -0.52};
+  lb->set_external_force(f1);
+  lb->add_force_at_pos(Utils::Vector3d{2, 2, 2}, f2);
+  lb->integrate();
+
+  auto mom_local = lb->get_momentum();
+  auto mom = boost::mpi::all_reduce(world, mom_local, std::plus<Vector3d>());
+
+  // Expected momentum = momentum added in prev. time step
+  // + f/2 from velocity shift due to last applied forces
+  auto mom_exp = 1.5 * f1 * Utils::product(params.grid_dimensions) + 1.5 * f2;
+  auto d = mom - mom_exp;
+  std::cout << mom << " | " << mom_exp << " | " << d << "\n";
+  BOOST_CHECK_SMALL((mom - mom_exp).norm(), 1E-10);
+
+  // check that momentum doesn't drift when no force is applied again
+  lb->set_external_force(Vector3d{});
+  lb->integrate();
+  // The expected moment is just that applied during a single time step
+  // No f/2 correction, since no force was applied in last time step
+  mom_exp = 1.0 * f1 * Utils::product(params.grid_dimensions) + 1.0 * f2;
+  mom_local = lb->get_momentum();
+  mom = boost::mpi::all_reduce(world, mom_local, std::plus<Vector3d>());
+  BOOST_CHECK_SMALL((mom - mom_exp).norm(), 1E-10);
+}
+
+int main(int argc, char **argv) {
+  int n_nodes;
+  Vector3i mpi_shape{};
+
+  MPI_Init(&argc, &argv);
+  MPI_Comm_size(MPI_COMM_WORLD, &n_nodes);
+  MPI_Dims_create(n_nodes, 3, mpi_shape.data());
+  walberla::mpi_init();
+
+  params.seed = 0u;
+  params.kT = 1.1E-4;
+  params.viscosity = 0.02;
+  params.density = 1.4;
+  params.grid_dimensions = Vector3i{12, 12, 18};
+  params.box_dimensions = Vector3d{6, 6, 9};
+  params.lattice =
+      std::make_shared<LatticeWalberla>(params.grid_dimensions, mpi_shape, 1u);
+
+  auto const res = boost::unit_test::unit_test_main(init_unit_test, argc, argv);
+  MPI_Finalize();
+  return res;
+}
+
+#else // WALBERLA
+int main(int argc, char **argv) {}
+#endif
diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_lees_edwards.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_lees_edwards.cpp
new file mode 100644
index 00000000000..8e66ed037ec
--- /dev/null
+++ b/src/walberla_bridge/tests/LBWalberlaImpl_lees_edwards.cpp
@@ -0,0 +1,186 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#define BOOST_TEST_MODULE Walberla point force test
+#define BOOST_TEST_DYN_LINK
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#define BOOST_TEST_NO_MAIN
+
+#include <boost/test/data/monomorphic.hpp>
+#include <boost/test/data/test_case.hpp>
+#include <boost/test/unit_test.hpp>
+
+#include "tests_common_lb.hpp"
+
+#include "../src/lattice_boltzmann/LBWalberlaImpl.hpp"
+
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
+#include <walberla_bridge/lattice_boltzmann/lb_walberla_init.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <boost/mpi/collectives/all_reduce.hpp>
+#include <boost/mpi/communicator.hpp>
+
+#include <mpi.h>
+
+#include <cmath>
+#include <functional>
+#include <iostream>
+#include <math.h>
+#include <memory>
+#include <vector>
+
+using Utils::hadamard_product;
+using Utils::Vector3d;
+using Utils::Vector3i;
+
+namespace bdata = boost::unit_test::data;
+auto constexpr v0 = 0.064;
+static Vector3i mpi_shape{};
+
+static double u_expected(double x, double t, double nu, double v_0, double h,
+                         int k_max = 100) {
+  auto u = x / h - 0.5;
+  for (int k = 1; k <= k_max; k++) {
+    u += 1.0 / (M_PI * k) * exp(-4 * M_PI * M_PI * nu * k * k / (h * h) * t) *
+         sin(2 * M_PI / h * k * x);
+  }
+  return v_0 * u;
+}
+
+BOOST_AUTO_TEST_CASE(test_transient_shear) {
+  using LBImplementation = walberla::LBWalberlaImpl<double, lbmpy::Arch::CPU>;
+  double density = 1;
+  double viscosity = 1. / 7.;
+  auto lattice =
+      std::make_shared<LatticeWalberla>(Vector3i{8, 64, 8}, mpi_shape, 1);
+  auto lb = LBImplementation(lattice, viscosity, density);
+  auto le_pack = std::make_unique<LeesEdwardsPack>(
+      0u, 1u, []() { return 0.0; }, [=]() { return v0; });
+  lb.set_collision_model(std::move(le_pack));
+  lb.ghost_communication();
+  auto const grid_size_y = lattice->get_grid_dimensions()[1];
+  for (int i = 0; i < 200; i++) {
+    lb.integrate();
+    if (i < grid_size_y / 2.)
+      continue;
+    for (double y :
+         {0., 0.13 * grid_size_y, 0.7 * grid_size_y, 1. * grid_size_y}) {
+      auto u = lb.get_velocity_at_pos(Vector3d{4, y, 4}, true);
+      auto expected = u_expected(y, i, viscosity, v0, grid_size_y);
+      BOOST_CHECK_SMALL((*u)[0] - expected, 3E-5);
+    }
+  }
+}
+
+static auto setup_lb_with_offset(double offset) {
+  using LBImplementation = walberla::LBWalberlaImpl<double, lbmpy::Arch::CPU>;
+  auto density = 1.;
+  auto viscosity = 1. / 7.;
+  auto lattice =
+      std::make_shared<LatticeWalberla>(Vector3i{10, 10, 10}, mpi_shape, 1);
+  auto lb = std::make_shared<LBImplementation>(lattice, viscosity, density);
+  auto le_pack = std::make_unique<LeesEdwardsPack>(
+      0u, 1u, [=]() { return offset; }, []() { return 0.0; });
+  lb->set_collision_model(std::move(le_pack));
+  lb->ghost_communication();
+  return lb;
+}
+
+BOOST_AUTO_TEST_CASE(test_interpolation_force) {
+  auto const offset = 2;
+  auto lb = setup_lb_with_offset(offset);
+  auto const shape = lb->get_lattice().get_grid_dimensions();
+  auto const xz = shape[0] / 2;
+  auto const y_max = shape[1] - 1;
+
+  auto const force_pos = Vector3d{xz + 0.5, y_max + 0.5, xz + 0.5};
+  auto const force_node = Vector3i{xz, y_max, xz};
+  auto const f1 = Vector3d{0.3, -0.2, 0.3};
+  lb->add_force_at_pos(force_pos, f1);
+
+  lb->integrate();
+
+  auto const ghost_node = Vector3i{force_node[0] - offset, -1, force_node[2]};
+  auto const laf = *(lb->get_node_last_applied_force(ghost_node, true));
+  BOOST_CHECK_SMALL((laf - f1).norm(), 1E-10);
+}
+
+BOOST_AUTO_TEST_CASE(test_interpolation_velocity) {
+  auto const offset = 2;
+  auto lb = setup_lb_with_offset(offset);
+  auto const shape = lb->get_lattice().get_grid_dimensions();
+  auto const xz = shape[0] / 2;
+  auto const y_max = shape[1] - 1;
+
+  auto const source_node = Vector3i{xz, y_max, xz};
+  auto const v = Vector3d{0.3, -0.2, 0.3};
+  lb->set_node_velocity(source_node, v);
+
+  lb->ghost_communication();
+
+  auto const ghost_node = Vector3i{source_node[0] - offset, -1, source_node[2]};
+  auto const ghost_vel = *(lb->get_node_velocity(ghost_node, true));
+  BOOST_CHECK_SMALL((ghost_vel - v).norm(), 1E-10);
+}
+
+BOOST_AUTO_TEST_CASE(test_interpolation_pdf) {
+  auto const offset = 2;
+  auto lb = setup_lb_with_offset(offset);
+  auto const shape = lb->get_lattice().get_grid_dimensions();
+  auto const xz = shape[0] / 2;
+  auto const y_max = shape[1] - 1;
+
+  auto const source_node = Vector3i{xz, y_max, xz};
+
+  std::vector<double> source_pop(19);
+  auto x = -1.;
+  std::for_each(source_pop.begin(), source_pop.end(), [&x](auto &v) {
+    v = x;
+    x += .1;
+  });
+  lb->set_node_population(source_node, source_pop);
+  lb->ghost_communication();
+
+  auto const ghost_node = Vector3i{source_node[0] - offset, -1, source_node[2]};
+  auto const ghost_pop = *(lb->get_node_population(ghost_node, true));
+  for (unsigned int i = 0u; i < source_pop.size(); ++i) {
+    BOOST_CHECK_EQUAL(source_pop[i], ghost_pop[i]);
+  }
+}
+
+int main(int argc, char **argv) {
+  int n_nodes;
+
+  MPI_Init(&argc, &argv);
+  MPI_Comm_size(MPI_COMM_WORLD, &n_nodes);
+  MPI_Dims_create(n_nodes, 3, mpi_shape.data());
+  walberla::mpi_init();
+
+  auto const res = boost::unit_test::unit_test_main(init_unit_test, argc, argv);
+  MPI_Finalize();
+  return res;
+}
+
+#else // WALBERLA
+int main(int argc, char **argv) {}
+#endif
diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_statistical_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_statistical_tests.cpp
new file mode 100644
index 00000000000..9b3db0fdd98
--- /dev/null
+++ b/src/walberla_bridge/tests/LBWalberlaImpl_statistical_tests.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#define BOOST_TEST_MODULE Walberla statistical tests
+#define BOOST_TEST_DYN_LINK
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#define BOOST_TEST_NO_MAIN
+
+#include <boost/test/data/monomorphic.hpp>
+#include <boost/test/data/test_case.hpp>
+#include <boost/test/unit_test.hpp>
+
+#include "tests_common_lb.hpp"
+
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
+#include <walberla_bridge/lattice_boltzmann/lb_walberla_init.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <boost/mpi/collectives/all_reduce.hpp>
+#include <boost/mpi/communicator.hpp>
+
+#include <mpi.h>
+
+#include <cmath>
+#include <functional>
+#include <iostream>
+
+using Utils::hadamard_product;
+using Utils::Vector3d;
+using Utils::Vector3i;
+
+namespace bdata = boost::unit_test::data;
+
+static LBTestParameters params; // populated in main()
+
+BOOST_DATA_TEST_CASE(velocity_fluctuation, bdata::make(thermalized_lbs()),
+                     lb_generator) {
+  auto lb = lb_generator(params);
+
+  // Warmup
+  for (int i = 0; i < 200; i++)
+    lb->integrate();
+
+  // Sample
+  int steps = 800;
+
+  auto const [my_left, my_right] = lb->get_lattice().get_local_domain();
+  auto const denominator = Utils::product(my_right - my_left);
+
+  Vector3d sum_v_local{}, sum_v_square_local{};
+
+  for (int i = 0; i < steps; i++) {
+    Vector3d step_v{}, step_v_square{};
+    for (int x = static_cast<int>(my_left[0]);
+         x < static_cast<int>(my_right[0]); x++) {
+      for (int y = static_cast<int>(my_left[1]);
+           y < static_cast<int>(my_right[1]); y++) {
+        for (int z = static_cast<int>(my_left[2]);
+             z < static_cast<int>(my_right[2]); z++) {
+          const Vector3i node{{x, y, z}};
+          auto v = *(lb->get_node_velocity(node));
+          auto rho = *(lb->get_node_density(node));
+          step_v += v * rho;
+          step_v_square += rho * hadamard_product(v, v);
+        }
+      }
+    }
+    step_v /= denominator;
+    step_v_square /= denominator;
+
+    sum_v_local += step_v;
+    sum_v_square_local += step_v_square;
+    std::cout << sum_v_square_local / static_cast<double>(i + 1) << std::endl;
+
+    lb->integrate();
+    lb->integrate();
+    lb->integrate();
+  }
+
+  // aggregate
+  boost::mpi::communicator world;
+  auto sum_v =
+      boost::mpi::all_reduce(world, sum_v_local, std::plus<Vector3d>());
+  auto sum_v_square =
+      boost::mpi::all_reduce(world, sum_v_square_local, std::plus<Vector3d>());
+  sum_v /= static_cast<double>(world.size());
+  sum_v_square /= static_cast<double>(world.size());
+
+  // check
+  auto const tol_v = 3E-6;
+  BOOST_CHECK_SMALL(std::abs(sum_v[0] / steps), tol_v * 100); // boost oddity
+  BOOST_CHECK_SMALL(std::abs(sum_v[1] / steps), tol_v * 100);
+  BOOST_CHECK_SMALL(std::abs(sum_v[2] / steps), tol_v * 100);
+
+  const double tol_kT = 5; // this is in percent ...
+  BOOST_CHECK_CLOSE(sum_v_square[0] / steps, params.kT, tol_kT);
+  BOOST_CHECK_CLOSE(sum_v_square[1] / steps, params.kT, tol_kT);
+  BOOST_CHECK_CLOSE(sum_v_square[2] / steps, params.kT, tol_kT);
+}
+
+int main(int argc, char **argv) {
+  int n_nodes;
+  Vector3i mpi_shape{};
+
+  MPI_Init(&argc, &argv);
+  MPI_Comm_size(MPI_COMM_WORLD, &n_nodes);
+  MPI_Dims_create(n_nodes, 3, mpi_shape.data());
+  walberla::mpi_init();
+
+  params.seed = 1u;
+  params.kT = 1.1E-4;
+  params.viscosity = 0.02;
+  params.density = 1.4;
+  params.grid_dimensions = Vector3i{12, 12, 18};
+  params.box_dimensions = Vector3d{6, 6, 9};
+  params.lattice =
+      std::make_shared<LatticeWalberla>(params.grid_dimensions, mpi_shape, 1u);
+
+  auto const res = boost::unit_test::unit_test_main(init_unit_test, argc, argv);
+  MPI_Finalize();
+  return res;
+}
+
+#else // WALBERLA
+int main(int argc, char **argv) {}
+#endif
diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp
new file mode 100644
index 00000000000..8e3e285fddd
--- /dev/null
+++ b/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp
@@ -0,0 +1,632 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#define BOOST_TEST_MODULE LB walberla node setters and getters test
+#define BOOST_TEST_DYN_LINK
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#define BOOST_TEST_NO_MAIN
+
+#include <boost/test/data/monomorphic.hpp>
+#include <boost/test/data/test_case.hpp>
+#include <boost/test/unit_test.hpp>
+
+#include "tests_common_lb.hpp"
+
+#include <walberla_bridge/Architecture.hpp>
+#include <walberla_bridge/VTKHandle.hpp>
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
+#include <walberla_bridge/lattice_boltzmann/lb_walberla_init.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <boost/mpi/collectives/all_reduce.hpp>
+#include <boost/mpi/communicator.hpp>
+#include <boost/multi_array.hpp>
+
+#include <mpi.h>
+
+#include <functional>
+#include <initializer_list>
+#include <memory>
+#include <stdexcept>
+#include <unordered_map>
+#include <vector>
+
+using Utils::hadamard_product;
+using Utils::Vector3d;
+using Utils::Vector3i;
+
+namespace bdata = boost::unit_test::data;
+
+static LBTestParameters params; // populated in main()
+static Vector3i mpi_shape;
+
+BOOST_DATA_TEST_CASE(dimensions, bdata::make(all_lbs()), lb_generator) {
+  using boost::test_tools::per_element;
+  auto lb = lb_generator(params);
+  auto constexpr zero = Vector3i{0, 0, 0};
+
+  auto const grid_dim = lb->get_lattice().get_grid_dimensions();
+  BOOST_TEST(grid_dim == params.grid_dimensions, per_element());
+
+  auto const [my_left, my_right] = lb->get_lattice().get_local_domain();
+  auto const my_size = my_right - my_left;
+  BOOST_TEST(my_size > zero, per_element());
+  BOOST_TEST(my_left >= zero, per_element());
+  BOOST_TEST(my_right <= params.grid_dimensions, per_element());
+}
+
+BOOST_DATA_TEST_CASE(set_viscosity, bdata::make(all_lbs()), lb_generator) {
+  auto lb = lb_generator(params);
+  auto new_viscosity = 2.;
+  lb->set_viscosity(new_viscosity);
+  BOOST_CHECK_CLOSE(lb->get_viscosity(), new_viscosity, 1E-11);
+}
+
+BOOST_DATA_TEST_CASE(initial_state, bdata::make(all_lbs()), lb_generator) {
+  auto lb = lb_generator(params);
+  auto const pressure = Utils::VectorXd<9>{1., 0., 0., 0., 1., 0., 0., 0., 1.} *
+                        params.density / 3.;
+  for (auto const &node : local_nodes_incl_ghosts(lb->get_lattice())) {
+    auto const consider_ghosts = !lb->get_lattice().node_in_local_domain(node);
+    BOOST_CHECK(!(*lb->get_node_is_boundary(node, consider_ghosts)));
+    if (lb->get_lattice().node_in_local_domain(node)) {
+      BOOST_CHECK((*lb->get_node_force_to_be_applied(node)) == Vector3d{});
+      BOOST_CHECK((*lb->get_node_last_applied_force(node)) == Vector3d{});
+      BOOST_CHECK((*lb->get_node_velocity(node)) == Vector3d{});
+      BOOST_CHECK_CLOSE((*lb->get_node_density(node)), params.density, 1E-10);
+      BOOST_CHECK_LE((*lb->get_node_pressure_tensor(node) - pressure).norm(),
+                     1E-9);
+    }
+  }
+
+  boost::mpi::communicator world;
+  auto const local_pressure_tensor = lb->get_pressure_tensor();
+  auto const global_pressure_tensor = local_pressure_tensor * world.size();
+  BOOST_CHECK_LE((global_pressure_tensor - pressure).norm(), 1E-9);
+  BOOST_CHECK_LE(lb->get_momentum().norm(), 1E-11);
+  BOOST_CHECK_CLOSE(lb->get_viscosity(), params.viscosity, 1E-11);
+}
+
+BOOST_DATA_TEST_CASE(kT_unthermalized, bdata::make(unthermalized_lbs()),
+                     lb_generator) {
+  auto lb = lb_generator(params);
+  BOOST_CHECK_EQUAL(lb->get_kT(), 0.);
+}
+
+BOOST_DATA_TEST_CASE(kT_thermalized, bdata::make(thermalized_lbs()),
+                     lb_generator) {
+  auto lb = lb_generator(params);
+  BOOST_CHECK_EQUAL(lb->get_kT(), params.kT);
+}
+
+BOOST_DATA_TEST_CASE(per_node_boundary, bdata::make(all_lbs()), lb_generator) {
+  auto lb = lb_generator(params);
+  auto const vel = Vector3d{{0.2, 3.8, 4.2}};
+  auto const n_ghost_layers =
+      static_cast<int>(lb->get_lattice().get_ghost_layers());
+  for (auto const &node : std::vector<Vector3i>{
+           {-n_ghost_layers, 0, 0}, {0, 0, 0}, {0, 1, 2}, {9, 9, 9}}) {
+    if (lb->get_lattice().node_in_local_halo(node)) {
+      {
+        auto const res = lb->get_node_is_boundary(node, true);
+        // Did we get a value?
+        BOOST_REQUIRE(res);
+        // Should not be a boundary node
+        BOOST_CHECK(*res == false);
+      }
+      {
+        BOOST_CHECK(lb->set_node_velocity_at_boundary(node, vel));
+        auto const res = lb->get_node_is_boundary(node, true);
+        // Did we get a value?
+        BOOST_REQUIRE(res);
+        // Should be a boundary node
+        BOOST_CHECK(*res == true);
+      }
+      {
+        auto const vel_check = lb->get_node_velocity_at_boundary(node, true);
+        // Do we have a value
+        BOOST_REQUIRE(vel_check);
+        // Check the value
+        BOOST_CHECK_SMALL((*vel_check - vel).norm(), 1E-12);
+      }
+      {
+        BOOST_CHECK(lb->remove_node_from_boundary(node));
+        auto const res = lb->get_node_is_boundary(node, true);
+        // Did we get a value?
+        BOOST_REQUIRE(res);
+        // Should not be a boundary node
+        BOOST_CHECK(*res == false);
+      }
+    } else {
+      // Not in the local halo.
+      BOOST_CHECK(!lb->set_node_velocity_at_boundary(node, vel));
+      BOOST_CHECK(!lb->get_node_velocity_at_boundary(node));
+      BOOST_CHECK(!lb->remove_node_from_boundary(node));
+      BOOST_CHECK(!lb->get_node_is_boundary(node));
+    }
+  }
+
+  lb->clear_boundaries();
+  for (auto const &node : local_nodes_incl_ghosts(lb->get_lattice())) {
+    BOOST_CHECK(!(*lb->get_node_is_boundary(node, true)));
+  }
+}
+
+BOOST_DATA_TEST_CASE(update_boundary_from_shape, bdata::make(all_lbs()),
+                     lb_generator) {
+  auto lb = lb_generator(params);
+  auto const n_ghost_layers =
+      static_cast<int>(lb->get_lattice().get_ghost_layers());
+  auto const vel = Vector3d{{0.2, 3.8, 4.2}};
+
+  auto const vec3to4 = [](Utils::Vector<int, 3> const &d, int v) {
+    return Utils::Vector<int, 4>{{d[0], d[1], d[2], v}};
+  };
+
+  auto const nodes = std::vector<Vector3i>{
+      {-n_ghost_layers, 0, 0}, {0, 0, 0}, {0, 1, 2}, {9, 9, 9}};
+  // set up boundary
+  {
+    auto const n_grid_points = Utils::product(params.grid_dimensions);
+    boost::multi_array<int, 3> raster_3d(params.grid_dimensions);
+    boost::multi_array<double, 4> vel_3d(vec3to4(params.grid_dimensions, 3));
+    BOOST_CHECK_EQUAL(raster_3d.num_elements(), n_grid_points);
+    for (auto const &node : nodes) {
+      auto const idx = (node + params.grid_dimensions) % params.grid_dimensions;
+      raster_3d(idx) = 1;
+      for (auto const i : {0, 1, 2}) {
+        vel_3d(vec3to4(idx, i)) = vel[i];
+      }
+    }
+    std::vector<int> raster_flat(raster_3d.data(),
+                                 raster_3d.data() + raster_3d.num_elements());
+    std::vector<double> vel_flat(vel_3d.data(),
+                                 vel_3d.data() + vel_3d.num_elements());
+    lb->update_boundary_from_shape(raster_flat, vel_flat);
+  }
+
+  for (auto const &node : nodes) {
+    if (lb->get_lattice().node_in_local_halo(node)) {
+      {
+        auto const res = lb->get_node_is_boundary(node, true);
+        // Did we get a value?
+        BOOST_REQUIRE(res);
+        // Should be a boundary node
+        BOOST_CHECK(*res == true);
+      }
+      {
+        auto const vel_check = lb->get_node_velocity_at_boundary(node, true);
+        // Do we have a value
+        BOOST_REQUIRE(vel_check);
+        // Check the value
+        BOOST_CHECK_SMALL((*vel_check - vel).norm(), 1E-12);
+      }
+    } else {
+      // Not in the local halo.
+      BOOST_CHECK(!lb->get_node_velocity_at_boundary(node));
+    }
+  }
+
+  lb->clear_boundaries();
+  lb->ghost_communication();
+  for (auto const &node : local_nodes_incl_ghosts(lb->get_lattice())) {
+    BOOST_CHECK(!(*lb->get_node_is_boundary(node, true)));
+  }
+}
+
+BOOST_DATA_TEST_CASE(domain_and_halo, bdata::make(all_lbs()), lb_generator) {
+  auto lb = lb_generator(params);
+  auto const n_ghost_layers = lb->get_lattice().get_ghost_layers();
+  auto const [my_left, my_right] = lb->get_lattice().get_local_domain();
+
+  for (auto const &n : all_nodes_incl_ghosts(lb->get_lattice())) {
+    auto const pos = n + Vector3d::broadcast(.5);
+    int is_local = 0;
+    // Nodes in local domain
+    if (Vector3d(n) >= my_left and Vector3d(n) < my_right) {
+      BOOST_CHECK(lb->get_lattice().node_in_local_domain(n));
+      BOOST_CHECK(lb->get_lattice().node_in_local_halo(n));
+
+      BOOST_CHECK(lb->get_lattice().pos_in_local_domain(pos));
+      BOOST_CHECK(lb->get_lattice().pos_in_local_halo(pos));
+      is_local = 1;
+    } else {
+      // in local halo?
+      if ((n + Vector3d::broadcast(n_ghost_layers)) >= my_left and
+          (n - Vector3d::broadcast(n_ghost_layers)) < my_right) {
+        BOOST_CHECK(!lb->get_lattice().node_in_local_domain(n));
+        BOOST_CHECK(lb->get_lattice().node_in_local_halo(n));
+
+        BOOST_CHECK(!lb->get_lattice().pos_in_local_domain(pos));
+        BOOST_CHECK(lb->get_lattice().pos_in_local_halo(pos));
+      } else {
+        // neither in domain nor in halo
+        BOOST_CHECK(!lb->get_lattice().node_in_local_domain(n));
+        BOOST_CHECK(!lb->get_lattice().node_in_local_halo(n));
+
+        BOOST_CHECK(!lb->get_lattice().pos_in_local_domain(pos));
+        BOOST_CHECK(!lb->get_lattice().pos_in_local_halo(pos));
+      }
+    }
+
+    // If the cell is in the global physical domain
+    // check that only one mpi rank said the node was local
+    auto constexpr origin = Vector3i{0, 0, 0};
+    if (n >= origin and n < params.grid_dimensions) {
+      boost::mpi::communicator world;
+      auto const is_local_sum =
+          boost::mpi::all_reduce(world, is_local, std::plus<int>());
+      BOOST_CHECK(is_local_sum == 1);
+    }
+  }
+}
+
+static auto fold_node(Vector3i n) {
+  for (unsigned int i = 0; i < 3; i++) {
+    if (n[i] < 0) {
+      n[i] += params.grid_dimensions[i];
+    } else if (n[i] >= params.grid_dimensions[i]) {
+      n[i] -= params.grid_dimensions[i];
+    }
+  }
+  return n;
+}
+
+BOOST_DATA_TEST_CASE(velocity_at_node_and_pos, bdata::make(all_lbs()),
+                     lb_generator) {
+  auto lb = lb_generator(params);
+
+  // Values
+  auto n_pos = [](Vector3i const &n) { return n + Vector3d::broadcast(.5); };
+
+  auto n_vel = [](Vector3i const &node) {
+    return fold_node(node) + Vector3d{{1., 2., -.5}};
+  };
+
+  // Assign velocities
+  for (auto const &node : all_nodes_incl_ghosts(lb->get_lattice())) {
+    if (lb->get_lattice().node_in_local_domain(node)) {
+      BOOST_CHECK(lb->set_node_velocity(node, n_vel(node)));
+    } else {
+      // Check that access to node velocity is not possible
+      BOOST_CHECK(!lb->set_node_velocity(node, Vector3d{}));
+    }
+  }
+
+  lb->ghost_communication();
+
+  // check velocities
+  for (auto const &node : all_nodes_incl_ghosts(lb->get_lattice())) {
+    auto constexpr eps = 1E-8;
+    if (lb->get_lattice().node_in_local_halo(node)) {
+      auto const consider_ghosts =
+          !lb->get_lattice().node_in_local_domain(node);
+      auto res = lb->get_node_velocity(node, consider_ghosts);
+      BOOST_REQUIRE(res);                                  // value available?
+      BOOST_CHECK_SMALL((*res - n_vel(node)).norm(), eps); // value correct?
+      // Check that the interpolated velocity at the node pos equals the node
+      // vel
+      res = lb->get_velocity_at_pos(n_pos(node), consider_ghosts);
+      BOOST_REQUIRE(res);                                  // value available?
+      BOOST_CHECK_SMALL((*res - n_vel(node)).norm(), eps); // value correct?
+    } else {
+      // Check that access to node velocity is not possible
+      BOOST_CHECK(!lb->get_node_velocity(node));
+      BOOST_CHECK(!lb->get_velocity_at_pos(n_pos(node), true));
+    }
+  }
+
+  {
+    // check interpolation works for edge cases (box corners)
+    auto const [low_corner, up_corner] = params.lattice->get_local_domain();
+    BOOST_CHECK(lb->get_velocity_at_pos(low_corner));
+    BOOST_CHECK(lb->get_velocity_at_pos(up_corner - Vector3d::broadcast(1e-6)));
+    // check interpolation fails outside local domain
+    auto const pos_outside_domain =
+        low_corner - Utils::Vector3d::broadcast(0.6);
+    BOOST_CHECK_THROW(lb->get_velocity_at_pos(pos_outside_domain, true),
+                      std::runtime_error);
+  }
+}
+
+BOOST_DATA_TEST_CASE(interpolated_density_at_pos, bdata::make(all_lbs()),
+                     lb_generator) {
+  auto lb = lb_generator(params);
+
+  // Values
+  auto n_pos = [](Vector3i const &n) { return n + Vector3d::broadcast(.5); };
+
+  auto n_dens = [](Vector3i const &node) {
+    return 1. + static_cast<double>(Utils::product(fold_node(node))) * 1e-6;
+  };
+
+  // Assign densities
+  for (auto const &node : all_nodes_incl_ghosts(lb->get_lattice())) {
+    if (lb->get_lattice().node_in_local_domain(node)) {
+      BOOST_CHECK(lb->set_node_density(node, n_dens(node)));
+    } else {
+      // Check that access to node density is not possible
+      BOOST_CHECK(!lb->set_node_density(node, 0.));
+    }
+  }
+
+  lb->ghost_communication();
+
+  // check densities
+  for (auto const &node : all_nodes_incl_ghosts(lb->get_lattice())) {
+    auto constexpr eps = 1E-8;
+    if (lb->get_lattice().node_in_local_halo(node)) {
+      if (lb->get_lattice().node_in_local_domain(node)) {
+        auto res = lb->get_node_density(node);
+        BOOST_REQUIRE(res);                          // value available?
+        BOOST_CHECK_SMALL(*res - n_dens(node), eps); // value correct?
+        // Check that the interpolated density at the node pos equals the node
+        // density
+        res = lb->get_interpolated_density_at_pos(n_pos(node));
+        BOOST_REQUIRE(res);                          // value available?
+        BOOST_CHECK_SMALL(*res - n_dens(node), eps); // value correct?
+      } else {
+        BOOST_CHECK(!lb->get_node_density(node));
+        BOOST_CHECK(!lb->get_interpolated_density_at_pos(n_pos(node), false));
+      }
+    } else {
+      // Check that access to node density is not possible
+      BOOST_CHECK(!lb->get_node_density(node));
+      BOOST_CHECK(!lb->get_interpolated_density_at_pos(n_pos(node), true));
+    }
+  }
+
+  {
+    // check interpolation works for edge cases (box corners)
+    auto const [low_corner, up_corner] = params.lattice->get_local_domain();
+    BOOST_CHECK(lb->get_interpolated_density_at_pos(low_corner));
+    BOOST_CHECK(lb->get_interpolated_density_at_pos(up_corner -
+                                                    Vector3d::broadcast(1e-6)));
+    // check interpolation fails outside local domain
+    auto const pos_outside_domain =
+        low_corner - Utils::Vector3d::broadcast(0.6);
+    BOOST_CHECK_THROW(
+        lb->get_interpolated_density_at_pos(pos_outside_domain, true),
+        std::runtime_error);
+  }
+}
+
+BOOST_DATA_TEST_CASE(total_momentum, bdata::make(all_lbs()), lb_generator) {
+  auto lb = lb_generator(params);
+  auto const n1 = Vector3i{{1, 2, 3}};
+  auto const n2 = Vector3i{{9, 2, 10}};
+  auto const v1 = Vector3d{{1.5, 2.5, -2.2}};
+  auto const v2 = Vector3d{{-.5, 3.5, -.2}};
+  if (lb->get_lattice().node_in_local_domain(n1)) {
+    lb->set_node_velocity(n1, v1);
+  }
+  if (lb->get_lattice().node_in_local_domain(n2)) {
+    lb->set_node_velocity(n2, v2);
+  }
+
+  boost::mpi::communicator world;
+  auto const mom_local = lb->get_momentum();
+  auto const mom_exp = params.density * (v1 + v2);
+  auto const mom =
+      boost::mpi::all_reduce(world, mom_local, std::plus<Vector3d>());
+  BOOST_CHECK_SMALL((mom - mom_exp).norm(), 1E-10);
+}
+
+BOOST_DATA_TEST_CASE(forces_interpolation, bdata::make(all_lbs()),
+                     lb_generator) {
+  auto lb = lb_generator(params);
+
+  // todo: check a less symmetrical situation, where the force is applied not
+  // in the middle between the nodes
+
+  for (Vector3i n : all_nodes_incl_ghosts(lb->get_lattice())) {
+    if (lb->get_lattice().node_in_local_halo(n)) {
+      auto const pos = 1. * n; // Mid point between nodes
+      auto const f = Vector3d{{1., 2., -3.5}};
+      lb->add_force_at_pos(pos, f);
+      // Check neighboring nodes for force to be applied
+      for (int x : {0, 1})
+        for (int y : {0, 1})
+          for (int z : {0, 1}) {
+            auto const check_node = Vector3i{{n[0] - x, n[1] - y, n[2] - z}};
+            if (lb->get_lattice().node_in_local_halo(check_node)) {
+              auto const res = lb->get_node_force_to_be_applied(check_node);
+              BOOST_CHECK_SMALL(((*res) - f / 8.).norm(), 1E-10);
+            }
+          }
+      // Apply counter force to clear force field
+      lb->add_force_at_pos(pos, -f);
+    }
+  }
+}
+
+BOOST_DATA_TEST_CASE(forces_book_keeping, bdata::make(all_lbs()),
+                     lb_generator) {
+  auto lb = lb_generator(params);
+
+  // Forces added go to force_to_be_applied. After integration, they should be
+  // in last_applied_force, where they are used for velocity calculation
+
+  Vector3i const origin{};
+  Vector3i const middle = params.grid_dimensions / 2;
+  Vector3i const right = params.grid_dimensions - Vector3i{{1, 1, 1}};
+
+  Vector3d const f{{1., -2., 3.1}};
+
+  for (auto n : {origin, middle, right}) {
+    // Add force to node position
+    if (lb->get_lattice().node_in_local_domain(n)) {
+      lb->add_force_at_pos(n + Vector3d::broadcast(.5), f);
+      BOOST_CHECK_SMALL((*(lb->get_node_force_to_be_applied(n)) - f).norm(),
+                        1E-10);
+    }
+    lb->integrate();
+    // Check nodes incl some of the ghosts
+    for (auto cn : {n, n + params.grid_dimensions, n - params.grid_dimensions,
+                    n + Vector3i{{params.grid_dimensions[0], 0, 0}}}) {
+      if (lb->get_lattice().node_in_local_halo(cn)) {
+        BOOST_CHECK_SMALL(
+            (*(lb->get_node_last_applied_force(cn, true)) - f).norm(), 1E-10);
+        BOOST_CHECK_SMALL((*(lb->get_node_force_to_be_applied(cn))).norm(),
+                          1E-10);
+      }
+    }
+    lb->integrate();
+    for (auto cn : {n, n + params.grid_dimensions, n - params.grid_dimensions,
+                    n + Vector3i{{params.grid_dimensions[0], 0, 0}}}) {
+      if (lb->get_lattice().node_in_local_halo(cn)) {
+        BOOST_CHECK_SMALL((*(lb->get_node_last_applied_force(cn, true))).norm(),
+                          1E-10);
+        BOOST_CHECK_SMALL((*(lb->get_node_force_to_be_applied(cn))).norm(),
+                          1E-10);
+      }
+    }
+  }
+}
+
+BOOST_DATA_TEST_CASE(force_in_corner, bdata::make(all_lbs()), lb_generator) {
+  auto lb = lb_generator(params);
+  boost::mpi::communicator world;
+
+  // Add forces in all box corners. If domain boundaries are treated correctly
+  // each corner node should get 1/8 of the force.
+
+  auto const l = params.box_dimensions;
+  auto const f = Vector3d{{0.1, .02, -0.3}};
+  for (double x : {0., l[0]}) {
+    for (double y : {0., l[1]}) {
+      for (double z : {0., l[2]}) {
+        auto const pos = Vector3d{x, y, z};
+        static_cast<void>(lb->add_force_at_pos(pos, f));
+      }
+    }
+  }
+
+  // check forces to be applied
+  // Each corner node should have 1/8 of the force
+  auto const tol = 1E-10;
+  int count_local = 0;
+  int count = 0;
+  for (auto const &c : corner_nodes(params.grid_dimensions)) {
+    auto const res = lb->get_node_force_to_be_applied(c);
+    if (res) {
+      BOOST_CHECK_SMALL(((*res) - f / 8.).norm(), tol);
+      ++count_local;
+    }
+  };
+  count = boost::mpi::all_reduce(world, count_local, std::plus<int>());
+  BOOST_CHECK_EQUAL(count, 8);
+
+  lb->integrate();
+
+  // check applied forces from last integration step
+  count_local = 0;
+  for (auto const &c : corner_nodes(params.grid_dimensions)) {
+    auto const res = lb->get_node_last_applied_force(c);
+    if (res) {
+      BOOST_CHECK_SMALL(((*res) - f / 8.).norm(), tol);
+      ++count_local;
+    }
+  };
+  count = boost::mpi::all_reduce(world, count_local, std::plus<int>());
+  BOOST_CHECK_EQUAL(count, 8);
+}
+
+BOOST_DATA_TEST_CASE(vtk_exceptions,
+                     bdata::make(LbGeneratorVector{unthermalized_lbs()[0]}),
+                     lb_generator) {
+  std::unordered_map<std::string, double> const units = {{"density", 1.}};
+  auto lb = lb_generator(params);
+  auto const flag =
+      static_cast<std::underlying_type_t<OutputVTK>>(OutputVTK::density);
+  // cannot create the same observable twice
+  lb->create_vtk(1u, 0u, flag, units, "density", "vtk_out", "step");
+  BOOST_CHECK_THROW(
+      lb->create_vtk(1u, 0u, flag, units, "density", "vtk_out", "step"),
+      std::runtime_error);
+  // cannot manually call an automatic observable
+  lb->create_vtk(1u, 0u, flag, units, "auto", "vtk_out", "step");
+  BOOST_CHECK_THROW(lb->write_vtk("vtk_out/auto"), std::runtime_error);
+  // cannot activate a manual observable
+  lb->create_vtk(0u, 0u, flag, units, "manual", "vtk_out", "step");
+  BOOST_CHECK_THROW(lb->switch_vtk("vtk_out/manual", 0), std::runtime_error);
+  // cannot call or activate observables that haven't been registered yet
+  BOOST_CHECK_THROW(lb->write_vtk("unknown"), std::runtime_error);
+  BOOST_CHECK_THROW(lb->switch_vtk("unknown", 0), std::runtime_error);
+}
+
+BOOST_AUTO_TEST_CASE(lb_exceptions) {
+  using LB = walberla::LBWalberlaImpl<double, lbmpy::Arch::CPU>;
+  auto lb_lattice_without_ghosts =
+      std::make_shared<LatticeWalberla>(params.grid_dimensions, mpi_shape, 0u);
+  BOOST_CHECK_THROW(LB(lb_lattice_without_ghosts, 1., 1.), std::runtime_error);
+}
+
+BOOST_AUTO_TEST_CASE(le_sweep) {
+  auto const get_pos_offset = []() { return 0.123; };
+  auto const get_shift = []() { return 0.456; };
+  auto const make_kernel = [&](unsigned int n_ghost_layers,
+                               unsigned int shear_direction,
+                               unsigned int shear_plane_normal) {
+    using LB = walberla::LBWalberlaImpl<double, lbmpy::Arch::CPU>;
+    using VectorField = typename LB::VectorField;
+    using Sweep = walberla::InterpolateAndShiftAtBoundary<VectorField, double>;
+    auto const sweep = std::make_shared<Sweep>(
+        nullptr, walberla::BlockDataID{}, walberla::BlockDataID{},
+        n_ghost_layers, shear_direction, shear_plane_normal, get_pos_offset,
+        get_shift);
+    BOOST_CHECK_CLOSE(sweep->get_pos_offset(), 0.123, 1e-10);
+    BOOST_CHECK_CLOSE(sweep->get_shift(), 0.456, 1e-10);
+  };
+  auto constexpr n_ghost_layers = 1u;
+  make_kernel(n_ghost_layers, 0u, 1u);
+  make_kernel(n_ghost_layers, 1u, 2u);
+  make_kernel(n_ghost_layers, 2u, 0u);
+  BOOST_CHECK_THROW(make_kernel(2u, 0u, 1u), std::domain_error);
+  BOOST_CHECK_THROW(make_kernel(0u, 0u, 1u), std::domain_error);
+}
+
+int main(int argc, char **argv) {
+  int n_nodes;
+
+  MPI_Init(&argc, &argv);
+  MPI_Comm_size(MPI_COMM_WORLD, &n_nodes);
+  MPI_Dims_create(n_nodes, 3, mpi_shape.data());
+  walberla::mpi_init();
+
+  params.seed = 0u;
+  params.kT = 1.3E-4;
+  params.viscosity = 0.003;
+  params.density = 1.4;
+  params.grid_dimensions = Vector3i{12, 12, 18};
+  params.box_dimensions = Vector3d{12, 12, 18};
+  params.lattice =
+      std::make_shared<LatticeWalberla>(params.grid_dimensions, mpi_shape, 1u);
+
+  auto const res = boost::unit_test::unit_test_main(init_unit_test, argc, argv);
+  MPI_Finalize();
+  return res;
+}
+
+#else // WALBERLA
+int main(int argc, char **argv) {}
+#endif
diff --git a/src/walberla_bridge/tests/LatticeWalberla_unit_tests.cpp b/src/walberla_bridge/tests/LatticeWalberla_unit_tests.cpp
new file mode 100644
index 00000000000..c26ad40a084
--- /dev/null
+++ b/src/walberla_bridge/tests/LatticeWalberla_unit_tests.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2020-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#define BOOST_TEST_MODULE LatticeWalberla tests
+#define BOOST_TEST_DYN_LINK
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#define BOOST_TEST_NO_MAIN
+
+#include <boost/test/data/monomorphic.hpp>
+#include <boost/test/data/test_case.hpp>
+#include <boost/test/unit_test.hpp>
+
+#include "tests_common.hpp"
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <boost/mpi/collectives/all_reduce.hpp>
+#include <boost/mpi/communicator.hpp>
+
+#include <mpi.h>
+
+#include <functional>
+#include <stdexcept>
+#include <type_traits>
+
+using Utils::Vector3d;
+using Utils::Vector3i;
+
+namespace bdata = boost::unit_test::data;
+
+static LatticeTestParameters params; // populated in main()
+static Vector3i mpi_shape;           // populated in main
+
+BOOST_DATA_TEST_CASE(domain_and_halo, bdata::xrange(3u), n_ghost_layers) {
+  auto const lattice =
+      LatticeWalberla(params.grid_dimensions, mpi_shape, n_ghost_layers);
+  auto const [my_left, my_right] = lattice.get_local_domain();
+
+  for (auto const &n : all_nodes_incl_ghosts(lattice)) {
+    auto const pos = n + Vector3d::broadcast(.5);
+    int is_local = 0;
+    // Nodes in local domain
+    if (Vector3d(n) >= my_left and Vector3d(n) < my_right) {
+      BOOST_CHECK(lattice.node_in_local_domain(n));
+      BOOST_CHECK(lattice.node_in_local_halo(n));
+
+      BOOST_CHECK(lattice.pos_in_local_domain(pos));
+      BOOST_CHECK(lattice.pos_in_local_halo(pos));
+      is_local = 1;
+    } else {
+      // in local halo?
+      if ((n + Vector3d::broadcast(n_ghost_layers)) >= my_left and
+          (n - Vector3d::broadcast(n_ghost_layers)) < my_right) {
+        BOOST_CHECK(!lattice.node_in_local_domain(n));
+        BOOST_CHECK(lattice.node_in_local_halo(n));
+
+        BOOST_CHECK(!lattice.pos_in_local_domain(pos));
+        BOOST_CHECK(lattice.pos_in_local_halo(pos));
+      } else {
+        // neither in domain nor in halo
+        BOOST_CHECK(!lattice.node_in_local_domain(n));
+        BOOST_CHECK(!lattice.node_in_local_halo(n));
+
+        BOOST_CHECK(!lattice.pos_in_local_domain(pos));
+        BOOST_CHECK(!lattice.pos_in_local_halo(pos));
+      }
+    }
+
+    // If the cell is in the global physical domain
+    // check that only one mpi rank said the node was local
+    constexpr auto origin = Vector3i{0, 0, 0};
+    if (n >= origin and n < params.grid_dimensions) {
+      boost::mpi::communicator world;
+      auto const is_local_sum =
+          boost::mpi::all_reduce(world, is_local, std::plus<int>());
+      BOOST_CHECK(is_local_sum == 1);
+    }
+  }
+}
+
+BOOST_AUTO_TEST_CASE(exceptions) {
+  for (int i : {0, 1, 2}) {
+    auto node_grid = Vector3i::broadcast(1);
+    auto grid_dims = Vector3i::broadcast(1);
+    grid_dims[i] = 3;
+    node_grid[i] = 2;
+    BOOST_CHECK_THROW(LatticeWalberla(grid_dims, node_grid, 1u),
+                      std::runtime_error);
+  }
+}
+
+int main(int argc, char **argv) {
+  MPI_Init(&argc, &argv);
+  int n_nodes;
+
+  MPI_Comm_size(MPI_COMM_WORLD, &n_nodes);
+  MPI_Dims_create(n_nodes, 3, mpi_shape.data());
+
+  params.grid_dimensions = Vector3i{12, 12, 18};
+  params.box_dimensions = Vector3d{12, 12, 18};
+
+  walberla::mpi_init();
+  auto const res = boost::unit_test::unit_test_main(init_unit_test, argc, argv);
+  MPI_Finalize();
+  return res;
+}
+
+#else // WALBERLA
+int main(int argc, char **argv) {}
+#endif
diff --git a/src/walberla_bridge/tests/ResourceManager_test.cpp b/src/walberla_bridge/tests/ResourceManager_test.cpp
new file mode 100644
index 00000000000..2d0cb51148e
--- /dev/null
+++ b/src/walberla_bridge/tests/ResourceManager_test.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#define BOOST_TEST_MODULE resources manager
+#define BOOST_TEST_DYN_LINK
+
+#include <boost/test/unit_test.hpp>
+
+#include <walberla_bridge/utils/ResourceManager.hpp>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace Testing {
+
+static std::vector<std::string> logger;
+
+template <char Name> class LogWriter {
+
+public:
+  LogWriter() { logger.emplace_back(std::string(1, Name) + "()"); }
+  ~LogWriter() { logger.emplace_back("~" + std::string(1, Name) + "()"); }
+};
+
+} // namespace Testing
+
+BOOST_AUTO_TEST_CASE(destruction_order) {
+  // instantiate three resources in a specific order
+  auto obj_a = std::make_shared<Testing::LogWriter<'A'>>();
+  auto obj_b = std::make_shared<Testing::LogWriter<'B'>>();
+  auto obj_c = std::make_shared<Testing::LogWriter<'C'>>();
+  auto obj_d = std::make_shared<Testing::LogWriter<'D'>>();
+  BOOST_REQUIRE_EQUAL(Testing::logger.size(), 4ul);
+  BOOST_CHECK_EQUAL(Testing::logger[0], "A()");
+  BOOST_CHECK_EQUAL(Testing::logger[1], "B()");
+  BOOST_CHECK_EQUAL(Testing::logger[2], "C()");
+  BOOST_CHECK_EQUAL(Testing::logger[3], "D()");
+
+  // lock resources in some order (but *not* the reverse order of construction!)
+  auto manager = std::make_unique<ResourceManager>();
+  manager->acquire_lock(obj_c);
+  manager->acquire_lock(obj_a);
+  manager->acquire_lock(obj_d);
+  manager->acquire_lock(obj_b);
+  BOOST_REQUIRE_EQUAL(Testing::logger.size(), 4ul);
+
+  // resetting the local shared pointers should not make the resources expire
+  obj_a.reset();
+  obj_b.reset();
+  obj_c.reset();
+  obj_d.reset();
+  BOOST_REQUIRE_EQUAL(Testing::logger.size(), 4ul);
+
+  // the manager should free the resources in the reverse order of their locking
+  manager.reset();
+  BOOST_REQUIRE_EQUAL(Testing::logger.size(), 8ul);
+  BOOST_CHECK_EQUAL(Testing::logger[4], "~B()");
+  BOOST_CHECK_EQUAL(Testing::logger[5], "~D()");
+  BOOST_CHECK_EQUAL(Testing::logger[6], "~A()");
+  BOOST_CHECK_EQUAL(Testing::logger[7], "~C()");
+}
diff --git a/src/walberla_bridge/tests/kernels_unit_tests.cpp b/src/walberla_bridge/tests/kernels_unit_tests.cpp
new file mode 100644
index 00000000000..fed6eb4f067
--- /dev/null
+++ b/src/walberla_bridge/tests/kernels_unit_tests.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2021-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#define BOOST_TEST_MODULE Walberla kernels
+#define BOOST_TEST_DYN_LINK
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include <boost/test/unit_test.hpp>
+
+#include "../src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h"
+#include "../src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h"
+#include "../src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h"
+#include "../src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h"
+
+#include <walberla_bridge/utils/walberla_utils.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <cmath>
+#include <limits>
+
+bool operator!=(
+    const walberla::lbm::Dynamic_UBB_single_precision::IndexInfo &lhs,
+    const walberla::lbm::Dynamic_UBB_single_precision::IndexInfo &rhs) {
+  return not(lhs == rhs);
+}
+
+bool operator!=(
+    const walberla::lbm::Dynamic_UBB_double_precision::IndexInfo &lhs,
+    const walberla::lbm::Dynamic_UBB_double_precision::IndexInfo &rhs) {
+  return not(lhs == rhs);
+}
+
+bool operator!=(
+    const walberla::lbm::Dynamic_UBB_single_precision::IndexVectors &lhs,
+    const walberla::lbm::Dynamic_UBB_single_precision::IndexVectors &rhs) {
+  return not(lhs == rhs);
+}
+
+bool operator!=(
+    const walberla::lbm::Dynamic_UBB_double_precision::IndexVectors &lhs,
+    const walberla::lbm::Dynamic_UBB_double_precision::IndexVectors &rhs) {
+  return not(lhs == rhs);
+}
+
+BOOST_AUTO_TEST_CASE(dynamic_ubb) {
+  using Dynamic_UBB_f = walberla::lbm::Dynamic_UBB_single_precision;
+  using Dynamic_UBB_d = walberla::lbm::Dynamic_UBB_double_precision;
+
+  // check IndexInfo
+  auto vel1_f = Dynamic_UBB_f::IndexInfo(1, 2, 3, 0);
+  auto vel2_f = Dynamic_UBB_f::IndexInfo(1, 2, 3, 0);
+  auto vel1_d = Dynamic_UBB_d::IndexInfo(1, 2, 3, 0);
+  auto vel2_d = Dynamic_UBB_d::IndexInfo(1, 2, 3, 0);
+  vel1_f.vel_0 = vel2_f.vel_0 = 1.0f;
+  vel1_f.vel_1 = vel2_f.vel_1 = 2.0f;
+  vel1_f.vel_2 = vel2_f.vel_2 = 3.0f;
+  vel1_d.vel_0 = vel2_d.vel_0 = 1.0;
+  vel1_d.vel_1 = vel2_d.vel_1 = 2.0;
+  vel1_d.vel_2 = vel2_d.vel_2 = 3.0;
+  BOOST_TEST((vel1_f == vel2_f));
+  BOOST_TEST((vel1_d == vel2_d));
+  vel2_f.vel_2 += 1.0f;
+  vel2_d.vel_2 += 1.0;
+  BOOST_TEST((vel1_f != vel2_f));
+  BOOST_TEST((vel1_d != vel2_d));
+  vel2_f.vel_2 += 1.0f;
+  vel2_d.vel_2 += 1.0;
+  BOOST_TEST((vel1_f != vel2_f));
+  BOOST_TEST((vel1_d != vel2_d));
+
+  // check IndexVector
+  auto vec1_f = Dynamic_UBB_f::IndexVectors();
+  auto vec2_f = Dynamic_UBB_f::IndexVectors();
+  auto vec1_d = Dynamic_UBB_d::IndexVectors();
+  auto vec2_d = Dynamic_UBB_d::IndexVectors();
+  vec1_f.indexVector(Dynamic_UBB_f::IndexVectors::Type::ALL).push_back(vel1_f);
+  vec2_f.indexVector(Dynamic_UBB_f::IndexVectors::Type::ALL).push_back(vel1_f);
+  vec1_d.indexVector(Dynamic_UBB_d::IndexVectors::Type::ALL).push_back(vel1_d);
+  vec2_d.indexVector(Dynamic_UBB_d::IndexVectors::Type::ALL).push_back(vel1_d);
+  BOOST_TEST((vec1_f == vec2_f));
+  BOOST_TEST((vec1_d == vec2_d));
+  vec1_f.indexVector(Dynamic_UBB_f::IndexVectors::Type::ALL).push_back(vel1_f);
+  vec2_f.indexVector(Dynamic_UBB_f::IndexVectors::Type::ALL).push_back(vel2_f);
+  vec1_d.indexVector(Dynamic_UBB_d::IndexVectors::Type::ALL).push_back(vel1_d);
+  vec2_d.indexVector(Dynamic_UBB_d::IndexVectors::Type::ALL).push_back(vel2_d);
+  BOOST_TEST((vec1_f != vec2_f));
+  BOOST_TEST((vec1_d != vec2_d));
+}
+
+static auto clamp_zero(double value) {
+  auto constexpr epsilon = std::numeric_limits<float>::epsilon();
+  return (std::abs(value) < 5.f * epsilon) ? 0. : value;
+}
+
+static auto clamp_zero(float value) {
+  auto constexpr epsilon = std::numeric_limits<float>::epsilon();
+  return (std::abs(value) < 5.f * epsilon) ? 0.f : value;
+}
+
+BOOST_AUTO_TEST_CASE(macroscopic_accessor_equilibrium_distribution) {
+  using namespace walberla::stencil;
+  using namespace walberla::lbm::accessor;
+
+  auto const x = std::sqrt(1. / 3.);
+  auto const u = Utils::Vector3d::broadcast(x);
+  auto const u_f = walberla::to_vector3<float>(u);
+  auto const u_d = walberla::to_vector3<double>(u);
+  auto const rho_f = 0.2f;
+  auto const rho_d = 0.2;
+  auto const tol_f = 100.f * 5e-7f;
+  auto const tol_d = 100. * 5e-9;
+
+  {
+    auto const direction = Direction::C;
+    auto const ref_d = rho_d * 1. / 3. * (1. - u.norm2());
+    auto const ref_f = static_cast<float>(ref_d);
+    auto const pop_f = EquilibriumDistribution::get(direction, u_f, rho_f);
+    auto const pop_d = EquilibriumDistribution::get(direction, u_d, rho_d);
+    BOOST_CHECK_CLOSE(clamp_zero(pop_f), clamp_zero(ref_f), tol_f);
+    BOOST_CHECK_CLOSE(clamp_zero(pop_d), clamp_zero(ref_d), tol_d);
+  }
+  {
+    auto const ref_d = rho_d * (1. / 18. - 1. / 6. * (x * x - x));
+    auto const ref_f = static_cast<float>(ref_d);
+    for (auto const direction : {Direction::N, Direction::E, Direction::T}) {
+      auto const pop_f = EquilibriumDistribution::get(direction, u_f, rho_f);
+      auto const pop_d = EquilibriumDistribution::get(direction, u_d, rho_d);
+      BOOST_CHECK_CLOSE(clamp_zero(pop_f), clamp_zero(ref_f), tol_f);
+      BOOST_CHECK_CLOSE(clamp_zero(pop_d), clamp_zero(ref_d), tol_d);
+    }
+  }
+  {
+    auto const ref_d = rho_d * (1. / 18. - 1. / 6. * (x + x * x));
+    auto const ref_f = static_cast<float>(ref_d);
+    for (auto const direction : {Direction::S, Direction::W, Direction::B}) {
+      auto const pop_f = EquilibriumDistribution::get(direction, u_f, rho_f);
+      auto const pop_d = EquilibriumDistribution::get(direction, u_d, rho_d);
+      BOOST_CHECK_CLOSE(clamp_zero(pop_f), clamp_zero(ref_f), tol_f);
+      BOOST_CHECK_CLOSE(clamp_zero(pop_d), clamp_zero(ref_d), tol_d);
+    }
+  }
+  {
+    auto const ref_d = rho_d * (1. / 36. - 1. / 12. * x * x);
+    auto const ref_f = static_cast<float>(ref_d);
+    for (auto const direction : {Direction::NW, Direction::SE, Direction::TS,
+                                 Direction::TW, Direction::BN, Direction::BE}) {
+      auto const pop_f = EquilibriumDistribution::get(direction, u_f, rho_f);
+      auto const pop_d = EquilibriumDistribution::get(direction, u_d, rho_d);
+      BOOST_CHECK_CLOSE(clamp_zero(pop_f), clamp_zero(ref_f), tol_f);
+      BOOST_CHECK_CLOSE(clamp_zero(pop_d), clamp_zero(ref_d), tol_d);
+    }
+  }
+  {
+    auto const ref_d = rho_d * (1. / 36. + 5. / 12. * x * x + 2. / 12. * x);
+    auto const ref_f = static_cast<float>(ref_d);
+    for (auto const direction : {Direction::NE, Direction::TN, Direction::TE}) {
+      auto const pop_f = EquilibriumDistribution::get(direction, u_f, rho_f);
+      auto const pop_d = EquilibriumDistribution::get(direction, u_d, rho_d);
+      BOOST_CHECK_CLOSE(clamp_zero(pop_f), clamp_zero(ref_f), tol_f);
+      BOOST_CHECK_CLOSE(clamp_zero(pop_d), clamp_zero(ref_d), tol_d);
+    }
+  }
+  {
+    auto const ref_d = rho_d * (1. / 36. + 5. / 12. * x * x - 2. / 12. * x);
+    auto const ref_f = static_cast<float>(ref_d);
+    for (auto const direction : {Direction::SW, Direction::BS, Direction::BW}) {
+      auto const pop_f = EquilibriumDistribution::get(direction, u_f, rho_f);
+      auto const pop_d = EquilibriumDistribution::get(direction, u_d, rho_d);
+      BOOST_CHECK_CLOSE(clamp_zero(pop_f), clamp_zero(ref_f), tol_f);
+      BOOST_CHECK_CLOSE(clamp_zero(pop_d), clamp_zero(ref_d), tol_d);
+    }
+  }
+}
+
+#endif
diff --git a/src/walberla_bridge/tests/tests_common.hpp b/src/walberla_bridge/tests/tests_common.hpp
new file mode 100644
index 00000000000..65c8c49c17a
--- /dev/null
+++ b/src/walberla_bridge/tests/tests_common.hpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/walberla_init.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <initializer_list>
+#include <memory>
+#include <vector>
+
+struct LatticeTestParameters {
+  Utils::Vector3d box_dimensions;
+  Utils::Vector3i grid_dimensions;
+  std::shared_ptr<LatticeWalberla> lattice;
+};
+
+inline auto all_nodes_incl_ghosts(LatticeWalberla const &lattice,
+                                  bool with_ghosts = true) {
+  auto const &grid_dimensions = lattice.get_grid_dimensions();
+  auto const gl =
+      (with_ghosts) ? static_cast<int>(lattice.get_ghost_layers()) : 0;
+  std::vector<Utils::Vector3i> res;
+  for (auto x = -gl; x < grid_dimensions[0] + gl; ++x) {
+    for (auto y = -gl; y < grid_dimensions[1] + gl; ++y) {
+      for (auto z = -gl; z < grid_dimensions[2] + gl; ++z) {
+        res.push_back(Utils::Vector3i{x, y, z});
+      }
+    }
+  }
+  return res;
+}
+
+inline auto local_nodes_incl_ghosts(LatticeWalberla const &lattice,
+                                    bool with_ghosts = true) {
+  auto const [left, right] = lattice.get_local_grid_range();
+  auto const gl =
+      (with_ghosts) ? static_cast<int>(lattice.get_ghost_layers()) : 0;
+  std::vector<Utils::Vector3i> res;
+  for (auto x = left[0] - gl; x < right[0] + gl; ++x) {
+    for (auto y = left[1] - gl; y < right[1] + gl; ++y) {
+      for (auto z = left[2] - gl; z < right[2] + gl; ++z) {
+        res.push_back(Utils::Vector3i{x, y, z});
+      }
+    }
+  }
+  return res;
+}
+
+inline auto corner_nodes(Utils::Vector3i const &n) {
+  std::vector<Utils::Vector3i> res;
+  for (auto i : {0, n[0] - 1}) {
+    for (auto j : {0, n[1] - 1}) {
+      for (auto k : {0, n[2] - 1}) {
+        res.emplace_back(Utils::Vector3i{i, j, k});
+      }
+    }
+  }
+  return res;
+}
+
+#endif // WALBERLA
diff --git a/src/walberla_bridge/tests/tests_common_ek.hpp b/src/walberla_bridge/tests/tests_common_ek.hpp
new file mode 100644
index 00000000000..1b3a67df16a
--- /dev/null
+++ b/src/walberla_bridge/tests/tests_common_ek.hpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "tests_common.hpp"
+
+#include "../src/electrokinetics/EKinWalberlaImpl.hpp"
+
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/EKinWalberlaBase.hpp>
+#include <walberla_bridge/electrokinetics/ek_walberla_init.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <functional>
+#include <memory>
+#include <vector>
+
+struct EKTestParameters : public LatticeTestParameters {
+  unsigned int seed;
+  double kT;
+  double density;
+  double diffusion;
+  double valency;
+  bool advection;
+  bool friction_coupling;
+  Utils::Vector3d ext_efield;
+  Utils::Vector3d box_dimensions;
+  Utils::Vector3i grid_dimensions;
+  std::shared_ptr<LatticeWalberla> lattice;
+};
+
+using EkGeneratorVector = std::vector<
+    std::function<std::shared_ptr<EKinWalberlaBase>(EKTestParameters const &)>>;
+
+inline EkGeneratorVector unthermalized_eks() {
+  using EKImplementation = walberla::EKinWalberlaImpl<>;
+  EkGeneratorVector eks;
+
+  eks.push_back([](EKTestParameters const &params) {
+    auto ptr = std::make_shared<EKImplementation>(
+        params.lattice, params.diffusion, 0., params.valency, params.ext_efield,
+        params.density, params.advection, params.friction_coupling);
+    ptr->ghost_communication();
+    return ptr;
+  });
+  return eks;
+}
+
+inline EkGeneratorVector thermalized_eks() {
+  using EKImplementation = walberla::EKinWalberlaImpl<>;
+  EkGeneratorVector eks;
+
+  eks.push_back([](EKTestParameters const &params) {
+    auto ptr = std::make_shared<EKImplementation>(
+        params.lattice, params.diffusion, params.kT, params.valency,
+        params.ext_efield, params.density, params.advection,
+        params.friction_coupling);
+    ptr->ghost_communication();
+    return ptr;
+  });
+  return eks;
+}
+
+inline EkGeneratorVector all_eks() {
+  auto eks = unthermalized_eks();
+  auto thermalized = thermalized_eks();
+  eks.insert(eks.end(), thermalized.begin(), thermalized.end());
+  return eks;
+}
+
+// Disable printing of type which does not support it
+BOOST_TEST_DONT_PRINT_LOG_VALUE(EkGeneratorVector::value_type)
+
+#endif // WALBERLA
diff --git a/src/walberla_bridge/tests/tests_common_lb.hpp b/src/walberla_bridge/tests/tests_common_lb.hpp
new file mode 100644
index 00000000000..675e6173ea7
--- /dev/null
+++ b/src/walberla_bridge/tests/tests_common_lb.hpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2019-2023 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "config/config.hpp"
+
+#ifdef WALBERLA
+
+#include "tests_common.hpp"
+
+#include "../src/lattice_boltzmann/LBWalberlaImpl.hpp"
+
+#include <walberla_bridge/Architecture.hpp>
+#include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
+#include <walberla_bridge/lattice_boltzmann/lb_walberla_init.hpp>
+
+#include <utils/Vector.hpp>
+
+#include <functional>
+#include <memory>
+#include <vector>
+
+struct LBTestParameters : public LatticeTestParameters {
+  unsigned int seed;
+  double kT;
+  double viscosity;
+  double density;
+  Utils::Vector3d box_dimensions;
+  Utils::Vector3i grid_dimensions;
+  std::shared_ptr<LatticeWalberla> lattice;
+};
+
+using LbGeneratorVector = std::vector<
+    std::function<std::shared_ptr<LBWalberlaBase>(LBTestParameters const &)>>;
+
+inline LbGeneratorVector unthermalized_lbs() {
+  using LBImplementation = walberla::LBWalberlaImpl<double, lbmpy::Arch::CPU>;
+  LbGeneratorVector lbs;
+
+  // Unthermalized D3Q19 MRT
+  lbs.push_back([](LBTestParameters const &params) {
+    auto ptr = std::make_shared<LBImplementation>(
+        params.lattice, params.viscosity, params.density);
+    ptr->set_collision_model(0.0, params.seed);
+    ptr->ghost_communication();
+    return ptr;
+  });
+  return lbs;
+}
+
+inline LbGeneratorVector thermalized_lbs() {
+  using LBImplementation = walberla::LBWalberlaImpl<double, lbmpy::Arch::CPU>;
+  LbGeneratorVector lbs;
+
+  // Thermalized D3Q19 MRT
+  lbs.push_back([](LBTestParameters const &params) {
+    auto ptr = std::make_shared<LBImplementation>(
+        params.lattice, params.viscosity, params.density);
+    ptr->set_collision_model(params.kT, params.seed);
+    ptr->ghost_communication();
+    return ptr;
+  });
+  return lbs;
+}
+
+inline LbGeneratorVector all_lbs() {
+  auto lbs = unthermalized_lbs();
+  auto thermalized = thermalized_lbs();
+  lbs.insert(lbs.end(), thermalized.begin(), thermalized.end());
+  return lbs;
+}
+
+// Disable printing of type which does not support it
+BOOST_TEST_DONT_PRINT_LOG_VALUE(LbGeneratorVector::value_type)
+
+#endif // WALBERLA
diff --git a/testsuite/python/CMakeLists.txt b/testsuite/python/CMakeLists.txt
index 0c63b06706a..ab80f19e579 100644
--- a/testsuite/python/CMakeLists.txt
+++ b/testsuite/python/CMakeLists.txt
@@ -189,12 +189,12 @@ endfunction(checkpoint_test)
 # feature with zero or more options; separate features with 2 underscores and
 # options with 1 underscore (options can appear in any order). For example,
 # "p3m_cpu__lb_cpu_ascii" generates modes P3M, P3M.CPU, LB, LB.CPU, LB.ASCII.
-checkpoint_test(MODES therm_lb__p3m_cpu__lj__lb_cpu_ascii SUFFIX 1_core
-                MAX_NUM_PROC 1)
-checkpoint_test(MODES therm_lb__p3m_cpu__lj__lb_cpu_ascii)
-checkpoint_test(MODES therm_lb__elc_cpu__lj__lb_cpu_binary)
-checkpoint_test(MODES therm_lb__elc_gpu__lj__lb_gpu_ascii GPU_SLOTS 3)
-checkpoint_test(MODES therm_lb__p3m_gpu__lj__lb_gpu_binary GPU_SLOTS 3)
+checkpoint_test(MODES therm_lb__p3m_cpu__lj__lb_walberla_cpu_ascii SUFFIX
+                1_core MAX_NUM_PROC 1)
+checkpoint_test(MODES therm_lb__p3m_cpu__lj__lb_walberla_cpu_ascii)
+checkpoint_test(MODES therm_lb__elc_cpu__lj__lb_walberla_cpu_binary)
+checkpoint_test(MODES therm_lb__elc_gpu__lj__lb_walberla_cpu_ascii GPU_SLOTS 3)
+checkpoint_test(MODES therm_lb__p3m_gpu__lj__lb_walberla_cpu_binary GPU_SLOTS 3)
 checkpoint_test(MODES therm_npt__int_npt)
 checkpoint_test(MODES int_sd__lj)
 checkpoint_test(MODES dp3m_cpu__therm_langevin__int_nvt)
@@ -259,29 +259,25 @@ python_test(FILE constant_pH.py MAX_NUM_PROC 1)
 python_test(FILE constant_pH_stats.py MAX_NUM_PROC 4 LABELS long)
 python_test(FILE canonical_ensemble.py MAX_NUM_PROC 2)
 python_test(FILE writevtf.py MAX_NUM_PROC 4)
-python_test(FILE lb_stokes_sphere.py MAX_NUM_PROC 4 GPU_SLOTS 1 LABELS long)
+# python_test(FILE lb_stokes_sphere.py MAX_NUM_PROC 4 GPU_SLOTS 1 LABELS long)
 python_test(FILE lb_pressure_tensor.py MAX_NUM_PROC 1 GPU_SLOTS 3 LABELS long)
-python_test(FILE ek_fluctuations.py MAX_NUM_PROC 1 GPU_SLOTS 1)
-python_test(FILE ek_charged_plate.py MAX_NUM_PROC 1 GPU_SLOTS 1)
-python_test(FILE ek_eof_one_species.py MAX_NUM_PROC 1 GPU_SLOTS 2 SUFFIX x
-            ARGUMENTS Test__axis_x DEPENDENCIES unittest_generator.py)
-python_test(FILE ek_eof_one_species.py MAX_NUM_PROC 1 GPU_SLOTS 2 SUFFIX y
-            ARGUMENTS Test__axis_y DEPENDENCIES unittest_generator.py)
-python_test(FILE ek_eof_one_species.py MAX_NUM_PROC 1 GPU_SLOTS 2 SUFFIX z
-            ARGUMENTS Test__axis_z DEPENDENCIES unittest_generator.py)
+# python_test(FILE ek_fluctuations.py MAX_NUM_PROC 1) # TODO
 python_test(FILE exclusions.py MAX_NUM_PROC 2)
 python_test(FILE langevin_thermostat.py MAX_NUM_PROC 1)
 python_test(FILE langevin_thermostat_stats.py MAX_NUM_PROC 1 LABELS long)
 python_test(FILE brownian_dynamics.py MAX_NUM_PROC 1)
 python_test(FILE brownian_dynamics_stats.py MAX_NUM_PROC 1 LABELS long)
 python_test(FILE lees_edwards.py MAX_NUM_PROC 4)
+python_test(FILE lb_lees_edwards.py MAX_NUM_PROC 1)
+python_test(FILE lb_lees_edwards_particle_coupling.py MAX_NUM_PROC 1)
+python_test(FILE lb_planar_couette.py MAX_NUM_PROC 1)
 python_test(FILE nsquare.py MAX_NUM_PROC 4)
 python_test(FILE virtual_sites_relative.py MAX_NUM_PROC 2)
 python_test(FILE virtual_sites_relative_pbc.py MAX_NUM_PROC 2)
 python_test(FILE virtual_sites_tracers.py MAX_NUM_PROC 2 DEPENDENCIES
             virtual_sites_tracers_common.py)
-python_test(FILE virtual_sites_tracers_gpu.py MAX_NUM_PROC 2 GPU_SLOTS 1
-            DEPENDENCIES virtual_sites_tracers_common.py)
+# python_test(FILE virtual_sites_tracers_gpu.py MAX_NUM_PROC 2 GPU_SLOTS 1
+# DEPENDENCIES virtual_sites_tracers_common.py)
 python_test(FILE regular_decomposition.py MAX_NUM_PROC 4)
 python_test(FILE hybrid_decomposition.py MAX_NUM_PROC 1 SUFFIX 1_core)
 python_test(FILE hybrid_decomposition.py MAX_NUM_PROC 4)
@@ -298,7 +294,6 @@ python_test(FILE lb.py MAX_NUM_PROC 2 GPU_SLOTS 1)
 python_test(FILE lb_stats.py MAX_NUM_PROC 2 GPU_SLOTS 2 LABELS long)
 python_test(FILE lb_stats.py MAX_NUM_PROC 1 GPU_SLOTS 2 LABELS long SUFFIX
             1_core)
-python_test(FILE lb_vtk.py MAX_NUM_PROC 2 GPU_SLOTS 1)
 python_test(FILE force_cap.py MAX_NUM_PROC 2)
 python_test(FILE dpd.py MAX_NUM_PROC 4)
 python_test(FILE dpd_stats.py MAX_NUM_PROC 4 LABELS long)
@@ -311,7 +306,6 @@ python_test(FILE coulomb_mixed_periodicity.py MAX_NUM_PROC 4)
 python_test(FILE coulomb_cloud_wall_duplicated.py MAX_NUM_PROC 4 GPU_SLOTS 3)
 python_test(FILE collision_detection.py MAX_NUM_PROC 4)
 python_test(FILE collision_detection_interface.py MAX_NUM_PROC 2)
-python_test(FILE lb_get_u_at_pos.py MAX_NUM_PROC 4 GPU_SLOTS 1)
 python_test(FILE lj.py MAX_NUM_PROC 4)
 python_test(FILE pairs.py MAX_NUM_PROC 4)
 python_test(FILE polymer_linear.py MAX_NUM_PROC 4)
@@ -339,10 +333,9 @@ python_test(FILE drude.py MAX_NUM_PROC 2)
 python_test(FILE thermostats_anisotropic.py MAX_NUM_PROC 4)
 python_test(FILE thermalized_bond.py MAX_NUM_PROC 4)
 python_test(FILE thole.py MAX_NUM_PROC 4)
-python_test(FILE lb_slice.py MAX_NUM_PROC 1)
-python_test(FILE lb_switch.py MAX_NUM_PROC 1 GPU_SLOTS 1)
+python_test(FILE lb_slice.py MAX_NUM_PROC 2)
 python_test(FILE lb_boundary_velocity.py MAX_NUM_PROC 1)
-python_test(FILE lb_boundary_volume_force.py MAX_NUM_PROC 4)
+# python_test(FILE lb_boundary_volume_force.py MAX_NUM_PROC 2) # TODO
 python_test(FILE lb_circular_couette.py MAX_NUM_PROC 2 GPU_SLOTS 1)
 python_test(FILE lb_thermo_virtual.py MAX_NUM_PROC 2 GPU_SLOTS 1)
 python_test(FILE lb_poiseuille.py MAX_NUM_PROC 4 GPU_SLOTS 1)
@@ -356,14 +349,15 @@ python_test(FILE lb_boundary.py MAX_NUM_PROC 2 GPU_SLOTS 1)
 python_test(FILE lb_streaming.py MAX_NUM_PROC 4 GPU_SLOTS 1)
 python_test(FILE lb_shear.py MAX_NUM_PROC 2 GPU_SLOTS 1)
 python_test(FILE lb_thermostat.py MAX_NUM_PROC 2 GPU_SLOTS 1)
-python_test(FILE lb_buoyancy_force.py MAX_NUM_PROC 4 GPU_SLOTS 1)
-python_test(FILE lb_momentum_conservation.py MAX_NUM_PROC 4 GPU_SLOTS 1)
-python_test(FILE lb_momentum_conservation.py MAX_NUM_PROC 1 GPU_SLOTS 1 SUFFIX
-            1_core)
+# python_test(FILE lb_buoyancy_force.py MAX_NUM_PROC 2 GPU_SLOTS 1) # TODO
+python_test(FILE lb_momentum_conservation.py MAX_NUM_PROC 2 GPU_SLOTS 1 LABELS
+            long)
+python_test(FILE lb_momentum_conservation.py MAX_NUM_PROC 1 GPU_SLOTS 1 LABELS
+            long SUFFIX 1_core)
+python_test(FILE lb_mass_conservation.py MAX_NUM_PROC 2)
 python_test(FILE p3m_electrostatic_pressure.py MAX_NUM_PROC 2 GPU_SLOTS 1)
 python_test(FILE p3m_madelung.py MAX_NUM_PROC 2 GPU_SLOTS 2 LABELS long)
 python_test(FILE sigint.py DEPENDENCIES sigint_child.py NO_MPI)
-python_test(FILE lb_density.py MAX_NUM_PROC 1)
 python_test(FILE observable_chain.py MAX_NUM_PROC 4)
 python_test(FILE mpiio.py MAX_NUM_PROC 4)
 python_test(FILE mpiio_exceptions.py MAX_NUM_PROC 1)
@@ -391,6 +385,21 @@ python_test(FILE integrator_exceptions.py MAX_NUM_PROC 1)
 python_test(FILE utils.py MAX_NUM_PROC 1)
 python_test(FILE npt_thermostat.py MAX_NUM_PROC 4)
 python_test(FILE box_geometry.py MAX_NUM_PROC 1)
+python_test(FILE lattice.py MAX_NUM_PROC 4)
+python_test(FILE lattice_vtk.py MAX_NUM_PROC 4)
+if(${ESPRESSO_TEST_NP} GREATER_EQUAL 6)
+  python_test(FILE lattice_vtk.py MAX_NUM_PROC 6 SUFFIX 6_cores)
+endif()
+python_test(FILE ek_interface.py MAX_NUM_PROC 2)
+python_test(FILE ek_diffusion.py MAX_NUM_PROC 1)
+python_test(FILE ek_noflux.py MAX_NUM_PROC 1)
+python_test(FILE ek_eof.py MAX_NUM_PROC 1)
+python_test(FILE ek_fixedflux.py MAX_NUM_PROC 1)
+python_test(FILE ek_bulk_reactions.py MAX_NUM_PROC 1)
+python_test(FILE ek_indexed_reactions.py MAX_NUM_PROC 1)
+python_test(FILE ek_fixeddensity.py MAX_NUM_PROC 1)
+python_test(FILE ek_boundary.py MAX_NUM_PROC 2)
+python_test(FILE ek_slice.py MAX_NUM_PROC 2)
 
 set(ESPRESSO_CTEST_RESOURCE_SPEC_FILE resources.json)
 configure_file(
diff --git a/testsuite/python/actor.py b/testsuite/python/actor.py
index 2a2792fcd82..9687537a328 100644
--- a/testsuite/python/actor.py
+++ b/testsuite/python/actor.py
@@ -23,12 +23,84 @@
 """
 
 import unittest as ut
-import espressomd.lb
 import espressomd.actors
 import espressomd.highlander
-
-
-class TestActor(espressomd.lb.FluidActor):
+import espressomd.utils as utils
+
+
+class BaseActor:
+
+    """
+    Abstract base class for interactions affecting particles in the system,
+    such as LB fluids. Derived classes must implement the interface to the
+    relevant core objects and global variables.
+    """
+
+    # Keys in active_list have to match the method name.
+    active_list = dict(HydrodynamicInteraction=False)
+
+    def __init__(self, **kwargs):
+        self._isactive = False
+        utils.check_valid_keys(self.valid_keys(), kwargs.keys())
+        utils.check_required_keys(self.required_keys(), kwargs.keys())
+        self._params = self.default_params()
+        self._params.update(kwargs)
+
+    def _activate(self):
+        inter = self._get_interaction_type()
+        if inter in BaseActor.active_list:
+            if BaseActor.active_list[inter]:
+                raise espressomd.highlander.ThereCanOnlyBeOne(
+                    self.__class__.__bases__[0])
+            BaseActor.active_list[inter] = True
+
+        self.validate_params()
+        self._activate_method()
+        utils.handle_errors("Activation of an actor")
+        self._isactive = True
+
+    def _deactivate(self):
+        self._deactivate_method()
+        utils.handle_errors("Deactivation of an actor")
+        self._isactive = False
+        inter = self._get_interaction_type()
+        if inter in BaseActor.active_list:
+            if not BaseActor.active_list[inter]:
+                raise Exception(
+                    f"Class not registered in Actor.active_list: {self.__class__.__bases__[0].__name__}")
+            BaseActor.active_list[inter] = False
+
+    def get_params(self):
+        """Get interaction parameters"""
+        # If this instance refers to an actual interaction defined in the es
+        # core, load current parameters from there
+        if self.is_active():
+            update = self._get_params_from_es_core()
+            self._params.update(update)
+        return self._params
+
+    def set_params(self, **p):
+        """Update the given parameters."""
+        # Check if keys are valid
+        utils.check_valid_keys(self.valid_keys(), p.keys())
+
+        # When an interaction is newly activated, all required keys must be
+        # given
+        if not self.is_active():
+            utils.check_required_keys(self.required_keys(), p.keys())
+
+        self._params.update(p)
+        # validate updated parameters
+        self.validate_params()
+        # Put in values given by the user
+        if self.is_active():
+            self._set_params_in_es_core()
+
+    def is_active(self):
+        return self._isactive
+
+
+class TestActor(BaseActor):
 
     def __init__(self, *args, **kwargs):
         self._core_args = None
@@ -62,6 +134,15 @@ def _deactivate_method(self):
     def validate_params(self):
         self._validated = True
 
+    def _get_interaction_type(self):
+        return None
+
+
+class TestHydrodynamicActor(TestActor):
+
+    def _get_interaction_type(self):
+        return "HydrodynamicInteraction"
+
 
 class ActorTest(ut.TestCase):
 
@@ -69,7 +150,6 @@ def test_ctor(self):
         a = TestActor(a=False, c=False)
         self.assertFalse(a.is_active())
         self.assertEqual(a.get_params(), a.default_params())
-        self.assertEqual(a.system, None)
 
     def test_params_non_active(self):
         a = TestActor(a=True, c=True)
@@ -160,14 +240,18 @@ def test_deactivation(self):
 
     def test_unique(self):
         # an actor can only be added once
-        actor = TestActor(a=False, c=False)
+        actor = TestHydrodynamicActor(a=False, c=False)
         self.actors.add(actor)
         with self.assertRaises(espressomd.highlander.ThereCanOnlyBeOne):
             self.actors.add(actor)
+        with self.assertRaises(espressomd.highlander.ThereCanOnlyBeOne):
+            actor._activate()
         # an actor can only be removed once
         self.actors.remove(actor)
-        with self.assertRaises(Exception):
+        with self.assertRaisesRegex(Exception, "Actor is not active"):
             self.actors.remove(actor)
+        with self.assertRaisesRegex(Exception, "Class not registered.*: TestActor"):
+            actor._deactivate()
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/array_properties.py b/testsuite/python/array_properties.py
index 02340ab5cf3..8a205bd9e6c 100644
--- a/testsuite/python/array_properties.py
+++ b/testsuite/python/array_properties.py
@@ -187,13 +187,14 @@ def test_rot_aniso(self):
 
         self.assert_copy_is_writable(self.partcl.gamma_rot)
 
+    @utx.skipIfMissingFeatures("WALBERLA")
     def test_lb(self):
-        lbf = espressomd.lb.LBFluid(agrid=0.5, dens=1, visc=1, tau=0.01)
+        lbf = espressomd.lb.LBFluidWalberla(
+            agrid=0.5, density=1., kinematic_viscosity=1., tau=0.01)
         self.system.actors.add(lbf)
 
         self.assert_operator_usage_raises(lbf[0, 0, 0].velocity)
         self.assert_operator_usage_raises(lbf[0, 0, 0].pressure_tensor)
-        self.assert_operator_usage_raises(lbf[0, 0, 0].pressure_tensor_neq)
         self.assert_operator_usage_raises(lbf[0, 0, 0].population)
 
     @utx.skipIfMissingFeatures(["THERMOSTAT_PER_PARTICLE",
diff --git a/testsuite/python/ek_boundary.py b/testsuite/python/ek_boundary.py
new file mode 100644
index 00000000000..1c5da29bceb
--- /dev/null
+++ b/testsuite/python/ek_boundary.py
@@ -0,0 +1,172 @@
+#
+# Copyright (C) 2010-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import numpy as np
+import unittest as ut
+import unittest_decorators as utx
+
+import espressomd
+import espressomd.lb
+import espressomd.shapes
+import espressomd.electrokinetics
+
+
+class EKBoundariesBase:
+    system = espressomd.System(box_l=[10.0, 5.0, 5.0])
+    system.cell_system.skin = 0.1
+    ek_species_params = {"kT": 1.5,
+                         "density": 0.85,
+                         "valency": 0.0,
+                         "diffusion": 0.1,
+                         "advection": False,
+                         "friction_coupling": False,
+                         "tau": 1.0}
+
+    wall_shape1 = espressomd.shapes.Wall(normal=[1., 0., 0.], dist=2.5)
+    wall_shape2 = espressomd.shapes.Wall(normal=[-1., 0., 0.], dist=-7.5)
+
+    def setUp(self):
+        self.lattice = self.ek_lattice_class(agrid=0.5, n_ghost_layers=1)
+
+    def tearDown(self):
+        self.system.ekcontainer.clear()
+
+    def make_default_ek_species(self):
+        return self.ek_species_class(
+            lattice=self.lattice,
+            single_precision=self.ek_params["single_precision"],
+            **self.ek_species_params)
+
+    def check_boundary_flags(self, ek_species, attr, value1, value2):
+        def generator(value, shape):
+            value_grid = np.tile(value, shape)
+            if value_grid.shape[-1] == 1:
+                value_grid = np.squeeze(value_grid, axis=-1)
+            return value_grid
+
+        accessor = np.vectorize(
+            lambda obj: np.copy(getattr(obj, attr)),
+            signature=f"()->({'n' if attr == 'flux' else ''})")
+
+        slice1 = ek_species[:5, :, :]
+        slice2 = ek_species[15:, :, :]
+        slice3 = ek_species[5:15, :, :]
+        np.testing.assert_equal(np.copy(slice1.is_boundary), True)
+        np.testing.assert_equal(np.copy(slice2.is_boundary), True)
+        np.testing.assert_equal(np.copy(slice3.is_boundary), False)
+        field = f"{attr}_boundary"
+
+        np.testing.assert_allclose(accessor(np.copy(getattr(slice1, field))),
+                                   generator(value1, [5, 10, 10, 1]))
+        np.testing.assert_allclose(accessor(np.copy(getattr(slice2, field))),
+                                   generator(value2, [5, 10, 10, 1]))
+        getattr(ek_species, f"clear_{attr}_boundaries")()
+        np.testing.assert_equal(
+            np.copy(ek_species[:, :, :].is_boundary), False)
+
+    def test_flux_boundary_flags(self):
+        flux1 = 1e-3 * np.array([1., 2., 3.])
+        flux2 = 1e-3 * np.array([4., 5., 6.])
+
+        # check with two shapes
+        ek_species = self.make_default_ek_species()
+        value_shape = tuple(ek_species.shape) + (3,)
+        ek_species.add_boundary_from_shape(
+            shape=self.wall_shape1, value=flux1,
+            boundary_type=espressomd.electrokinetics.FluxBoundary)
+        ek_species.add_boundary_from_shape(
+            shape=self.wall_shape2, value=flux2 * np.ones(value_shape),
+            boundary_type=espressomd.electrokinetics.FluxBoundary)
+        self.check_boundary_flags(ek_species, "flux", flux1, flux2)
+
+        # check with union of two shapes
+        ek_species = self.make_default_ek_species()
+        union = espressomd.shapes.Union()
+        union.add([self.wall_shape1, self.wall_shape2])
+        ek_species.add_boundary_from_shape(
+            shape=union, value=flux1,
+            boundary_type=espressomd.electrokinetics.FluxBoundary)
+        self.check_boundary_flags(ek_species, "flux", flux1, flux1)
+
+    def test_density_boundary_flags(self):
+        density1 = 1.
+        density2 = 2.
+
+        # check with two shapes
+        ek_species = self.make_default_ek_species()
+        value_shape = tuple(ek_species.shape) + (1,)
+        ek_species.add_boundary_from_shape(
+            shape=self.wall_shape1, value=density1,
+            boundary_type=espressomd.electrokinetics.DensityBoundary)
+        ek_species.add_boundary_from_shape(
+            shape=self.wall_shape2, value=density2 * np.ones(value_shape),
+            boundary_type=espressomd.electrokinetics.DensityBoundary)
+        self.check_boundary_flags(ek_species, "density", density1, density2)
+
+        # check with union of two shapes
+        ek_species = self.make_default_ek_species()
+        union = espressomd.shapes.Union()
+        union.add([self.wall_shape1, self.wall_shape2])
+        ek_species.add_boundary_from_shape(
+            shape=union, value=density1,
+            boundary_type=espressomd.electrokinetics.DensityBoundary)
+        self.check_boundary_flags(ek_species, "density", density1, density1)
+
+    def test_exceptions(self):
+        ek_species = self.make_default_ek_species()
+        with self.assertRaisesRegex(TypeError, "Parameter 'boundary_type' must be a subclass of FluxBoundary or DensityBoundary"):
+            ek_species.add_boundary_from_shape(
+                shape=self.wall_shape1, value=[0., 0., 0.],
+                boundary_type=espressomd.lb.VelocityBounceBack)
+        with self.assertRaisesRegex(ValueError, "expected an espressomd.shapes.Shape"):
+            ek_species.add_boundary_from_shape(
+                shape=ek_species, value=[0., 0., 0.],
+                boundary_type=espressomd.electrokinetics.FluxBoundary)
+        with self.assertRaisesRegex(ValueError, r"Cannot process density value grid of shape \(3,\)"):
+            ek_species.add_boundary_from_shape(
+                shape=self.wall_shape1, value=[0., 0., 0.],
+                boundary_type=espressomd.electrokinetics.DensityBoundary)
+        with self.assertRaisesRegex(ValueError, r"Cannot process flux value grid of shape \(1,\)"):
+            ek_species.add_boundary_from_shape(
+                shape=self.wall_shape1, value=0.,
+                boundary_type=espressomd.electrokinetics.FluxBoundary)
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class EKBoundariesWalberla(EKBoundariesBase, ut.TestCase):
+
+    """Test for the Walberla implementation of the LB in double-precision."""
+
+    ek_lattice_class = espressomd.electrokinetics.LatticeWalberla
+    ek_species_class = espressomd.electrokinetics.EKSpecies
+    ek_params = {"single_precision": False}
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class EKBoundariesWalberlaSinglePrecision(EKBoundariesBase, ut.TestCase):
+
+    """Test for the Walberla implementation of the LB in single-precision."""
+
+    ek_lattice_class = espressomd.electrokinetics.LatticeWalberla
+    ek_species_class = espressomd.electrokinetics.EKSpecies
+    ek_params = {"single_precision": True}
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/python/ek_bulk_reactions.py b/testsuite/python/ek_bulk_reactions.py
new file mode 100644
index 00000000000..be6c539458f
--- /dev/null
+++ b/testsuite/python/ek_bulk_reactions.py
@@ -0,0 +1,155 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import unittest as ut
+import unittest_decorators as utx
+import espressomd
+import espressomd.electrokinetics
+import numpy as np
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class EKReaction(ut.TestCase):
+    BOX_L = 11.
+    AGRID = 1.1
+    INITIAL_DENSITY = 1.0
+    DIFFUSION_COEFFICIENT = 0.1
+    TIME = 500
+    TAU = 1.9
+
+    system = espressomd.System(box_l=[BOX_L, BOX_L, BOX_L])
+    system.time_step = TAU
+    system.cell_system.skin = 0.4
+
+    def tearDown(self) -> None:
+        self.system.ekcontainer.clear()
+        self.system.ekreactions.clear()
+
+    def analytic_density_base(
+            self, time: float, coeffs, rate_constant: float, init_density: float) -> float:
+        """
+        Calculates the base density of a species after a given time of a reaction.
+        The reaction is defined via the stoichiometric coefficient of the educts.
+        To calculate the effective species density this base density needs to be multiplied by its stoichiometric coefficient.
+        For the product density, one needs to subtract this density from the init density.
+        """
+        order = sum(coeffs)
+        factor = rate_constant
+        for coeff in coeffs:
+            factor *= coeff**coeff
+        init_dens_factor = init_density ** (1 - order)
+        return (init_dens_factor + (order - 1) *
+                factor * time)**(1 / (1 - order))
+
+    def test_reaction_single(self):
+        self.detail_test_reaction(single_precision=True)
+
+    def test_reaction_double(self):
+        self.detail_test_reaction(single_precision=False)
+
+    def detail_test_reaction(self, single_precision: bool):
+
+        relative_precision: float = 1E-6 if single_precision else 1E-7
+
+        lattice = espressomd.electrokinetics.LatticeWalberla(
+            n_ghost_layers=1, agrid=self.AGRID)
+
+        eksolver = espressomd.electrokinetics.EKNone(lattice=lattice)
+
+        self.system.ekcontainer.tau = self.TAU
+
+        reaction_rate: float = 1e-5
+
+        stoech_coeffs = [2.0, 1.0, 1.2, 2.2]
+        product_coeff = 1.5
+        educt_species = []
+        reactants = []
+        for coeff in stoech_coeffs:
+            species = espressomd.electrokinetics.EKSpecies(
+                lattice=lattice, density=coeff * self.INITIAL_DENSITY,
+                diffusion=self.DIFFUSION_COEFFICIENT, valency=0.0,
+                advection=False, friction_coupling=False,
+                single_precision=single_precision, tau=self.TAU)
+            self.system.ekcontainer.add(species)
+            reactants.append(
+                espressomd.electrokinetics.EKReactant(
+                    ekspecies=species,
+                    stoech_coeff=-coeff,
+                    order=coeff))
+            educt_species.append(species)
+
+        ek_species_product = espressomd.electrokinetics.EKSpecies(
+            lattice=lattice, density=0.0, diffusion=self.DIFFUSION_COEFFICIENT,
+            valency=0.0, advection=False, friction_coupling=False,
+            single_precision=single_precision, tau=self.TAU)
+        self.system.ekcontainer.add(ek_species_product)
+        reactants.append(
+            espressomd.electrokinetics.EKReactant(
+                ekspecies=ek_species_product,
+                stoech_coeff=product_coeff,
+                order=0.0))
+
+        self.system.ekcontainer.solver = eksolver
+
+        reaction = espressomd.electrokinetics.EKBulkReaction(
+            reactants=reactants, coefficient=reaction_rate, lattice=lattice, tau=self.TAU)
+
+        self.system.ekreactions.add(reaction)
+
+        self.system.integrator.run(self.TIME)
+
+        domain_volume = np.product(ek_species_product.shape)
+        analytic_time = (self.TIME + 0.5) * self.system.time_step
+
+        measured_educt_densities = np.zeros(len(stoech_coeffs))
+        for i, educt in enumerate(educt_species):
+            measured_educt_densities[i] = np.sum(
+                educt[:, :, :].density) / domain_volume
+        measured_product_density = np.sum(
+            ek_species_product[:, :, :].density) / domain_volume
+
+        analytic_educt_densities = np.zeros(len(stoech_coeffs))
+        for i, coeff in enumerate(stoech_coeffs):
+            analytic_educt_densities[i] = coeff * self.analytic_density_base(
+                analytic_time, stoech_coeffs, reaction_rate, self.INITIAL_DENSITY)
+        analytic_product_density = product_coeff * \
+            (self.INITIAL_DENSITY -
+                self.analytic_density_base(analytic_time, stoech_coeffs,
+                                           reaction_rate, self.INITIAL_DENSITY))
+
+        np.testing.assert_allclose(
+            measured_educt_densities / measured_educt_densities[0],
+            analytic_educt_densities / analytic_educt_densities[0],
+            rtol=relative_precision, atol=0)
+        np.testing.assert_allclose(
+            measured_educt_densities,
+            analytic_educt_densities,
+            rtol=2 * reaction_rate,
+            atol=0)
+        np.testing.assert_allclose(
+            measured_product_density,
+            analytic_product_density,
+            rtol=2 *
+            reaction_rate *
+            len(stoech_coeffs),
+            atol=0)
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/python/ek_charged_plate.py b/testsuite/python/ek_charged_plate.py
deleted file mode 100644
index 6feba520d9e..00000000000
--- a/testsuite/python/ek_charged_plate.py
+++ /dev/null
@@ -1,183 +0,0 @@
-#
-# Copyright (C) 2011-2022 The ESPResSo project
-#
-# This file is part of ESPResSo.
-#
-# ESPResSo is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# ESPResSo is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-import unittest as ut
-import unittest_decorators as utx
-import espressomd
-import espressomd.electrokinetics
-import math
-
-##########################################################################
-# Set up the System #
-##########################################################################
-# Build plates using two ek species.
-
-
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(["ELECTROKINETICS"])
-class ek_charged_plate(ut.TestCase):
-
-    system = espressomd.System(box_l=[1.0, 1.0, 1.0])
-
-    def test(self):
-        system = self.system
-
-        # Set parameters
-        box_x = 20
-        box_y = 20
-        box_z = 20
-        system.box_l = [box_x, box_y, box_z]
-        system.cell_system.skin = 0.2
-        system.time_step = 0.1
-        system.periodicity = [True, True, True]
-        bjerrum_length = 2.13569
-        agrid = 0.5
-
-        system.thermostat.turn_off()
-
-        # Setup the Fluid
-        ek = espressomd.electrokinetics.Electrokinetics(
-            agrid=agrid,
-            lb_density=1.0,
-            viscosity=1.0,
-            friction=1.0,
-            T=1.0,
-            prefactor=bjerrum_length,
-            stencil="linkcentered",
-            advection=False,
-            es_coupling=True)
-
-        positive_ions = espressomd.electrokinetics.Species(
-            density=0.0, D=0.0, valency=1.0)
-        negative_ions = espressomd.electrokinetics.Species(
-            density=0.0, D=0.0, valency=-1.0)
-        ek.add_species(positive_ions)
-        ek.add_species(negative_ions)
-        system.actors.add(ek)
-
-        ##################################################################
-        # X
-        # Setup EK species
-        for i in range(int(box_y / agrid)):
-            for j in range(int(box_z / agrid)):
-                positive_ions[10, i, j].density = 1.0 / agrid
-                negative_ions[30, i, j].density = 1.0 / agrid
-
-        # Setup MD particle and integrate
-        p = system.part.add(pos=[0, 0, 0], q=-1.0, type=0)
-        force_difference = 0.0
-
-        for i in range(7, 14):
-            p.pos = [i, 0, 0]
-            system.integrator.run(0)
-
-            # Check Force
-            expected_force = -2 * math.pi * bjerrum_length
-            particle_force = p.f
-            if abs(expected_force - particle_force[0]) > force_difference:
-                force_difference = abs(expected_force - particle_force[0])
-
-        self.assertLess(force_difference, 1.0e-04,
-                        f"Force accuracy in X not achieved, allowed "
-                        f"deviation: 1.0e-04, measured: {force_difference}")
-
-        # Unset species
-        for i in range(int(box_y / agrid)):
-            for j in range(int(box_z / agrid)):
-                positive_ions[10, i, j].density = 0.0
-                negative_ions[30, i, j].density = 0.0
-
-        ##################################################################
-        # Y
-        # Setup EK species
-        for i in range(int(box_x / agrid)):
-            for j in range(int(box_z / agrid)):
-                positive_ions[i, 10, j].density = 1.0 / agrid
-                negative_ions[i, 30, j].density = 1.0 / agrid
-
-        # Setup MD particle and integrate
-        force_difference = 0.0
-
-        for i in range(7, 14):
-            p.pos = [0, i, 0]
-            system.integrator.run(0)
-
-            # Check Force
-            expected_force = -2 * math.pi * bjerrum_length
-            particle_force = p.f
-            if abs(expected_force - particle_force[1]) > force_difference:
-                force_difference = abs(expected_force - particle_force[1])
-
-        self.assertLess(force_difference, 1.0e-04,
-                        f"Force accuracy in Y not achieved, allowed "
-                        f"deviation: 1.0e-04, measured: {force_difference}")
-
-        # Unset species
-        for i in range(int(box_x / agrid)):
-            for j in range(int(box_z / agrid)):
-                positive_ions[i, 10, j].density = 0.0
-                negative_ions[i, 30, j].density = 0.0
-
-        ##################################################################
-        # Y
-        # Setup EK species
-        for i in range(int(box_x / agrid)):
-            for j in range(int(box_y / agrid)):
-                positive_ions[i, j, 10].density = 1.0 / agrid
-                negative_ions[i, j, 30].density = 1.0 / agrid
-
-        # Setup MD particle and integrate
-        force_difference = 0.0
-
-        for i in range(7, 14):
-            p.pos = [0, 0, i]
-            system.integrator.run(0)
-
-            # Check Force
-            expected_force = -2 * math.pi * bjerrum_length
-            particle_force = p.f
-            if abs(expected_force - particle_force[2]) > force_difference:
-                force_difference = abs(expected_force - particle_force[2])
-
-        self.assertLess(force_difference, 1.0e-04,
-                        f"Force accuracy in Z not achieved, allowed "
-                        f"deviation: 1.0e-04, measured: {force_difference}")
-
-        # Unset species
-        for i in range(int(box_x / agrid)):
-            for j in range(int(box_y / agrid)):
-                positive_ions[i, j, 10].density = 0.0
-                negative_ions[i, j, 30].density = 0.0
-
-        # Test error when trying to change ekin parameters after initialisation
-        with self.assertRaises(RuntimeError):
-            ek._params.update({'agrid': 3, 'T': 0.01})
-            ek._set_params_in_es_core()
-
-        # Check errors from the constructor
-        with self.assertRaisesRegex(ValueError, r"The following keys have to be given as keyword arguments: "
-                                                r"\[.+\], got \[.+\] \(missing \['D'\]\)"):
-            espressomd.electrokinetics.Species(density=0, valency=1)
-        with self.assertRaisesRegex(ValueError, r"Only the following keys can be given as keyword arguments: "
-                                                r"\[.+\], got \[.+\] \(unknown \['U'\]\)"):
-            espressomd.electrokinetics.Species(density=0, valency=1, D=0, U=1)
-
-
-if __name__ == "__main__":
-    ut.main()
diff --git a/testsuite/python/ek_diffusion.py b/testsuite/python/ek_diffusion.py
new file mode 100644
index 00000000000..0f979ae551b
--- /dev/null
+++ b/testsuite/python/ek_diffusion.py
@@ -0,0 +1,126 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import unittest as ut
+import unittest_decorators as utx
+import espressomd
+import espressomd.electrokinetics
+import numpy as np
+import scipy.optimize
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class EKDiffusion(ut.TestCase):
+    BOX_L = 15.5
+    AGRID = 0.5
+    DENSITY = 1
+    DIFFUSION_COEFFICIENT = 0.05
+    TAU = 0.9
+    TIMESTEPS = int(65 / TAU)
+
+    system = espressomd.System(box_l=[BOX_L, BOX_L, BOX_L])
+    system.time_step = TAU
+    system.cell_system.skin = 0.4
+
+    def tearDown(self) -> None:
+        self.system.ekcontainer.clear()
+
+    def analytical_density(self, pos: np.ndarray, time: int, D: float):
+        return (4 * np.pi * D * time)**(-3 / 2) * \
+            np.exp(-np.sum(np.square(pos), axis=-1) / (4 * D * time))
+
+    def test_diffusion_single(self):
+        self.detail_test_diffusion(single_precision=True)
+
+    def test_diffusion_double(self):
+        self.detail_test_diffusion(single_precision=False)
+
+    def detail_test_diffusion(self, single_precision: bool):
+        """
+        Testing EK for simple diffusion of a point droplet
+        """
+
+        decimal_precision: int = 7 if single_precision else 10
+
+        lattice = espressomd.electrokinetics.LatticeWalberla(
+            n_ghost_layers=1, agrid=self.AGRID)
+
+        ekspecies = espressomd.electrokinetics.EKSpecies(
+            lattice=lattice, density=0.0, valency=0.0, advection=False,
+            diffusion=self.DIFFUSION_COEFFICIENT, friction_coupling=False,
+            single_precision=single_precision, tau=self.TAU)
+
+        eksolver = espressomd.electrokinetics.EKNone(lattice=lattice)
+
+        self.system.ekcontainer.tau = self.TAU
+        self.system.ekcontainer.solver = eksolver
+        self.system.ekcontainer.add(ekspecies)
+
+        center = np.asarray(lattice.shape // 2, dtype=int)
+
+        ekspecies[center].density = self.DENSITY
+
+        # check that the density in the domain is what is expected
+        np.testing.assert_almost_equal(
+            np.sum(ekspecies[:, :, :].density), self.DENSITY, decimal_precision)
+
+        # calculate physical positions
+        positions = np.empty((*lattice.shape, 3))
+        positions[..., 2], positions[..., 1], positions[..., 0] = np.meshgrid(
+            *map(lambda x: np.arange(0, x) - x / 2, lattice.shape))
+        positions += 0.5
+        positions *= self.AGRID
+
+        self.system.integrator.run(self.TIMESTEPS)
+
+        simulated_density = np.copy(ekspecies[:, :, :].density)
+
+        # check that the density is conserved
+        np.testing.assert_almost_equal(
+            np.sum(simulated_density), self.DENSITY, decimal_precision)
+        assert np.all(simulated_density >= 0.), "EK has negative densities"
+
+        # check that the maximum is in the right place
+        peak = np.unravel_index(
+            np.argmax(simulated_density, axis=None),
+            lattice.shape)
+        np.testing.assert_equal(peak, center)
+
+        calc_density = self.analytical_density(
+            positions, self.TIMESTEPS * self.TAU, self.DIFFUSION_COEFFICIENT) * self.AGRID ** 3
+
+        target = [self.TIMESTEPS * self.TAU, self.DIFFUSION_COEFFICIENT]
+
+        popt, _ = scipy.optimize.curve_fit(self.analytical_density,
+                                           positions.reshape(-1, 3),
+                                           simulated_density.reshape(
+                                               -1) / self.AGRID ** 3,
+                                           p0=target,
+                                           bounds=([0, 0], [np.inf, np.inf]))
+
+        np.testing.assert_allclose(
+            popt[0], self.TIMESTEPS * self.TAU, rtol=0.1)
+        np.testing.assert_allclose(
+            popt[1], self.DIFFUSION_COEFFICIENT, rtol=0.1)
+        np.testing.assert_allclose(
+            calc_density, simulated_density, atol=1e-5, rtol=0.)
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/python/ek_eof.py b/testsuite/python/ek_eof.py
new file mode 100644
index 00000000000..c66f4450524
--- /dev/null
+++ b/testsuite/python/ek_eof.py
@@ -0,0 +1,199 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import numpy as np
+import scipy.optimize
+import unittest as ut
+import unittest_decorators as utx
+
+import espressomd
+import espressomd.lb
+import espressomd.shapes
+import espressomd.electrokinetics
+
+
+@utx.skipIfMissingFeatures(["WALBERLA", "WALBERLA_FFT"])
+class EKEOF:
+    BOX_L = [45., 9., 9.]
+    AGRID = 1.5
+    DENSITY = 1
+    DIFFUSION_COEFFICIENT = 0.25
+    TIMESTEPS = 5000
+    TAU = 1.6
+
+    system = espressomd.System(box_l=BOX_L)
+    system.time_step = TAU
+    system.cell_system.skin = 0.4
+
+    def tearDown(self):
+        self.system.actors.clear()
+        self.system.ekcontainer.clear()
+
+    def test_eof(self):
+        """
+        Testing EK for the electroosmotic flow
+        """
+
+        eps0 = 0.015
+        epsR = 18.5
+        kT = 2.
+        offset = self.AGRID
+        d = self.system.box_l[0] - 2 * offset
+        valency = 1.1
+        external_electric_field = np.asarray([0.0, 0.001, 0.0])
+
+        visc = 1. / 6.
+        eta = 1.0 * visc
+
+        density = 0.0006
+
+        lattice = self.ek_lattice_class(n_ghost_layers=1, agrid=self.AGRID)
+
+        ekspecies = self.ek_species_class(
+            lattice=lattice, density=density, kT=kT, valency=valency,
+            diffusion=self.DIFFUSION_COEFFICIENT, friction_coupling=True,
+            advection=True, ext_efield=external_electric_field,
+            tau=self.TAU, **self.ek_params)
+        ekwallcharge = self.ek_species_class(
+            lattice=lattice, density=0.0, kT=kT, diffusion=0.0, tau=self.TAU,
+            valency=-valency, friction_coupling=False, advection=False,
+            **self.ek_params)
+
+        eksolver = self.ek_solver_class(
+            lattice=lattice, permittivity=eps0 * epsR, **self.ek_params)
+
+        self.system.ekcontainer.tau = self.TAU
+        self.system.ekcontainer.solver = eksolver
+        self.system.ekcontainer.add(ekspecies)
+        self.system.ekcontainer.add(ekwallcharge)
+
+        lb_fluid = espressomd.lb.LBFluidWalberla(
+            lattice=lattice, density=1.0, kinematic_viscosity=visc,
+            tau=self.TAU, **self.ek_params)
+        self.system.actors.add(lb_fluid)
+
+        wall_bot = espressomd.shapes.Wall(normal=[1, 0, 0], dist=offset)
+        wall_top = espressomd.shapes.Wall(
+            normal=[-1, 0, 0], dist=-self.BOX_L[0] + offset)
+        for obj in (wall_bot, wall_top):
+            ekspecies.add_boundary_from_shape(
+                obj, [0.0, 0.0, 0.0], espressomd.electrokinetics.FluxBoundary)
+            ekspecies.add_boundary_from_shape(
+                obj, 0.0, espressomd.electrokinetics.DensityBoundary)
+            lb_fluid.add_boundary_from_shape(obj, [0.0, 0.0, 0.0])
+
+        ekspecies[0, :, :].density = 0.0
+        ekspecies[-1, :, :].density = 0.0
+
+        density_wall = density * d / 2
+        sigma = -valency * density_wall
+
+        ekwallcharge[0, :, :].density = density_wall
+        ekwallcharge[-1, :, :].density = density_wall
+
+        self.system.integrator.run(self.TIMESTEPS)
+
+        def transcendental_equation(
+                x, valency, distance, kT, sigma, eps0, epsR):
+            return x * np.tan(valency * distance / (4. * kT) * x) + \
+                sigma / (eps0 * epsR)
+
+        integration_constant = scipy.optimize.fsolve(
+            lambda x: transcendental_equation(
+                x=x,
+                valency=valency,
+                distance=d,
+                kT=kT,
+                sigma=sigma,
+                eps0=eps0,
+                epsR=epsR),
+            0.1)
+
+        def calc_analytic_density(
+                x, integration_constant, valency, kT, eps0, epsR):
+            return (epsR * eps0) * integration_constant**2 / (2 * kT) / np.square(
+                np.cos(valency * integration_constant / (2 * kT) * x))
+
+        def calc_analytic_velocity(x, integration_constant, valency,
+                                   distance, kT, eps0, epsR, eta, external_electric_field):
+            return 2 * np.linalg.norm(external_electric_field) * epsR * eps0 * kT / (eta * valency) * (
+                np.log(np.cos(valency * integration_constant / (2 * kT) * x)) - np.log(np.cos(distance * valency * integration_constant / (4 * kT))))
+
+        simulated_density = ekspecies[:, 0, 0].density.squeeze()
+        simulated_velocity = lb_fluid[:, 0, 0].velocity.squeeze()[:, 1]
+
+        x_sim = (np.arange(lattice.shape[0]) -
+                 lattice.shape[0] / 2 + 0.5) * self.AGRID
+
+        analytic_density = calc_analytic_density(
+            x=x_sim,
+            integration_constant=integration_constant,
+            valency=valency,
+            kT=kT,
+            eps0=eps0,
+            epsR=epsR)
+        analytic_density[np.logical_or(
+            x_sim < -self.system.box_l[0] / 2 + offset,
+            x_sim > self.system.box_l[0] / 2 - offset)] = 0.
+        np.testing.assert_allclose(
+            simulated_density, analytic_density, rtol=2e-2)
+
+        analytic_velocity = calc_analytic_velocity(
+            x=x_sim,
+            integration_constant=integration_constant,
+            valency=valency,
+            distance=d,
+            kT=kT,
+            eps0=eps0,
+            epsR=epsR,
+            eta=eta,
+            external_electric_field=external_electric_field)
+        analytic_velocity[np.logical_or(
+            x_sim < -self.system.box_l[0] / 2 + offset,
+            x_sim > self.system.box_l[0] / 2 - offset)] = 0.
+        np.testing.assert_allclose(
+            simulated_velocity,
+            analytic_velocity,
+            rtol=2e-2)
+
+
+@utx.skipIfMissingFeatures(["WALBERLA", "WALBERLA_FFT"])
+class EKTestWalberla(EKEOF, ut.TestCase):
+
+    """Test for the Walberla implementation of the EK in double-precision."""
+
+    ek_lattice_class = espressomd.electrokinetics.LatticeWalberla
+    ek_species_class = espressomd.electrokinetics.EKSpecies
+    ek_solver_class = espressomd.electrokinetics.EKFFT
+    ek_params = {"single_precision": False}
+
+
+@utx.skipIfMissingFeatures(["WALBERLA", "WALBERLA_FFT"])
+class EKTestWalberlaSinglePrecision(EKEOF, ut.TestCase):
+
+    """Test for the Walberla implementation of the EK in single-precision."""
+
+    ek_lattice_class = espressomd.electrokinetics.LatticeWalberla
+    ek_species_class = espressomd.electrokinetics.EKSpecies
+    ek_solver_class = espressomd.electrokinetics.EKFFT
+    ek_params = {"single_precision": True}
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/python/ek_eof_one_species.py b/testsuite/python/ek_eof_one_species.py
deleted file mode 100644
index 5d90b971629..00000000000
--- a/testsuite/python/ek_eof_one_species.py
+++ /dev/null
@@ -1,492 +0,0 @@
-#
-# Copyright (C) 2011-2022 The ESPResSo project
-#
-# This file is part of ESPResSo.
-#
-# ESPResSo is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# ESPResSo is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-import unittest as ut
-import unittest_decorators as utx
-import unittest_generator as utg
-import pathlib
-import tempfile
-import contextlib
-
-import math
-import numpy as np
-
-with contextlib.suppress(ImportError):
-    import vtk
-    import vtk.util.numpy_support
-
-import espressomd
-import espressomd.electrokinetics
-import espressomd.shapes
-
-config = utg.TestGenerator()
-modes = config.get_modes()
-
-
-##########################################################################
-# Utility functions
-##########################################################################
-
-def solve(xi, d, bjerrum_length, sigma, valency, el_char=1.0):
-    # root finding function
-    return xi * math.tan(xi * d / 2.0) + 2.0 * math.pi * \
-        bjerrum_length * sigma / (valency * el_char)
-
-
-def density(x, xi, bjerrum_length):
-    return (xi * xi) / (2.0 * math.pi * bjerrum_length *
-                        math.cos(xi * x) * math.cos(xi * x))
-
-
-def velocity(x, xi, d, bjerrum_length, force, visc_kinematic, density_water):
-    return force * math.log(math.cos(xi * x) / math.cos(xi * d / 2.0)) / \
-        (2.0 * math.pi * bjerrum_length * visc_kinematic * density_water)
-
-
-def pressure_tensor_offdiagonal(x, xi, bjerrum_length, force):
-    # calculate the nonzero component of the pressure tensor
-    return force * xi * math.tan(xi * x) / (2.0 * math.pi * bjerrum_length)
-
-
-def hydrostatic_pressure(ek, tensor_entry, box_x, box_y, box_z, agrid):
-    """
-    Calculate the hydrostatic pressure.
-
-    Technically, the LB simulates a compressible fluid, whose pressure
-    tensor contains an additional term on the diagonal, proportional to
-    the divergence of the velocity. We neglect this contribution, which
-    creates a small error in the direction normal to the wall, which
-    should decay with the simulation time.
-    """
-    offset = ek[int(box_x / (2 * agrid)), int(box_y / (2 * agrid)),
-                int(box_z / (2 * agrid))].pressure_tensor[tensor_entry]
-    return 0.0 + offset
-
-
-##########################################################################
-#                          Set up the System                             #
-##########################################################################
-# Set the slit pore geometry. The width is the non-periodic part of the
-# geometry. The padding is used to ensure that there is no field outside
-# the slit.
-
-params_base = dict([
-    ('dt', 1.0 / 7),
-    ('integration_length', 2300),
-    ('agrid', 1. / 3),
-    ('density_water', 26.15),
-    ('friction', 1.9),
-    ('width', 20.0),
-    ('thickness', 3.0),
-    ('sigma', -0.04),
-    ('padding', 6.0),
-    ('force', 0.07),
-    ('temperature', 1.1),
-    ('viscosity_kinematic', 1.7),
-    ('bjerrum_length', 0.8),
-    ('sigma', -0.04),
-    ('valency', 1.0),
-])
-params_base['density_counterions'] = -2.0 * \
-    params_base['sigma'] / params_base['width']
-
-if "AXIS.X" in modes:
-    axis = "x"
-elif "AXIS.Y" in modes:
-    axis = "y"
-else:
-    assert "AXIS.Z" in modes
-    axis = "z"
-
-params = {
-    "x": dict([
-        ('box_x', params_base['thickness']),
-        ('box_y', params_base['thickness']),
-        ('box_z', params_base['width'] + 2 * params_base['padding']),
-        ('ext_force_density', [params_base['force'], 0.0, 0.0]),
-        ('wall_normal_1', [0, 0, 1]),
-        ('wall_normal_2', [0, 0, -1]),
-        ('periodic_dirs', (0, 1)),
-        ('non_periodic_dir', 2),
-        ('n_roll_index', 0),
-        ('calculated_pressure_xy', 0.0),
-        ('calculated_pressure_yz', 0.0)
-    ]),
-    "y": dict([
-        ('box_x', params_base['width'] + 2 * params_base['padding']),
-        ('box_y', params_base['thickness']),
-        ('box_z', params_base['thickness']),
-        ('ext_force_density', [0.0, params_base['force'], 0.0]),
-        ('wall_normal_1', [1, 0, 0]),
-        ('wall_normal_2', [-1, 0, 0]),
-        ('periodic_dirs', (1, 2)),
-        ('non_periodic_dir', 0),
-        ('n_roll_index', 1),
-        ('calculated_pressure_xz', 0.0),
-        ('calculated_pressure_yz', 0.0)
-    ]),
-    "z": dict([
-        ('box_x', params_base['thickness']),
-        ('box_y', params_base['width'] + 2 * params_base['padding']),
-        ('box_z', params_base['thickness']),
-        ('ext_force_density', [0.0, 0.0, params_base['force']]),
-        ('wall_normal_1', [0, 1, 0]),
-        ('wall_normal_2', [0, -1, 0]),
-        ('periodic_dirs', (0, 2)),
-        ('non_periodic_dir', 1),
-        ('n_roll_index', 2),
-        ('calculated_pressure_xy', 0.0),
-        ('calculated_pressure_xz', 0.0)
-    ])
-}[axis]
-
-
-def bisection():
-    args = [params_base[k]
-            for k in ('width', 'bjerrum_length', 'sigma', 'valency')]
-    # initial parameters for bisection scheme
-    size = math.pi / (2.0 * params_base['width'])
-    pnt0 = 0.0
-    pntm = pnt0 + size
-    pnt1 = pnt0 + 1.9 * size
-    # the bisection scheme
-    tol = 1.0e-08
-    while size > tol:
-        size /= 2.0
-        val0, val1, valm = map(lambda x: solve(x, *args), [pnt0, pnt1, pntm])
-        assert val0 < 0.0 and val1 > 0.0, "Bisection method failed"
-        if valm < 0.0:
-            pnt0 = pntm
-            pntm += size
-        else:
-            pnt1 = pntm
-            pntm -= size
-    return pntm
-
-
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(["ELECTROKINETICS", "EK_BOUNDARIES"])
-@utx.skipIfMissingModules("vtk")
-class ek_eof_one_species(ut.TestCase):
-    system = espressomd.System(box_l=[1.0, 1.0, 1.0])
-    xi = bisection()
-
-    def parse_vtk(self, filepath, name, shape):
-        reader = vtk.vtkStructuredPointsReader()
-        reader.SetFileName(str(filepath))
-        reader.ReadAllVectorsOn()
-        reader.ReadAllScalarsOn()
-        reader.Update()
-
-        data = reader.GetOutput()
-        points = data.GetPointData()
-
-        return vtk.util.numpy_support.vtk_to_numpy(
-            points.GetArray(name)).reshape(shape, order='F')
-
-    @classmethod
-    def setUpClass(cls):
-        system = cls.system
-        system.box_l = [params['box_x'], params['box_y'], params['box_z']]
-        system.time_step = params_base['dt']
-        system.cell_system.skin = 0.1
-        system.thermostat.turn_off()
-
-        # Set up the (LB) electrokinetics fluid
-        ek = cls.ek = espressomd.electrokinetics.Electrokinetics(
-            agrid=params_base['agrid'],
-            lb_density=params_base['density_water'],
-            viscosity=params_base['viscosity_kinematic'],
-            friction=params_base['friction'],
-            T=params_base['temperature'],
-            prefactor=params_base['bjerrum_length'] *
-            params_base['temperature'],
-            stencil="linkcentered")
-
-        counterions = cls.counterions = espressomd.electrokinetics.Species(
-            density=params_base['density_counterions'],
-            D=0.3,
-            valency=params_base['valency'],
-            ext_force_density=params['ext_force_density'])
-        ek.add_species(counterions)
-
-        # Set up the walls confining the fluid and carrying charge
-        ek_wall1 = espressomd.ekboundaries.EKBoundary(
-            charge_density=params_base['sigma'] /
-            params_base['padding'],
-            shape=espressomd.shapes.Wall(
-                normal=params['wall_normal_1'],
-                dist=params_base['padding']))
-        ek_wall2 = espressomd.ekboundaries.EKBoundary(
-            charge_density=params_base['sigma'] /
-            params_base['padding'],
-            shape=espressomd.shapes.Wall(
-                normal=params['wall_normal_2'],
-                dist=-(params_base['padding'] + params_base['width'])))
-        system.ekboundaries.add(ek_wall1)
-        system.ekboundaries.add(ek_wall2)
-        system.actors.add(ek)
-
-        # Integrate the system
-        system.integrator.run(params_base['integration_length'])
-
-    def test(self):
-        # compare the various quantities to the analytic results
-        total_velocity_difference = 0.0
-        total_density_difference = 0.0
-        total_pressure_difference_xx = 0.0
-        total_pressure_difference_yy = 0.0
-        total_pressure_difference_zz = 0.0
-        total_pressure_difference_xy = 0.0
-        total_pressure_difference_yz = 0.0
-        total_pressure_difference_xz = 0.0
-
-        system = self.system
-        ek = self.ek
-        counterions = self.counterions
-        for i in range(
-                int(system.box_l[params['non_periodic_dir']] / params_base['agrid'])):
-            if (i *
-                params_base['agrid'] >= params_base['padding'] and i *
-                params_base['agrid'] < system.box_l[params['non_periodic_dir']] -
-                    params_base['padding']):
-                position = i * params_base['agrid'] - params_base['padding'] - \
-                    params_base['width'] / 2.0 + params_base['agrid'] / 2.0
-
-                # density
-                index = np.array([int(system.box_l[params['periodic_dirs'][0]] /
-                                      (2 * params_base['agrid'])),
-                                  int(system.box_l[params['periodic_dirs'][1]] /
-                                      (2 * params_base['agrid'])), i])
-                index = np.roll(index, params['n_roll_index'])
-                measured_density = counterions[index].density
-                calculated_density = density(
-                    position, self.xi, params_base['bjerrum_length'])
-                density_difference = abs(measured_density - calculated_density)
-                total_density_difference += density_difference
-
-                # velocity
-                measured_velocity = ek[index].velocity[int(
-                    np.nonzero(params['ext_force_density'])[0])]
-                calculated_velocity = velocity(
-                    position,
-                    self.xi,
-                    params_base['width'],
-                    params_base['bjerrum_length'],
-                    params_base['force'],
-                    params_base['viscosity_kinematic'],
-                    params_base['density_water'])
-                velocity_difference = abs(
-                    measured_velocity - calculated_velocity)
-                total_velocity_difference = total_velocity_difference + \
-                    velocity_difference
-
-                # diagonal pressure tensor
-                measured_pressure_xx = ek[index].pressure_tensor[(0, 0)]
-                calculated_pressure_xx = hydrostatic_pressure(
-                    ek,
-                    (0, 0),
-                    system.box_l[params['periodic_dirs'][0]],
-                    system.box_l[params['periodic_dirs'][1]],
-                    params['box_z'],
-                    params_base['agrid'])
-                measured_pressure_yy = ek[index].pressure_tensor[(1, 1)]
-                calculated_pressure_yy = hydrostatic_pressure(
-                    ek,
-                    (1, 1),
-                    system.box_l[params['periodic_dirs'][0]],
-                    system.box_l[params['periodic_dirs'][1]],
-                    params['box_z'],
-                    params_base['agrid'])
-                measured_pressure_zz = ek[index].pressure_tensor[(2, 2)]
-                calculated_pressure_zz = hydrostatic_pressure(
-                    ek,
-                    (2, 2),
-                    system.box_l[params['periodic_dirs'][0]],
-                    system.box_l[params['periodic_dirs'][1]],
-                    params['box_z'],
-                    params_base['agrid'])
-
-                pressure_difference_xx = abs(
-                    measured_pressure_xx - calculated_pressure_xx)
-                pressure_difference_yy = abs(
-                    measured_pressure_yy - calculated_pressure_yy)
-                pressure_difference_zz = abs(
-                    measured_pressure_zz - calculated_pressure_zz)
-
-                total_pressure_difference_xx = total_pressure_difference_xx + \
-                    pressure_difference_xx
-                total_pressure_difference_yy = total_pressure_difference_yy + \
-                    pressure_difference_yy
-                total_pressure_difference_zz = total_pressure_difference_zz + \
-                    pressure_difference_zz
-
-                calculated_pressure_offdiagonal = pressure_tensor_offdiagonal(
-                    position, self.xi, params_base['bjerrum_length'], params_base['force'])
-                # xy component pressure tensor
-                measured_pressure_xy = ek[index].pressure_tensor[(0, 1)]
-                calculated_pressure_xy = 0.0
-                if 'calculated_pressure_xy' not in params:
-                    calculated_pressure_xy = calculated_pressure_offdiagonal
-                pressure_difference_xy = abs(
-                    measured_pressure_xy - calculated_pressure_xy)
-                total_pressure_difference_xy = total_pressure_difference_xy + \
-                    pressure_difference_xy
-
-                # yz component pressure tensor
-                measured_pressure_yz = ek[index].pressure_tensor[(1, 2)]
-                calculated_pressure_yz = 0.0
-                if 'calculated_pressure_yz' not in params:
-                    calculated_pressure_yz = calculated_pressure_offdiagonal
-                pressure_difference_yz = abs(
-                    measured_pressure_yz - calculated_pressure_yz)
-                total_pressure_difference_yz = total_pressure_difference_yz + \
-                    pressure_difference_yz
-
-                # xz component pressure tensor
-                measured_pressure_xz = ek[index].pressure_tensor[(0, 2)]
-                calculated_pressure_xz = 0.0
-                if 'calculated_pressure_xz' not in params:
-                    calculated_pressure_xz = calculated_pressure_offdiagonal
-                pressure_difference_xz = abs(
-                    measured_pressure_xz - calculated_pressure_xz)
-                total_pressure_difference_xz = total_pressure_difference_xz + \
-                    pressure_difference_xz
-
-        scale_factor = params_base['agrid'] / params_base['width']
-        total_density_difference *= scale_factor
-        total_velocity_difference *= scale_factor
-        total_pressure_difference_xx *= scale_factor
-        total_pressure_difference_yy *= scale_factor
-        total_pressure_difference_zz *= scale_factor
-        total_pressure_difference_xy *= scale_factor
-        total_pressure_difference_yz *= scale_factor
-        total_pressure_difference_xz *= scale_factor
-
-        self.assertLess(total_density_difference, 1.0e-04,
-                        "Density accuracy not achieved")
-        self.assertLess(total_velocity_difference, 1.0e-04,
-                        "Velocity accuracy not achieved")
-        self.assertLess(total_pressure_difference_xx, 1.0e-04,
-                        "Pressure accuracy xx component not achieved")
-        self.assertLess(total_pressure_difference_yy, 1.0e-04,
-                        "Pressure accuracy yy component not achieved")
-        self.assertLess(total_pressure_difference_zz, 1.0e-04,
-                        "Pressure accuracy zz component not achieved")
-        self.assertLess(total_pressure_difference_xy, 1.0e-04,
-                        "Pressure accuracy xy component not achieved")
-        self.assertLess(total_pressure_difference_yz, 1.0e-04,
-                        "Pressure accuracy yz component not achieved")
-        self.assertLess(total_pressure_difference_xz, 1.0e-04,
-                        "Pressure accuracy xz component not achieved")
-
-    @utx.skipIfMissingModules("vtk")
-    def test_vtk(self):
-        ek = self.ek
-        counterions = self.counterions
-        grid_dims = list(
-            map(int, np.round(self.system.box_l / params_base['agrid'])))
-
-        # write VTK files
-        with tempfile.TemporaryDirectory() as tmp_directory:
-            path_vtk_root = pathlib.Path(tmp_directory)
-            path_vtk_root.mkdir(parents=True, exist_ok=True)
-            path_vtk_boundary = path_vtk_root / "boundary.vtk"
-            path_vtk_velocity = path_vtk_root / "velocity.vtk"
-            path_vtk_potential = path_vtk_root / "potential.vtk"
-            path_vtk_lbdensity = path_vtk_root / "density.vtk"
-            path_vtk_lbforce = path_vtk_root / "lbforce.vtk"
-            path_vtk_density = path_vtk_root / "lbdensity.vtk"
-            path_vtk_flux = path_vtk_root / "flux.vtk"
-            path_vtk_flux_link = path_vtk_root / "flux_link.vtk"
-            if espressomd.has_features('EK_DEBUG'):
-                path_vtk_flux_fluc = path_vtk_root / "flux_fluc.vtk"
-            ek.write_vtk_boundary(str(path_vtk_boundary))
-            ek.write_vtk_velocity(str(path_vtk_velocity))
-            ek.write_vtk_potential(str(path_vtk_potential))
-            ek.write_vtk_density(str(path_vtk_lbdensity))
-            if espressomd.has_features('EK_DEBUG') or espressomd.has_features(
-                    'VIRTUAL_SITES_INERTIALESS_TRACERS'):
-                ek.write_vtk_lbforce(str(path_vtk_lbforce))
-            counterions.write_vtk_density(str(path_vtk_density))
-            counterions.write_vtk_flux(str(path_vtk_flux))
-            if espressomd.has_features('EK_DEBUG'):
-                counterions.write_vtk_flux_fluc(str(path_vtk_flux_fluc))
-            counterions.write_vtk_flux_link(str(path_vtk_flux_link))
-
-            # load VTK files to check they are correctly formatted
-            get_vtk = self.parse_vtk
-            vtk_boundary = get_vtk(path_vtk_boundary, "boundary", grid_dims)
-            vtk_velocity = get_vtk(
-                path_vtk_velocity, "velocity", grid_dims + [3])
-            vtk_potential = get_vtk(path_vtk_potential, "potential", grid_dims)
-            vtk_lbdensity = get_vtk(
-                path_vtk_lbdensity, "density_lb", grid_dims)
-            if espressomd.has_features('EK_DEBUG') or espressomd.has_features(
-                    'VIRTUAL_SITES_INERTIALESS_TRACERS'):
-                get_vtk(path_vtk_lbforce, "lbforce", grid_dims + [3])
-            vtk_density = get_vtk(path_vtk_density, "density_1", grid_dims)
-            vtk_flux = get_vtk(path_vtk_flux, "flux_1", grid_dims + [3])
-            if espressomd.has_features('EK_DEBUG'):
-                get_vtk(path_vtk_flux_fluc, "flux_fluc_1", grid_dims + [4])
-            get_vtk(path_vtk_flux_link, "flux_link_1", grid_dims + [13])
-
-        # check VTK files against the EK grid
-        species_density = np.zeros(grid_dims)
-        species_flux = np.zeros(grid_dims + [3])
-        ek_potential = np.zeros(grid_dims)
-        ek_velocity = np.zeros(grid_dims + [3])
-        for i in range(grid_dims[0]):
-            for j in range(grid_dims[1]):
-                for k in range(grid_dims[2]):
-                    index = np.array([i, j, k])
-                    species_density[i, j, k] = counterions[index].density
-                    species_flux[i, j, k] = counterions[index].flux
-                    ek_potential[i, j, k] = ek[index].potential
-                    ek_velocity[i, j, k] = ek[index].velocity
-
-        np.testing.assert_allclose(vtk_velocity, ek_velocity, atol=1e-6)
-        np.testing.assert_allclose(vtk_potential, ek_potential, atol=1e-6)
-        np.testing.assert_allclose(vtk_density, species_density, atol=1e-6)
-        np.testing.assert_allclose(vtk_flux, species_flux, atol=1e-6)
-
-        # check VTK files against the EK parameters
-        dens = params_base['density_water']
-        left_dist = int(params_base['padding'] / params_base['agrid'])
-        right_dist = int(-params_base['padding'] / params_base['agrid'])
-        thickness = int(params_base['thickness'] / params_base['agrid'])
-        i = np.roll([0, 0, right_dist], params['n_roll_index'])
-        j = np.roll([thickness, thickness, left_dist], params['n_roll_index'])
-        mask_left = np.zeros(grid_dims, dtype=bool)
-        mask_left[:j[0], :j[1], :j[2]] = True
-        mask_right = np.zeros(grid_dims, dtype=bool)
-        mask_right[i[0]:, i[1]:, i[2]:] = True
-        mask_outside = np.logical_or(mask_left, mask_right)
-        mask_inside = np.logical_not(mask_outside)
-        np.testing.assert_allclose(vtk_lbdensity[mask_inside], dens, atol=1e-4)
-        np.testing.assert_allclose(vtk_lbdensity[mask_outside], 0, atol=1e-6)
-        np.testing.assert_allclose(vtk_boundary[mask_left], 1, atol=1e-6)
-        np.testing.assert_allclose(vtk_boundary[mask_left], 1, atol=1e-6)
-        np.testing.assert_allclose(vtk_boundary[mask_right], 2, atol=1e-6)
-        np.testing.assert_allclose(vtk_boundary[mask_inside], 0, atol=1e-6)
-
-
-if __name__ == "__main__":
-    config.bind_test_class(ek_eof_one_species)
-    ut.main()
diff --git a/testsuite/python/ek_fixeddensity.py b/testsuite/python/ek_fixeddensity.py
new file mode 100644
index 00000000000..c1b3f29d13e
--- /dev/null
+++ b/testsuite/python/ek_fixeddensity.py
@@ -0,0 +1,109 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import numpy as np
+import unittest as ut
+import unittest_decorators as utx
+
+import espressomd
+import espressomd.electrokinetics
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class EKFixedDensity(ut.TestCase):
+    AGRID = 1.1
+    BOX_L = np.array([32., 3., 3.]) * AGRID
+    DENSITY = 1
+
+    DIFFUSION_COEFFICIENT = 0.2
+    TIME = 5000
+    TAU = 1.81
+
+    INLET_CONCENTRATION = 1.0
+    OUTLET_CONCENTRATION = 0.01
+
+    system = espressomd.System(box_l=BOX_L)
+    system.time_step = TAU
+    system.cell_system.skin = 0.4
+
+    def tearDown(self) -> None:
+        self.system.ekcontainer.clear()
+
+    def test_constant_density_bc_single(self):
+        self.detail_test_constant_density_bc(single_precision=True)
+
+    def test_constant_density_bc_double(self):
+        self.detail_test_constant_density_bc(single_precision=False)
+
+    def detail_test_constant_density_bc(self, single_precision: bool):
+        """ effective 1D system with linear equilibrium profile """
+
+        decimal_precision: int = 5 if single_precision else 7
+
+        lattice = espressomd.electrokinetics.LatticeWalberla(
+            n_ghost_layers=1, agrid=self.AGRID)
+
+        ekspecies = espressomd.electrokinetics.EKSpecies(
+            lattice=lattice, density=0.0, diffusion=self.DIFFUSION_COEFFICIENT,
+            valency=0.0, advection=False, friction_coupling=False,
+            single_precision=single_precision, tau=self.TAU)
+
+        eksolver = espressomd.electrokinetics.EKNone(lattice=lattice)
+
+        self.system.ekcontainer.tau = self.TAU
+        self.system.ekcontainer.solver = eksolver
+        self.system.ekcontainer.add(ekspecies)
+
+        # left and right no flux
+        ekspecies[0, :, :].flux_boundary = \
+            espressomd.electrokinetics.FluxBoundary([0, 0, 0])
+        ekspecies[-1, :, :].flux_boundary = \
+            espressomd.electrokinetics.FluxBoundary([0, 0, 0])
+
+        left_slice = ekspecies[1, :, :]
+        left_slice.density = 1.0
+        left_slice.density_boundary = espressomd.electrokinetics.DensityBoundary(
+            self.INLET_CONCENTRATION)
+
+        right_slice = ekspecies[-2, :, :]
+        right_slice.density_boundary = espressomd.electrokinetics.DensityBoundary(
+            self.OUTLET_CONCENTRATION)
+
+        self.system.integrator.run(self.TIME)
+
+        effective_boxl = (lattice.shape[0] - 3) * self.AGRID
+        domain_positions = np.arange(
+            lattice.shape[0] - 2,
+            dtype=np.float64) * self.AGRID
+
+        measured_values = ekspecies[1:-1, 1, 1].density.squeeze()
+
+        slope = (self.OUTLET_CONCENTRATION -
+                 self.INLET_CONCENTRATION) / effective_boxl
+        offset = self.INLET_CONCENTRATION
+        analytic_values = slope * domain_positions + offset
+
+        np.testing.assert_almost_equal(
+            measured_values,
+            analytic_values,
+            decimal_precision)
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/python/ek_fixedflux.py b/testsuite/python/ek_fixedflux.py
new file mode 100644
index 00000000000..8275d0b9d91
--- /dev/null
+++ b/testsuite/python/ek_fixedflux.py
@@ -0,0 +1,119 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import numpy as np
+import unittest as ut
+import unittest_decorators as utx
+
+import espressomd
+import espressomd.shapes
+import espressomd.electrokinetics
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class EKFixedFlux(ut.TestCase):
+    BOX_L = 2.5
+    AGRID = 0.5
+    DENSITY = 1
+    DIFFUSION_COEFFICIENT = 0.1
+    TIMESTEPS = 40
+    TAU = 0.25
+
+    INFLOW_FLUX = 0.1
+
+    system = espressomd.System(box_l=[BOX_L, BOX_L, BOX_L])
+    system.time_step = 1.0
+    system.cell_system.skin = 0.4
+
+    def tearDown(self) -> None:
+        self.system.ekcontainer.clear()
+
+    def test_inflow_single(self):
+        self.detail_test_inflow(single_precision=True)
+
+    def test_inflow_double(self):
+        self.detail_test_inflow(single_precision=False)
+
+    def detail_test_inflow(self, single_precision: bool):
+        """
+        Testing the EK fixed flux boundaries to test the fixed inflow into a non-periodic box.
+        """
+
+        decimal_precision: int = 5 if single_precision else 7
+
+        lattice = espressomd.electrokinetics.LatticeWalberla(
+            n_ghost_layers=1, agrid=self.AGRID)
+
+        ekspecies = espressomd.electrokinetics.EKSpecies(
+            lattice=lattice, density=0.0, diffusion=self.DIFFUSION_COEFFICIENT,
+            valency=0.0, advection=False, friction_coupling=False,
+            single_precision=single_precision, tau=self.TAU)
+
+        eksolver = espressomd.electrokinetics.EKNone(lattice=lattice)
+
+        self.system.ekcontainer.tau = self.TAU
+        self.system.ekcontainer.solver = eksolver
+        self.system.ekcontainer.add(ekspecies)
+
+        ekspecies[1:-1, 1:-1, 1:-1].density = self.DENSITY
+
+        ekspecies[:, :, 0].flux_boundary = \
+            espressomd.electrokinetics.FluxBoundary([0, 0, 0])
+        ekspecies[:, :, -1].flux_boundary = \
+            espressomd.electrokinetics.FluxBoundary([0, 0, 0])
+        ekspecies[:, 0, :].flux_boundary = \
+            espressomd.electrokinetics.FluxBoundary([0, 0, 0])
+        ekspecies[:, -1, :].flux_boundary = \
+            espressomd.electrokinetics.FluxBoundary([0, 0, 0])
+        ekspecies[0, :, :].flux_boundary = \
+            espressomd.electrokinetics.FluxBoundary([0, 0, 0])
+        ekspecies[-1, :, :].flux_boundary = \
+            espressomd.electrokinetics.FluxBoundary([0, 0, 0])
+
+        # set fixed flux in +z-direction
+        ekspecies[:, :, -1].flux_boundary = espressomd.electrokinetics.FluxBoundary(
+            [0, 0, -self.INFLOW_FLUX])
+        additional_center_flux = 3 * self.INFLOW_FLUX
+        midpoint = int(lattice.shape[0] // 2)
+        ekspecies[midpoint, midpoint, -1].flux_boundary = \
+            espressomd.electrokinetics.FluxBoundary(
+                [0, 0, -self.INFLOW_FLUX - additional_center_flux])
+
+        # check density before integration
+        expected_initial_density = self.DENSITY * np.prod(lattice.shape - 2)
+
+        np.testing.assert_almost_equal(
+            actual=np.sum(ekspecies[1:-1, 1:-1, 1:-1].density),
+            desired=expected_initial_density, decimal=decimal_precision)
+
+        self.system.integrator.run(self.TIMESTEPS)
+
+        # check that density has pushed into domain
+        inflow_area = np.prod(lattice.shape[:2] - 2)
+        expected_end_density = expected_initial_density + \
+            (self.INFLOW_FLUX * inflow_area + additional_center_flux) * \
+            self.TIMESTEPS * self.TAU / self.AGRID
+
+        np.testing.assert_almost_equal(
+            actual=np.sum(ekspecies[1:-1, 1:-1, 1:-1].density),
+            desired=expected_end_density, decimal=decimal_precision)
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/python/ek_indexed_reactions.py b/testsuite/python/ek_indexed_reactions.py
new file mode 100644
index 00000000000..c47db731b06
--- /dev/null
+++ b/testsuite/python/ek_indexed_reactions.py
@@ -0,0 +1,167 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import numpy as np
+import unittest as ut
+import unittest_decorators as utx
+
+import espressomd
+import espressomd.lb
+import espressomd.electrokinetics
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class EKReaction(ut.TestCase):
+    AGRID = 1.32
+    BOX_L = np.asarray([22., 2., 2.]) * AGRID
+    PADDING = 1
+    WIDTH = BOX_L[0] - 2 * PADDING * AGRID
+    INITIAL_DENSITIES = [1.7, 1.3]
+    DIFFUSION_COEFFICIENTS = np.array([0.4, 0.2])
+    REACTION_RATES = np.array([5e-3, 8e-3])
+    TIME = 7000
+    TAU = 1.88
+
+    system = espressomd.System(box_l=BOX_L)
+    system.time_step = TAU
+    system.cell_system.skin = 0.4
+
+    def tearDown(self) -> None:
+        self.system.ekcontainer.clear()
+        self.system.ekreactions.clear()
+
+    def analytic_density_profiles(
+            self, width, reaction_rates, diffusion_coefficients, initial_densities, agrid):
+        rezipr_diff = 1 / \
+            diffusion_coefficients[0] + 1 / diffusion_coefficients[1]
+        rezipr_rate = 1 / reaction_rates[0] + 1 / reaction_rates[1]
+        actual_width = width - agrid
+        slopes = sum(initial_densities) / (diffusion_coefficients *
+                                           (rezipr_rate + actual_width / 2 * rezipr_diff))
+        midvalues = sum(initial_densities) / (reaction_rates * (rezipr_rate +
+                                                                actual_width / 2 * rezipr_diff)) + actual_width / 2 * slopes
+
+        x = np.linspace(-actual_width / 2, actual_width /
+                        2, int(width / agrid))
+        values_a = slopes[0] * x + midvalues[0]
+        values_b = -slopes[1] * x + midvalues[1]
+        return values_a, values_b
+
+    def test_reaction_single(self):
+        self.detail_test_reaction(single_precision=True)
+
+    def test_reaction_double(self):
+        self.detail_test_reaction(single_precision=False)
+
+    def detail_test_reaction(self, single_precision: bool):
+
+        lattice = espressomd.electrokinetics.LatticeWalberla(
+            n_ghost_layers=1, agrid=self.AGRID)
+
+        eksolver = espressomd.electrokinetics.EKNone(lattice=lattice)
+
+        self.system.ekcontainer.tau = self.TAU
+
+        self.system.ekcontainer.solver = eksolver
+
+        species_A = espressomd.electrokinetics.EKSpecies(
+            lattice=lattice, density=self.INITIAL_DENSITIES[0],
+            diffusion=self.DIFFUSION_COEFFICIENTS[0], valency=0.0,
+            advection=False, friction_coupling=False,
+            single_precision=single_precision, tau=self.TAU)
+        self.system.ekcontainer.add(species_A)
+
+        species_B = espressomd.electrokinetics.EKSpecies(
+            lattice=lattice, density=self.INITIAL_DENSITIES[1],
+            diffusion=self.DIFFUSION_COEFFICIENTS[1], valency=0.0,
+            advection=False, friction_coupling=False,
+            single_precision=single_precision, tau=self.TAU)
+        self.system.ekcontainer.add(species_B)
+
+        coeffs_left = [-1.0, 1.0]
+        reactants_left = [
+            espressomd.electrokinetics.EKReactant(
+                ekspecies=species_A,
+                stoech_coeff=coeffs_left[0],
+                order=1.0),
+            espressomd.electrokinetics.EKReactant(
+                ekspecies=species_B,
+                stoech_coeff=coeffs_left[1],
+                order=0.0)]
+
+        reaction_left = espressomd.electrokinetics.EKIndexedReaction(
+            reactants=reactants_left, coefficient=self.REACTION_RATES[0],
+            lattice=lattice, tau=self.TAU)
+        reaction_left[1, :, :] = True
+
+        coeffs_right = [1.0, -1.0]
+        reactants_right = [
+            espressomd.electrokinetics.EKReactant(
+                ekspecies=species_A,
+                stoech_coeff=coeffs_right[0],
+                order=0.0),
+            espressomd.electrokinetics.EKReactant(
+                ekspecies=species_B,
+                stoech_coeff=coeffs_right[1],
+                order=1.0)]
+
+        reaction_right = espressomd.electrokinetics.EKIndexedReaction(
+            reactants=reactants_right, coefficient=self.REACTION_RATES[1],
+            lattice=lattice, tau=self.TAU)
+        reaction_right[-2, :, :] = True
+
+        self.system.ekreactions.add(reaction_left)
+        self.system.ekreactions.add(reaction_right)
+
+        wall_left = espressomd.shapes.Wall(
+            normal=[1, 0, 0], dist=self.PADDING * self.AGRID)
+        wall_right = espressomd.shapes.Wall(
+            normal=[-1, 0, 0], dist=-self.BOX_L[0] + self.PADDING * self.AGRID)
+        for obj in (wall_left, wall_right):
+            species_A.add_boundary_from_shape(
+                obj, [0.0, 0.0, 0.0], espressomd.electrokinetics.FluxBoundary)
+            species_B.add_boundary_from_shape(
+                obj, [0.0, 0.0, 0.0], espressomd.electrokinetics.FluxBoundary)
+
+        self.system.integrator.run(self.TIME)
+
+        density_profile = np.zeros((2, int(self.WIDTH / self.AGRID)))
+        for x in range(int(self.WIDTH / self.AGRID)):
+            density_profile[0, x] = np.mean(
+                species_A[x + self.PADDING, :, :].density)
+            density_profile[1, x] = np.mean(
+                species_B[x + self.PADDING, :, :].density)
+
+        analytic_density_profile = np.zeros((2, int(self.WIDTH / self.AGRID)))
+        analytic_density_profile[0], analytic_density_profile[1] = \
+            self.analytic_density_profiles(self.WIDTH,
+                                           self.REACTION_RATES,
+                                           self.DIFFUSION_COEFFICIENTS,
+                                           self.INITIAL_DENSITIES,
+                                           self.AGRID)
+
+        np.testing.assert_allclose(
+            density_profile,
+            analytic_density_profile,
+            rtol=self.REACTION_RATES[0],
+            atol=0)
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/python/ek_interface.py b/testsuite/python/ek_interface.py
new file mode 100644
index 00000000000..0ce7352927f
--- /dev/null
+++ b/testsuite/python/ek_interface.py
@@ -0,0 +1,433 @@
+#
+# Copyright (C) 2010-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import sys
+import numpy as np
+import itertools
+import unittest as ut
+import unittest_decorators as utx
+
+import espressomd
+import espressomd.lb
+import espressomd.electrokinetics
+
+
+class EKTest:
+
+    """
+    Basic tests for the electrokinetics implementation
+
+    """
+    system = espressomd.System(box_l=3 * [6.0])
+    np.random.seed(1)
+    params = {"tau": 0.01, "agrid": 0.5}
+    ek_species_params = {"kT": 1.5,
+                         "density": 0.85,
+                         "valency": 0.0,
+                         "diffusion": 0.1,
+                         "advection": False,
+                         "friction_coupling": False,
+                         "ext_efield": [0.1, 0.2, 0.3],
+                         "tau": params["tau"]}
+
+    system.periodicity = [True, True, True]
+    system.time_step = params["tau"]
+    system.cell_system.skin = 1.0
+
+    def setUp(self):
+        self.lattice = self.ek_lattice_class(
+            n_ghost_layers=1, agrid=self.params["agrid"])
+        ek_solver = espressomd.electrokinetics.EKNone(lattice=self.lattice)
+        self.system.ekcontainer.solver = ek_solver
+        self.system.ekcontainer.tau = self.system.time_step
+
+    def tearDown(self):
+        self.system.actors.clear()
+        self.system.ekcontainer.clear()
+        self.system.part.clear()
+        self.system.thermostat.turn_off()
+        self.system.time_step = self.params["tau"]
+        self.system.ekcontainer.solver = None
+
+    def make_default_ek_species(self):
+        return self.ek_species_class(
+            lattice=self.lattice, **self.ek_params, **self.ek_species_params)
+
+    def test_ek_container_poisson_solver(self):
+        ekcontainer = self.system.ekcontainer
+        ekcontainer.solver = None
+        self.assertFalse(ekcontainer.call_method("is_poisson_solver_set"))
+        self.assertIsNone(ekcontainer.solver)
+        ek_solver = espressomd.electrokinetics.EKNone(lattice=self.lattice)
+        self.assertFalse(ekcontainer.call_method("is_poisson_solver_set"))
+        ekcontainer.solver = ek_solver
+        self.assertTrue(ekcontainer.call_method("is_poisson_solver_set"))
+        self.assertIsInstance(self.system.ekcontainer.solver,
+                              espressomd.electrokinetics.EKNone)
+
+    def test_ek_species(self):
+        # inactive species
+        ek_species = self.make_default_ek_species()
+        self.check_ek_species_properties(ek_species)
+
+        self.assertAlmostEqual(
+            self.system.ekcontainer.tau,
+            self.system.time_step,
+            delta=self.atol)
+
+        # activated species
+        ek_species = self.make_default_ek_species()
+        self.system.ekcontainer.add(ek_species)
+        self.check_ek_species_properties(ek_species)
+
+        # deactivated species
+        ek_species = self.make_default_ek_species()
+        self.system.ekcontainer.add(ek_species)
+        self.system.ekcontainer.remove(ek_species)
+        self.check_ek_species_properties(ek_species)
+
+        # reactive species
+        ek_species = self.make_default_ek_species()
+        espressomd.electrokinetics.EKReactant(
+            ekspecies=ek_species, stoech_coeff=-2.0, order=2.0)
+        self.check_ek_species_properties(ek_species)
+
+    def check_ek_species_properties(self, species):
+        agrid = self.params["agrid"]
+        # check getters
+        self.assertEqual(species.lattice.n_ghost_layers, 1)
+        self.assertAlmostEqual(species.lattice.agrid, agrid, delta=self.atol)
+        self.assertAlmostEqual(species.diffusion, 0.1, delta=self.atol)
+        self.assertAlmostEqual(species.valency, 0.0, delta=self.atol)
+        self.assertAlmostEqual(species.kT, 1.5, delta=self.atol)
+        np.testing.assert_allclose(
+            np.copy(species.ext_efield), [0.1, 0.2, 0.3], atol=self.atol)
+        self.assertFalse(species.advection)
+        self.assertFalse(species.friction_coupling)
+        self.assertEqual(
+            species.single_precision,
+            self.ek_params["single_precision"])
+        # check setters
+        species.diffusion = 0.2
+        species.valency = 0.3
+        species.kT = 0.4
+        ext_f = [0.01, 0.02, 0.03]
+        species.ext_efield = ext_f
+        species.advection = True
+        species.friction_coupling = True
+        self.assertAlmostEqual(species.diffusion, 0.2, delta=self.atol)
+        self.assertAlmostEqual(species.valency, 0.3, delta=self.atol)
+        self.assertAlmostEqual(species.kT, 0.4, delta=self.atol)
+        np.testing.assert_allclose(
+            np.copy(species.ext_efield), ext_f, atol=self.atol)
+        self.assertTrue(species.advection)
+        self.assertTrue(species.friction_coupling)
+        # check node getters/setters
+        self.assertAlmostEqual(species[0, 0, 0].density, 0.85, delta=self.atol)
+        species[0, 0, 0].density = 0.90
+        self.assertAlmostEqual(species[0, 0, 0].density, 0.90, delta=self.atol)
+        with self.assertRaises(RuntimeError):
+            species[0, 0, 0].density = [1, 2]
+        with self.assertRaises(TypeError):
+            species[0, 1].density = 1.
+        # check boundary conditions
+        node = species[1, 1, 1]
+        self.assertIsNone(node.density_boundary)
+        self.assertIsNone(node.flux_boundary)
+        self.assertFalse(node.is_boundary)
+        node.flux_boundary = espressomd.electrokinetics.FluxBoundary(
+            [1., 2., 3.])
+        self.assertIsInstance(
+            node.flux_boundary,
+            espressomd.electrokinetics.FluxBoundary)
+        np.testing.assert_allclose(
+            np.copy(node.flux_boundary.flux), [1., 2., 3.], atol=self.atol)
+        self.assertTrue(node.is_boundary)
+        node.density_boundary = espressomd.electrokinetics.DensityBoundary(4.5)
+        self.assertIsInstance(
+            node.density_boundary,
+            espressomd.electrokinetics.DensityBoundary)
+        np.testing.assert_allclose(
+            np.copy(node.density_boundary.density), 4.5, atol=self.atol)
+        self.assertTrue(node.is_boundary)
+        node.density_boundary = None
+        self.assertTrue(node.is_boundary)
+        node.flux_boundary = None
+        self.assertFalse(node.is_boundary)
+        self.assertIsNone(node.density_boundary)
+        self.assertIsNone(node.flux_boundary)
+        with self.assertRaisesRegex(TypeError, "must be an instance of DensityBoundary or None"):
+            node.density_boundary = 4.6
+        with self.assertRaisesRegex(TypeError, "must be an instance of FluxBoundary or None"):
+            node.flux_boundary = 4.6
+
+    @utx.skipIfMissingFeatures(["WALBERLA_FFT"])
+    def test_ek_fft_solver(self):
+        ek_solver = espressomd.electrokinetics.EKFFT(
+            lattice=self.lattice, permittivity=0.01,
+            single_precision=self.ek_params["single_precision"])
+        self.assertEqual(
+            ek_solver.single_precision,
+            self.ek_params["single_precision"])
+        self.assertAlmostEqual(ek_solver.permittivity, 0.01, delta=self.atol)
+        ek_solver.permittivity = 0.05
+        self.assertAlmostEqual(ek_solver.permittivity, 0.05, delta=self.atol)
+
+        self.system.ekcontainer.solver = ek_solver
+        self.assertTrue(
+            self.system.ekcontainer.call_method("is_poisson_solver_set"))
+        self.assertIsInstance(self.system.ekcontainer.solver,
+                              espressomd.electrokinetics.EKFFT)
+
+    def test_ek_none_solver(self):
+        ek_solver = espressomd.electrokinetics.EKNone(
+            lattice=self.lattice,
+            single_precision=self.ek_params["single_precision"])
+        self.assertEqual(
+            ek_solver.single_precision,
+            self.ek_params["single_precision"])
+
+        self.system.ekcontainer.solver = ek_solver
+        self.assertTrue(
+            self.system.ekcontainer.call_method("is_poisson_solver_set"))
+        self.assertIsInstance(self.system.ekcontainer.solver,
+                              espressomd.electrokinetics.EKNone)
+
+    def test_ek_solver_exceptions(self):
+        ek_solver = self.system.ekcontainer.solver
+        ek_species = self.make_default_ek_species()
+        self.system.ekcontainer.add(ek_species)
+        incompatible_lattice = self.ek_lattice_class(
+            n_ghost_layers=2, agrid=self.params["agrid"])
+        incompatible_ek_species = self.ek_species_class(
+            lattice=incompatible_lattice, **self.ek_params, **self.ek_species_params)
+        with self.assertRaisesRegex(RuntimeError, "EKSpecies lattice incompatible with existing Poisson solver lattice"):
+            self.system.ekcontainer.add(incompatible_ek_species)
+        with self.assertRaisesRegex(RuntimeError, "EKSpecies lattice incompatible with existing EKSpecies lattice"):
+            self.system.ekcontainer.solver = None
+            self.system.ekcontainer.add(incompatible_ek_species)
+        with self.assertRaisesRegex(ValueError, "Parameter 'tau' is required when container isn't empty"):
+            self.system.ekcontainer.tau = None
+        with self.assertRaisesRegex(RuntimeError, "Poisson solver lattice incompatible with existing EKSpecies lattice"):
+            self.system.ekcontainer.clear()
+            self.system.ekcontainer.add(incompatible_ek_species)
+            self.system.ekcontainer.solver = ek_solver
+        with self.assertRaisesRegex(ValueError, "Parameter 'tau' must be > 0"):
+            self.system.ekcontainer.tau = 0.
+        self.assertAlmostEqual(
+            self.system.ekcontainer.tau, self.system.time_step, delta=1e-7)
+        self.system.ekcontainer.clear()
+        self.system.ekcontainer.tau = None
+        self.assertIsNone(self.system.ekcontainer.tau)
+
+    def test_ek_reactants(self):
+        ek_species = self.make_default_ek_species()
+        ek_reactant = espressomd.electrokinetics.EKReactant(
+            ekspecies=ek_species, stoech_coeff=-2.0, order=2.0)
+        self.assertAlmostEqual(ek_reactant.stoech_coeff, -2.0, delta=self.atol)
+        self.assertAlmostEqual(ek_reactant.order, 2.0, delta=self.atol)
+        ek_reactant.stoech_coeff = 1.0
+        self.assertAlmostEqual(ek_reactant.stoech_coeff, 1.0, delta=self.atol)
+
+        with self.assertRaisesRegex(RuntimeError, f"(Parameter|Property) 'order' is read-only"):
+            ek_reactant.order = 1.5
+
+    def test_ek_indexed_reactions(self):
+        ek_species = self.make_default_ek_species()
+        ek_reactant = espressomd.electrokinetics.EKReactant(
+            ekspecies=ek_species, stoech_coeff=-2.0, order=2.0)
+        ek_reaction = espressomd.electrokinetics.EKIndexedReaction(
+            reactants=[ek_reactant], coefficient=1.5, lattice=self.lattice, tau=self.params["tau"])
+        self.assertAlmostEqual(ek_reaction.coefficient, 1.5, delta=self.atol)
+        ek_reaction.coefficient = 0.5
+        self.assertAlmostEqual(ek_reaction.coefficient, 0.5, delta=self.atol)
+        # boundaries
+        self.assertFalse(ek_reaction[1, 1, 1])
+        ek_reaction[1, 1, 1] = True
+        self.assertTrue(ek_reaction[1, 1, 1])
+        ek_reaction.remove_node_from_index([1, 1, 1])
+        self.assertFalse(ek_reaction[1, 1, 1])
+        ek_reaction.add_node_to_index([1, 1, 1])
+        self.assertTrue(ek_reaction[1, 1, 1])
+
+    def test_grid_index(self):
+        ek_species = self.make_default_ek_species()
+        ek_reactant = espressomd.electrokinetics.EKReactant(
+            ekspecies=ek_species, stoech_coeff=-2.0, order=2.0)
+        ek_reaction = espressomd.electrokinetics.EKIndexedReaction(
+            reactants=[ek_reactant], coefficient=1.5, lattice=self.lattice, tau=self.params["tau"])
+        # check ranges and out-of-bounds access
+        shape = np.around(self.system.box_l / self.params["agrid"]).astype(int)
+        for i in range(3):
+            n = [0, 0, 0]
+            n[i] -= shape[i]
+            ek_reaction[n[0], n[1], n[2]] = True
+            self.assertTrue(ek_reaction[0, 0, 0])
+            self.assertEqual(ek_reaction[tuple(n)], ek_reaction[0, 0, 0])
+            self.assertEqual(ek_species[tuple(n)], ek_species[0, 0, 0])
+            for offset in (shape[i] + 1, -(shape[i] + 1)):
+                n = [0, 0, 0]
+                n[i] += offset
+                err_msg = rf"provided index \[{str(n)[1:-1]}\] is out of range for shape \[{str(list(shape))[1:-1]}\]"
+                with self.assertRaisesRegex(IndexError, err_msg):
+                    ek_reaction[tuple(n)]
+                with self.assertRaisesRegex(IndexError, err_msg):
+                    ek_species[tuple(n)]
+        # node index
+        node = ek_species[1, 2, 3]
+        with self.assertRaisesRegex(RuntimeError, "Parameter 'index' is read-only"):
+            node.index = [2, 4, 6]
+        np.testing.assert_array_equal(node.index, [1, 2, 3])
+        retval = node.call_method("override_index", index=[2, 4, 6])
+        self.assertEqual(retval, 0)
+        np.testing.assert_array_equal(node.index, [2, 4, 6])
+        retval = node.call_method("override_index", index=[0, 0, shape[2]])
+        self.assertEqual(retval, 1)
+        np.testing.assert_array_equal(node.index, [2, 4, 6])
+        np.testing.assert_array_equal(ek_species[-1, -1, -1].index, shape - 1)
+
+    def test_runtime_exceptions(self):
+        # set up a valid species
+        ek_species = self.make_default_ek_species()
+        ek_species.kT = 0.
+        self.system.ekcontainer.add(ek_species)
+        self.system.integrator.run(1)
+
+        print("\nTesting EK runtime error messages:", file=sys.stderr)
+        sys.stderr.flush()
+
+        # check exceptions without EK Poisson solver
+        with self.assertRaisesRegex(Exception, "EK requires a Poisson solver"):
+            self.system.ekcontainer.solver = None
+            self.system.integrator.run(1)
+        ek_solver = espressomd.electrokinetics.EKNone(lattice=self.lattice)
+        self.system.ekcontainer.solver = ek_solver
+
+        # check exceptions without LB force field
+        with self.assertRaisesRegex(Exception, "friction coupling enabled but no force field accessible"):
+            ek_species.friction_coupling = True
+            ek_species.advection = False
+            self.system.integrator.run(1)
+
+        # check exceptions without LB velocity field
+        with self.assertRaisesRegex(Exception, "advection enabled but no velocity field accessible"):
+            ek_species.friction_coupling = False
+            ek_species.advection = True
+            self.system.integrator.run(1)
+
+        # non-diffusive species don't trigger exceptions due to early exit
+        ek_species.friction_coupling = True
+        ek_species.advection = True
+        ek_species.diffusion = 0.
+        self.system.integrator.run(1)
+
+        # check exceptions with an incompatible LB time step
+        with self.assertRaisesRegex(Exception, "LB and EK are active but with different time steps"):
+            lb = self.lb_fluid_class(
+                lattice=self.lattice, density=0.5, kinematic_viscosity=3.,
+                tau=2. * self.params["tau"], **self.lb_params)
+            self.system.actors.add(lb)
+            self.system.integrator.run(1)
+
+        print("End of EK runtime error messages", file=sys.stderr)
+        sys.stderr.flush()
+
+        # reset global variable fluid_step
+        self.system.ekcontainer.clear()
+        self.system.integrator.run(1)
+
+    def test_ek_bulk_reactions(self):
+        ek_species = self.make_default_ek_species()
+        ek_reactant = espressomd.electrokinetics.EKReactant(
+            ekspecies=ek_species, stoech_coeff=-2.0, order=2.0)
+        ek_reaction = espressomd.electrokinetics.EKBulkReaction(
+            reactants=[ek_reactant], coefficient=1.5, lattice=self.lattice, tau=self.params["tau"])
+        self.assertAlmostEqual(ek_reaction.coefficient, 1.5, delta=self.atol)
+        ek_reaction.coefficient = 0.5
+        self.assertAlmostEqual(ek_reaction.coefficient, 0.5, delta=self.atol)
+
+    def test_raise_if_read_only(self):
+        ek_species = self.make_default_ek_species()
+        for key in {"lattice", "shape", "single_precision"}:
+            with self.assertRaisesRegex(RuntimeError, f"(Parameter|Property) '{key}' is read-only"):
+                setattr(ek_species, key, 0)
+
+    def test_ctor_exceptions(self):
+        def make_kwargs(**kwargs):
+            ek_kwargs = {"lattice": self.lattice}
+            ek_kwargs.update(self.ek_species_params)
+            ek_kwargs.update(self.ek_params)
+            ek_kwargs.update(kwargs)
+            return ek_kwargs
+
+        with self.assertRaisesRegex(ValueError, "Parameter 'tau' must be > 0"):
+            self.ek_species_class(**make_kwargs(tau=0.))
+        with self.assertRaisesRegex(ValueError, "Parameter 'density' must be >= 0"):
+            self.ek_species_class(**make_kwargs(density=-1.))
+        with self.assertRaisesRegex(ValueError, "Parameter 'kT' must be >= 0"):
+            self.ek_species_class(**make_kwargs(kT=-1.))
+
+    def test_bool_operations_on_node(self):
+        ekspecies = self.make_default_ek_species()
+        # test __eq()__ where a node is equal to itself and not equal to any
+        # other node
+        assert ekspecies[0, 0, 0] == ekspecies[0, 0, 0]
+        shape = np.around(self.system.box_l / self.params["agrid"]).astype(int)
+        nodes = [
+            ekspecies[ijk] for ijk in itertools.product(
+                range(shape[0]), range(shape[1]), range(shape[2]))]
+        nodes.remove(ekspecies[0, 0, 0])
+        assert all(ekspecies[0, 0, 0] != node for node in nodes)
+        # test __hash()__ intercept to identify nodes based on index rather
+        # than name. set() constructor runs hash()
+        subset1, subset2 = nodes[:-10], nodes[-10:]
+        assert len(set(subset1 + subset1)) == len(subset1)
+        assert len(set(subset1 + subset2)) == len(subset1) + len(subset2)
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class EKTestWalberla(EKTest, ut.TestCase):
+
+    """Test for the Walberla implementation of the EK in double-precision."""
+
+    lb_fluid_class = espressomd.lb.LBFluidWalberla
+    ek_lattice_class = espressomd.electrokinetics.LatticeWalberla
+    ek_species_class = espressomd.electrokinetics.EKSpecies
+    ek_params = {"single_precision": False}
+    lb_params = {"single_precision": False}
+    atol = 1e-10
+    rtol = 1e-7
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class EKTestWalberlaSinglePrecision(EKTest, ut.TestCase):
+
+    """Test for the Walberla implementation of the EK in single-precision."""
+
+    lb_fluid_class = espressomd.lb.LBFluidWalberla
+    ek_lattice_class = espressomd.electrokinetics.LatticeWalberla
+    ek_species_class = espressomd.electrokinetics.EKSpecies
+    ek_params = {"single_precision": True}
+    lb_params = {"single_precision": True}
+    atol = 1e-7
+    rtol = 5e-5
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/python/ek_noflux.py b/testsuite/python/ek_noflux.py
new file mode 100644
index 00000000000..35ab7784b9c
--- /dev/null
+++ b/testsuite/python/ek_noflux.py
@@ -0,0 +1,106 @@
+#
+# Copyright (C) 2022-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import numpy as np
+import unittest as ut
+import unittest_decorators as utx
+
+import espressomd
+import espressomd.shapes
+import espressomd.electrokinetics
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class EKNoFlux(ut.TestCase):
+    BOX_L = 15.
+    AGRID = 1.0
+    DENSITY = 1
+    DIFFUSION_COEFFICIENT = 0.1
+    TIME = 50
+    RADIUS = 5.
+
+    system = espressomd.System(box_l=[BOX_L, BOX_L, BOX_L])
+    system.time_step = 1.0
+    system.cell_system.skin = 0.4
+
+    def tearDown(self) -> None:
+        self.system.ekcontainer.clear()
+
+    def test_noflux_single(self):
+        self.detail_test_noflux(single_precision=True)
+
+    def test_noflux_double(self):
+        self.detail_test_noflux(single_precision=False)
+
+    def detail_test_noflux(self, single_precision: bool):
+        """
+        Testing the EK noflux boundaries to not leak density outside of a sphere.
+        """
+
+        decimal_precision: int = 7 if single_precision else 10
+
+        lattice = espressomd.electrokinetics.LatticeWalberla(
+            n_ghost_layers=1, agrid=self.AGRID)
+
+        ekspecies = espressomd.electrokinetics.EKSpecies(
+            lattice=lattice, density=0.0, diffusion=self.DIFFUSION_COEFFICIENT,
+            valency=0.0, advection=False, friction_coupling=False,
+            single_precision=single_precision, tau=1.0)
+
+        eksolver = espressomd.electrokinetics.EKNone(lattice=lattice)
+
+        self.system.ekcontainer.tau = 1.0
+        self.system.ekcontainer.solver = eksolver
+        self.system.ekcontainer.add(ekspecies)
+
+        center = np.asarray(self.system.box_l / 2, dtype=int)
+
+        ekspecies[center[0], center[1], center[2]].density = self.DENSITY
+
+        sphere = espressomd.shapes.Sphere(
+            center=self.system.box_l / 2,
+            radius=self.RADIUS,
+            direction=-1)
+        ekspecies.add_boundary_from_shape(
+            sphere, [0, 0, 0], espressomd.electrokinetics.FluxBoundary)
+
+        positions = np.empty((*self.system.box_l.astype(int), 3))
+        positions[..., 2], positions[..., 1], positions[..., 0] = np.meshgrid(
+            *map(lambda x: np.arange(0, x) - x / 2, self.system.box_l))
+        positions += 0.5
+
+        self.system.integrator.run(self.TIME)
+
+        simulated_density = np.copy(ekspecies[:, :, :].density)
+
+        # check that the density is conserved globally
+        np.testing.assert_almost_equal(
+            np.sum(simulated_density), self.DENSITY, decimal_precision)
+
+        domain_density = simulated_density[np.logical_not(
+            ekspecies[:, :, :].is_boundary)]
+        # check that the density is kept constant inside the sphere
+        np.testing.assert_almost_equal(
+            np.sum(domain_density), self.DENSITY, decimal_precision)
+        np.testing.assert_array_less(
+            0., domain_density, "EK density array contains negative densities!")
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/python/ek_slice.py b/testsuite/python/ek_slice.py
new file mode 100644
index 00000000000..30c16bf6e86
--- /dev/null
+++ b/testsuite/python/ek_slice.py
@@ -0,0 +1,161 @@
+#
+# Copyright (C) 2010-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import itertools
+import numpy as np
+import unittest as ut
+import unittest_decorators as utx
+
+import espressomd
+import espressomd.electrokinetics
+
+
+@utx.skipIfMissingFeatures("WALBERLA")
+class Test(ut.TestCase):
+
+    """This simple test first writes random numbers and then reads them
+    to same slices of LB nodes and compares if the results are the same,
+    shape-wise and value-wise.
+    """
+
+    system = espressomd.System(box_l=[10.0, 10.0, 10.0])
+    system.time_step = .01
+    system.cell_system.skin = 0.1
+    ek_species_params = {"kT": 1.5,
+                         "density": 0.85,
+                         "valency": 0.0,
+                         "diffusion": 0.1,
+                         "advection": False,
+                         "friction_coupling": False,
+                         "tau": 1.0}
+    np.random.seed(seed=42)
+
+    @classmethod
+    def setUpClass(cls):
+        cls.lattice = espressomd.electrokinetics.LatticeWalberla(
+            agrid=1., n_ghost_layers=1)
+        cls.ek_species = espressomd.electrokinetics.EKSpecies(
+            lattice=cls.lattice,
+            single_precision=False,
+            **cls.ek_species_params)
+
+    def test_slicing(self):
+        ek_species = self.ek_species
+
+        # array locked
+        array = ek_species[1:-1:1, 5, 3:6].density
+        with self.assertRaisesRegex(ValueError, "ESPResSo array properties return non-writable arrays"):
+            array[0, 0, 0] = 5.
+
+        input_dens = np.random.rand(8, 3) + 1.
+        ek_species[1:-1, 5, 3:6].density = input_dens
+        output_dens = ek_species[1:-1, 5, 3:6].density
+        np.testing.assert_array_almost_equal(np.copy(output_dens), input_dens)
+
+        # density broadcast (with type conversion from int to double)
+        ek_species[:, :, 0].density = 2
+        np.testing.assert_array_almost_equal(
+            np.copy(ek_species[:, :, 0].density), 2.)
+
+        # flux boundary on slice
+        output_boundary_shape = ek_species[1:, 1:, 1:].flux_boundary.shape
+        should_boundary_shape = (9, 9, 9)
+        np.testing.assert_array_equal(
+            output_boundary_shape, should_boundary_shape)
+
+        with self.assertRaisesRegex(TypeError, "Parameter 'values' must be an array_like of FluxBoundary or None"):
+            ek_species[1:, 1:, 1:].flux_boundary = np.zeros(
+                should_boundary_shape)
+        with self.assertRaisesRegex(TypeError, "Parameter 'values' must be an array_like of FluxBoundary or None"):
+            ek_species[1:, 1:, 1:].flux_boundary = np.array(
+                [None, [1, 2, 3]], dtype=object)
+
+        flux_ref = espressomd.electrokinetics.FluxBoundary([1e-6, 2e-6, 3e-6])
+        ek_species[1:2, 1:, 0].flux_boundary = flux_ref
+        ek_species[1:2, 2:, 0].flux_boundary = None
+        for flux in ek_species[1:2, 1, 0].flux_boundary.flatten():
+            np.testing.assert_array_almost_equal(
+                flux.flux, flux_ref.flux)
+        for flux in ek_species[1:2, 2:, 0:2].flux_boundary.flatten():
+            self.assertIsNone(flux)
+
+        # density boundary on slice
+        output_boundary_shape = ek_species[0:-1, 1:, 1:].density_boundary.shape
+        should_boundary_shape = (9, 9, 9)
+        np.testing.assert_array_equal(
+            output_boundary_shape, should_boundary_shape)
+
+        with self.assertRaisesRegex(TypeError, "Parameter 'values' must be an array_like of DensityBoundary or None"):
+            ek_species[1:, 1:, 1:].density_boundary = np.zeros(
+                should_boundary_shape)
+        with self.assertRaisesRegex(TypeError, "Parameter 'values' must be an array_like of DensityBoundary or None"):
+            ek_species[1:, 1:, 1:].density_boundary = np.array(
+                [None, 1.], dtype=object)
+
+        dens_ref = espressomd.electrokinetics.DensityBoundary(1e-6)
+        ek_species[2:3, 1:, 0].density_boundary = dens_ref
+        ek_species[2:3, 2:, 0].density_boundary = None
+        for dens in ek_species[2:3, 1, 0].density_boundary.flatten():
+            np.testing.assert_array_almost_equal(
+                dens.density, dens_ref.density)
+        for dens in ek_species[2:3, 2:, 0:2].density_boundary.flatten():
+            self.assertIsNone(dens)
+
+        # is_boundary on slice
+        output_boundary_shape = ek_species[1:, 1:, 1:].is_boundary.shape
+        should_boundary_shape = (9, 9, 9)
+        np.testing.assert_array_equal(
+            output_boundary_shape, should_boundary_shape)
+        np.testing.assert_array_equal(
+            np.copy(ek_species[1:3, 1:2, 0].is_boundary), True)
+        np.testing.assert_array_equal(
+            np.copy(ek_species[3:, 2:, 0:2].is_boundary), False)
+
+        with self.assertRaisesRegex(RuntimeError, "Property 'is_boundary' is read-only"):
+            ek_species[1:, 1:, 1:].is_boundary = np.zeros(
+                should_boundary_shape)
+
+        # access out of bounds
+        i = ek_species.shape[2] + 10
+        ek_slice = ek_species[1, 2, i:i + 10]
+        self.assertEqual(ek_slice.density.shape, (0,))
+        self.assertIsInstance(ek_slice.density.dtype, object)
+        with self.assertRaisesRegex(AttributeError, "Cannot set properties of an empty 'EKSpeciesSlice' object"):
+            ek_slice.density = [1., 2., 3.]
+
+        # other exceptions
+        with self.assertRaisesRegex(RuntimeError, "Unknown EK property 'unknown'"):
+            ek_species[:, :, :].call_method("get_value_shape", name="unknown")
+
+    def test_iterator(self):
+        ekslice_handle = self.ek_species[:, :, :]
+        # arrange node indices using class methods
+        ek_indices = [np.arange(self.ek_species.shape[i]) for i in range(3)]
+        arranged_indices = list(itertools.product(*ek_indices))
+        # arrange node indices using __iter__() enforced conversion
+        iterator_indices = [node.index for node in ekslice_handle]
+        # check the results correspond pairwise. order is implicitly preserved.
+        np.testing.assert_array_equal(arranged_indices, iterator_indices)
+        # use __eq()__ method form EKSpeciesNode()
+        assert all([x == y for x, y in zip(
+            arranged_indices, iterator_indices)])
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/python/engine_lb.py b/testsuite/python/engine_lb.py
index 2261a37fc16..20951362118 100644
--- a/testsuite/python/engine_lb.py
+++ b/testsuite/python/engine_lb.py
@@ -22,6 +22,7 @@
 import unittest as ut
 import unittest_decorators as utx
 import numpy as np
+import tests_common
 
 
 class SwimmerTest():
@@ -29,8 +30,8 @@ class SwimmerTest():
     system.cell_system.skin = 1
     system.time_step = 1e-2
     LB_params = {'agrid': 1,
-                 'dens': 1.1,
-                 'visc': 1.2,
+                 'density': 1.1,
+                 'kinematic_viscosity': 1.2,
                  'kT': 0,
                  'tau': system.time_step}
     gamma = 0.3
@@ -74,7 +75,7 @@ def add_all_types_of_swimmers(
 
     def setUp(self):
         self.set_cellsystem()
-        self.lbf = self.lb_class(**self.LB_params)
+        self.lbf = self.lb_class(**self.LB_params, **self.lb_params)
         self.system.actors.add(self.lbf)
         self.system.thermostat.set_lb(LB_fluid=self.lbf, gamma=self.gamma)
 
@@ -87,13 +88,32 @@ def test_momentum_conservation(self):
         """friction as well as 'active' forces apply to particles
         and to the fluid, so total momentum is conserved
         """
+        if self.lbf.get_params().get("single_precision", False):
+            self.skipTest('Momentum is not conserved on single precision')
+
         self.add_all_types_of_swimmers(rotation=False)
-        self.system.integrator.run(20, reuse_forces=True)
+
+        # Comments by Christoph Lohrmann from #3514:
+        # - why I used `reuse_forces=True` : If I don't use it, `force_calc()`
+        #   is called the first time without LB-coupling. That means no friction
+        #   for any swimmer and no additional force for the `v_swim` type swimmers.
+        #   The active force for the `f_swim` swimmers gets added anyway because
+        #   it is not derived from the coupling to LB. With `reuse_forces` at
+        #   least both types are treated the same.
+        # - Therefore, in the first halfstep, the active forces on the particles
+        #   are missing. This creates the first half of the missing momentum.
+        # - The LB fluid is always ahead by a half step (as checked by
+        #   `test_ext_force_density()` in `lb.py`). It is also not affected by
+        #   the `reuse_forces` in the first halfstep because `force_calc()`
+        #   with coupling is called in the main integration loop before
+        #   `lb_lbfluid_propagate()`
+        # - => in total, the fluid momentum is ahead by a full time step
+        self.system.integrator.run(100, reuse_forces=True)
         tot_mom = self.system.analysis.linear_momentum(include_particles=True,
                                                        include_lbfluid=True)
-        # compensate half-step offset between force calculation and LB-update
+        # compensate offset between force calculation and LB-update
         for part in self.system.part:
-            tot_mom += part.f * self.system.time_step / 2.
+            tot_mom += part.f * self.system.time_step
 
         np.testing.assert_allclose(tot_mom, 3 * [0.], atol=self.tol)
 
@@ -114,7 +134,7 @@ def test_particle_forces(self):
                 0.5 * self.system.time_step * swimmer.f / swimmer.mass
             # for friction coupling, the old fluid at the new position is used
             v_fluid = self.lbf.get_interpolated_velocity(
-                swimmer.pos + self.system.time_step * v_swimmer)
+                pos=swimmer.pos + self.system.time_step * v_swimmer)
             force = -self.gamma * (v_swimmer - v_fluid) + \
                 f_swim * director + self.gamma * v_swim * director
 
@@ -122,53 +142,110 @@ def test_particle_forces(self):
             np.testing.assert_allclose(
                 np.copy(swimmer.f), force, atol=self.tol)
 
+    def test_fluid_force(self):
+        """ forces on particles are already checked (self.test_particle_forces)
+        total force on the fluid matches (self.test_momentum_conservation)
+        only thing left to check is the location of the fluid force.
+        """
+        f_swim = 0.11
+        dip_length = 2 * self.LB_params['agrid']
+
+        sw0_pos = np.array([3.8, 1.1, 1.1])
+        sw1_pos = np.array([1.8, 4.1, 4.1])
+        sw0 = self.system.part.add(pos=sw0_pos, quat=np.sqrt([.5, 0, .5, 0]),
+                                   mass=0.9, rotation=3 * [False],
+                                   swimming={"mode": "pusher", "f_swim": f_swim,
+                                             "dipole_length": dip_length})
+        sw1 = self.system.part.add(pos=sw1_pos, quat=np.sqrt([.5, 0, .5, 0]),
+                                   mass=0.7, rotation=3 * [False],
+                                   swimming={"mode": "puller", "f_swim": f_swim,
+                                             "dipole_length": dip_length})
+
+        self.system.integrator.run(2)
+
+        for sw in [sw0, sw1]:
+            push_pull = -1 if sw.swimming['mode'] == 'pusher' else 1
+            sw_source_pos = sw.pos + self.system.time_step * \
+                sw.v + push_pull * dip_length * sw.director
+            # fold into box
+            sw_source_pos -= np.floor(sw_source_pos /
+                                      self.system.box_l) * np.array(self.system.box_l)
+            sw_source_nodes = tests_common.get_lb_nodes_around_pos(
+                sw_source_pos, self.lbf)
+            sw_source_forces = np.array(
+                [n.last_applied_force for n in sw_source_nodes])
+            np.testing.assert_allclose(
+                np.sum(sw_source_forces, axis=0),
+                -f_swim * np.array(sw.director), atol=self.tol)
+
 
-@utx.skipIfMissingFeatures(["ENGINE", "ROTATIONAL_INERTIA", "MASS"])
-class SwimmerTestRegularCPU(SwimmerTest, ut.TestCase):
+@utx.skipIfMissingFeatures(
+    ["ENGINE", "ROTATIONAL_INERTIA", "MASS", "WALBERLA"])
+class SwimmerTestDomDecWalberla(SwimmerTest, ut.TestCase):
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
     tol = 1e-10
 
     def set_cellsystem(self):
         self.system.cell_system.set_regular_decomposition()
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(["ENGINE", "ROTATIONAL_INERTIA", "MASS"])
-class SwimmerTestRegularGPU(SwimmerTest, ut.TestCase):
+@utx.skipIfMissingFeatures(
+    ["ENGINE", "ROTATIONAL_INERTIA", "MASS", "WALBERLA"])
+class SwimmerTestDomDecWalberlaSinglePrecision(SwimmerTest, ut.TestCase):
 
-    lb_class = espressomd.lb.LBFluidGPU
-    tol = 1e-5
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
+    tol = 1e-10
 
     def set_cellsystem(self):
         self.system.cell_system.set_regular_decomposition()
 
 
-@utx.skipIfMissingFeatures(["ENGINE", "ROTATIONAL_INERTIA", "MASS"])
-class SwimmerTestNSquareCPU(SwimmerTest, ut.TestCase):
+@utx.skipIfMissingFeatures(
+    ["ENGINE", "ROTATIONAL_INERTIA", "MASS", "WALBERLA"])
+class SwimmerTestNSquareWalberla(SwimmerTest, ut.TestCase):
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
     tol = 1e-10
 
     def set_cellsystem(self):
         self.system.cell_system.set_n_square()
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(["ENGINE", "ROTATIONAL_INERTIA", "MASS"])
-class SwimmerTestNSquareGPU(SwimmerTest, ut.TestCase):
+@utx.skipIfMissingFeatures(
+    ["ENGINE", "ROTATIONAL_INERTIA", "MASS", "WALBERLA"])
+class SwimmerTestNSquareWalberlaSinglePrecision(SwimmerTest, ut.TestCase):
 
-    lb_class = espressomd.lb.LBFluidGPU
-    tol = 1e-5
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
+    tol = 1e-10
 
     def set_cellsystem(self):
         self.system.cell_system.set_n_square()
 
 
-@utx.skipIfMissingFeatures(["ENGINE", "ROTATIONAL_INERTIA", "MASS"])
-class SwimmerTestHybrid0CPU(SwimmerTest, ut.TestCase):
+@utx.skipIfMissingFeatures(
+    ["ENGINE", "ROTATIONAL_INERTIA", "MASS", "WALBERLA"])
+class SwimmerTestHybrid0CPUWalberla(SwimmerTest, ut.TestCase):
+
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
+    tol = 1e-10
+
+    def set_cellsystem(self):
+        self.system.cell_system.set_hybrid_decomposition(
+            n_square_types={0}, cutoff_regular=1)
+
 
-    lb_class = espressomd.lb.LBFluid
+@utx.skipIfMissingFeatures(
+    ["ENGINE", "ROTATIONAL_INERTIA", "MASS", "WALBERLA"])
+class SwimmerTestHybrid0CPUWalberlaSinglePrecision(SwimmerTest, ut.TestCase):
+
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
     tol = 1e-10
 
     def set_cellsystem(self):
@@ -176,10 +253,12 @@ def set_cellsystem(self):
             n_square_types={0}, cutoff_regular=1)
 
 
-@utx.skipIfMissingFeatures(["ENGINE", "ROTATIONAL_INERTIA", "MASS"])
-class SwimmerTestHybrid1CPU(SwimmerTest, ut.TestCase):
+@utx.skipIfMissingFeatures(
+    ["ENGINE", "ROTATIONAL_INERTIA", "MASS", "WALBERLA"])
+class SwimmerTestHybrid1CPUWalberla(SwimmerTest, ut.TestCase):
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
     tol = 1e-10
 
     def set_cellsystem(self):
@@ -187,16 +266,17 @@ def set_cellsystem(self):
             n_square_types={1}, cutoff_regular=1)
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(["ENGINE", "ROTATIONAL_INERTIA", "MASS"])
-class SwimmerTestHybrid0GPU(SwimmerTest, ut.TestCase):
+@utx.skipIfMissingFeatures(
+    ["ENGINE", "ROTATIONAL_INERTIA", "MASS", "WALBERLA"])
+class SwimmerTestHybrid1CPUWalberlaSinglePrecision(SwimmerTest, ut.TestCase):
 
-    lb_class = espressomd.lb.LBFluidGPU
-    tol = 1e-5
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
+    tol = 1e-10
 
     def set_cellsystem(self):
         self.system.cell_system.set_hybrid_decomposition(
-            n_square_types={0}, cutoff_regular=1)
+            n_square_types={1}, cutoff_regular=1)
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/h5md.py b/testsuite/python/h5md.py
index 807b569d9e3..6fdbe90fd51 100644
--- a/testsuite/python/h5md.py
+++ b/testsuite/python/h5md.py
@@ -121,6 +121,7 @@ def test_opening(self):
     # doesn't alway work in parallel: https://github.com/h5py/h5py/issues/736
     @ut.skipIf(n_nodes > 1, "only runs for 1 MPI rank")
     def test_appending(self):
+        import time
         # write one frame to the file
         temp_file = self.temp_path / 'appending.h5'
         h5 = espressomd.io.writer.h5md.H5md(file_path=str(temp_file))
@@ -135,6 +136,11 @@ def test_appending(self):
         # check both frames are identical to the reference trajectory
         with h5py.File(temp_file, 'r') as cur:
             def predicate(cur, key):
+                """
+                Check dataset against reference values. Read operations on the
+                cursor are throttled to avoid triggering an address overflow.
+                """
+                time.sleep(0.01)  # introduce a delay between successive reads
                 np.testing.assert_allclose(cur[key], self.py_file[key])
             for key in ('position', 'image', 'velocity', 'force',
                         'id', 'species', 'mass', 'charge'):
diff --git a/testsuite/python/lattice.py b/testsuite/python/lattice.py
new file mode 100644
index 00000000000..71badb328b6
--- /dev/null
+++ b/testsuite/python/lattice.py
@@ -0,0 +1,69 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+import unittest as ut
+import unittest_decorators as utx
+
+import espressomd
+import espressomd.lb
+import numpy as np
+
+
+@utx.skipIfMissingFeatures("WALBERLA")
+class Test(ut.TestCase):
+
+    """
+    Basic tests of the block forest.
+
+    """
+    system = espressomd.System(box_l=[12., 4., 4.])
+
+    def test_interface(self):
+        LatticeWalberla = espressomd.lb.LatticeWalberla
+
+        # check getters
+        for n_ghost_layers in range(10):
+            obj = LatticeWalberla(agrid=1., n_ghost_layers=n_ghost_layers)
+            self.assertEqual(obj.n_ghost_layers, n_ghost_layers)
+        for agrid in (0.5, 1., 2.):
+            obj = LatticeWalberla(agrid=agrid, n_ghost_layers=1)
+            self.assertEqual(obj.agrid, agrid)
+            target_shape = np.asarray(self.system.box_l, dtype=int) / obj.agrid
+            np.testing.assert_array_equal(obj.shape, target_shape)
+
+        # check exception mechanism
+        obj = LatticeWalberla(agrid=1., n_ghost_layers=1)
+        with self.assertRaisesRegex(RuntimeError, "Parameter 'agrid' is read-only"):
+            obj.agrid = 2.
+        with self.assertRaisesRegex(RuntimeError, "Parameter 'n_ghost_layers' is read-only"):
+            obj.n_ghost_layers = 2
+        with self.assertRaisesRegex(RuntimeError, "Parameter 'n_ghost_layers' is missing"):
+            LatticeWalberla(agrid=1.)
+        with self.assertRaisesRegex(ValueError, "Parameter 'n_ghost_layers' must be >= 0"):
+            LatticeWalberla(agrid=1., n_ghost_layers=-1)
+        with self.assertRaisesRegex(ValueError, "Parameter 'agrid' must be > 0"):
+            LatticeWalberla(agrid=0., n_ghost_layers=1)
+        with self.assertRaisesRegex(ValueError, "Parameter 'agrid' must be > 0"):
+            LatticeWalberla(agrid=-1., n_ghost_layers=1)
+        with self.assertRaisesRegex(ValueError, "Parameter 'shape' must be derived from espressomd.shapes.Shape"):
+            obj = LatticeWalberla(agrid=1., n_ghost_layers=1)
+            next(obj.get_node_indices_inside_shape(10))
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/python/lattice_vtk.py b/testsuite/python/lattice_vtk.py
new file mode 100644
index 00000000000..0e20ddaa6e6
--- /dev/null
+++ b/testsuite/python/lattice_vtk.py
@@ -0,0 +1,371 @@
+#
+# Copyright (C) 2010-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import unittest as ut
+import unittest_decorators as utx
+
+import os
+import pathlib
+import tempfile
+import contextlib
+import numpy as np
+
+import espressomd
+import espressomd.lb
+import espressomd.shapes
+import espressomd.electrokinetics
+
+with contextlib.suppress(ImportError):
+    import espressomd.io.vtk
+
+
+class TestVTK:
+    system = espressomd.System(box_l=[6, 7, 5])
+    system.time_step = 0.1
+    system.cell_system.skin = 0.4
+
+    def setUp(self):
+        self.lattice = self.lattice_class(n_ghost_layers=1, agrid=0.5)
+        self.actor = self.add_actor()
+
+    def tearDown(self):
+        self.clear_actors()
+
+    @ut.skipIf(system.cell_system.get_state()["n_nodes"] > 4,
+               "this test is slow on more than 4 MPI ranks")
+    def test_exceptions(self):
+        label_invalid_obs = f"test_vtk_{self.vtk_id}_invalid_obs"
+        error_msg = rf"Only the following VTK observables are supported: \[{repr(sorted(self.valid_obs))[1:-1]}\], got 'dens'"
+        with self.assertRaisesRegex(ValueError, error_msg):
+            self.vtk_class(
+                identifier=label_invalid_obs, delta_N=0, observables=["dens"])
+        vtk_manual_id = f"test_vtk_{self.vtk_id}_manual"
+        vtk_auto_id = f"test_vtk_{self.vtk_id}_auto"
+        vtk_manual = self.vtk_class(
+            identifier=vtk_manual_id, delta_N=0, observables=["density"])
+        vtk_auto = self.vtk_class(
+            identifier=vtk_auto_id, delta_N=1, observables=["density"])
+        self.actor.add_vtk_writer(vtk=vtk_manual)
+        self.actor.add_vtk_writer(vtk=vtk_auto)
+        with self.assertRaisesRegex(RuntimeError, "Automatic VTK callbacks cannot be triggered manually"):
+            vtk_auto.write()
+        with self.assertRaisesRegex(RuntimeError, "Manual VTK callbacks cannot be disabled"):
+            vtk_manual.disable()
+        with self.assertRaisesRegex(RuntimeError, "Manual VTK callbacks cannot be enabled"):
+            vtk_manual.enable()
+        with self.assertRaisesRegex(RuntimeError, "already exists"):
+            self.actor.add_vtk_writer(vtk=self.vtk_class(
+                identifier=vtk_manual_id, delta_N=0, observables=[]))
+        with self.assertRaisesRegex(RuntimeError, "already attached to this lattice"):
+            self.actor.add_vtk_writer(vtk=self.actor.vtk_writers[0])
+        with self.assertRaisesRegex(RuntimeError, "not attached to this lattice"):
+            self.actor.remove_vtk_writer(vtk=self.vtk_class(
+                identifier=vtk_manual_id, delta_N=0, observables=[]))
+        with self.assertRaisesRegex(RuntimeError, "Cannot attach VTK object to multiple lattices"):
+            self.make_actor().add_vtk_writer(vtk=vtk_manual)
+        with self.assertRaisesRegex(RuntimeError, "Detached VTK objects cannot be attached again"):
+            self.actor.remove_vtk_writer(vtk=vtk_manual)
+            self.actor.add_vtk_writer(vtk=vtk_manual)
+        with self.assertRaisesRegex(ValueError, "Parameter 'delta_N' must be >= 0"):
+            self.vtk_class(identifier="a", delta_N=-1, observables=[])
+        with self.assertRaisesRegex(ValueError, "Parameter 'identifier' cannot be empty"):
+            self.vtk_class(identifier="", delta_N=0, observables=[])
+        with self.assertRaisesRegex(ValueError, "cannot be a filepath"):
+            self.vtk_class(
+                identifier=f"test{os.sep}test", delta_N=0, observables=[])
+
+        # can still use VTK when the actor has been cleared but not deleted
+        label_cleared = f"test_vtk_{self.vtk_id}_cleared"
+        vtk_cleared = self.vtk_class(
+            identifier=label_cleared, observables=["density"])
+        self.actor.add_vtk_writer(vtk=vtk_cleared)
+        self.clear_actors()
+        vtk_cleared.write()
+
+        # cannot use VTK when no lattice is attached to it
+        label_unattached = f"test_vtk_{self.vtk_id}_unattached"
+        label_unattached = self.vtk_class(
+            identifier=label_unattached, observables=[])
+        with self.assertRaisesRegex(RuntimeError, "This VTK object isn't attached to a lattice"):
+            label_unattached.write()
+
+
+class TestLBVTK(TestVTK):
+
+    valid_obs = ["density", "velocity_vector", "pressure_tensor"]
+
+    def make_actor(self):
+        return self.lb_class(
+            lattice=self.lattice, tau=0.1, density=1., kinematic_viscosity=1.,
+            ext_force_density=[0., 0.03, 0.], **self.lb_params)
+
+    def add_actor(self):
+        self.lbf = self.make_actor()
+        self.system.actors.add(self.lbf)
+        return self.lbf
+
+    def clear_actors(self):
+        self.system.actors.clear()
+
+    @utx.skipIfMissingModules("espressomd.io.vtk")
+    def test_vtk(self):
+        """
+        Check VTK files. Keep in mind the VTK module writes in single-precision.
+        """
+        dist = 1.5 * self.lattice.agrid
+        actor = self.lbf
+        actor.add_boundary_from_shape(
+            espressomd.shapes.Wall(normal=[1, 0, 0], dist=dist))
+        actor.add_boundary_from_shape(
+            espressomd.shapes.Wall(normal=[-1, 0, 0], dist=-(self.system.box_l[0] - dist)))
+
+        n_steps = 10
+        shape = tuple(actor.shape)
+        shape = (shape[0] - 4, *shape[1:])
+        vtk_reader = espressomd.io.vtk.VTKReader()
+        label_density = "density"
+        label_velocity = "velocity_vector"
+        label_pressure = "pressure_tensor"
+
+        with tempfile.TemporaryDirectory() as tmp_directory:
+            path_vtk_root = pathlib.Path(tmp_directory)
+            label_vtk_end = f"test_vtk_{self.vtk_id}_end"
+            label_vtk_continuous = f"test_vtk_{self.vtk_id}_continuous"
+            path_vtk_end = path_vtk_root / label_vtk_end / "simulation_step_0.vtu"
+            path_vtk_continuous = [
+                path_vtk_root / label_vtk_continuous / f"simulation_step_{i}.vtu" for i in range(n_steps)]
+            filepaths = [path_vtk_end] + path_vtk_continuous
+
+            # write VTK files
+            vtk_obs = list(self.valid_obs)
+            vtk_obj = self.vtk_class(
+                identifier=label_vtk_continuous, delta_N=1, observables=vtk_obs,
+                base_folder=str(path_vtk_root))
+            actor.add_vtk_writer(vtk=vtk_obj)
+            vtk_obj.disable()
+            vtk_obj.enable()
+            self.system.integrator.run(n_steps)
+            vtk_obj = self.vtk_class(
+                identifier=label_vtk_end, delta_N=0, observables=vtk_obs,
+                base_folder=str(path_vtk_root))
+            actor.add_vtk_writer(vtk=vtk_obj)
+            vtk_obj.write()
+            self.assertEqual(sorted(vtk_obj.observables), sorted(vtk_obs))
+            self.assertEqual(vtk_obj.valid_observables(), set(self.valid_obs))
+
+            # check VTK files exist
+            for filepath in filepaths:
+                self.assertTrue(
+                    filepath.exists(),
+                    f"VTK file \"{filepath}\" not written to disk")
+            for filepath in [path_vtk_end.parent.with_suffix(".pvd"),
+                             path_vtk_continuous[0].parent.with_suffix(".pvd")]:
+                self.assertTrue(
+                    filepath.exists(),
+                    f"VTK summary file \"{filepath}\" not written to disk")
+
+            # check velocity profile is symmetric at all time steps
+            for filepath in filepaths:
+                vtk_velocity = vtk_reader.parse(filepath)[label_velocity]
+                v_profile = np.mean(
+                    np.linalg.norm(vtk_velocity, axis=-1),
+                    axis=(1, 2))
+                np.testing.assert_allclose(
+                    v_profile, v_profile[::-1], rtol=5e-5, atol=0.)
+
+            # check scalar pressure is symmetric at all time steps
+            for filepath in filepaths:
+                vtk_pressure = vtk_reader.parse(filepath)[label_pressure]
+                vtk_pressure = vtk_pressure.reshape(shape + (3, 3))
+                p_profile = np.mean(
+                    np.trace(vtk_pressure, axis1=-2, axis2=-1),
+                    axis=(1, 2))
+                np.testing.assert_allclose(
+                    p_profile, p_profile[::-1], rtol=5e-5, atol=0.)
+
+            # read VTK output of final time step
+            last_frames = []
+            for filepath in (path_vtk_end, path_vtk_continuous[-1]):
+                grids = vtk_reader.parse(filepath)
+                last_frames.append((
+                    grids[label_density],
+                    grids[label_velocity],
+                    grids[label_pressure].reshape(shape + (3, 3)),
+                ))
+
+            # check VTK output is identical in both continuous and manual mode
+            for i in range(len(last_frames[0])):
+                np.testing.assert_allclose(last_frames[0][i],
+                                           last_frames[1][i], atol=1e-10)
+
+            # check VTK values match node values in the final time step
+            lb_density = np.copy(self.lbf[2:-2, :, :].density)
+            lb_velocity = np.copy(self.lbf[2:-2, :, :].velocity)
+            lb_pressure = np.copy(self.lbf[2:-2, :, :].pressure_tensor)
+
+            for vtk_density, vtk_velocity, vtk_pressure in last_frames:
+                np.testing.assert_allclose(
+                    vtk_density, lb_density, rtol=1e-10, atol=0.)
+                np.testing.assert_allclose(
+                    vtk_velocity, lb_velocity, rtol=1e-7, atol=0.)
+                np.testing.assert_allclose(
+                    vtk_pressure, lb_pressure, rtol=1e-6, atol=0.)
+
+
+class TestEKVTK(TestVTK):
+
+    valid_obs = ["density"]
+
+    def make_actor(self):
+        return self.ek_class(
+            lattice=self.lattice, density=1., diffusion=0.1, valency=0.,
+            advection=False, friction_coupling=False, tau=0.1, **self.ek_params)
+
+    def add_actor(self):
+        self.solver = self.ek_solver(lattice=self.lattice)
+        self.species = self.make_actor()
+        self.system.ekcontainer.tau = 0.1
+        self.system.ekcontainer.solver = self.solver
+        self.system.ekcontainer.add(self.species)
+        return self.species
+
+    def clear_actors(self):
+        self.system.ekcontainer.clear()
+
+    @utx.skipIfMissingModules("espressomd.io.vtk")
+    def test_vtk(self):
+        """
+        Check VTK files. Keep in mind the VTK module writes in single-precision.
+        """
+        dist = 1.5 * self.lattice.agrid
+        actor = self.species
+        actor.add_boundary_from_shape(
+            shape=espressomd.shapes.Wall(normal=[1, 0, 0], dist=dist),
+            value=0.0, boundary_type=espressomd.electrokinetics.DensityBoundary)
+        actor.add_boundary_from_shape(
+            shape=espressomd.shapes.Wall(
+                normal=[-1, 0, 0], dist=-(self.system.box_l[0] - dist)),
+            value=0.0, boundary_type=espressomd.electrokinetics.DensityBoundary)
+
+        n_steps = 100
+        shape = tuple(self.lattice.shape)
+        shape = (shape[0] - 4, *shape[1:])
+        vtk_reader = espressomd.io.vtk.VTKReader()
+        label_density = "density"
+
+        with tempfile.TemporaryDirectory() as tmp_directory:
+            path_vtk_root = pathlib.Path(tmp_directory)
+            label_vtk_end = f"test_vtk_{self.vtk_id}_end"
+            label_vtk_continuous = f"test_vtk_{self.vtk_id}_continuous"
+            path_vtk_end = path_vtk_root / label_vtk_end / "simulation_step_0.vtu"
+            path_vtk_continuous = [
+                path_vtk_root / label_vtk_continuous / f"simulation_step_{i}.vtu" for i in range(n_steps)]
+            filepaths = [path_vtk_end] + path_vtk_continuous
+
+            # write VTK files
+            vtk_obs = list(self.valid_obs)
+            vtk_obj = self.vtk_class(
+                identifier=label_vtk_continuous, delta_N=1, observables=vtk_obs,
+                base_folder=str(path_vtk_root))
+            actor.add_vtk_writer(vtk=vtk_obj)
+            vtk_obj.disable()
+            vtk_obj.enable()
+            self.system.integrator.run(n_steps)
+            vtk_obj = self.vtk_class(
+                identifier=label_vtk_end, delta_N=0, observables=vtk_obs,
+                base_folder=str(path_vtk_root))
+            actor.add_vtk_writer(vtk=vtk_obj)
+            vtk_obj.write()
+            self.assertEqual(sorted(vtk_obj.observables), sorted(vtk_obs))
+            self.assertEqual(vtk_obj.valid_observables(), set(self.valid_obs))
+
+            # check VTK files exist
+            for filepath in filepaths:
+                self.assertTrue(
+                    filepath.exists(),
+                    f"VTK file \"{filepath}\" not written to disk")
+            for filepath in [path_vtk_end.parent.with_suffix(".pvd"),
+                             path_vtk_continuous[0].parent.with_suffix(".pvd")]:
+                self.assertTrue(
+                    filepath.exists(),
+                    f"VTK summary file \"{filepath}\" not written to disk")
+
+            # read VTK output of final time step
+            last_frames = []
+            for filepath in (path_vtk_end, path_vtk_continuous[-1]):
+                grids = vtk_reader.parse(filepath)
+                last_frames.append(grids[label_density])
+
+            # check VTK output is identical in both continuous and manual mode
+            for i in range(len(last_frames[0])):
+                np.testing.assert_allclose(last_frames[0][i],
+                                           last_frames[1][i], atol=1e-10)
+
+            # check VTK values match node values in the final time step
+            ek_density = np.copy(actor[2:-2, :, :].density)
+
+            for vtk_density in last_frames:
+                np.testing.assert_allclose(
+                    vtk_density, ek_density, rtol=5e-7)
+
+        self.assertEqual(len(actor.vtk_writers), 2)
+        actor.clear_vtk_writers()
+        self.assertEqual(len(actor.vtk_writers), 0)
+
+
+@utx.skipIfMissingFeatures("WALBERLA")
+class LBWalberlaWrite(TestLBVTK, ut.TestCase):
+    vtk_class = espressomd.lb.VTKOutput
+    lattice_class = espressomd.lb.LatticeWalberla
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
+    vtk_id = "lb_double_precision"
+
+
+@utx.skipIfMissingFeatures("WALBERLA")
+class LBWalberlaWriteSinglePrecision(TestLBVTK, ut.TestCase):
+    vtk_class = espressomd.lb.VTKOutput
+    lattice_class = espressomd.lb.LatticeWalberla
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
+    vtk_id = "lb_single_precision"
+
+
+@utx.skipIfMissingFeatures("WALBERLA")
+class EKWalberlaWrite(TestEKVTK, ut.TestCase):
+    vtk_class = espressomd.electrokinetics.VTKOutput
+    lattice_class = espressomd.electrokinetics.LatticeWalberla
+    ek_class = espressomd.electrokinetics.EKSpecies
+    ek_solver = espressomd.electrokinetics.EKNone
+    ek_params = {"single_precision": False}
+    vtk_id = "ek_double_precision"
+
+
+@utx.skipIfMissingFeatures("WALBERLA")
+class EKWalberlaWriteSinglePrecision(TestEKVTK, ut.TestCase):
+    vtk_class = espressomd.electrokinetics.VTKOutput
+    lattice_class = espressomd.electrokinetics.LatticeWalberla
+    ek_class = espressomd.electrokinetics.EKSpecies
+    ek_solver = espressomd.electrokinetics.EKNone
+    ek_params = {"single_precision": True}
+    vtk_id = "ek_single_precision"
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/python/lb.py b/testsuite/python/lb.py
index bc32ae7b99b..858f7fb5479 100644
--- a/testsuite/python/lb.py
+++ b/testsuite/python/lb.py
@@ -21,13 +21,16 @@
 import unittest_decorators as utx
 import numpy as np
 import itertools
+
 import espressomd
 import espressomd.lb
+import espressomd.utils
 import espressomd.observables
-import sys
+import espressomd.electrostatics
+import tests_common
 
 
-class TestLB:
+class LBTest:
 
     """
     Basic tests of the lattice-Boltzmann implementation
@@ -40,17 +43,14 @@ class TestLB:
     """
     system = espressomd.System(box_l=3 * [6.0])
     np.random.seed(1)
-    params = {'time_step': 0.01,
-              'tau': 0.01,
+    gamma = 2.0
+    params = {'tau': 0.01,
               'agrid': 0.5,
-              'dens': 0.85,
-              'viscosity': 3.0,
-              'friction': 2.0,
-              'temp': 1.5,
-              'gamma': 1.5}
+              'density': 0.85,
+              'kinematic_viscosity': 3.0}
 
     system.periodicity = [True, True, True]
-    system.time_step = params['time_step']
+    system.time_step = params['tau']
     system.cell_system.skin = 1.0
     interpolation = False
 
@@ -58,152 +58,236 @@ def tearDown(self):
         self.system.actors.clear()
         self.system.part.clear()
         self.system.thermostat.turn_off()
-        self.system.box_l = 3 * [6.0]
-        self.system.time_step = self.params['time_step']
+        self.system.time_step = self.params['tau']
 
     def test_properties(self):
-        lbf = self.lb_class(
-            kT=1.0, seed=42, visc=self.params['viscosity'],
-            dens=self.params['dens'],
-            agrid=self.params['agrid'],
-            tau=self.system.time_step)
+        # inactive actor
+        lbf = self.lb_class(kT=1.0, seed=42, **self.params, **self.lb_params)
+        self.assertFalse(lbf.is_active)
+        self.check_properties(lbf)
+
+        # activated actor
+        lbf = self.lb_class(kT=1.0, seed=42, **self.params, **self.lb_params)
+        self.system.actors.add(lbf)
+        self.system.thermostat.set_lb(LB_fluid=lbf, seed=1)
+        self.assertTrue(lbf.is_active)
+        self.check_properties(lbf)
+        self.system.actors.remove(lbf)
+
+        # deactivated actor
+        lbf = self.lb_class(kT=1.0, seed=42, **self.params, **self.lb_params)
         self.system.actors.add(lbf)
-        with self.assertRaises(ValueError):
-            lbf.tau = -0.1
-        self.assertAlmostEqual(lbf.tau, self.system.time_step, delta=self.atol)
-        with self.assertRaises(ValueError):
-            lbf.density = -0.1
-        lbf.density = 1.0
-        with self.assertRaises(ValueError):
-            lbf.viscosity = -0.1
-        lbf.density = 2.4
-        with self.assertRaises(ValueError):
-            lbf.density = -2.4
-        self.assertAlmostEqual(lbf.density, 2.4, delta=self.atol)
-        lbf.seed = 56
+        self.system.thermostat.set_lb(LB_fluid=lbf, seed=1)
+        self.system.actors.remove(lbf)
+        self.assertFalse(lbf.is_active)
+        self.check_properties(lbf)
+
+    def check_properties(self, lbf):
+        agrid = self.params["agrid"]
+        tau = self.system.time_step
+        # check LB object
+        self.assertAlmostEqual(lbf.tau, tau, delta=self.atol)
+        self.assertAlmostEqual(lbf.agrid, agrid, delta=self.atol)
+        self.assertAlmostEqual(lbf.kinematic_viscosity, 3., delta=self.atol)
+        self.assertAlmostEqual(lbf.density, 0.85, delta=self.atol)
+        self.assertAlmostEqual(lbf.kT, 1.0, delta=self.atol)
+        self.assertEqual(lbf.seed, 42)
+        self.assertEqual(
+            lbf.single_precision,
+            self.lb_params["single_precision"])
+        np.testing.assert_allclose(
+            np.copy(lbf.ext_force_density), [0., 0., 0.], atol=self.atol)
+        lbf.kinematic_viscosity = 2.
+        self.assertAlmostEqual(lbf.kinematic_viscosity, 2., delta=self.atol)
+        ext_f = [0.01, 0.02, 0.03]
+        lbf.ext_force_density = ext_f
+        np.testing.assert_allclose(
+            np.copy(lbf.ext_force_density), ext_f, atol=self.atol)
+        self.assertEqual(lbf.rng_state, 0)
         self.system.integrator.run(1)
-        self.assertEqual(lbf.seed, 57)
-        lbf.tau = 0.2
-        self.assertAlmostEqual(lbf.tau, 0.2, delta=self.atol)
-        with self.assertRaises(ValueError):
-            lbf.set_params(bulk_visc=-1.2)
-        lbf.set_params(bulk_visc=1.2)
-        self.assertAlmostEqual(
-            lbf.get_params()['bulk_visc'], 1.2, delta=self.atol)
-        with self.assertRaises(ValueError):
-            lbf.set_params(gamma_odd=1.3)
-        lbf.set_params(gamma_odd=0.3)
-        self.assertAlmostEqual(
-            lbf.get_params()['gamma_odd'], 0.3, delta=self.atol)
-        with self.assertRaises(ValueError):
-            lbf.set_params(gamma_even=1.4)
-        lbf.set_params(gamma_even=0.4)
-        self.assertAlmostEqual(
-            lbf.get_params()['gamma_even'], 0.4, delta=self.atol)
+        self.assertEqual(lbf.rng_state, int(lbf.is_active))
+        lbf.rng_state = 56
+        self.system.integrator.run(1)
+        self.assertEqual(lbf.rng_state, 56 + int(lbf.is_active))
+        self.assertAlmostEqual(lbf.tau, tau, delta=self.atol)
+        self.assertAlmostEqual(lbf.agrid, agrid, delta=self.atol)
+        self.assertAlmostEqual(lbf.kT, 1.0, delta=self.atol)
+        self.assertEqual(lbf.seed, 42)
+        self.assertEqual(
+            lbf.single_precision,
+            self.lb_params["single_precision"])
+        lbf.kinematic_viscosity = 3.
+        self.assertAlmostEqual(lbf.kinematic_viscosity, 3., delta=self.atol)
+        ext_force_density = [0.02, 0.05, 0.07]
+        lbf.ext_force_density = ext_force_density
+        np.testing.assert_allclose(np.copy(lbf.ext_force_density),
+                                   ext_force_density, atol=self.atol)
+        # check node getters/setters
         lbf[0, 0, 0].velocity = [1, 2, 3]
         np.testing.assert_allclose(
             np.copy(lbf[0, 0, 0].velocity), [1, 2, 3], atol=self.atol)
-        with self.assertRaises(Exception):
+        with self.assertRaises(RuntimeError):
             lbf[0, 0, 0].velocity = [1, 2]
-        with self.assertRaises(Exception):
+        with self.assertRaises(TypeError):
             lbf[0, 1].velocity = [1, 2, 3]
+        node = lbf[0, 0, 0]
+        self.assertIsNone(node.boundary)
+        self.assertIsNone(node.boundary_force)
+        vbb_ref = espressomd.lb.VelocityBounceBack([1e-6, 2e-6, 3e-6])
+        node.boundary = vbb_ref
+        np.testing.assert_allclose(
+            np.copy(node.boundary.velocity), np.copy(vbb_ref.velocity),
+            atol=self.atol)
+        with self.assertRaisesRegex(TypeError, "Parameter 'value' must be an instance of VelocityBounceBack or None"):
+            node.boundary = vbb_ref.velocity
+        # TODO WALBERLA: remove next line (no-op to get code coverage) once
+        # the boundary force getter is implemented from the waLBerla side
+        self.assertEqual(len(node.boundary_force), 3)
+        # momentum update: check density conservation when velocity changes,
+        # and velocity conservation when density changes
+        node = lbf[1, 2, 3]
+        density_old = node.density
+        density_new = 0.5
+        velocity_old = node.velocity
+        velocity_new = [0.01, 0.02, 0.03]
+        node.velocity = velocity_new
+        np.testing.assert_allclose(np.copy(node.density),
+                                   np.copy(density_old), atol=self.atol)
+        np.testing.assert_allclose(np.copy(node.velocity),
+                                   np.copy(velocity_new), atol=self.atol)
+        node.density = density_new
+        np.testing.assert_allclose(np.copy(node.density),
+                                   np.copy(density_new), atol=self.atol)
+        np.testing.assert_allclose(np.copy(node.velocity),
+                                   np.copy(velocity_new), atol=self.atol)
+        node.density = density_old
+        node.velocity = velocity_old
+
+    def test_raise_if_read_only(self):
+        lbf = self.lb_class(**self.params, **self.lb_params)
+        for key in {'agrid', 'tau', 'density', 'kT', 'single_precision',
+                    'shape', 'pressure_tensor', 'seed', 'is_active'}:
+            with self.assertRaisesRegex(RuntimeError, f"(Parameter|Property) '{key}' is read-only"):
+                setattr(lbf, key, 0)
+
+    def test_ctor_exceptions(self):
+        def make_kwargs(**kwargs):
+            lb_kwargs = {}
+            lb_kwargs.update(self.params)
+            lb_kwargs.update(self.lb_params)
+            lb_kwargs.update(kwargs)
+            return lb_kwargs
+
+        with self.assertRaisesRegex(ValueError, "Parameter 'agrid' must be > 0"):
+            self.lb_class(**make_kwargs(agrid=0.))
+        with self.assertRaisesRegex(ValueError, "Parameter 'agrid' must be > 0"):
+            self.lb_class(**make_kwargs(agrid=-1.))
+        with self.assertRaisesRegex(ValueError, "Parameter 'tau' must be > 0"):
+            self.lb_class(**make_kwargs(tau=0.))
+        with self.assertRaisesRegex(ValueError, "Parameter 'density' must be > 0"):
+            self.lb_class(**make_kwargs(density=0.))
+        with self.assertRaisesRegex(ValueError, "Parameter 'kinematic_viscosity' must be >= 0"):
+            self.lb_class(**make_kwargs(kinematic_viscosity=-1.))
+        with self.assertRaisesRegex(ValueError, "Parameter 'kT' must be >= 0"):
+            self.lb_class(**make_kwargs(kT=-1., seed=42))
+        with self.assertRaisesRegex(ValueError, "Parameter 'seed' must be >= 0"):
+            self.lb_class(**make_kwargs(kT=0., seed=-42))
+        with self.assertRaisesRegex(RuntimeError, "Cannot add a second LB instance"):
+            lbf = self.lb_class(**make_kwargs())
+            self.system.actors.add(lbf)
+            lbf.call_method("activate")
 
-    def test_raise_if_not_active(self):
-        class MockLBFluid(self.lb_class):
-            '''LB class mock that ignores runtime errors from agrid and tau.'''
-            @property
-            def agrid(self):
-                return 1.
-
-            @agrid.setter
-            def agrid(self, value):
-                pass
-
-            @property
-            def tau(self):
-                return 0.01
-
-            @tau.setter
-            def tau(self, value):
-                pass
-
-        self.check_raise_if_not_active(self.lb_class, False)
-        self.check_raise_if_not_active(MockLBFluid, True)
-
-    def check_raise_if_not_active(self, lb_class, mock):
-        lbf = lb_class(visc=1.0, dens=1.0, agrid=1.0, tau=0.1)
-
-        # check exceptions from LB actor
-        with self.assertRaises(RuntimeError):
-            lbf.density
-        with self.assertRaises(RuntimeError):
-            lbf.density = 0.2
-        with self.assertRaises(RuntimeError):
-            lbf.viscosity
-        with self.assertRaises(RuntimeError):
-            lbf.viscosity = 0.2
-        with self.assertRaises(RuntimeError):
-            lbf.bulk_viscosity
-        with self.assertRaises(RuntimeError):
-            lbf.bulk_viscosity = 0.2
-        with self.assertRaises(RuntimeError):
-            lbf.seed
-        with self.assertRaises(RuntimeError):
-            lbf.seed = 2
-        with self.assertRaises(RuntimeError):
-            lbf.kT
-        with self.assertRaises(RuntimeError):
-            lbf.kT = 2
-        with self.assertRaises(RuntimeError):
-            lbf.shape
-        if not mock:
-            with self.assertRaises(RuntimeError):
-                lbf.agrid
-            with self.assertRaises(RuntimeError):
-                lbf.agrid = 0.2
-            with self.assertRaises(RuntimeError):
-                lbf.tau
-            with self.assertRaises(RuntimeError):
-                lbf.tau = 0.01
-        with self.assertRaises(RuntimeError):
-            lbf.pressure_tensor
-        with self.assertRaises(NotImplementedError):
-            lbf.pressure_tensor = np.eye(3, 3)
-        with self.assertRaises(RuntimeError):
-            lbf.ext_force_density
-        with self.assertRaises(RuntimeError):
-            lbf.ext_force_density = [1, 1, 1]
-        with self.assertRaises(RuntimeError):
-            lbf.get_interpolated_velocity([0, 0, 0])
-
+    def test_node_exceptions(self):
+        lbf = self.lb_class(**self.params, **self.lb_params)
+        self.system.actors.add(lbf)
+        lb_node = lbf[0, 0, 0]
         # check exceptions from LB node
+        with self.assertRaisesRegex(RuntimeError, "Property 'boundary_force' is read-only"):
+            lb_node.boundary_force = [1, 2, 3]
+        with self.assertRaisesRegex(RuntimeError, "Property 'pressure_tensor' is read-only"):
+            lb_node.pressure_tensor = np.eye(3, 3)
+        with self.assertRaisesRegex(RuntimeError, "Property 'is_boundary' is read-only"):
+            lb_node.is_boundary = True
+        with self.assertRaisesRegex(NotImplementedError, "Cannot serialize LB fluid node objects"):
+            lb_node.__reduce__()
+        # check property types
+        array_locked = espressomd.utils.array_locked
+        self.assertIsInstance(lb_node.pressure_tensor, array_locked)
+        # self.assertIsInstance(lb_node.boundary_force, array_locked) # TODO
+        self.assertIsInstance(lb_node.velocity, array_locked)
+        self.assertIsInstance(lb_node.last_applied_force, array_locked)
+
+    def test_slice_exceptions(self):
+        lbf = self.lb_class(**self.params, **self.lb_params)
         self.system.actors.add(lbf)
-        node = lbf[0, 0, 0]
-        self.system.actors.remove(lbf)
-        with self.assertRaises(RuntimeError):
-            node.density
-        with self.assertRaises(RuntimeError):
-            node.density = 1.
-        with self.assertRaises(RuntimeError):
-            node.velocity
-        with self.assertRaises(RuntimeError):
-            node.velocity = [1, 1, 1]
-        with self.assertRaises(RuntimeError):
-            node.pressure_tensor
-        with self.assertRaises(NotImplementedError):
-            node.pressure_tensor = np.eye(3, 3)
-        with self.assertRaises(RuntimeError):
-            node.pressure_tensor_neq
-        with self.assertRaises(NotImplementedError):
-            node.pressure_tensor_neq = np.eye(3, 3)
-        with self.assertRaises(RuntimeError):
-            node.boundary
-        with self.assertRaises(NotImplementedError):
-            node.boundary = 1
-        with self.assertRaises(RuntimeError):
-            node.population
-        with self.assertRaises(RuntimeError):
-            node.population = np.zeros(19)
+        lb_slice = lbf[:, :, :]
+        # check exceptions from LB slice
+        with self.assertRaisesRegex(RuntimeError, "Property 'boundary_force' is read-only"):
+            lb_slice.boundary_force = [1, 2, 3]
+        with self.assertRaisesRegex(RuntimeError, "Property 'pressure_tensor' is read-only"):
+            lb_slice.pressure_tensor = np.eye(3, 3)
+        with self.assertRaisesRegex(RuntimeError, "Property 'is_boundary' is read-only"):
+            lb_slice.is_boundary = True
+        with self.assertRaisesRegex(NotImplementedError, 'Cannot serialize LB fluid slice objects'):
+            lb_slice.__reduce__()
+        with self.assertRaisesRegex(RuntimeError, "Unknown fluid property 'unknown'"):
+            lb_slice.call_method("get_value_shape", name="unknown")
+        # check property types
+        array_locked = espressomd.utils.array_locked
+        self.assertIsInstance(lb_slice.pressure_tensor, array_locked)
+        # self.assertIsInstance(lb_slice.boundary_force, array_locked) # TODO
+        self.assertIsInstance(lb_slice.velocity, array_locked)
+        self.assertIsInstance(lb_slice.last_applied_force, array_locked)
+        # check exceptions from python slices
+        with self.assertRaisesRegex(NotImplementedError, "Slices with step != 1 are not supported"):
+            lbf[:10:2, :, :]
+        with self.assertRaisesRegex(NotImplementedError, "Tuple-based indexing is not supported"):
+            lbf[:2, (0, 1), (0, 1)]
+        with self.assertRaisesRegex(AttributeError, "Cannot set properties of an empty .+ object"):
+            lbf[0:1, 0:1, 0:0].density = []
+
+    def test_lb_slice_set_get(self):
+        lbf = self.lb_class(**self.params, **self.lb_params)
+        self.system.actors.add(lbf)
+        ref_density = 1. + np.arange(np.prod(lbf.shape)).reshape(lbf.shape)
+        lbf[:, :, :].density = ref_density
+        densities = np.copy(lbf[:, :, :].density)
+        np.testing.assert_allclose(densities, ref_density, rtol=1e-5)
+        self.assertIsNone(lbf[:1, 0, 0].boundary[0])
+
+        # prepare various slicing operations
+        slices = []
+        for i in range(3):
+            slices.append([
+                slice(lbf.shape[i]), slice(0, lbf.shape[i]), slice(1, -1),
+                slice(0, 0), slice(5, 1), slice(0, -lbf.shape[i] + 1),
+                slice(-lbf.shape[i], None), slice(2, lbf.shape[i] - 1), 1])
+
+        # check gettters
+        for subset in itertools.product(*slices):
+            # skip indexing without any slice
+            if not any(isinstance(item, slice) for item in subset):
+                continue
+            np.testing.assert_allclose(
+                np.copy(lbf[subset].density), ref_density[subset], rtol=1e-5)
+
+        # check settters
+        for subset in itertools.product(*slices):
+            # skip indexing without any slice and skip slices with zero length
+            if not any(isinstance(item, slice) for item in subset) or any(
+                    isinstance(s, slice) and (s.start or 0) >= (s.stop or 0) for s in subset):
+                continue
+            lbf[:, :, :].density = ref_density
+            lbf[subset].density = -lbf[subset].density
+            densities = np.copy(lbf[:, :, :].density)
+            np.testing.assert_allclose(
+                densities[subset], -ref_density[subset], rtol=1e-5)
+            densities[subset] *= -1.
+            np.testing.assert_allclose(densities, ref_density, rtol=1e-5)
+
+        # empty slices
+        self.assertEqual(lbf[5:2, 0, 0].pressure_tensor.shape, (0, 3, 3))
+        self.assertEqual(lbf[5:2, 0:0, -1:-1].velocity.shape, (0, 0, 0, 3))
 
     def test_pressure_tensor_observable(self):
         """
@@ -212,44 +296,44 @@ def test_pressure_tensor_observable(self):
 
         """
         system = self.system
-        self.n_col_part = 1000
+        n_col_part = 1000
         system.part.add(
-            pos=np.random.random((self.n_col_part, 3)) * self.system.box_l[0],
-            v=np.random.random((self.n_col_part, 3)))
+            pos=np.random.random((n_col_part, 3)) * self.system.box_l[0],
+            v=np.random.random((n_col_part, 3)))
         system.thermostat.turn_off()
 
-        lbf = self.lb_class(
-            visc=self.params['viscosity'],
-            dens=self.params['dens'],
-            agrid=self.params['agrid'],
-            tau=system.time_step,
-            kT=1, ext_force_density=[0, 0, 0], seed=1)
+        lbf = self.lb_class(kT=1., seed=1, ext_force_density=[0, 0, 0],
+                            **self.params, **self.lb_params)
         system.actors.add(lbf)
         system.thermostat.set_lb(LB_fluid=lbf, seed=1)
         system.integrator.run(10)
-        pressure_tensor = np.zeros((3, 3))
-        agrid = self.params["agrid"]
-        for n in lbf.nodes():
-            pressure_tensor += n.pressure_tensor
-
-        pressure_tensor /= system.volume() / agrid**3
+        pressure_tensor = np.copy(
+            np.mean(lbf[:, :, :].pressure_tensor, axis=(0, 1, 2)))
 
         obs = espressomd.observables.LBFluidPressureTensor()
         obs_pressure_tensor = obs.calculate()
         np.testing.assert_allclose(
-            pressure_tensor, obs_pressure_tensor, atol=1E-7)
+            pressure_tensor, obs_pressure_tensor,
+            atol=self.atol, rtol=self.rtol)
         np.testing.assert_allclose(
-            np.copy(lbf.pressure_tensor), obs_pressure_tensor, atol=1E-10)
+            np.copy(lbf.pressure_tensor), obs_pressure_tensor,
+            atol=1e-12, rtol=1e-12)
+
+        self.assertIsInstance(
+            lbf[0, 0, 0].pressure_tensor, espressomd.utils.array_locked)
+        self.assertIsInstance(
+            lbf.pressure_tensor,
+            espressomd.utils.array_locked)
+        system.actors.remove(lbf)
+        with self.assertRaisesRegex(RuntimeError, 'LB not activated'):
+            obs.calculate()
 
     def test_lb_node_set_get(self):
-        lbf = self.lb_class(
-            kT=0.0,
-            visc=self.params['viscosity'],
-            dens=self.params['dens'],
-            agrid=self.params['agrid'],
-            tau=self.system.time_step,
-            ext_force_density=[0, 0, 0])
+        lbf = self.lb_class(kT=0.0, ext_force_density=[0, 0, 0], **self.params,
+                            **self.lb_params)
         self.system.actors.add(lbf)
+        self.assertAlmostEqual(
+            lbf[0, 0, 0].density, self.params['density'], delta=1e-4)
 
         shape_ref = np.copy(self.system.box_l) / self.params['agrid']
         np.testing.assert_array_equal(lbf.shape, shape_ref.astype(int))
@@ -264,65 +348,71 @@ def test_lb_node_set_get(self):
 
         self.assertEqual(lbf[3, 2, 1].index, (3, 2, 1))
         ext_force_density = [0.1, 0.2, 1.2]
+        last_applied_force = [0.2, 0.4, 0.6]
         lbf.ext_force_density = ext_force_density
-        lbf[1, 2, 3].velocity = v_fluid
+        node = lbf[1, 2, 3]
+        node.velocity = v_fluid
+        node.last_applied_force = last_applied_force
+        np.testing.assert_allclose(np.copy(node.velocity), v_fluid, atol=1e-4)
         np.testing.assert_allclose(
-            np.copy(lbf[1, 2, 3].velocity), v_fluid, atol=1e-4)
+            np.copy(node.last_applied_force), last_applied_force, atol=1e-4)
         np.testing.assert_allclose(
             np.copy(lbf.ext_force_density), ext_force_density, atol=1e-4)
 
+        self.assertEqual(lbf.kT, 0.0)
+        self.assertIsNone(lbf.rng_state)
+        with self.assertRaisesRegex(RuntimeError, "This LB instance is unthermalized"):
+            lbf.rng_state = 5
+        with self.assertRaisesRegex(ValueError, "Parameter 'rng_state' must be >= 0"):
+            lbf.rng_state = -5
+
     def test_parameter_change_without_seed(self):
-        lbf = self.lb_class(
-            visc=self.params['viscosity'],
-            dens=self.params['dens'],
-            agrid=self.params['agrid'],
-            tau=self.system.time_step,
-            ext_force_density=[0, 0, 0],
-            kT=1.0,
-            seed=42)
+        lbf = self.lb_class(kT=1.0, seed=42, **self.params, **self.lb_params)
         self.system.actors.add(lbf)
         self.system.thermostat.set_lb(LB_fluid=lbf, seed=23, gamma=2.0)
         self.system.thermostat.set_lb(LB_fluid=lbf, gamma=3.0)
+        actor = espressomd.electrostatics.DH(prefactor=1., kappa=1., r_cut=1.)
+        with self.assertRaisesRegex(Exception, "Temperature change not supported by LB"):
+            self.system.thermostat.turn_off()
+        with self.assertRaisesRegex(Exception, "Time step change not supported by LB"):
+            self.system.time_step /= 2.
+        with self.assertRaisesRegex(RuntimeError, "LB does not currently support handling changes of the MD cell geometry"):
+            self.system.actors.add(actor)
+        self.assertEqual(len(self.system.actors), 1)
 
     def test_grid_index(self):
-        lbf = self.lb_class(
-            visc=self.params['viscosity'],
-            dens=self.params['dens'],
-            agrid=self.params['agrid'],
-            tau=self.system.time_step,
-            ext_force_density=[0, 0, 0])
+        lbf = self.lb_class(**self.params, **self.lb_params)
         self.system.actors.add(lbf)
-        out_of_bounds = int(max(self.system.box_l) / self.params['agrid']) + 1
-        with self.assertRaises(ValueError):
-            lbf[out_of_bounds, 0, 0].velocity
-        with self.assertRaises(ValueError):
-            lbf[0, out_of_bounds, 0].velocity
-        with self.assertRaises(ValueError):
-            lbf[0, 0, out_of_bounds].velocity
-        # resize system
-        self.system.box_l = self.system.box_l + 1.
-        shape_ref = np.copy(self.system.box_l) / self.params['agrid']
-        np.testing.assert_array_equal(lbf.shape, shape_ref.astype(int))
+        # check ranges and out-of-bounds access
+        shape = lbf.shape
+        for i in range(3):
+            n = [0, 0, 0]
+            n[i] -= shape[i]
+            lbf[n[0], n[1], n[2]].velocity
+            self.assertEqual(lbf[tuple(n)], lbf[0, 0, 0])
+            for offset in (shape[i] + 1, -(shape[i] + 1)):
+                n = [0, 0, 0]
+                n[i] += offset
+                err_msg = rf"provided index \[{str(n)[1:-1]}\] is out of range for shape \[{str(list(shape))[1:-1]}\]"
+                with self.assertRaisesRegex(IndexError, err_msg):
+                    lbf[tuple(n)].velocity
+        # node index
+        node = lbf[1, 2, 3]
+        with self.assertRaisesRegex(RuntimeError, "Parameter 'index' is read-only"):
+            node.index = [2, 4, 6]
+        np.testing.assert_array_equal(node.index, [1, 2, 3])
         np.testing.assert_array_equal(
-            np.copy(lbf[out_of_bounds, 0, 0].velocity), 0.)
+            lbf[-1, -1, -1].index, np.array(shape) - 1)
 
     def test_incompatible_agrid(self):
         """
         LB lattice initialization must raise an exception when either box_l or
         local_box_l aren't integer multiples of agrid.
         """
-        lbf = self.lb_class(
-            visc=self.params['viscosity'],
-            dens=self.params['dens'],
-            agrid=self.params['agrid'] + 1e-6,
-            tau=self.system.time_step,
-            ext_force_density=[0, 0, 0])
-        print("\nTesting LB error messages:", file=sys.stderr)
-        sys.stderr.flush()
-        with self.assertRaises(Exception):
-            self.system.actors.add(lbf)
-        print("End of LB error messages", file=sys.stderr)
-        sys.stderr.flush()
+        with self.assertRaisesRegex(RuntimeError, "Box length not commensurate with agrid"):
+            params = self.params.copy()
+            params['agrid'] += 1e-6
+            self.lb_class(**params, **self.lb_params)
 
     def test_agrid_rounding(self):
         """Tests agrid*n ~= box_l for a case where rounding down is needed"""
@@ -334,25 +424,23 @@ def test_agrid_rounding(self):
         lj_sig = 1.0
         l = (n_part * 4. / 3. * np.pi * (lj_sig / 2.)**3 / phi)**(1. / 3.)
         system.box_l = [l] * 3 * np.array(system.cell_system.node_grid)
-        system.actors.add(self.lb_class(agrid=l / 31, dens=1,
-                                        visc=1, kT=0, tau=system.time_step))
+        lbf = self.lb_class(agrid=l / 31, density=1, kinematic_viscosity=1, kT=0,
+                            tau=system.time_step, **self.lb_params)
+        system.actors.add(lbf)
         system.integrator.run(steps=1)
         system.actors.clear()
         system.box_l = old_l
 
     def test_bool_operations_on_node(self):
-        lbf = self.lb_class(
-            kT=1.0, seed=42, visc=self.params['viscosity'],
-            dens=self.params['dens'],
-            agrid=self.params['agrid'],
-            tau=self.system.time_step)
+        lbf = self.lb_class(kT=1.0, seed=42, **self.params, **self.lb_params)
         self.system.actors.add(lbf)
         # test __eq()__ where a node is equal to itself and not equal to any
         # other node
         assert lbf[0, 0, 0] == lbf[0, 0, 0]
-        x, y, z = range(int(self.system.box_l[0])), range(
-            int(self.system.box_l[1])), range(int(self.system.box_l[2]))
-        nodes = [lbf[i, j, k] for i, j, k in itertools.product(x, y, z)]
+        shape = np.around(self.system.box_l / self.params["agrid"]).astype(int)
+        nodes = [
+            lbf[ijk] for ijk in itertools.product(
+                range(shape[0]), range(shape[1]), range(shape[2]))]
         nodes.remove(lbf[0, 0, 0])
         assert all(lbf[0, 0, 0] != node for node in nodes)
         # test __hash()__ intercept to identify nodes based on index rather
@@ -363,48 +451,162 @@ def test_bool_operations_on_node(self):
 
     @utx.skipIfMissingFeatures("EXTERNAL_FORCES")
     def test_viscous_coupling(self):
-        v_part = np.array([1, 2, 3])
-        v_fluid = np.array([1.2, 4.3, 0.2])
-        lbf = self.lb_class(
-            visc=self.params['viscosity'],
-            dens=self.params['dens'],
-            agrid=self.params['agrid'],
-            tau=self.system.time_step,
-            ext_force_density=[0, 0, 0])
+        lbf = self.lb_class(**self.params, **self.lb_params)
         self.system.actors.add(lbf)
-        if self.interpolation:
-            lbf.set_interpolation_order("quadratic")
-        self.system.thermostat.set_lb(
-            LB_fluid=lbf, seed=3, gamma=self.params['friction'])
-        p = self.system.part.add(
-            pos=[0.5 * self.params['agrid']] * 3, v=v_part, fix=3 * [True])
-        lbf[0, 0, 0].velocity = v_fluid
-        if self.interpolation:
-            v_fluid = lbf.get_interpolated_velocity(p.pos)
+        self.system.thermostat.set_lb(LB_fluid=lbf, seed=3, gamma=self.gamma)
+
+        # Random velocities
+        lbf[:, :, :].velocity = np.random.random((*lbf.shape, 3))
+        # Test several particle positions
+        for pos in ([0, 0, 0], self.system.box_l, self.system.box_l / 2,
+                    self.system.box_l / 2 - self.params['agrid'] / 2):
+            p = self.system.part.add(pos=pos, v=[1, 2, 3])
+            v_part = np.copy(p.v)
+
+            # In the first time step after a system change, LB coupling forces
+            # are ignored. Hence, the coupling position is shifted
+            coupling_pos = p.pos + self.system.time_step * p.v
+            v_fluid = np.copy(lbf.get_interpolated_velocity(pos=coupling_pos))
+            # Nodes to which forces will be interpolated
+            lb_nodes = tests_common.get_lb_nodes_around_pos(coupling_pos, lbf)
+
+            self.system.integrator.run(1)
+            # Check friction force
+            np.testing.assert_allclose(
+                np.copy(p.f), -self.gamma * (v_part - v_fluid), atol=1E-10)
+
+            # check particle/fluid force balance
+            applied_forces = np.array([n.last_applied_force for n in lb_nodes])
+            np.testing.assert_allclose(
+                np.sum(applied_forces, axis=0), -np.copy(p.f), atol=1E-10)
+
+            # Check that last_applied_force gets cleared
+            p.remove()
+            self.system.integrator.run(1)
+            applied_forces = np.array([n.last_applied_force for n in lb_nodes])
+            np.testing.assert_allclose(
+                np.sum(applied_forces, axis=0), [0, 0, 0])
+
+    def test_viscous_coupling_pairs(self):
+        lbf = self.lb_class(**self.params, **self.lb_params)
+        self.system.actors.add(lbf)
+        self.system.thermostat.set_lb(LB_fluid=lbf, seed=3, gamma=self.gamma)
+
+        # Random velocities
+        lbf[:, :, :].velocity = np.random.random((*lbf.shape, 3))
+        # Test several particle positions
+        agrid = self.params['agrid']
+        offset = -0.99 * np.array((agrid, agrid, agrid))
+        for pos in ([agrid / 2, agrid / 2, agrid / 2], self.system.box_l, self.system.box_l / 2,
+                    self.system.box_l / 2 - self.params['agrid'] / 2):
+            p1 = self.system.part.add(pos=pos, v=[1, 2, 3])
+            p2 = self.system.part.add(pos=pos + offset, v=[-2, 1, 0.3])
+
+            v_part1 = p1.v
+            v_part2 = p2.v
+            # In the first time step after a system change, LB coupling forces
+            # are ignored. Hence, the coupling position is shifted
+            coupling_pos1 = p1.pos + self.system.time_step * p1.v
+            coupling_pos2 = p2.pos + self.system.time_step * p2.v
+
+            v_fluid1 = lbf.get_interpolated_velocity(pos=coupling_pos1)
+            v_fluid2 = lbf.get_interpolated_velocity(pos=coupling_pos2)
+            # Nodes to which forces will be interpolated
+            lb_nodes1 = tests_common.get_lb_nodes_around_pos(
+                coupling_pos1, lbf)
+            lb_nodes2 = tests_common.get_lb_nodes_around_pos(
+                coupling_pos2, lbf)
+
+            all_coupling_nodes = [lbf[index] for index in set(
+                [n.index for n in (lb_nodes1 + lb_nodes2)])]
+            self.system.integrator.run(1)
+            # Check friction force
+            np.testing.assert_allclose(
+                np.copy(p1.f), -self.gamma * (v_part1 - v_fluid1), atol=1E-10)
+            np.testing.assert_allclose(
+                np.copy(p2.f), -self.gamma * (v_part2 - v_fluid2), atol=1E-10)
+
+            # check particle/fluid force balance
+            applied_forces = np.array(
+                [n.last_applied_force for n in all_coupling_nodes])
+            np.testing.assert_allclose(
+                np.sum(applied_forces, axis=0), -np.copy(p1.f) - np.copy(p2.f), atol=1E-10)
+
+            # Check that last_applied_force gets cleared
+            self.system.part.clear()
+            self.system.integrator.run(1)
+            applied_forces = np.array(
+                [n.last_applied_force for n in all_coupling_nodes])
+            np.testing.assert_allclose(
+                np.sum(applied_forces, axis=0), [0, 0, 0])
+
+    def test_thermalization_force_balance(self):
+        system = self.system
+
+        system.part.add(pos=np.random.random((1000, 3)) * system.box_l)
+        if espressomd.has_features("MASS"):
+            system.part.all().mass = 0.1 + np.random.random(len(system.part))
+
+        lbf = self.lb_class(kT=1.5, seed=4, **self.params, **self.lb_params)
+        system.actors.add(lbf)
+        system.thermostat.set_lb(LB_fluid=lbf, seed=3, gamma=self.gamma)
+
+        for _ in range(20):
+            system.integrator.run(1)
+            particle_force = np.sum(system.part.all().f, axis=0)
+            fluid_force = np.copy(
+                np.sum(lbf[:, :, :].last_applied_force, axis=(0, 1, 2)))
+            np.testing.assert_allclose(
+                particle_force, -fluid_force, rtol=self.rtol)
+
+    def test_force_interpolation(self):
+        lbf = self.lb_class(**self.params, **self.lb_params)
+
+        self.system.actors.add(lbf)
+        self.system.thermostat.set_lb(LB_fluid=lbf, seed=3, gamma=self.gamma)
+
+        position = np.array([1., 2., 3.])
+        position_lb_units = position / lbf.agrid
+        force = np.array([4., -5., 6.])
+        lbf.add_force_at_pos(pos=position, force=force)
+
         self.system.integrator.run(1)
-        np.testing.assert_allclose(
-            np.copy(p.f), -self.params['friction'] * (v_part - v_fluid), atol=1E-6)
+
+        # the force should be split equally across the 8 nearest vertices
+        n_couplings = 0
+        for n in lbf[:, :, :]:
+            if np.sum(np.abs(n.last_applied_force)):
+                fluid_force = np.copy(n.last_applied_force)
+                np.testing.assert_allclose(fluid_force, force / 8.)
+                distance = np.linalg.norm(n.index - position_lb_units)
+                self.assertLessEqual(int(np.round(distance**2)), 3)
+                n_couplings += 1
+        self.assertEqual(n_couplings, 8)
 
     @utx.skipIfMissingFeatures("EXTERNAL_FORCES")
     def test_ext_force_density(self):
         ext_force_density = [2.3, 1.2, 0.1]
-        lbf = self.lb_class(
-            visc=self.params['viscosity'],
-            dens=self.params['dens'],
-            agrid=self.params['agrid'],
-            tau=self.system.time_step,
-            ext_force_density=ext_force_density)
+        lbf = self.lb_class(ext_force_density=ext_force_density, **self.params,
+                            **self.lb_params)
         self.system.actors.add(lbf)
-        n_time_steps = 5
+        n_time_steps = 1
         self.system.integrator.run(n_time_steps)
         # ext_force_density is a force density, therefore v = ext_force_density
         # / dens * tau * (n_time_steps + 0.5)
         fluid_velocity = np.array(ext_force_density) * self.system.time_step * (
-            n_time_steps + 0.5) / self.params['dens']
-        for n in lbf.nodes():
+            n_time_steps + 0.5) / self.params['density']
+        # Check global linear momentum = density * volume * velocity
+        rtol = self.rtol
+        if hasattr(lbf, "single_precision") and lbf.single_precision:
+            rtol *= 10.
+        np.testing.assert_allclose(
+            np.copy(self.system.analysis.linear_momentum()),
+            fluid_velocity * self.params['density'] * self.system.volume(),
+            rtol=rtol)
+        # Check node velocities
+        for node_velocity in lbf[:, :, :].velocity.reshape((-1, 3)):
             np.testing.assert_allclose(
-                np.copy(n.velocity), fluid_velocity, atol=1E-6,
-                err_msg=f"Fluid node velocity not as expected on node {n.index}")
+                node_velocity, fluid_velocity, atol=1E-6)
 
     @utx.skipIfMissingFeatures("EXTERNAL_FORCES")
     def test_unequal_time_step(self):
@@ -414,68 +616,86 @@ def test_unequal_time_step(self):
         where particles don't move.
 
         """
-        p = self.system.part.add(pos=[0.1, 0.2, 0.3], fix=3 * [True])
-        ext_force_density = [2.3, 1.2, 0.1]
-        lbf = self.lb_class(
-            visc=self.params['viscosity'],
-            dens=self.params['dens'],
-            agrid=self.params['agrid'],
-            tau=self.params['time_step'],
-            ext_force_density=ext_force_density,
-            kT=0.)
-        sim_time = 100 * self.params['time_step']
+        p = self.system.part.add(pos=[0.1, 0.2, 0.3], fix=[True, True, True])
+        base_params = {}
+        base_params.update(
+            ext_force_density=[2.3, 1.2, 0.1],
+            kinematic_viscosity=self.params['kinematic_viscosity'],
+            density=self.params['density'],
+            agrid=self.params['agrid'])
+
+        def params_with_tau(tau):
+            params = base_params.copy()
+            params.update(tau=tau)
+            return params
+
+        lbf = self.lb_class(**params_with_tau(self.system.time_step),
+                            **self.lb_params)
+        sim_time = 100 * self.params['tau']
         self.system.actors.add(lbf)
         self.system.thermostat.set_lb(LB_fluid=lbf, gamma=0.1)
         self.system.integrator.run(
             int(round(sim_time / self.system.time_step)))
         probe_pos = np.array(self.system.box_l) / 2.
-        v1 = np.copy(lbf.get_interpolated_velocity(probe_pos))
+        v1 = np.copy(lbf.get_interpolated_velocity(pos=probe_pos))
         f1 = np.copy(p.f)
         self.system.actors.clear()
         # get fresh LBfluid and change time steps
-        lbf = self.lb_class(
-            visc=self.params['viscosity'],
-            dens=self.params['dens'],
-            agrid=self.params['agrid'],
-            tau=self.params['time_step'],
-            ext_force_density=ext_force_density)
-        self.system.actors.add(lbf)
-        self.system.thermostat.set_lb(LB_fluid=lbf, gamma=0.1)
-        # illegal time_step/ tau combinations
-        with self.assertRaises(ValueError):
-            lbf.tau = 0.5 * self.system.time_step
-        with self.assertRaises(ValueError):
-            lbf.tau = 1.1 * self.system.time_step
-        with self.assertRaises(ValueError):
-            self.system.time_step = 2. * lbf.get_params()["tau"]
-        with self.assertRaises(ValueError):
+        with self.assertRaises(Exception):
+            self.system.actors.add(
+                self.lb_class(**params_with_tau(0.5 * self.system.time_step),
+                              **self.lb_params))
+        self.system.actors.clear()
+        with self.assertRaises(Exception):
+            self.system.actors.add(
+                self.lb_class(**params_with_tau(1.1 * self.system.time_step),
+                              **self.lb_params))
+        self.system.actors.clear()
+
+        self.system.actors.add(
+            self.lb_class(**params_with_tau(self.system.time_step),
+                          **self.lb_params))
+
+        with self.assertRaisesRegex(ValueError, r"LB tau \(0\.0100[0-9]+\) must be >= MD time_step \(0\.0200[0-9]+\)"):
+            self.system.time_step = 2.0 * lbf.get_params()["tau"]
+        with self.assertRaisesRegex(ValueError, r"LB tau \(0\.0100[0-9]+\) must be an integer multiple of the MD time_step \(0\.0080[0-9]+\)"):
             self.system.time_step = 0.8 * lbf.get_params()["tau"]
-        lbf.tau = self.params['time_step']
-        self.system.time_step = 0.5 * self.params['time_step']
+
+        self.system.actors.clear()
+        self.system.time_step = 0.5 * self.params['tau']
+        lbf = self.lb_class(**params_with_tau(self.system.time_step),
+                            **self.lb_params)
+        self.system.actors.add(lbf)
         self.system.integrator.run(
             int(round(sim_time / self.system.time_step)))
-        self.system.time_step = self.params['time_step']
-        v2 = np.copy(lbf.get_interpolated_velocity(probe_pos))
+        v2 = np.copy(lbf.get_interpolated_velocity(pos=probe_pos))
         f2 = np.copy(p.f)
-        np.testing.assert_allclose(v1, v2, rtol=1e-5)
-        np.testing.assert_allclose(f1, f2, rtol=1e-5)
+        np.testing.assert_allclose(v1, v2, rtol=1e-2)
+        np.testing.assert_allclose(f1, f2, rtol=1e-2)
+
 
+@utx.skipIfMissingFeatures("WALBERLA")
+class LBTestWalberlaDoublePrecision(LBTest, ut.TestCase):
 
-class TestLBCPU(TestLB, ut.TestCase):
-    lb_class = espressomd.lb.LBFluid
+    """Test for the Walberla implementation of the LB in double-precision."""
+
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_lattice_class = espressomd.lb.LatticeWalberla
+    lb_params = {"single_precision": False}
     atol = 1e-10
+    rtol = 1e-7
 
 
-@utx.skipIfMissingGPU()
-class TestLBGPU(TestLB, ut.TestCase):
-    lb_class = espressomd.lb.LBFluidGPU
-    atol = 1e-7
+@utx.skipIfMissingFeatures("WALBERLA")
+class LBTestWalberlaSinglePrecision(LBTest, ut.TestCase):
 
-    @utx.skipIfMissingFeatures("EXTERNAL_FORCES")
-    def test_viscous_coupling_higher_order_interpolation(self):
-        self.interpolation = True
-        self.test_viscous_coupling()
-        self.interpolation = False
+    """Test for the Walberla implementation of the LB in single-precision."""
+
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_lattice_class = espressomd.lb.LatticeWalberla
+    lb_params = {"single_precision": True}
+    atol = 1e-7
+    rtol = 5e-5
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/lb_boundary.py b/testsuite/python/lb_boundary.py
index ae41eb045ce..5f1bf61dd83 100644
--- a/testsuite/python/lb_boundary.py
+++ b/testsuite/python/lb_boundary.py
@@ -22,121 +22,99 @@
 import espressomd
 import espressomd.lb
 import espressomd.shapes
-import espressomd.lbboundaries
-import itertools
 import numpy as np
 
 
 class LBBoundariesBase:
-    system = espressomd.System(box_l=[10.0, 10.0, 10.0])
+    system = espressomd.System(box_l=[10.0, 5.0, 5.0])
     system.cell_system.skin = 0.1
 
     wall_shape1 = espressomd.shapes.Wall(normal=[1., 0., 0.], dist=2.5)
     wall_shape2 = espressomd.shapes.Wall(normal=[-1., 0., 0.], dist=-7.5)
 
     def setUp(self):
-        self.lbf = self.lb_class(visc=1.0, dens=1.0, agrid=0.5, tau=1.0)
+        self.lbf = self.lb_class(
+            kinematic_viscosity=1.0, density=1.0, agrid=0.5, tau=1.0,
+            **self.lb_params)
         self.system.actors.add(self.lbf)
 
     def tearDown(self):
-        self.system.lbboundaries.clear()
         self.system.actors.clear()
 
-    def test_add(self):
-        boundary = espressomd.lbboundaries.LBBoundary(shape=self.wall_shape1)
+    def check_boundary_flags(self, slip_velocity1, slip_velocity2):
+        def vbb2vel(values):
+            velocities = np.empty((*values.shape, 3), dtype=float)
+            for index in np.ndindex(*values.shape):
+                velocities[index] = values[index].velocity
+            return velocities
+        lbb1 = self.lbf[:5, :, :]
+        lbb2 = self.lbf[15:, :, :]
+        lbb3 = self.lbf[5:15, :, :]
+        ref_velocity1 = np.tile(slip_velocity1, [5, 10, 10, 1])
+        ref_velocity2 = np.tile(slip_velocity2, [5, 10, 10, 1])
+        np.testing.assert_equal(np.copy(lbb1.is_boundary), True)
+        np.testing.assert_equal(np.copy(lbb2.is_boundary), True)
+        np.testing.assert_equal(np.copy(lbb3.is_boundary), False)
+        np.testing.assert_allclose(np.copy(lbb1.velocity), ref_velocity1)
+        np.testing.assert_allclose(np.copy(lbb2.velocity), ref_velocity2)
+        np.testing.assert_allclose(vbb2vel(lbb1.boundary), ref_velocity1)
+        np.testing.assert_allclose(vbb2vel(lbb2.boundary), ref_velocity2)
+        self.assertTrue(self.lbf[4, 0, 0].is_boundary)
+        self.assertFalse(self.lbf[5, 0, 0].is_boundary)
+        self.assertFalse(self.lbf[14, 0, 0].is_boundary)
+        self.assertTrue(self.lbf[15, 0, 0].is_boundary)
+        self.lbf.clear_boundaries()
+        np.testing.assert_equal(np.copy(self.lbf[:, :, :].is_boundary), False)
 
-        self.system.lbboundaries.add(boundary)
-        self.assertEqual(boundary, self.system.lbboundaries[0])
-
-    def test_remove(self):
-        lbb = self.system.lbboundaries
-
-        b1 = lbb.add(
-            espressomd.lbboundaries.LBBoundary(shape=self.wall_shape1))
-        b2 = lbb.add(
-            espressomd.lbboundaries.LBBoundary(shape=self.wall_shape1))
-
-        lbb.remove(b1)
-
-        self.assertNotIn(b1, lbb)
-        self.assertIn(b2, lbb)
-
-    def test_size(self):
-        lbb = self.system.lbboundaries
-        self.assertEqual(lbb.size(), 0)
-
-        lbb.add(espressomd.lbboundaries.LBBoundary(shape=self.wall_shape1))
-        self.assertEqual(lbb.size(), 1)
-
-        lbb.add(espressomd.lbboundaries.LBBoundary(shape=self.wall_shape1))
-        self.assertEqual(lbb.size(), 2)
-
-    def test_getters(self):
-        boundary = espressomd.lbboundaries.LBBoundary(shape=self.wall_shape1)
-        with self.assertRaisesRegex(RuntimeError, "You probably tried to get the force of an lbboundary that was not added to system.lbboundaries"):
-            boundary.get_force()
-        self.system.lbboundaries.add(boundary)
-        np.testing.assert_equal(np.copy(boundary.get_force()), [0., 0., 0.])
-        self.assertIsNone(boundary.call_method('unknown'))
-
-    def test_empty(self):
-        lbb = self.system.lbboundaries
-        self.assertTrue(lbb.empty())
-
-        lbb.add(espressomd.lbboundaries.LBBoundary(shape=self.wall_shape1))
-        self.assertFalse(lbb.empty())
-
-    def test_clear(self):
-        lbb = self.system.lbboundaries
-
-        lbb.add(espressomd.lbboundaries.LBBoundary(shape=self.wall_shape1))
-        lbb.add(espressomd.lbboundaries.LBBoundary(shape=self.wall_shape1))
-
-        lbb.clear()
-
-        self.assertTrue(lbb.empty())
-
-    def check_boundary_flags(self, boundarynumbers):
-        rng = range(20)
-
-        for i in itertools.product(range(0, 5), rng, rng):
-            self.assertEqual(self.lbf[i].boundary, boundarynumbers[0])
+    def test_boundary_flags(self):
+        slip_velocity1 = 1e-3 * np.array([1., 2., 3.])
+        slip_velocity2 = 1e-3 * np.array([4., 5., 6.])
+        value_shape = tuple(self.lbf.shape) + (3,)
+        slip_velocity2_all = slip_velocity2 * np.ones(value_shape)
+        self.lbf.add_boundary_from_shape(self.wall_shape1, slip_velocity1)
+        self.lbf.add_boundary_from_shape(self.wall_shape2, slip_velocity2_all)
+        self.check_boundary_flags(slip_velocity1, slip_velocity2)
 
-        for i in itertools.product(range(5, 15), rng, rng):
-            self.assertEqual(self.lbf[i].boundary, boundarynumbers[1])
+    def test_union(self):
+        union = espressomd.shapes.Union()
+        union.add([self.wall_shape1, self.wall_shape2])
 
-        for i in itertools.product(range(15, 20), rng, rng):
-            self.assertEqual(self.lbf[i].boundary, boundarynumbers[2])
+        slip_velocity = 1e-3 * np.array([1., 2., 3.])
+        self.lbf.add_boundary_from_shape(union, slip_velocity)
+        self.check_boundary_flags(slip_velocity, slip_velocity)
 
-        self.system.lbboundaries.clear()
-        for i in itertools.product(rng, rng, rng):
-            self.assertEqual(self.lbf[i].boundary, 0)
+    def test_exceptions(self):
+        with self.assertRaisesRegex(TypeError, "Parameter 'boundary_type' must be a subclass of VelocityBounceBack"):
+            self.lbf.add_boundary_from_shape(
+                shape=self.wall_shape1, velocity=[0., 0., 0.],
+                boundary_type=self.lb_class)
+        with self.assertRaisesRegex(ValueError, "expected an espressomd.shapes.Shape"):
+            self.lbf.add_boundary_from_shape(
+                shape=self.lbf, velocity=[0., 0., 0.],
+                boundary_type=espressomd.lb.VelocityBounceBack)
+        with self.assertRaisesRegex(ValueError, r"Cannot process velocity value grid of shape \(4,\)"):
+            self.lbf.add_boundary_from_shape(
+                shape=self.wall_shape1, velocity=[0., 0., 0., 0.],
+                boundary_type=espressomd.lb.VelocityBounceBack)
+        self.lbf.add_boundary_from_shape(self.wall_shape1, [0., 0., 0.])
 
-    def test_boundary_flags(self):
-        lbb = self.system.lbboundaries
 
-        lbb.add(espressomd.lbboundaries.LBBoundary(shape=self.wall_shape1))
-        lbb.add(espressomd.lbboundaries.LBBoundary(shape=self.wall_shape2))
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBBoundariesWalberlaDoublePrecision(LBBoundariesBase, ut.TestCase):
 
-        self.check_boundary_flags([1, 0, 2])
+    """Test for the Walberla implementation of the LB in double-precision."""
 
-    def test_union(self):
-        union = espressomd.shapes.Union()
-        union.add([self.wall_shape1, self.wall_shape2])
-        self.system.lbboundaries.add(
-            espressomd.lbboundaries.LBBoundary(shape=union))
-        self.check_boundary_flags([1, 0, 1])
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
 
 
-@utx.skipIfMissingFeatures(["LB_BOUNDARIES"])
-class LBBoundariesCPU(LBBoundariesBase, ut.TestCase):
-    lb_class = espressomd.lb.LBFluid
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBBoundariesWalberlaSinglePrecision(LBBoundariesBase, ut.TestCase):
 
+    """Test for the Walberla implementation of the LB in single-precision."""
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(["LB_BOUNDARIES_GPU"])
-class LBBoundariesGPU(LBBoundariesBase, ut.TestCase):
-    lb_class = espressomd.lb.LBFluidGPU
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/lb_boundary_velocity.py b/testsuite/python/lb_boundary_velocity.py
index 8bb37402e47..c5374abd1eb 100644
--- a/testsuite/python/lb_boundary_velocity.py
+++ b/testsuite/python/lb_boundary_velocity.py
@@ -18,45 +18,318 @@
 #
 
 import espressomd.lb
-import espressomd.lbboundaries
 import espressomd.shapes
 import unittest as ut
 import unittest_decorators as utx
+import numpy as np
+import itertools
 
 
-@utx.skipIfMissingFeatures(["LB_BOUNDARIES"])
+@utx.skipIfMissingFeatures(["WALBERLA"])
 class LBBoundaryVelocityTest(ut.TestCase):
-
-    """Test slip velocity of boundaries.
-
-       In this simple test, a wall with slip velocity is
-       added and the fluid is checked if it has the same velocity.
+    """
+    Various tests to check the interaction of lb velocity boundary conditions and the fluid
     """
 
-    system = espressomd.System(box_l=[10.0, 10.0, 10.0])
-    system.time_step = .5
+    lb_params = {'agrid': 0.6,
+                 'density': 0.5,
+                 'kinematic_viscosity': 3.2,
+                 'tau': 0.7}
+    system = espressomd.System(box_l=3 * [8 * lb_params['agrid']])
+    system.time_step = lb_params['tau']
     system.cell_system.skin = 0.1
 
-    def test(self):
-        system = self.system
+    def tearDown(self):
+        self.system.actors.clear()
+
+    def setUp(self):
+        self.lb_fluid = espressomd.lb.LBFluidWalberla(**self.lb_params)
+        self.system.actors.add(self.lb_fluid)
+
+    def check_wall_slip(self, v_boundary, atol):
+        """
+        Check that the fluid adopts the velocity set by the boundary conditions.
+        """
+
+        agrid = self.lb_params['agrid']
+        wall_shape_left = espressomd.shapes.Wall(normal=[1, 0, 0], dist=agrid)
+        wall_shape_right = espressomd.shapes.Wall(
+            normal=[-1, 0, 0], dist=-(self.system.box_l[0] - agrid))
+        for shape in [wall_shape_left, wall_shape_right]:
+            self.lb_fluid.add_boundary_from_shape(shape, v_boundary)
 
-        lb_fluid = espressomd.lb.LBFluid(
-            agrid=2.0, dens=.5, visc=3.0, tau=0.5)
-        system.actors.add(lb_fluid)
+        # fluid in contact with moving boundary adopts same velocity
+        self.system.integrator.run(200)
+        v_fluid = np.copy(self.lb_fluid[2, 1, 3].velocity)
+        np.testing.assert_allclose(v_fluid, v_boundary, atol=atol)
 
+        # velocity in the middle needs to propagate first
+        self.system.integrator.run(200)
+        v_fluid = np.copy(self.lb_fluid[2, 1, 3].velocity)
+        np.testing.assert_allclose(v_fluid, v_boundary, atol=atol)
+
+    def test_wall_slip_parallel(self):
+        v_boundary = [0, 0, 0.07]
+        self.check_wall_slip(v_boundary, 2e-4)
+
+    def test_wall_slip_nonparallel(self):
         v_boundary = [0.03, 0.02, 0.01]
+        self.check_wall_slip(v_boundary, 5e-4)
+
+    def test_boundary_readout(self):
+        """
+        Test the read part of the boundary property of lb nodes.
+        """
+        v_boundary = [0.03, 0.05, 0.07]
+        wall_shape = espressomd.shapes.Wall(
+            normal=[1, 0, 0], dist=self.lb_params['agrid'])
+        self.lb_fluid.add_boundary_from_shape(wall_shape, v_boundary)
+
+        # check non_boundary node
+        bound_cond = self.lb_fluid[4, 4, 4].boundary
+        self.assertIsNone(bound_cond)
+
+        # on boundary
+        bound_cond = self.lb_fluid[0, 2, 4].boundary
+        np.testing.assert_array_almost_equal(bound_cond.velocity, v_boundary)
+
+        # TODO WALBERLA boundary_force
+
+    def test_velocity_bounce_back_class(self):
+        """
+        Test setters and getters of :class:`espressomd.lb.VelocityBounceBack`
+        """
+        with self.assertRaises(ValueError):
+            bound_cond = espressomd.lb.VelocityBounceBack([1, 2])
+        v = [1, 2, 17.4]
+        bound_cond = espressomd.lb.VelocityBounceBack(v)
+        np.testing.assert_array_almost_equal(bound_cond.velocity, v)
+
+    def test_boundary_setting(self):
+        """
+        Test setting and un-setting individual lb boundary nodes.
+        """
+        v_boundary = [0.02, 0.01, 0.03]
+        bound_cond = espressomd.lb.VelocityBounceBack(v_boundary)
+
+        with self.assertRaises(TypeError):
+            self.lb_fluid[1, 2, 3].boundary = 17
+        with self.assertRaises(TypeError):
+            self.lb_fluid[1, 2, 3].boundary = np.array([1, 2, 3])
+
+        self.lb_fluid[1, 2, 3].boundary = bound_cond
+        np.testing.assert_array_almost_equal(
+            self.lb_fluid[1, 2, 3].boundary.velocity, bound_cond.velocity)
+
+        self.lb_fluid[1, 2, 3].boundary = None
+        self.assertIsNone(self.lb_fluid[1, 2, 3].boundary)
+
+    def test_nodes_inside_shape_line(self):
+        """
+        Test if the ``get_nodes_inside_shape`` method correctly identifies
+        the grid points inside a line.
+        """
+        agrid = self.lb_params['agrid']
+        cyl = espressomd.shapes.Cylinder(center=agrid * np.array([1.5, 2.5, 5.5]),
+                                         axis=[0, 0, 1],
+                                         length=2.1 * agrid,
+                                         radius=0.5 * agrid)
+        nodes_in_boundary = self.lb_fluid.get_nodes_inside_shape(cyl)
+        idxs_in_boundary = set(tuple(node.index) for node in nodes_in_boundary)
+
+        idx_ref = {(1, 2, 5), (1, 2, 4), (1, 2, 6)}
+        self.assertSetEqual(idxs_in_boundary, idx_ref)
+
+    def test_nodes_inside_shape_cylinder(self):
+        """
+        Test if the ``get_nodes_inside_shape`` method correctly identifies
+        the grid points inside a cylinder.
+        """
+        agrid = self.lb_params['agrid']
+        cyl = espressomd.shapes.Cylinder(center=agrid * np.array([1.5, 1.5, 0.5]),
+                                         axis=[0, 0, 1],
+                                         length=2.0 * self.system.box_l[2],
+                                         radius=2.0 * agrid)
+        nodes_in_boundary = self.lb_fluid.get_nodes_inside_shape(cyl)
+        idxs_in_boundary = list(tuple(node.index)
+                                for node in nodes_in_boundary)
+
+        for node in idxs_in_boundary:
+            self.assertIn(node[0], [0, 1, 2])
+            self.assertIn(node[1], [0, 1, 2])
+            self.assertIn(node[2], np.arange(8))
+
+    def test_nodes_inside_shape_cube(self):
+        """
+        Test if the ``get_nodes_inside_shape`` method correctly identifies
+        the grid points inside a cube.
+        """
+        agrid = self.lb_params['agrid']
+        prism = espressomd.shapes.Rhomboid(a=2 * agrid * np.array([1, 0, 0]),
+                                           b=3 * agrid * np.array([0, 1, 0]),
+                                           c=4 * agrid * np.array([0, 0, 1]),
+                                           corner=agrid * np.array([1, 1, 1]),
+                                           direction=1)
+        nodes_in_boundary = self.lb_fluid.get_nodes_inside_shape(prism)
+        idxs_in_boundary = set(tuple(node.index) for node in nodes_in_boundary)
+
+        idx_ref = set(itertools.product(range(1, 3), range(1, 4), range(1, 5)))
+        self.assertSetEqual(idxs_in_boundary, idx_ref)
+
+    def test_shape_bitmask(self):
+        """
+        Test if the ``get_shape_bitmask`` method correctly identifies the grid
+        points inside a shape and matches the LB ``is_boundary`` property.
+        """
+        def get_masks(shape):
+            """
+            Get the shape mask and the LB boundary mask.
+            """
+            self.lb_fluid.add_boundary_from_shape(shape)
+            lb_bitmask = np.copy(self.lb_fluid[:, :, :].is_boundary)
+            shape_bitmask = self.lb_fluid.get_shape_bitmask(shape)
+            self.lb_fluid.clear_boundaries()
+            return lb_bitmask.astype(int), shape_bitmask.astype(int)
+
+        agrid = self.lb_params['agrid']
+
+        # check a prism
+        for nudge_corner in (0.5 + 1e-6, 1.0, 1.5 - 1e-6):
+            shape = espressomd.shapes.Rhomboid(
+                a=2 * agrid * np.array([1, 0, 0]),
+                b=3 * agrid * np.array([0, 1, 0]),
+                c=4 * agrid * np.array([0, 0, 1]),
+                corner=agrid * nudge_corner * np.array([1, 1, 1]),
+                direction=1)
+            lb_bitmask, shape_bitmask = get_masks(shape)
+            np.testing.assert_array_equal(shape_bitmask, lb_bitmask)
+            np.testing.assert_array_equal(shape_bitmask[1:3, 1:4, 1:5], 1)
+            shape_bitmask[1:3, 1:4, 1:5] = 0
+            np.testing.assert_array_equal(shape_bitmask, 0)
+
+        # check a sphere
+        for nudge_radius in (-0.1, -0.01, 0., 0.01, 0.1):
+            for nudge_center in ([0.1, 0., 0.], [0., 0.15, 0.20]):
+                shape = espressomd.shapes.Sphere(
+                    center=4 * agrid * np.array([1, 1, 1]) + nudge_center,
+                    radius=3 * agrid + nudge_radius)
+                lb_bitmask, shape_bitmask = get_masks(shape)
+                np.testing.assert_array_equal(shape_bitmask, lb_bitmask)
+
+    def test_edge_detection_x(self):
+        self.check_edge_detection(0)
+
+    def test_edge_detection_y(self):
+        self.check_edge_detection(1)
+
+    def test_edge_detection_z(self):
+        self.check_edge_detection(2)
+
+    def check_edge_detection(self, axis):
+        """
+        Test if the ``edge_detection`` method correctly identifies the grid
+        points on the surface of a cube and on the surface of a square
+        column (finite or infinite, periodic or aperiodic).
+        """
+        def get_surface_indices(mask, periodicity):
+            idx = espressomd.lb.edge_detection(mask, periodicity)
+            return set(map(tuple, idx))
+
+        def roll_product(a, b, c):
+            """
+            Calculate ``itertools.product`` of 3 objects that are rolled.
+            """
+            collection = np.array([list(a), list(b), list(c)], dtype=object)
+            return itertools.product(*np.roll(collection, axis))
+
+        def create_column_shape_roll(lengths, corner):
+            """
+            Create a prism with lengths and corner that are rolled.
+            """
+            lengths = np.roll(lengths, axis)
+            corner = np.roll(corner, axis)
+            return espressomd.shapes.Rhomboid(
+                a=lengths[0] * agrid * np.array([1, 0, 0]),
+                b=lengths[1] * agrid * np.array([0, 1, 0]),
+                c=lengths[2] * agrid * np.array([0, 0, 1]),
+                corner=agrid * corner,
+                direction=1)
+
+        agrid = self.lb_params['agrid']
+        periodic = np.roll([True, True, True], axis)
+        aperiodic = np.roll([False, False, False], axis)
+
+        # check a simple cube
+        cube = create_column_shape_roll([4, 4, 4], [1, 1, 1])
+        self.lb_fluid.add_boundary_from_shape(cube)
+        cube_mask = np.copy(self.lb_fluid[:, :, :].is_boundary.astype(bool))
+        idx_ref = set(roll_product(range(1, 5), range(1, 5), range(1, 5)))
+        for item in roll_product(range(2, 4), range(2, 4), range(2, 4)):
+            idx_ref.remove(item)
+
+        idxs_on_surface = get_surface_indices(cube_mask, periodic)
+        self.assertSetEqual(idxs_on_surface, idx_ref)
+
+        self.lb_fluid.clear_boundaries()
+
+        # create an infinite square column
+        col = create_column_shape_roll([8, 4, 4], [0, 1, 1])
+        self.lb_fluid.add_boundary_from_shape(col)
+        col_mask = np.copy(self.lb_fluid[:, :, :].is_boundary.astype(bool))
+        idx_ref = set(roll_product(range(0, 8), range(1, 5), range(1, 5)))
+        for item in roll_product(range(0, 8), range(2, 4), range(2, 4)):
+            idx_ref.remove(item)
+
+        # with periodicity: check neither ends are in contact with fluid
+        idxs_on_surface = get_surface_indices(col_mask, periodic)
+        self.assertSetEqual(idxs_on_surface, idx_ref)
+
+        # without periodicity: check neither ends are in contact with fluid
+        idxs_on_surface = get_surface_indices(col_mask, aperiodic)
+        self.assertSetEqual(idxs_on_surface, idx_ref)
+
+        self.lb_fluid.clear_boundaries()
+
+        # create a finite square column; both ends of the columns are in
+        # contact with a thin slice of fluid
+        col = create_column_shape_roll([7, 4, 4], [0, 1, 1])
+        self.lb_fluid.add_boundary_from_shape(col)
+        col_mask = np.copy(self.lb_fluid[:, :, :].is_boundary.astype(bool))
+        idx_ref = set(roll_product(range(0, 7), range(1, 5), range(1, 5)))
 
-        wall_shape = espressomd.shapes.Wall(normal=[1, 2, 3], dist=0.5)
-        wall = espressomd.lbboundaries.LBBoundary(
-            shape=wall_shape, velocity=v_boundary)
-        system.lbboundaries.add(wall)
+        # with periodicity: check both ends are in contact with fluid
+        for item in roll_product(range(1, 6), range(2, 4), range(2, 4)):
+            idx_ref.remove(item)
+        idxs_on_surface = get_surface_indices(col_mask, periodic)
+        self.assertSetEqual(idxs_on_surface, idx_ref)
 
-        system.integrator.run(2000)
+        # without periodicity: check one end of the column is no longer in
+        # contact with the fluid
+        for item in roll_product(range(0, 1), range(2, 4), range(2, 4)):
+            idx_ref.remove(item)
+        idxs_on_surface = get_surface_indices(col_mask, aperiodic)
+        self.assertSetEqual(idxs_on_surface, idx_ref)
 
-        v_fluid = lb_fluid[1, 0, 0].velocity
-        self.assertAlmostEqual(v_fluid[0], v_boundary[0], places=3)
-        self.assertAlmostEqual(v_fluid[1], v_boundary[1], places=3)
-        self.assertAlmostEqual(v_fluid[2], v_boundary[2], places=3)
+    def test_calc_cylinder_tangential_vectors(self):
+        """
+        Test the ``calc_cylinder_tangential_vectors`` method.
+        """
+        agrid = 1.
+        offset = 0.5
+        center = np.array(3 * [offset])
+        node_indices = np.array([[0, 0, 0],
+                                 [2, 0, 0],
+                                 [0, 2, 0],
+                                 [-2, 0, 0],
+                                 [0, -2, 0]])
+        ref_tangents = np.array([[0, 0, 0],
+                                 [0, 1, 0],
+                                 [-1, 0, 0],
+                                 [0, -1, 0],
+                                 [1, 0, 0]])
+        tangents = espressomd.lb.calc_cylinder_tangential_vectors(
+            center, agrid, offset, node_indices)
+        np.testing.assert_array_almost_equal(tangents, ref_tangents)
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/lb_boundary_volume_force.py b/testsuite/python/lb_boundary_volume_force.py
index ca3cfdb9f21..26192c73a34 100644
--- a/testsuite/python/lb_boundary_volume_force.py
+++ b/testsuite/python/lb_boundary_volume_force.py
@@ -22,10 +22,7 @@
 import numpy as np
 
 import espressomd.lb
-import espressomd.lbboundaries
 import espressomd.shapes
-import tests_common
-
 
 AGRID = 0.5
 EXT_FORCE = np.array([-.01, 0.02, 0.03])
@@ -33,8 +30,8 @@
 DENS = 1.5
 TIME_STEP = 0.05
 LB_PARAMS = {'agrid': AGRID,
-             'dens': DENS,
-             'visc': VISC,
+             'density': DENS,
+             'kinematic_viscosity': VISC,
              'tau': TIME_STEP,
              'ext_force_density': EXT_FORCE}
 
@@ -50,11 +47,10 @@ class LBBoundaryForceCommon:
     system.cell_system.skin = 0.4 * AGRID
 
     def setUp(self):
-        self.lbf = self.lb_class(**LB_PARAMS)
+        self.lbf = self.lb_class(**LB_PARAMS, **self.lb_params)
         self.system.actors.add(self.lbf)
 
     def tearDown(self):
-        self.system.lbboundaries.clear()
         self.system.actors.clear()
 
     def test(self):
@@ -67,44 +63,52 @@ def test(self):
         wall_shape1 = espressomd.shapes.Wall(normal=[1, 0, 0], dist=AGRID)
         wall_shape2 = espressomd.shapes.Wall(
             normal=[-1, 0, 0], dist=-(self.system.box_l[0] - AGRID))
-        wall1 = espressomd.lbboundaries.LBBoundary(shape=wall_shape1)
-        wall2 = espressomd.lbboundaries.LBBoundary(shape=wall_shape2)
 
-        self.system.lbboundaries.add(wall1)
-        self.system.lbboundaries.add(wall2)
-        fluid_nodes = tests_common.count_fluid_nodes(self.lbf)
+        fluid_nodes = np.sum(np.logical_not(
+            self.lbf[:, :, :].is_boundary).astype(int))
+        self.lbf.add_boundary_from_shape(wall_shape1)
+        self.lbf.add_boundary_from_shape(wall_shape2)
+
+        # TODO WALBERLA: (#4381)
+        self.skipTest("boundary forces not implemented at the moment")
 
         self.system.integrator.run(20)
         diff = float("inf")
         old_val = float("inf")
         while diff > 0.002:
             self.system.integrator.run(10)
-            new_val = wall1.get_force()[0]
+            new_val = self.lbf.boundary['wall1'].get_force()[0]
             diff = abs(new_val - old_val)
             old_val = new_val
 
         expected_force = fluid_nodes * AGRID**3 * \
             np.copy(self.lbf.ext_force_density)
-        measured_force = np.array(wall1.get_force()) + \
-            np.array(wall2.get_force())
-        np.testing.assert_allclose(measured_force, expected_force, atol=2E-2)
+        measured_force = np.array(self.lbf.boundary['wall1'].get_force()) + \
+            np.array(self.lbf.boundary['wall2'].get_force())
+        # TODO WALBERLA: the force converges to 90% of the expected force
+        np.testing.assert_allclose(
+            measured_force,
+            expected_force * 0.9,
+            atol=1E-10)
 
 
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES', 'EXTERNAL_FORCES'])
-class LBCPUBoundaryForce(LBBoundaryForceCommon, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBBoundaryForceWalberla(LBBoundaryForceCommon, ut.TestCase):
 
-    """Test for the CPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in double-precision."""
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES_GPU', 'EXTERNAL_FORCES'])
-class LBGPUBoundaryForce(LBBoundaryForceCommon, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBBoundaryForceWalberlaSinglePrecision(
+        LBBoundaryForceCommon, ut.TestCase):
 
-    """Test for the GPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in single-precision."""
 
-    lb_class = espressomd.lb.LBFluidGPU
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
 
 
 if __name__ == '__main__':
diff --git a/testsuite/python/lb_buoyancy_force.py b/testsuite/python/lb_buoyancy_force.py
index d24fa3f53ed..6e0f6b3e55e 100644
--- a/testsuite/python/lb_buoyancy_force.py
+++ b/testsuite/python/lb_buoyancy_force.py
@@ -18,14 +18,11 @@
 #
 
 import espressomd
-import espressomd.lbboundaries
 import espressomd.shapes
 import unittest as ut
 import unittest_decorators as utx
 import numpy as np
 
-import tests_common
-
 # Define the LB Parameters
 TIME_STEP = 0.01
 AGRID = 0.5
@@ -35,8 +32,8 @@
 BOX_SIZE = 18 * AGRID
 
 LB_PARAMS = {'agrid': AGRID,
-             'dens': DENS,
-             'visc': KVISC,
+             'density': DENS,
+             'kinematic_viscosity': KVISC,
              'tau': TIME_STEP,
              'ext_force_density': [0, DENS * G, 0]}
 # System setup
@@ -54,31 +51,26 @@ class LBBuoyancy:
     system.cell_system.skin = 0.01
 
     def setUp(self):
-        self.lbf = self.lb_class(**LB_PARAMS)
+        self.lbf = self.lb_class(**LB_PARAMS, **self.lb_params)
         self.system.actors.add(self.lbf)
 
     def tearDown(self):
         self.system.actors.clear()
-        self.system.lbboundaries.clear()
 
     def test(self):
         # Setup walls
         for i in range(3):
             n = np.zeros(3)
             n[i] = 1
-            self.system.lbboundaries.add(espressomd.lbboundaries.LBBoundary(
-                                         shape=espressomd.shapes.Wall(
-                                             normal=-n, dist=-(self.system.box_l[i] - AGRID))))
-
-            self.system.lbboundaries.add(espressomd.lbboundaries.LBBoundary(
-                                         shape=espressomd.shapes.Wall(
-                                             normal=n, dist=AGRID)))
+            self.lbf.add_boundary_from_shape(espressomd.shapes.Wall(
+                normal=-n, dist=-(self.system.box_l[i] - AGRID)))
+            self.lbf.add_boundary_from_shape(
+                espressomd.shapes.Wall(normal=n, dist=AGRID))
 
         # setup sphere without slip in the middle
-        sphere = espressomd.lbboundaries.LBBoundary(shape=espressomd.shapes.Sphere(
-            radius=RADIUS, center=self.system.box_l / 2, direction=1))
-
-        self.system.lbboundaries.add(sphere)
+        sphere_shape = espressomd.shapes.Sphere(
+            radius=RADIUS, center=self.system.box_l / 2, direction=1)
+        self.lbf.add_boundary_from_shape(sphere_shape)
 
         sphere_volume = 4. / 3. * np.pi * RADIUS**3
 
@@ -87,17 +79,18 @@ def test(self):
         self.system.integrator.run(100)
         while True:
             self.system.integrator.run(10)
-            force = np.linalg.norm(sphere.get_force())
+            force = np.linalg.norm(self.lbf.boundary['sphere'].get_force())
             if np.linalg.norm(force - last_force) < 0.01:
                 break
             last_force = force
 
         # Check force balance
         boundary_force = np.zeros(3)
-        for b in self.system.lbboundaries:
+        for b in self.lbf.boundary:
             boundary_force += b.get_force()
 
-        fluid_nodes = tests_common.count_fluid_nodes(self.lbf)
+        fluid_nodes = np.sum(np.logical_not(
+            self.lbf[:, :, :].is_boundary).astype(int))
         fluid_volume = fluid_nodes * AGRID**3
         applied_force = fluid_volume * np.array(LB_PARAMS['ext_force_density'])
 
@@ -110,19 +103,26 @@ def test(self):
         expected_force = np.array(
             [0, -sphere_volume * DENS * G, 0])
         np.testing.assert_allclose(
-            np.copy(sphere.get_force()), expected_force,
+            np.copy(self.lbf.boundary['sphere'].get_force()), expected_force,
             atol=np.linalg.norm(expected_force) * 0.02)
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(["LB_BOUNDARIES_GPU", "EXTERNAL_FORCES"])
-class LBGPUBuoyancy(LBBuoyancy, ut.TestCase):
-    lb_class = espressomd.lb.LBFluidGPU
+@utx.skipIfMissingFeatures(["EXTERNAL_FORCES", "WALBERLA"])
+class LBBuoyancyWalberla(LBBuoyancy, ut.TestCase):
+
+    """Test for the Walberla implementation of the LB in double-precision."""
+
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
+
+
+@utx.skipIfMissingFeatures(["EXTERNAL_FORCES", "WALBERLA"])
+class LBBuoyancyWalberlaSinglePrecision(LBBuoyancy, ut.TestCase):
 
+    """Test for the Walberla implementation of the LB in single-precision."""
 
-@utx.skipIfMissingFeatures(["LB_BOUNDARIES", "EXTERNAL_FORCES"])
-class LBCPUBuoyancy(LBBuoyancy, ut.TestCase):
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/lb_circular_couette.py b/testsuite/python/lb_circular_couette.py
index fecf7c5d130..fb90a055a20 100644
--- a/testsuite/python/lb_circular_couette.py
+++ b/testsuite/python/lb_circular_couette.py
@@ -21,34 +21,15 @@
 import unittest_decorators as utx
 import numpy as np
 
-import espressomd.math
 import espressomd.lb
-import espressomd.lbboundaries
-import espressomd.observables
 import espressomd.shapes
-import espressomd.accumulators
+import espressomd.observables
+import espressomd.math
+
 
 AGRID = .5
-VISC = 2.7
-DENS = 1.7
 TIME_STEP = 0.1
-BOX_L = 16.0
-EFFECTIVE_RADIUS = BOX_L / 2.0 - 1.0
-LB_PARAMS = {'agrid': AGRID,
-             'dens': DENS,
-             'visc': VISC,
-             'tau': TIME_STEP}
-
-OBS_PARAMS = {'n_r_bins': 12,
-              'n_phi_bins': 1,
-              'n_z_bins': 1,
-              'min_r': 1.0,
-              'min_phi': -np.pi,
-              'min_z': 0.0,
-              'max_r': EFFECTIVE_RADIUS,
-              'max_phi': np.pi,
-              'max_z': BOX_L / 2.,
-              'sampling_density': 1.0}
+GRID_SIZE = np.array([63, 63, 4])
 
 
 def taylor_couette(v1, v2, r1, r2):
@@ -61,72 +42,43 @@ def taylor_couette(v1, v2, r1, r2):
     return a, b
 
 
+@utx.skipIfMissingFeatures(["WALBERLA"])
 class LBCircularCouetteCommon:
 
-    """
-    Check the lattice-Boltzmann velocity-driven flow in a cylindrical
-    constraint by comparing to the analytical solution.
-    """
-
-    system = espressomd.System(box_l=[BOX_L, BOX_L, BOX_L / 2.])
+    system = espressomd.System(box_l=(GRID_SIZE + [1, 1, 0]) * AGRID)
     system.time_step = TIME_STEP
-    system.cell_system.skin = 0.4 * AGRID
-    params = {'axis': [0, 0, 1],
-              'orientation': [1, 0, 0]}
+    system.cell_system.skin = 0.1
+    system.periodicity = [False, False, True]
 
     def tearDown(self):
         self.system.actors.clear()
-        self.system.lbboundaries.clear()
 
     def test_taylor_couette_flow(self):
         """
-        Rotate a shell filled with fluid with a non-rotating rod at the center.
-        The solution to the Navier-Stokes equation, assuming an infinite rod,
-        is the Taylor-Couette equation.
+        Rotate a rod in a cavity filled with fluid. The solution to the
+        Navier-Stokes equation, assuming an infinite rod, is the
+        Taylor-Couette equation.
         """
 
-        # disable periodicity except in the flow direction
-        self.system.periodicity = np.logical_not(self.params['axis'])
-        lbf = self.lb_class(**LB_PARAMS)
-        self.system.actors.add(lbf)
-
-        # create an outer cylinder that is rotating; this is achieved by
-        # creating an octagon with a slip velocity parallel to each face
-        sc = np.cos(np.pi / 4.)
-        normals = [
-            [-1, 0, 0],
-            [0, -1, 0],
-            [1, 0, 0],
-            [0, 1, 0],
-            [-sc, sc, 0],
-            [sc, -sc, 0],
-            [sc, sc, 0],
-            [-sc, -sc, 0],
-        ]
-        dists = [
-            2. * AGRID - BOX_L,
-            2. * AGRID - BOX_L,
-            2. * AGRID,
-            2. * AGRID,
-            2. * AGRID - BOX_L / 2.,
-            2. * AGRID - BOX_L / 2.,
-            2. * AGRID + BOX_L * (np.sqrt(2.) - 1.) / 2.,
-            2. * AGRID - BOX_L * (1. + (np.sqrt(2.) - 1.) / 2.),
-        ]
-        # outer cylinder with tangential slip velocity
-        slip_vel = 0.01
-        for normal, dist in zip(normals, dists):
-            self.system.lbboundaries.add(espressomd.lbboundaries.LBBoundary(
-                shape=espressomd.shapes.Wall(normal=normal, dist=dist),
-                velocity=slip_vel * np.cross(normal, self.params['axis'])))
-        # inner cylinder without slip velocity
-        self.system.lbboundaries.add(espressomd.lbboundaries.LBBoundary(
-            shape=espressomd.shapes.Cylinder(
-                center=self.system.box_l / 2.0, axis=self.params['axis'],
-                direction=1, radius=1., length=BOX_L * 1.5)))
+        system = self.system
+        lb_fluid = espressomd.lb.LBFluidWalberla(
+            agrid=AGRID, density=0.5, kinematic_viscosity=3.2,
+            tau=system.time_step)
+        system.actors.add(lb_fluid)
+
+        # set up two cylinders
+        cyl_center = AGRID * (GRID_SIZE // 2 + 0.5) * [1, 1, 0]
+        cyl1 = espressomd.shapes.Cylinder(
+            center=cyl_center, axis=[0, 0, 1], length=3 * system.box_l[2],
+            radius=8.1 * AGRID, direction=1)
+        cyl2 = espressomd.shapes.Cylinder(
+            center=cyl_center, axis=[0, 0, 1], length=3 * system.box_l[2],
+            radius=30.1 * AGRID, direction=-1)
+        lb_fluid.add_boundary_from_shape(cyl1)
+        lb_fluid.add_boundary_from_shape(cyl2)
 
         # the system needs to be fully symmetric
-        mask = np.copy(lbf[:, :, :].boundary.astype(bool))
+        mask = np.copy(lb_fluid[:63, :63, :].is_boundary.astype(int))
         np.testing.assert_array_equal(mask, np.flip(mask, axis=0))
         np.testing.assert_array_equal(mask, np.flip(mask, axis=1))
         np.testing.assert_array_equal(mask, np.flip(mask, axis=2))
@@ -137,58 +89,82 @@ def test_taylor_couette_flow(self):
         np.testing.assert_array_equal(mask[:, 0, :], 1)
         np.testing.assert_array_equal(mask[:, -1, :], 1)
 
-        ctp = espressomd.math.CylindricalTransformationParameters(
-            center=[BOX_L / 2.0, BOX_L / 2.0, 0.0],
-            axis=self.params['axis'],
-            orientation=self.params['orientation'])
-        local_obs_params = OBS_PARAMS.copy()
-        local_obs_params['transform_params'] = ctp
-        obs = espressomd.observables.CylindricalLBVelocityProfile(
-            **local_obs_params)
-
-        # simulate until profile converges
-        mid_indices = [int((EFFECTIVE_RADIUS / AGRID) / 2) - 2,
-                       int((BOX_L / AGRID) / 2), int((BOX_L / 2. / AGRID) / 2)]
-        diff = float("inf")
-        old_val = lbf[mid_indices].velocity[1]
-        while diff > 1e-6:
-            self.system.integrator.run(10)
-            new_val = lbf[mid_indices].velocity[1]
-            diff = abs(new_val - old_val)
-            old_val = new_val
-
-        r = obs.bin_centers()[:, :, :, 0].reshape(-1)
-        v_r, v_phi, v_z = np.copy(obs.calculate()).reshape([-1, 3]).T
+        # add tangential slip velocity to the inner cylinder
+        slip_vel = 0.01
+        surface_nodes = espressomd.lb.edge_detection(
+            lb_fluid.get_shape_bitmask(cyl1), system.periodicity)
+        tangents = espressomd.lb.calc_cylinder_tangential_vectors(
+            cyl1.center, AGRID, 0.5, surface_nodes)
+        for node, tangent in zip(surface_nodes, tangents):
+            lb_fluid[node].boundary = espressomd.lb.VelocityBounceBack(
+                slip_vel * tangent)
+
+        # add observable for the fluid velocity in cylindrical coordinates
+        cyl_transform_params = espressomd.math.CylindricalTransformationParameters(
+            center=cyl_center, axis=[0, 0, 1], orientation=[1, 0, 0])
+        observable = espressomd.observables.CylindricalLBVelocityProfile(
+            transform_params=cyl_transform_params,
+            n_r_bins=GRID_SIZE[0] // 2,
+            n_phi_bins=1,
+            n_z_bins=1,
+            min_r=0.0,
+            max_r=system.box_l[0] / 2,
+            min_phi=0.,
+            max_phi=2 * np.pi,
+            min_z=0.,
+            max_z=+system.box_l[2],
+            axis=[0.0, 0.0, 1.0],
+            sampling_density=1
+        )
+
+        # equilibrate the fluid and sample velocities
+        obs_data_baseline = observable.calculate()
+        system.integrator.run(200)
+        obs_data = observable.calculate()
+        profile_r = np.copy(observable.bin_centers()).reshape([-1, 3])[:, 0]
+        profile_v = np.copy(obs_data - obs_data_baseline).reshape([-1, 3])
+        v_r, v_phi, v_z = profile_v.T
 
         # check velocity is zero for the radial and axial components
-        np.testing.assert_allclose(v_r, 0., atol=1e-6)
-        np.testing.assert_allclose(v_z, 0., atol=1e-8)
+        np.testing.assert_allclose(v_r, 0., atol=1e-4)
+        np.testing.assert_allclose(v_z, 0., atol=1e-6)
+
+        # check azimuthal velocity is zero inside boundary
+        np.testing.assert_allclose(v_phi[:7], 0., atol=1e-7)
+
+        # check azimuthal velocity in the linear regime
+        self.assertGreater(v_phi[7], v_phi[6])
+        self.assertGreater(v_phi[8], v_phi[7])
+        self.assertGreater(v_phi[9], v_phi[8])
 
         # check azimuthal velocity in the Couette regime
-        a_ref, b_ref = taylor_couette(
-            0.0, slip_vel, 1., BOX_L / 2. - 2. * AGRID)
+        r = profile_r[10:-1]
+        v_phi = v_phi[10:-1]
+        a_ref, b_ref = taylor_couette(slip_vel, 0.0, cyl1.radius, cyl2.radius)
         v_phi_ref = a_ref * r + b_ref / r
         v_phi_drift = np.mean(v_phi) - np.mean(v_phi_ref)
-        np.testing.assert_allclose(v_phi_drift, 0., atol=5e-4)
-        np.testing.assert_allclose(v_phi - v_phi_drift, v_phi_ref, atol=1e-3)
+        np.testing.assert_allclose(v_phi_drift, 0., atol=1.2e-4)
+        np.testing.assert_allclose(v_phi - v_phi_drift, v_phi_ref, atol=1e-4)
 
 
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES'])
-class LBCPUCircularCouette(LBCircularCouetteCommon, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBCircularCouetteWalberla(LBCircularCouetteCommon, ut.TestCase):
 
-    """Test for the CPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in double-precision."""
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES_GPU'])
-class LBGPUCircularCouette(LBCircularCouetteCommon, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBCircularCouetteWalberlaSinglePrecision(
+        LBCircularCouetteCommon, ut.TestCase):
 
-    """Test for the GPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in single-precision."""
 
-    lb_class = espressomd.lb.LBFluidGPU
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     ut.main()
diff --git a/testsuite/python/lb_electrohydrodynamics.py b/testsuite/python/lb_electrohydrodynamics.py
index e2e4f736e5a..500fb7f4ec6 100644
--- a/testsuite/python/lb_electrohydrodynamics.py
+++ b/testsuite/python/lb_electrohydrodynamics.py
@@ -24,7 +24,7 @@
 import numpy as np
 
 
-@utx.skipIfMissingFeatures(["LB_ELECTROHYDRODYNAMICS"])
+@utx.skipIfMissingFeatures(["WALBERLA", "LB_ELECTROHYDRODYNAMICS"])
 class LBEHTest(ut.TestCase):
     system = espressomd.System(box_l=[6.0, 6.0, 6.0])
 
@@ -34,7 +34,7 @@ def setUp(self):
                        'tau': 0.02,
                        'agrid': 0.5,
                        'dens': 0.85,
-                       'viscosity': 30.0,
+                       'kinematic_viscosity': 30.0,
                        'friction': 3.0,
                        'temp': 0.0,
                        'skin': 0.2,
@@ -44,9 +44,9 @@ def setUp(self):
         system.time_step = self.params['time_step']
         system.cell_system.skin = self.params['skin']
 
-        lbf = espressomd.lb.LBFluid(
-            visc=self.params['viscosity'],
-            dens=self.params['dens'],
+        lbf = espressomd.lb.LBFluidWalberla(
+            kinematic_viscosity=self.params['kinematic_viscosity'],
+            density=self.params['dens'],
             agrid=self.params['agrid'],
             tau=system.time_step,
             kT=self.params['temp']
@@ -71,7 +71,7 @@ def test(self):
 
         system.integrator.run(steps=500)
 
-        np.testing.assert_allclose(v_term, np.copy(p.v), atol=1e-5)
+        np.testing.assert_allclose(v_term, np.copy(p.v), atol=5e-5)
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/lb_get_u_at_pos.py b/testsuite/python/lb_get_u_at_pos.py
deleted file mode 100644
index b12af6087e1..00000000000
--- a/testsuite/python/lb_get_u_at_pos.py
+++ /dev/null
@@ -1,91 +0,0 @@
-#
-# Copyright (C) 2010-2022 The ESPResSo project
-#
-# This file is part of ESPResSo.
-#
-# ESPResSo is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# ESPResSo is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-import sys
-import unittest as ut
-import unittest_decorators as utx
-import numpy as np
-import numpy.testing
-import espressomd
-import espressomd.lb
-
-
-@utx.skipIfMissingGPU()
-class TestLBGetUAtPos(ut.TestCase):
-
-    """
-    Check velocities at particle positions are sorted by ``id`` and
-    quantitatively correct (only LB GPU).
-
-    """
-    @classmethod
-    def setUpClass(cls):
-        cls.params = {
-            'tau': 0.01,
-            'agrid': 0.5,
-            'box_l': [12.0, 12.0, 12.0],
-            'dens': 0.85,
-            'viscosity': 30.0,
-            'friction': 2.0,
-            'gamma': 1.5
-        }
-        cls.system = espressomd.System(box_l=[1.0, 1.0, 1.0])
-        cls.system.box_l = cls.params['box_l']
-        cls.system.cell_system.skin = 0.4
-        cls.system.time_step = 0.01
-        cls.n_nodes_per_dim = int(cls.system.box_l[0] / cls.params['agrid'])
-        for p in range(cls.n_nodes_per_dim):
-            # Set particles exactly between two LB nodes in x direction.
-            cls.system.part.add(pos=[(p + 1) * cls.params['agrid'],
-                                     0.5 * cls.params['agrid'],
-                                     0.5 * cls.params['agrid']])
-        cls.lb_fluid = espressomd.lb.LBFluidGPU(
-            visc=cls.params['viscosity'],
-            dens=cls.params['dens'],
-            agrid=cls.params['agrid'],
-            tau=cls.params['tau'],
-        )
-        cls.system.actors.add(cls.lb_fluid)
-        cls.vels = np.zeros((cls.n_nodes_per_dim, 3))
-        cls.vels[:, 0] = np.arange(cls.n_nodes_per_dim, dtype=float)
-        cls.interpolated_vels = cls.vels.copy()
-        cls.interpolated_vels[:, 0] += 0.5
-        for n in range(cls.n_nodes_per_dim):
-            cls.lb_fluid[n, 0, 0].velocity = cls.vels[n, :]
-        cls.system.integrator.run(0)
-
-    def test_get_u_at_pos(self):
-        """
-        Test if linear interpolated velocities are equal to the velocities at
-        the particle positions. This test uses the two-point coupling under
-        the hood.
-
-        """
-        numpy.testing.assert_allclose(
-            self.interpolated_vels[:-1],
-            self.lb_fluid.get_interpolated_fluid_velocity_at_positions(
-                self.system.part.all().pos, False)[:-1],
-            atol=1e-4)
-
-
-if __name__ == "__main__":
-    suite = ut.TestSuite()
-    suite.addTests(ut.TestLoader().loadTestsFromTestCase(TestLBGetUAtPos))
-    result = ut.TextTestRunner(verbosity=4).run(suite)
-    sys.exit(not result.wasSuccessful())
diff --git a/testsuite/python/lb_interpolation.py b/testsuite/python/lb_interpolation.py
index ac4b42c2c2b..cee0fa86601 100644
--- a/testsuite/python/lb_interpolation.py
+++ b/testsuite/python/lb_interpolation.py
@@ -21,7 +21,6 @@
 import unittest_decorators as utx
 import numpy as np
 import itertools
-import sys
 
 import espressomd
 import espressomd.shapes
@@ -36,11 +35,11 @@
 TIME_STEP = TAU
 LB_PARAMETERS = {
     'agrid': AGRID,
-    'visc': VISC,
-    'dens': DENS,
+    'kinematic_viscosity': VISC,
+    'density': DENS,
     'tau': TAU
 }
-V_BOUNDARY = 0.6
+V_BOUNDARY = 0.2
 
 
 def velocity_profile(x):
@@ -58,11 +57,10 @@ class LBInterpolation:
     system.time_step = TIME_STEP
 
     def setUp(self):
-        self.lbf = self.lb_class(**LB_PARAMETERS)
+        self.lbf = self.lb_class(**LB_PARAMETERS, **self.lb_params)
         self.system.actors.add(self.lbf)
 
     def tearDown(self):
-        self.system.lbboundaries.clear()
         self.system.actors.clear()
 
     def set_boundaries(self, velocity):
@@ -71,10 +69,8 @@ def set_boundaries(self, velocity):
             normal=[1, 0, 0], dist=AGRID)
         wall_shape2 = espressomd.shapes.Wall(
             normal=[-1, 0, 0], dist=-(BOX_L - AGRID))
-        self.system.lbboundaries.add(
-            espressomd.lbboundaries.LBBoundary(shape=wall_shape1))
-        self.system.lbboundaries.add(
-            espressomd.lbboundaries.LBBoundary(shape=wall_shape2, velocity=velocity))
+        self.lbf.add_boundary_from_shape(wall_shape1)
+        self.lbf.add_boundary_from_shape(wall_shape2, velocity)
 
     def test_interpolated_velocity(self):
         """
@@ -88,20 +84,19 @@ def test_interpolated_velocity(self):
         # box_l[0]-agrid/2.
         np.testing.assert_allclose(
             np.copy(self.lbf.get_interpolated_velocity(
-                [self.system.box_l[0] - AGRID / 2, 0, 0])),
+                pos=[self.system.box_l[0] - AGRID / 2, 0, 0])),
             np.array([0, 0, V_BOUNDARY]))
 
         # Check interpolated velocity involving boundary and neighboring node.
         # The boundary node index is lbf.shape[0]-1, so -2 refers to the
         # node in front of the boundary.
         node_next_to_boundary = self.lbf[self.lbf.shape[0] - 2, 0, 0]
+        pos_at_boundary = [BOX_L - AGRID, 0, 0]
         # The midpoint between the boundary and that node is box_l - agrid.
         np.testing.assert_allclose(
-            np.copy(self.lbf.get_interpolated_velocity(
-                [self.system.box_l[0] - AGRID, 0, 0])),
-            0.5 * (np.array([0, 0, V_BOUNDARY]) +
-                   node_next_to_boundary.velocity),
-            atol=1e-7)
+            np.copy(self.lbf.get_interpolated_velocity(pos=pos_at_boundary)),
+            ([0, 0, V_BOUNDARY] + np.copy(node_next_to_boundary.velocity)) / 2.,
+            atol=1e-4)
 
         # Bulk
         for pos in itertools.product(
@@ -109,37 +104,44 @@ def test_interpolated_velocity(self):
                 np.arange(0.5 * AGRID, BOX_L, AGRID),
                 np.arange(0.5 * AGRID, BOX_L, AGRID)):
             np.testing.assert_allclose(
-                self.lbf.get_interpolated_velocity(pos)[2],
-                velocity_profile(pos[0]), atol=5e-5)
+                self.lbf.get_interpolated_velocity(pos=pos)[2],
+                velocity_profile(pos[0]), atol=1e-3)
 
     def test_mach_limit_check(self):
         """
         Assert that the Mach number check fires an exception.
 
         """
-        max_vel = 0.21 * AGRID / TAU
-        print("Begin: Test error generation")
-        sys.stdout.flush()
-        sys.stderr.flush()
-        with self.assertRaises(Exception):
-            self.set_boundaries([0.0, 0.0, max_vel])
-            self.system.integrator.run(1)
-        sys.stdout.flush()
-        sys.stderr.flush()
-        print("End: Test error generation")
+        max_vel = 1.1 * self.lbf.mach_limit() * AGRID / TAU
+        vbb = espressomd.lb.VelocityBounceBack([0, 0, max_vel])
+        error_msg = 'Slip velocity exceeds Mach 0.35'
+
+        with self.assertRaisesRegex(ValueError, error_msg):
+            self.lbf[0, 0, 0].boundary = vbb
+        self.assertIsNone(self.lbf[0, 0, 0].boundary)
+
+        with self.assertRaisesRegex(ValueError, error_msg):
+            shape = espressomd.shapes.Wall(normal=[1, 0, 0], dist=AGRID)
+            self.lbf.add_boundary_from_shape(shape, vbb.velocity)
+        self.assertIsNone(self.lbf[0, 0, 0].boundary)
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBInterpolationWalberla(LBInterpolation, ut.TestCase):
 
+    """Test for the Walberla implementation of the LB in double-precision."""
 
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES'])
-class LBInterpolationCPU(LBInterpolation, ut.TestCase):
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
 
-    lb_class = espressomd.lb.LBFluid
 
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBInterpolationWalberlaSinglePrecision(LBInterpolation, ut.TestCase):
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES_GPU'])
-class LBInterpolationGPU(LBInterpolation, ut.TestCase):
+    """Test for the Walberla implementation of the LB in single-precision."""
 
-    lb_class = espressomd.lb.LBFluidGPU
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/lb_lees_edwards.py b/testsuite/python/lb_lees_edwards.py
new file mode 100644
index 00000000000..055e5ba9ee2
--- /dev/null
+++ b/testsuite/python/lb_lees_edwards.py
@@ -0,0 +1,333 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+import espressomd
+import espressomd.lb
+import espressomd.lees_edwards
+
+import unittest as ut
+import unittest_decorators as utx
+import numpy as np
+import itertools
+
+
+system = espressomd.System(box_l=[17, 17, 1])
+system.cell_system.skin = 0.1
+system.time_step = 0.01
+
+
+class LBContextManager:
+    """
+    Add an LB actor and remove it from the actor list at the end.
+    """
+
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+
+    def __enter__(self):
+        self.lbf = espressomd.lb.LBFluidWalberla(
+            agrid=1., density=1., kinematic_viscosity=1., tau=system.time_step, **self.kwargs)
+        system.actors.add(self.lbf)
+        system.thermostat.set_lb(LB_fluid=self.lbf, gamma=1.0)
+        return self.lbf
+
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        system.actors.remove(self.lbf)
+        system.thermostat.turn_off()
+
+
+class LEContextManager:
+    """
+    Add a Lees-Edwards linear shear boundary and remove it at the end.
+    """
+
+    def __init__(self, shear_direction, shear_plane_normal, offset):
+        protocol = espressomd.lees_edwards.LinearShear(
+            shear_velocity=0., initial_pos_offset=offset, time_0=0.)
+        self.initialize = lambda: system.lees_edwards.set_boundary_conditions(
+            shear_direction=shear_direction,
+            shear_plane_normal=shear_plane_normal,
+            protocol=protocol)
+
+    def __enter__(self):
+        self.initialize()
+
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        system.lees_edwards.protocol = espressomd.lees_edwards.Off()
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBLeesEdwards(ut.TestCase):
+
+    """
+    Check that velocities interpolated from spatially fixed particles wrap
+    around shear boundaries with the correct offset. A two-dimensional LB
+    grid is used for simplicity.
+
+    """
+
+    def setUp(self):
+        system.lees_edwards.set_boundary_conditions(
+            shear_direction="x", shear_plane_normal="y",
+            protocol=espressomd.lees_edwards.Off())
+
+    def tearDown(self):
+        system.actors.clear()
+        system.thermostat.turn_off()
+        system.part.clear()
+
+    def sample_lb_velocities(self, lbf):
+        profiles = []
+        for _ in range(5):
+            system.integrator.run(2)
+            vel_grid = lbf[:, :, :].velocity[:, :, 0, :]
+            profiles.append(np.linalg.norm(vel_grid, axis=2))
+        return profiles
+
+    def check_profile(self, profile, stencil,
+                      nodes_shifted, nodes_unshifted, tol):
+        profile = np.copy(profile) / np.max(profile)
+        for node in nodes_unshifted:
+            self.assertAlmostEqual(profile[stencil[node]], 1.0, delta=tol)
+        for node in nodes_shifted:
+            ref = profile[stencil['C']]
+            self.assertAlmostEqual(profile[stencil[node]], ref, delta=tol)
+            node += '~'
+            ref = 1.0 - ref
+            self.assertAlmostEqual(profile[stencil[node]], ref, delta=tol)
+
+    def test_velocity_shift_from_particles(self):
+        """
+        Place particles at the center and borders of a square (a cuboid LB
+        grid with thickness 1 in the z-axis). Particles are fixed in space
+        and apply a force on the fluid. The velocity vectors of particle
+        pairs that are in contact across a periodic boundary are aligned,
+        such that their contribution to the interpolation is constructive,
+        i.e. at time = 0 the velocity of a LB cell containing a particle
+        is 100%, and at time = 10 it is still 100% (70% from the particle,
+        30% from the neighboring particle across the periodic boundary).
+
+        Below is a diagram of the time evolution of such a system.
+        The magnitude of the interpolated velocity is initially 5,
+        and decreases to 4 at the next time step, with a magnitude
+        of 1 for neighboring LB cells. LB cells at the boundaries
+        remain at velocity = 5 because they gain 1 unit from the
+        periodic images.
+
+        .. code-block:: none
+
+            +-------------+        +-------------+
+            |      5      |        |     151     |
+            |             |        |      1      |
+            |             |        | 1    1    1 |
+            | 5    5    5 |  --->  | 51  141  15 |
+            |             |        | 1    1    1 |
+            |             |        |      1      |
+            |      5      |        |     151     |
+            +-------------+        +-------------+
+
+
+        When Lees-Edwards boundary conditions are present, contributions
+        to the interpolation are no longer constructive across the shear
+        boundary due to the shear offset.
+
+        Below is a diagram of the time evolution of such a system,
+        where the shear plane normal is the y-axis and the shear
+        direction is the x-axis with an offset of 3 agrid:
+
+        .. code-block:: none
+
+            +-------------+        +-------------+
+            |      5      |        |     141 1   |
+            |             |        |      1      |
+            |             |        | 1    1    1 |
+            | 5    5    5 |  --->  | 51  141  15 |
+            |             |        | 1    1    1 |
+            |             |        |      1      |
+            |      5      |        |   1 141     |
+            +-------------+        +-------------+
+
+
+        The interpolated velocity at the shear boundary is equal to
+        the interpolated velocity of a particle moving diagonally.
+        The central particle is moving diagonally and is used as a
+        reference.
+
+        """
+        tol = 0.012
+
+        # stencil for D2Q8
+        stencil_D2Q8 = {'S': (8, 0), 'W': (0, 8), 'N': (8, 16), 'E': (16, 8),
+                        'C': (8, 8)}
+
+        # place particles at the square edges and at the center of the square
+        for x, y in stencil_D2Q8.values():
+            v = np.array([y == 8, x == 8, 0], dtype=float)
+            v /= np.linalg.norm(v)
+            system.part.add(pos=[x + 0.5, y + 0.5, 0.5], v=v, fix=3 * [True])
+
+        # without Lees-Edwards, velocities remain unaffected
+        with LBContextManager() as lbf:
+            for profile in self.sample_lb_velocities(lbf):
+                self.check_profile(profile, stencil_D2Q8, '', 'SNWE', tol)
+
+        # with Lees-Edwards and no offset, velocities remain unaffected
+        with LEContextManager('x', 'y', 0):
+            with LBContextManager() as lbf:
+                for profile in self.sample_lb_velocities(lbf):
+                    self.check_profile(profile, stencil_D2Q8, '', 'SNWE', tol)
+
+        le_offset = 6
+
+        # North and South are sheared horizontally
+        with LEContextManager('x', 'y', le_offset):
+            stencil = {'N~': (8 - le_offset, 0),
+                       'S~': (8 + le_offset, 16),
+                       **stencil_D2Q8}
+            with LBContextManager() as lbf:
+                for profile in self.sample_lb_velocities(lbf):
+                    self.check_profile(profile, stencil, 'SN', 'WE', tol)
+
+        # TODO: re-enable this check once LB can be sheared in any direction
+#        # East and West are sheared vertically
+#        with LEContextManager('y', 'x', le_offset):
+#            stencil = {'E~': (0, 8 - le_offset),
+#                       'W~': (16, 8 + le_offset),
+#                       **stencil_D2Q8}
+#            with LBContextManager() as lbf:
+#                for profile in self.sample_lb_velocities(lbf):
+#                    self.check_profile(profile, stencil, 'WE', 'SN', tol)
+
+    def test_velocity_shift_from_fluid_impulse(self):
+        """
+        Same test as ``test_velocity_shift_from_particles``, but the particle
+        force on the fluid is simulated by manually changing the velocity of
+        fluid nodes directly. The velocity is applied one agrid away from the
+        shear boundary (at x=1), since velocities stored in the shear boundary
+        at x=0 are copied to x=h without any offset.
+
+        """
+        tol = 0.08
+
+        # stencil for D2Q8
+        stencil_D2Q8 = {'S': (8, 1), 'W': (1, 8), 'N': (8, 15), 'E': (15, 8),
+                        'C': (8, 8)}
+
+        def create_impulse(lbf, stencil):
+            # add velocities at the square edges and at the center
+            for x, y in stencil.values():
+                v = np.array([y == 8, x == 8, 0], dtype=float)
+                v /= np.linalg.norm(v)
+                lbf[x, y, 0].velocity = -0.05 * v
+
+        # without Lees-Edwards, velocities remain unaffected
+        with LBContextManager() as lbf:
+            create_impulse(lbf, stencil_D2Q8)
+            for profile in self.sample_lb_velocities(lbf):
+                self.check_profile(profile, stencil_D2Q8, '', 'SNWE', tol)
+
+        # with Lees-Edwards and no offset, velocities remain unaffected
+        with LEContextManager('x', 'y', 0):
+            with LBContextManager() as lbf:
+                create_impulse(lbf, stencil_D2Q8)
+                for profile in self.sample_lb_velocities(lbf):
+                    self.check_profile(profile, stencil_D2Q8, '', 'SNWE', tol)
+
+        le_offset = 6
+
+        # North and South are sheared horizontally
+        with LEContextManager('x', 'y', le_offset):
+            stencil = {'N~': (8 - le_offset, 1),
+                       'S~': (8 + le_offset, 15),
+                       **stencil_D2Q8}
+            with LBContextManager() as lbf:
+                create_impulse(lbf, stencil_D2Q8)
+                for profile in self.sample_lb_velocities(lbf):
+                    self.check_profile(profile, stencil, 'SN', 'WE', tol)
+
+        # TODO: re-enable this check once LB can be sheared in any direction
+#        # East and West are sheared vertically
+#        with LEContextManager('y', 'x', le_offset):
+#            stencil = {'E~': (1, 8 - le_offset),
+#                       'W~': (15, 8 + le_offset),
+#                       **stencil_D2Q8}
+#            with LBContextManager() as lbf:
+#                create_impulse(lbf, stencil_D2Q8)
+#                for profile in self.sample_lb_velocities(lbf):
+#                    self.check_profile(profile, stencil, 'WE', 'SN', tol)
+
+    def test_lebc_mismatch(self):
+        """
+        Check that MD LEbc and LB LEbc always agree.
+        """
+        err_msg = "MD and LB Lees-Edwards boundary conditions disagree"
+        # LEbc must be set before instantiating LB
+        with self.assertRaisesRegex(RuntimeError, err_msg):
+            with LBContextManager() as lbf:
+                LEContextManager('y', 'x', 1.).initialize()
+        # when a LB actor with LEbc is active, the MD LEbc shear directions
+        # are immutable
+        with LEContextManager('x', 'y', 1.):
+            with LBContextManager() as lbf:
+                with self.assertRaisesRegex(RuntimeError, err_msg):
+                    system.lees_edwards.protocol = None
+                with self.assertRaisesRegex(RuntimeError, err_msg):
+                    system.lees_edwards.set_boundary_conditions(
+                        shear_direction="z", shear_plane_normal="y",
+                        protocol=espressomd.lees_edwards.Off())
+                self.assertEqual(system.lees_edwards.shear_direction, "x")
+                self.assertEqual(system.lees_edwards.shear_plane_normal, "y")
+        # when de-activating and later re-activating a LB actor with LEbc,
+        # the MD LEbc must have the same shear directions
+        with self.assertRaisesRegex(Exception, err_msg):
+            with LEContextManager('z', 'y', 1.):
+                system.actors.add(lbf)
+        self.assertEqual(len(system.actors), 0)
+        # LB only implements shear_plane_normal="y"
+        err_msg = "Lees-Edwards LB only supports shear_plane_normal=\"y\""
+        for shear_dir, shear_plane_normal in itertools.product("xyz", "xz"):
+            if shear_dir != shear_plane_normal:
+                with self.assertRaisesRegex(ValueError, err_msg):
+                    with LEContextManager(shear_dir, shear_plane_normal, 1.):
+                        system.actors.add(espressomd.lb.LBFluidWalberla(
+                            agrid=1., density=1., kinematic_viscosity=1.,
+                            tau=system.time_step))
+                self.assertEqual(len(system.actors), 0)
+        # while LB and MD LEbc must agree on the shear directions,
+        # the offset can change
+        with LEContextManager('x', 'y', -1.):
+            system.actors.add(lbf)
+            system.actors.clear()
+        # no thermalization
+        with self.assertRaisesRegex(RuntimeError, "Lees-Edwards LB doesn't support thermalization"):
+            with LEContextManager('x', 'y', 1.):
+                system.actors.add(espressomd.lb.LBFluidWalberla(
+                    agrid=1., density=1., kinematic_viscosity=1., kT=1., seed=42,
+                    tau=system.time_step))
+        self.assertEqual(len(system.actors), 0)
+
+        with self.assertRaisesRegex(ValueError, "Lees-Edwards sweep is implemented for a ghost layer of thickness 1"):
+            lattice = espressomd.lb.LatticeWalberla(agrid=1., n_ghost_layers=2)
+            with LEContextManager('x', 'y', 1.):
+                system.actors.add(espressomd.lb.LBFluidWalberla(
+                    lattice=lattice, density=1., kinematic_viscosity=1.,
+                    tau=system.time_step))
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/python/lb_lees_edwards_particle_coupling.py b/testsuite/python/lb_lees_edwards_particle_coupling.py
new file mode 100644
index 00000000000..373d2eca250
--- /dev/null
+++ b/testsuite/python/lb_lees_edwards_particle_coupling.py
@@ -0,0 +1,94 @@
+#
+# Copyright (C) 2013-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+import unittest as ut
+import espressomd.lees_edwards as lees_edwards
+import espressomd
+import espressomd.lb
+import numpy as np
+import unittest_decorators as utx
+
+
+@utx.skipIfMissingFeatures("WALBERLA")
+class LBLeesEdwardsParticleCoupling(ut.TestCase):
+    def test(self):
+        system = espressomd.System(box_l=[10, 10, 10])
+
+        system.time_step = 1
+        system.cell_system.skin = 0.1
+        system.cell_system.set_n_square()
+
+        offset = 1
+        idx = int(offset)
+        protocol = lees_edwards.LinearShear(
+            shear_velocity=0., initial_pos_offset=offset, time_0=0.)
+        system.lees_edwards.set_boundary_conditions(
+            shear_direction="x", shear_plane_normal="y", protocol=protocol)
+
+        lbf = espressomd.lb.LBFluidWalberla(
+            agrid=1., density=1., kinematic_viscosity=1., tau=system.time_step)
+        system.actors.add(lbf)
+        system.thermostat.set_lb(LB_fluid=lbf, seed=123, gamma=1)
+
+        pos = [system.box_l[0] / 2., 0., system.box_l[0] / 2.]
+        p = system.part.add(pos=pos)
+        v0 = np.array([1, 2, 3])
+        mid_x = lbf.shape[0] // 2
+        mid_z = lbf.shape[2] // 2
+
+        upper_y = lbf.shape[1] - 1
+        nodes = [lbf[mid_x - 1, 0, mid_z],
+                 lbf[mid_x, 0, mid_z - 1],
+                 lbf[mid_x - 1, 0, mid_z],
+                 lbf[mid_x, 0, mid_z],
+                 lbf[mid_x - 1 + idx, upper_y, mid_z],
+                 lbf[mid_x + idx, upper_y, mid_z - 1],
+                 lbf[mid_x - 1 + idx, upper_y, mid_z],
+                 lbf[mid_x + idx, upper_y, mid_z]]
+        for n in nodes:
+            n.velocity = v0
+
+        system.integrator.run(1)
+        lb_forces = np.array([n.last_applied_force for n in nodes])
+        lb_force = np.sum(lb_forces, axis=0)
+        np.testing.assert_allclose(lb_force, -np.copy(p.f))
+        for f in lb_forces:
+            np.testing.assert_allclose(f, lb_forces[0])
+
+        lbf[:, :, :].velocity = [0, 0, 0]
+
+        lower_nodes = nodes[:4]
+        upper_nodes = nodes[4:]
+        for n in lower_nodes:
+            n.velocity = v0
+        for n in upper_nodes:
+            n.velocity = - v0
+        p.update(dict(pos=pos, v=np.zeros(3)))
+        np.testing.assert_allclose(
+            np.copy(lbf.get_interpolated_velocity(pos=pos)),
+            np.zeros(3))
+        system.integrator.run(1)
+        np.testing.assert_allclose(np.copy(p.pos), pos)
+        np.testing.assert_allclose(np.copy(p.f), np.zeros(3))
+        for n in nodes:
+            np.testing.assert_allclose(
+                np.copy(n.last_applied_force), np.zeros(3))
+
+
+if __name__ == '__main__':
+    ut.main()
diff --git a/testsuite/python/lb_density.py b/testsuite/python/lb_mass_conservation.py
similarity index 74%
rename from testsuite/python/lb_density.py
rename to testsuite/python/lb_mass_conservation.py
index 11db96f3103..22dcbb6a5a1 100644
--- a/testsuite/python/lb_density.py
+++ b/testsuite/python/lb_mass_conservation.py
@@ -30,8 +30,8 @@
 DENS = 1.7
 TIME_STEP = 0.01
 LB_PARAMS = {'agrid': AGRID,
-             'dens': DENS,
-             'visc': VISC,
+             'density': DENS,
+             'kinematic_viscosity': VISC,
              'tau': TIME_STEP,
              'kT': KT,
              'seed': 23}
@@ -46,7 +46,7 @@ class LBMassCommon:
     system.cell_system.skin = 0.4 * AGRID
 
     def setUp(self):
-        self.lbf = self.lb_class(**LB_PARAMS)
+        self.lbf = self.lb_class(**LB_PARAMS, **self.lb_params)
         self.system.actors.add(self.lbf)
         self.system.thermostat.set_lb(LB_fluid=self.lbf, seed=3, gamma=2.0)
 
@@ -66,21 +66,24 @@ def test_mass_conservation(self):
         np.testing.assert_array_less(result[:, 1], 0.015)
 
 
-class LBCPUMass(LBMassCommon, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBMassWalberlaDoublePrecision(LBMassCommon, ut.TestCase):
 
-    """Test for the CPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in double-precision."""
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
     atol = 1e-10
 
 
-@utx.skipIfMissingGPU()
-class LBGPUMass(LBMassCommon, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBMassWalberlaSinglePrecision(LBMassCommon, ut.TestCase):
 
-    """Test for the GPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in single-precision."""
 
-    lb_class = espressomd.lb.LBFluidGPU
-    atol = 3e-7
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
+    atol = 5e-7
 
 
 if __name__ == '__main__':
diff --git a/testsuite/python/lb_momentum_conservation.py b/testsuite/python/lb_momentum_conservation.py
index ffddccbe27d..0abb38b5030 100644
--- a/testsuite/python/lb_momentum_conservation.py
+++ b/testsuite/python/lb_momentum_conservation.py
@@ -18,40 +18,44 @@
 #
 
 import espressomd
+import espressomd.lb
 import unittest as ut
 import unittest_decorators as utx
 import numpy as np
 
 # Define the LB parameters
-TIME_STEP = 0.1
-AGRID = 1.0
-KVISC = 5
-DENS = 1
-BOX_SIZE = 6 * AGRID
-F = 1. / BOX_SIZE**3
-GAMMA = 15
+TIME_STEP = 0.008
+AGRID = .4
+GRID_SIZE = 6
+KVISC = 4
+DENS = 2.3
+F = 5.5 / GRID_SIZE**3
+GAMMA = 1
+
 
 LB_PARAMS = {'agrid': AGRID,
-             'dens': DENS,
-             'visc': KVISC,
+             'density': DENS,
+             'kinematic_viscosity': KVISC,
              'tau': TIME_STEP,
-             'ext_force_density': [0, F, 0]}
+             'ext_force_density': np.array([-.7 * F, .9 * F, .8 * F])}
 
 
-class Momentum(object):
+class TestLBMomentumConservation:
     """
     Tests momentum conservation for an LB coupled to a particle, where opposing
     forces are applied to LB and particle. The test should uncover issues
     with boundary and ghost layer handling.
 
     """
-    system = espressomd.System(box_l=[BOX_SIZE] * 3)
+
+    system = espressomd.System(box_l=[GRID_SIZE * AGRID] * 3)
     system.time_step = TIME_STEP
     system.cell_system.skin = 0.01
+    n_nodes = system.cell_system.get_state()["n_nodes"]
 
     def setUp(self):
         self.set_cellsystem()
-        self.lbf = self.lb_class(**LB_PARAMS)
+        self.lbf = self.lb_class(**LB_PARAMS, **self.lb_params)
 
     def tearDown(self):
         self.system.actors.clear()
@@ -61,109 +65,142 @@ def tearDown(self):
     def test(self):
         self.system.actors.add(self.lbf)
         self.system.thermostat.set_lb(LB_fluid=self.lbf, gamma=GAMMA, seed=1)
-        applied_force = self.system.volume() * np.array(
-            LB_PARAMS['ext_force_density'])
-        p = self.system.part.add(
-            pos=(0, 0, 0), ext_force=-applied_force, v=[.1, .2, .3])
+        np.testing.assert_allclose(
+            self.lbf.ext_force_density,
+            LB_PARAMS["ext_force_density"])
 
-        # Reach steady state
-        self.system.integrator.run(500)
-        v_final = np.copy(p.v)
-        momentum = self.system.analysis.linear_momentum()
+        # Initial momentum before integration = 0
+        mom_tol = 1E-4 if self.lbf.single_precision else 1E-12
+        np.testing.assert_allclose(
+            self.system.analysis.linear_momentum(), [0., 0., 0.], atol=mom_tol)
 
-        for _ in range(10):
-            self.system.integrator.run(50)
-            # check that momentum stays constant
-            np.testing.assert_allclose(
-                self.system.analysis.linear_momentum(), momentum, atol=2E-4)
+        ext_fluid_force = self.system.volume() * LB_PARAMS["ext_force_density"]
 
-            # Check that particle velocity is stationary
-            # up to the acceleration of 1/2 time step
-            np.testing.assert_allclose(np.copy(p.v), v_final, atol=2.2E-3)
+        p = self.system.part.add(
+            pos=self.system.box_l / 2, ext_force=-ext_fluid_force, v=[.2, .4, .6])
+        initial_momentum = np.array(self.system.analysis.linear_momentum())
+        np.testing.assert_allclose(initial_momentum, np.copy(p.v) * p.mass,
+                                   atol=mom_tol)
+        while True:
+            self.system.integrator.run(500)
+
+            measured_momentum = self.system.analysis.linear_momentum()
+            coupling_force = -(p.f - p.ext_force)
+            compensation = -TIME_STEP / 2 * coupling_force
+
+            np.testing.assert_allclose(measured_momentum + compensation,
+                                       initial_momentum, atol=self.atol)
+            if np.linalg.norm(p.f) < 0.01 \
+               and np.all(np.abs(p.pos) > 10.1 * self.system.box_l):
+                break
 
         # Make sure, the particle has crossed the periodic boundaries
-        self.assertGreater(np.amax(np.abs(v_final) * self.system.time),
-                           BOX_SIZE)
+        self.assertGreater(max(np.abs(p.v)) * self.system.time,
+                           self.system.box_l[0])
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(["EXTERNAL_FORCES"])
-class TestRegularLBGPU(Momentum, ut.TestCase):
+@ut.skipIf(TestLBMomentumConservation.n_nodes == 1,
+           "LB with regular decomposition already tested with 2 MPI ranks")
+@utx.skipIfMissingFeatures(["WALBERLA", "EXTERNAL_FORCES"])
+class TestLBMomentumConservationRegularWalberla(
+        TestLBMomentumConservation, ut.TestCase):
 
-    lb_class = espressomd.lb.LBFluidGPU
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
+    atol = 1.2e-4
 
     def set_cellsystem(self):
         self.system.cell_system.set_regular_decomposition()
 
 
-@utx.skipIfMissingFeatures(["EXTERNAL_FORCES"])
-class TestRegularLBCPU(Momentum, ut.TestCase):
+@ut.skipIf(TestLBMomentumConservation.n_nodes == 1,
+           "LB with regular decomposition already tested with 2 MPI ranks")
+@utx.skipIfMissingFeatures(["WALBERLA", "EXTERNAL_FORCES"])
+class TestLBMomentumConservationRegularWalberlaSinglePrecision(
+        TestLBMomentumConservation, ut.TestCase):
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
+    atol = 6.5e-4
 
     def set_cellsystem(self):
         self.system.cell_system.set_regular_decomposition()
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(["EXTERNAL_FORCES"])
-class TestNSquareLBGPU(Momentum, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA", "EXTERNAL_FORCES"])
+class TestLBCPUMomentumConservationHybridNSquareWalberla(
+        TestLBMomentumConservation, ut.TestCase):
 
-    lb_class = espressomd.lb.LBFluidGPU
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
+    atol = 1.2e-4
 
     def set_cellsystem(self):
-        self.system.cell_system.set_n_square()
+        self.system.cell_system.set_hybrid_decomposition(
+            n_square_types={0}, cutoff_regular=1)
 
 
-@utx.skipIfMissingFeatures(["EXTERNAL_FORCES"])
-class TestNSquareLBCPU(Momentum, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA", "EXTERNAL_FORCES"])
+class TestLBCPUMomentumConservationHybridNSquareWalberlaSinglePrecision(
+        TestLBMomentumConservation, ut.TestCase):
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
+    atol = 6.5e-4
 
     def set_cellsystem(self):
-        self.system.cell_system.set_n_square()
+        self.system.cell_system.set_hybrid_decomposition(
+            n_square_types={0}, cutoff_regular=1)
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(["EXTERNAL_FORCES"])
-class TestHybrid0LBGPU(Momentum, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA", "EXTERNAL_FORCES"])
+class TestLBCPUMomentumConservationHybridRegularWalberla(
+        TestLBMomentumConservation, ut.TestCase):
 
-    lb_class = espressomd.lb.LBFluidGPU
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
+    atol = 1.2e-4
 
     def set_cellsystem(self):
         self.system.cell_system.set_hybrid_decomposition(
-            n_square_types={0}, cutoff_regular=1)
+            n_square_types={1}, cutoff_regular=1)
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(["EXTERNAL_FORCES"])
-class TestHybrid1LBGPU(Momentum, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA", "EXTERNAL_FORCES"])
+class TestLBCPUMomentumConservationHybridRegularWalberlaSinglePrecision(
+        TestLBMomentumConservation, ut.TestCase):
 
-    lb_class = espressomd.lb.LBFluidGPU
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
+    atol = 6.5e-4
 
     def set_cellsystem(self):
         self.system.cell_system.set_hybrid_decomposition(
             n_square_types={1}, cutoff_regular=1)
 
 
-@utx.skipIfMissingFeatures(["EXTERNAL_FORCES"])
-class TestHybrid0LBCPU(Momentum, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA", "EXTERNAL_FORCES"])
+class TestLBMomentumConservationNSquareWalberla(
+        TestLBMomentumConservation, ut.TestCase):
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
+    atol = 1.2e-4
 
     def set_cellsystem(self):
-        self.system.cell_system.set_hybrid_decomposition(
-            n_square_types={0}, cutoff_regular=1)
+        self.system.cell_system.set_n_square()
 
 
-@utx.skipIfMissingFeatures(["EXTERNAL_FORCES"])
-class TestHybrid1LBCPU(Momentum, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA", "EXTERNAL_FORCES"])
+class TestLBMomentumConservationNSquareWalberlaSinglePrecision(
+        TestLBMomentumConservation, ut.TestCase):
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
+    atol = 6.5e-4
 
     def set_cellsystem(self):
-        self.system.cell_system.set_hybrid_decomposition(
-            n_square_types={1}, cutoff_regular=1)
+        self.system.cell_system.set_n_square()
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/lb_planar_couette.py b/testsuite/python/lb_planar_couette.py
new file mode 100644
index 00000000000..34ed3b08da1
--- /dev/null
+++ b/testsuite/python/lb_planar_couette.py
@@ -0,0 +1,135 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import espressomd.lb
+import espressomd.lees_edwards
+
+import unittest as ut
+import unittest_decorators as utx
+import numpy as np
+
+
+def analytical(x, t, nu, v, h, k_max):
+    """
+    Analytical solution with Fourier series of the Navier-Stokes equation.
+
+    Parameters
+    ----------
+    x : :obj:`float`
+        Height within the channel
+    t : :obj:`float`
+        Time since the start up of the shear flow
+    nu: :obj:`float`
+        Kinematic kinematic_viscosity
+    v: :obj:`float`
+        Shearing velocity
+    h : :obj:`float`
+        Distance between shear planes
+    k_max : :obj:`int`
+        Upper limit of sums for sinus series
+
+    """
+    u = x / h - 0.5
+    for k in np.arange(1, k_max + 1):
+        wave = 2 * np.pi * k / h
+        u += np.exp(-nu * wave ** 2 * t) * np.sin(wave * x) / (np.pi * k)
+    return v * u
+
+
+LB_PARAMS = {'agrid': 1.,
+             'density': 1.,
+             'kinematic_viscosity': 1. / 6.,
+             'tau': 1.}
+
+
+class LBCouetteFlowCommon:
+
+    system = espressomd.System(box_l=[64, 64, 1])
+    system.time_step = LB_PARAMS['tau']
+    system.cell_system.skin = 0.1
+    system.cell_system.set_n_square()
+
+    def setUp(self):
+        self.system.time = 0.
+
+    def tearDown(self):
+        self.system.actors.clear()
+        self.system.lees_edwards = espressomd.lees_edwards.LeesEdwards()
+
+    def check_profile(self, u_getter, **kwargs):
+        system = self.system
+        system.box_l = [64, 1, 64]
+        if "x" not in kwargs.values():
+            system.box_l = [1, 64, 64]
+        elif "z" not in kwargs.values():
+            system.box_l = [64, 64, 1]
+        h = np.max(system.box_l)
+        shear_velocity = 0.05
+        k_max = 100
+
+        protocol = espressomd.lees_edwards.LinearShear(
+            shear_velocity=shear_velocity, initial_pos_offset=0., time_0=0.)
+        system.lees_edwards.set_boundary_conditions(
+            protocol=protocol, **kwargs)
+
+        lbf = self.lb_class(**LB_PARAMS, **self.lb_params)
+        system.actors.add(lbf)
+
+        # warmup
+        system.integrator.run(8)
+
+        # sampling
+        for i in range(4, 9):
+            steps = (2**i - 2**(i - 1))
+            system.integrator.run(steps)
+            pos = np.linspace(0.5, 63.5, 64)
+            u_ref = analytical(pos, system.time - 1., lbf.kinematic_viscosity,
+                               shear_velocity, h, k_max)
+            u_lbf = np.copy(u_getter(lbf).reshape([-1]))
+            np.testing.assert_allclose(u_lbf, u_ref, atol=1e-4, rtol=0.)
+
+    def test_profile_xy(self):
+        self.check_profile(lambda lbf: lbf[5, :, 0].velocity[:, 0],
+                           shear_direction="x", shear_plane_normal="y")
+
+    def test_profile_zy(self):
+        self.check_profile(lambda lbf: lbf[0, :, 5].velocity[:, 0],
+                           shear_direction="z", shear_plane_normal="y")
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBCouetteFlowWalberla(LBCouetteFlowCommon, ut.TestCase):
+
+    """Test for the Walberla implementation of the LB in double-precision."""
+
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBCouetteFlowWalberlaSinglePrecision(LBCouetteFlowCommon, ut.TestCase):
+
+    """Test for the Walberla implementation of the LB in single-precision."""
+
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
+
+
+if __name__ == '__main__':
+    ut.main()
diff --git a/testsuite/python/lb_poiseuille.py b/testsuite/python/lb_poiseuille.py
index 4d1a756eb16..bb3caf9f728 100644
--- a/testsuite/python/lb_poiseuille.py
+++ b/testsuite/python/lb_poiseuille.py
@@ -22,33 +22,23 @@
 import numpy as np
 
 import espressomd.lb
-import espressomd.lbboundaries
-import espressomd.electrokinetics
 import espressomd.shapes
 
 AGRID = .25
 EXT_FORCE = .1
-VISC = 2.7
+KINEMATIC_VISC = 2.7
 DENS = 1.7
-TIME_STEP = 0.1
+TIME_STEP = 0.07
 LB_PARAMS = {'agrid': AGRID,
-             'dens': DENS,
-             'visc': VISC,
+             'density': DENS,
+             'kinematic_viscosity': KINEMATIC_VISC,
              'tau': TIME_STEP,
              'ext_force_density': [0.0, 0.0, EXT_FORCE]}
 
-EK_PARAMS = {'agrid': AGRID,
-             'lb_density': DENS,
-             'viscosity': VISC,
-             'ext_force_density': [0.0, 0.0, EXT_FORCE],
-             'friction': 0.,
-             'T': 1,
-             'prefactor': 0.}
-
 
 def poiseuille_flow(z, H, ext_force_density, dyn_visc):
     """
-    Analytical solution for plane Poiseuille flow.
+    Analytical solution for planar Poiseuille flow.
 
     Parameters
     ----------
@@ -76,28 +66,31 @@ class LBPoiseuilleCommon:
     system.time_step = TIME_STEP
     system.cell_system.skin = 0.4 * AGRID
 
+    def setUp(self):
+        self.lbf = self.lb_class(**LB_PARAMS, **self.lb_params)
+        self.system.actors.add(self.lbf)
+
+    def tearDown(self):
+        self.system.actors.clear()
+
     def prepare(self):
         """
         Integrate the LB fluid until steady state is reached within a certain
         accuracy.
 
         """
-        self.system.actors.clear()
-        self.system.actors.add(self.lbf)
         wall_shape1 = espressomd.shapes.Wall(normal=[1, 0, 0], dist=AGRID)
         wall_shape2 = espressomd.shapes.Wall(
             normal=[-1, 0, 0], dist=-(self.system.box_l[0] - AGRID))
-        wall1 = espressomd.lbboundaries.LBBoundary(shape=wall_shape1)
-        wall2 = espressomd.lbboundaries.LBBoundary(shape=wall_shape2)
 
-        self.system.lbboundaries.add(wall1)
-        self.system.lbboundaries.add(wall2)
+        self.lbf.add_boundary_from_shape(wall_shape1)
+        self.lbf.add_boundary_from_shape(wall_shape2)
 
         mid_indices = (self.system.box_l / AGRID / 2).astype(int)
         diff = float("inf")
         old_val = self.lbf[mid_indices].velocity[2]
         while diff > 0.005:
-            self.system.integrator.run(100)
+            self.system.integrator.run(200)
             new_val = self.lbf[mid_indices].velocity[2]
             diff = abs(new_val - old_val)
             old_val = new_val
@@ -122,85 +115,26 @@ def test_profile(self):
         v_expected = poiseuille_flow(velocities[1:-1, 0] - 0.5 * self.system.box_l[0],
                                      self.system.box_l[0] - 2.0 * AGRID,
                                      EXT_FORCE,
-                                     VISC * DENS)
-        atol = self.tolerance * AGRID / TIME_STEP
-        np.testing.assert_allclose(v_measured, v_expected, atol=atol)
-
-
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES', 'EXTERNAL_FORCES'])
-class LBCPUPoiseuille(ut.TestCase, LBPoiseuilleCommon):
-
-    """Test for the CPU implementation of the LB."""
-
-    def setUp(self):
-        self.lbf = espressomd.lb.LBFluid(**LB_PARAMS)
-        self.tolerance = 0.015
-
-
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES_GPU', 'EXTERNAL_FORCES'])
-class LBGPUPoiseuille(ut.TestCase, LBPoiseuilleCommon):
-
-    """Test for the GPU implementation of the LB."""
-
-    def setUp(self):
-        self.lbf = espressomd.lb.LBFluidGPU(**LB_PARAMS)
-        self.tolerance = 0.00015
+                                     KINEMATIC_VISC * DENS)
+        np.testing.assert_allclose(v_measured, v_expected, rtol=5E-5)
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(
-    ['LB_BOUNDARIES_GPU', "ELECTROKINETICS", "EXTERNAL_FORCES"])
-class LBEkinPoiseuille(ut.TestCase, LBPoiseuilleCommon):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBPoiseuilleWalberla(LBPoiseuilleCommon, ut.TestCase):
 
-    """Test the LB part of electrokinetics. """
+    """Test for the Walberla implementation of the LB in double-precision."""
 
-    def setUp(self):
-        self.lbf = espressomd.electrokinetics.Electrokinetics(**EK_PARAMS)
-        species = espressomd.electrokinetics.Species(
-            density=0., D=1., valency=0.)
-        self.lbf.add_species(species)
-        self.tolerance = 0.00015
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES_GPU', 'EXTERNAL_FORCES'])
-class LBGPUPoiseuilleInterpolation(ut.TestCase, LBPoiseuilleCommon):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBPoiseuilleWalberlaSinglePrecision(LBPoiseuilleCommon, ut.TestCase):
 
-    """Test for the higher order interpolation scheme of the LB."""
+    """Test for the Walberla implementation of the LB in single-precision."""
 
-    def setUp(self):
-        self.lbf = espressomd.lb.LBFluidGPU(**LB_PARAMS)
-        self.lbf.set_interpolation_order("quadratic")
-        self.tolerance = 0.015
-
-    def test_profile(self):
-        """
-        Compare against analytical function by calculating the RMSD.
-
-        """
-        self.prepare()
-        velocities = np.zeros((50, 2))
-        x_values = np.linspace(2 * AGRID, self.system.box_l[0] - 2 * AGRID, 50)
-
-        cnt = 0
-        for x in x_values:
-            v_tmp = []
-            for y in range(int(self.system.box_l[1] + 1)):
-                for z in range(int(self.system.box_l[2] + 1)):
-                    v_tmp.append(
-                        self.lbf.get_interpolated_velocity([x, y * AGRID, z * AGRID])[2])
-            velocities[cnt, 1] = np.mean(np.array(v_tmp))
-            velocities[cnt, 0] = x
-            cnt += 1
-
-        v_expected = poiseuille_flow(x_values - 0.5 * self.system.box_l[0],
-                                     self.system.box_l[0] - 2.0 * AGRID,
-                                     EXT_FORCE,
-                                     VISC * DENS)
-        v_measured = velocities[:, 1]
-        atol = self.tolerance * AGRID / TIME_STEP
-        np.testing.assert_allclose(v_measured, v_expected, atol=atol)
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
 
 
 if __name__ == '__main__':
diff --git a/testsuite/python/lb_poiseuille_cylinder.py b/testsuite/python/lb_poiseuille_cylinder.py
index 06129bb9d02..c3a95ebab9f 100644
--- a/testsuite/python/lb_poiseuille_cylinder.py
+++ b/testsuite/python/lb_poiseuille_cylinder.py
@@ -23,20 +23,19 @@
 
 import espressomd.math
 import espressomd.lb
-import espressomd.lbboundaries
 import espressomd.observables
 import espressomd.shapes
 
 AGRID = .5
 EXT_FORCE = .1
-VISC = 2.7
+KINEMATIC_VISC = 2.7
 DENS = 1.7
-TIME_STEP = 0.1
+TIME_STEP = 0.05
 BOX_L = 8.0
 EFFECTIVE_RADIUS = BOX_L / 2.0 - 1.0
 LB_PARAMS = {'agrid': AGRID,
-             'dens': DENS,
-             'visc': VISC,
+             'density': DENS,
+             'kinematic_viscosity': KINEMATIC_VISC,
              'tau': TIME_STEP}
 
 OBS_PARAMS = {'n_r_bins': 6,
@@ -85,7 +84,6 @@ class LBPoiseuilleCommon:
 
     def tearDown(self):
         self.system.actors.clear()
-        self.system.lbboundaries.clear()
 
     def prepare(self):
         """
@@ -99,21 +97,20 @@ def prepare(self):
         local_lb_params = LB_PARAMS.copy()
         local_lb_params['ext_force_density'] = np.array(
             self.params['axis']) * EXT_FORCE
-        self.lbf = self.lb_class(**local_lb_params)
+        self.lbf = self.lb_class(**local_lb_params, **self.lb_params)
         self.system.actors.add(self.lbf)
 
         cylinder_shape = espressomd.shapes.Cylinder(
             center=self.system.box_l / 2.0, axis=self.params['axis'],
             direction=-1, radius=EFFECTIVE_RADIUS, length=BOX_L * 1.5)
-        cylinder = espressomd.lbboundaries.LBBoundary(shape=cylinder_shape)
-        self.system.lbboundaries.add(cylinder)
+        self.lbf.add_boundary_from_shape(cylinder_shape)
 
         # simulate until profile converges
         mid_indices = 3 * [int((BOX_L / AGRID) / 2)]
         diff = float("inf")
         old_val = self.lbf[mid_indices].velocity[2]
-        while diff > 0.001:
-            self.system.integrator.run(1)
+        while diff > 1E-5:
+            self.system.integrator.run(5)
             new_val = self.lbf[mid_indices].velocity[
                 np.nonzero(self.params['axis'])[0]]
             diff = abs(new_val - old_val)
@@ -143,8 +140,10 @@ def compare_to_analytical(self):
             positions[1:-1] - BOX_L / 2.0,
             EFFECTIVE_RADIUS,
             EXT_FORCE,
-            VISC * DENS)
-        np.testing.assert_allclose(v_measured, v_expected, atol=0.02, rtol=0.)
+            KINEMATIC_VISC * DENS)
+        f_half_correction = 0.5 * self.system.time_step * EXT_FORCE * AGRID**3 / DENS
+        np.testing.assert_allclose(v_measured + f_half_correction,
+                                   v_expected, atol=0.01, rtol=0.)
 
     def check_observable(self):
         if self.params['axis'] == [1, 0, 0]:
@@ -168,13 +167,15 @@ def check_observable(self):
             r,
             EFFECTIVE_RADIUS,
             EXT_FORCE,
-            VISC * DENS)
+            KINEMATIC_VISC * DENS)
         v_r, v_phi, v_z = np.copy(obs.calculate()).reshape([-1, 3]).T
         # check velocity is zero for the radial and azimuthal components
         np.testing.assert_allclose(v_r, 0., atol=1e-4, rtol=0.)
         np.testing.assert_allclose(v_phi, 0., atol=1e-4, rtol=0.)
         # check velocity is correct in the axial component
-        np.testing.assert_allclose(v_z, v_expected, atol=2.6e-3, rtol=0.)
+        f_half_correction = 0.5 * self.system.time_step * EXT_FORCE * AGRID**3 / DENS
+        np.testing.assert_allclose(v_z + f_half_correction,
+                                   v_expected, atol=3.6e-3, rtol=0.)
 
     def test_x(self):
         self.params['axis'] = [1, 0, 0]
@@ -195,21 +196,22 @@ def test_z(self):
         self.check_observable()
 
 
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES'])
-class LBCPUPoiseuille(LBPoiseuilleCommon, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBPoiseuilleWalberla(LBPoiseuilleCommon, ut.TestCase):
 
-    """Test for the CPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in double-precision."""
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES_GPU'])
-class LBGPUPoiseuille(LBPoiseuilleCommon, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBPoiseuilleWalberlaSinglePrecision(LBPoiseuilleCommon, ut.TestCase):
 
-    """Test for the GPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in single-precision."""
 
-    lb_class = espressomd.lb.LBFluidGPU
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
 
 
 if __name__ == '__main__':
diff --git a/testsuite/python/lb_pressure_tensor.py b/testsuite/python/lb_pressure_tensor.py
index 9d603bc44ca..2d4ebc27f71 100644
--- a/testsuite/python/lb_pressure_tensor.py
+++ b/testsuite/python/lb_pressure_tensor.py
@@ -22,38 +22,37 @@
 
 import espressomd
 import espressomd.lb
-import scipy.optimize
+#import scipy.optimize
 
-AGRID = .5
 N_CELLS = 12
-TAU = 0.002
-SEED = 1
-DENS = 2.4
-VISC = 1.8
-KT = 0.8
 
 
 class TestLBPressureTensor:
-    """Tests that the thermalized LB pressure auto correlation function
+    """
+    Test that the thermalized LB pressure auto correlation function
     is consistent with the chosen viscosity
     """
 
-    system = espressomd.System(box_l=[AGRID * N_CELLS] * 3)
-
-    system.time_step = TAU
+    params = {'tau': 0.002,
+              'agrid': 0.5,
+              'density': 2.4,
+              'kinematic_viscosity': 1.8,
+              'kT': 0.8,
+              'seed': 2}
+    system = espressomd.System(box_l=[params["agrid"] * N_CELLS] * 3)
+    system.time_step = params["tau"]
     system.cell_system.skin = 0
 
     def tearDown(self):
         self.system.actors.clear()
         self.system.thermostat.turn_off()
 
-    def sample_pressure_tensor(self):
+    def setUp(self):
         # Setup
         system = self.system
-        lb = self.lb_class(agrid=AGRID, dens=DENS, visc=VISC,
-                           tau=TAU, kT=KT, seed=SEED)
-        system.actors.add(lb)
-        system.thermostat.set_lb(LB_fluid=lb, seed=SEED + 1)
+        self.lbf = self.lb_class(**self.params, **self.lb_params)
+        system.actors.add(self.lbf)
+        system.thermostat.set_lb(LB_fluid=self.lbf, seed=42)
 
         # Warmup
         system.integrator.run(500)
@@ -64,13 +63,13 @@ def sample_pressure_tensor(self):
         self.p_node1 = np.zeros((self.steps, 3, 3))
 
         # Define two sample nodes, at the corner and in the center
-        node0 = lb[0, 0, 0]
-        node1 = lb[3 * [N_CELLS // 2]]
+        node0 = self.lbf[0, 0, 0]
+        node1 = self.lbf[3 * [N_CELLS // 2]]
 
         for i in range(self.steps):
             self.p_node0[i] = node0.pressure_tensor
             self.p_node1[i] = node1.pressure_tensor
-            self.p_global[i] = lb.pressure_tensor
+            self.p_global[i] = self.lbf.pressure_tensor
 
             system.integrator.run(2)
 
@@ -93,25 +92,27 @@ def test_averages(self):
         # Sound speed for D3Q19 in LB lattice units
         c_s_lb = np.sqrt(1 / 3)
         # And in MD units
-        c_s = c_s_lb * AGRID / TAU
+        c_s = c_s_lb * self.lbf.agrid / self.system.time_step
 
         # Test time average of pressure tensor against expectation ...
         # eq. (19) in ladd01a (https://doi.org/10.1023/A:1010414013942):
         # Pi_eq = rho c_s^2 I + rho u * u = rho c_s^2 I + 2 / V (m u^2 / 2),
         # with 3x3-identity matrix I . Equipartition: m u^2 / 2 = kT /2,
         # Pi_eq = rho c_s^2 I + kT / V
-        p_avg_expected = np.diag(3 * [DENS * c_s**2 + KT / AGRID**3])
+        p_avg_expected = np.diag(
+            3 * [self.lbf.density * c_s**2 + self.lbf.kT / self.lbf.agrid**3])
+        # TODO WALBERLA: remove tolerance adjustments in diagonal terms
 
         # ... globally,
         self.assert_allclose_matrix(
             np.mean(self.p_global, axis=0),
-            p_avg_expected, atol_diag=c_s_lb**2 / 5, atol_offdiag=c_s_lb**2 / 9)
+            p_avg_expected, atol_diag=c_s_lb**2 * 2, atol_offdiag=c_s_lb**2 / 9)
 
         # ... for two nodes.
         for time_series in [self.p_node0, self.p_node1]:
             self.assert_allclose_matrix(
                 np.mean(time_series, axis=0),
-                p_avg_expected, atol_diag=c_s_lb**2 * 10, atol_offdiag=c_s_lb**2 * 6)
+                p_avg_expected, atol_diag=c_s_lb**2 * 250, atol_offdiag=c_s_lb**2 * 6)
 
         # Test that <sigma_[i!=j]> ~=0 and sigma_[ij]==sigma_[ji] ...
         tol_global = 4 / np.sqrt(self.steps)
@@ -141,27 +142,32 @@ def test_averages(self):
                 self.assertAlmostEqual(avg_ij, 0., delta=tol_node)
 
 
+@utx.skipIfMissingFeatures("WALBERLA")
 class TestLBPressureTensorCPU(TestLBPressureTensor, ut.TestCase):
 
-    def setUp(self):
-        self.lb_class = espressomd.lb.LBFluid
-        self.steps = 5000
-        self.sample_pressure_tensor()
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
+    steps = 8000
 
 
+# TODO WALBERLA
+"""
+@utx.skipIfMissingFeatures("WALBERLA")
 @utx.skipIfMissingGPU()
 class TestLBPressureTensorGPU(TestLBPressureTensor, ut.TestCase):
 
-    def setUp(self):
-        self.lb_class = espressomd.lb.LBFluidGPU
-        self.steps = 50000
-        self.sample_pressure_tensor()
+    lb_class = espressomd.lb.LBFluidWalberlaGPU
+    lb_params = {"single_precision": True}
+    steps = 50000
 
     def test_gk_viscosity(self):
         # Check that stress auto correlation matches dynamic viscosity
         # eta = V/kT integral (stress acf), e.g., eq. (5) in Cui et. et al
         # (https://doi.org/10.1080/00268979609484542).
         # Cannot be run for CPU with sufficient statistics without CI timeout.
+        dyn_visc = self.params["kinematic_viscosity"] * self.params["density"]
+        tau = self.params["tau"]
+        kT = self.params["kT"]
         all_viscs = []
         for i in range(3):
             for j in range(i + 1, 3):
@@ -173,9 +179,9 @@ def test_gk_viscosity(self):
                 acf = tmp[len(tmp) // 2:] / self.steps
 
                 # integrate first part numerically, fit exponential to tail
-                t_max_fit = 50 * TAU
-                ts = np.arange(0, t_max_fit, 2 * TAU)
-                numeric_integral = np.trapz(acf[:len(ts)], dx=2 * TAU)
+                t_max_fit = 50 * tau
+                ts = np.arange(0, t_max_fit, 2 * tau)
+                numeric_integral = np.trapz(acf[:len(ts)], dx=2 * self.params["tau"])
 
                 # fit tail
                 def f(x, a, b): return a * np.exp(-b * x)
@@ -185,15 +191,16 @@ def f(x, a, b): return a * np.exp(-b * x)
 
                 integral = numeric_integral + tail
 
-                measured_visc = integral * self.system.volume() / KT
+                measured_visc = integral * self.system.volume() / kT
 
                 self.assertAlmostEqual(
-                    measured_visc, VISC * DENS, delta=VISC * DENS * .15)
+                    measured_visc, dyn_visc, delta=dyn_visc * .15)
                 all_viscs.append(measured_visc)
 
         # Check average over xy, xz and yz against tighter limit
         self.assertAlmostEqual(np.average(all_viscs),
-                               VISC * DENS, delta=VISC * DENS * .07)
+                               dyn_visc, delta=dyn_visc * .07)
+"""
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/lb_shear.py b/testsuite/python/lb_shear.py
index 67d737f5cd6..fcb95eb16c3 100644
--- a/testsuite/python/lb_shear.py
+++ b/testsuite/python/lb_shear.py
@@ -22,16 +22,8 @@
 import numpy as np
 
 import espressomd.lb
-import espressomd.lbboundaries
 import espressomd.shapes
 
-"""
-Check the lattice-Boltzmann lid-driven shear flow in a slab system
-by comparing to the analytical solution.
-
-"""
-
-
 AGRID = 0.6
 VISC = 5.2
 DENS = 2.3
@@ -44,8 +36,8 @@
 SHEAR_VELOCITY = 0.3
 
 LB_PARAMS = {'agrid': AGRID,
-             'dens': DENS,
-             'visc': VISC,
+             'density': DENS,
+             'kinematic_viscosity': VISC,
              'tau': TIME_STEP
              }
 
@@ -61,7 +53,7 @@ def shear_flow(x, t, nu, v, h, k_max):
     t : :obj:`float`
         Time since start of the shearing.
     nu : :obj:`float`
-        Kinematic viscosity.
+        Kinematic kinematic_viscosity.
     v : :obj:`float`
         Shear rate.
     h : :obj:`float`
@@ -84,40 +76,47 @@ def shear_flow(x, t, nu, v, h, k_max):
 
 class LBShearCommon:
 
-    """Base class of the test that holds the test logic."""
+    """
+    Check the lattice-Boltzmann lid-driven shear flow in a slab system
+    by comparing to the analytical solution.
+    """
     system = espressomd.System(box_l=[H + 2. * AGRID, W, W])
     system.time_step = TIME_STEP
     system.cell_system.skin = 0.4 * AGRID
 
+    def setUp(self):
+        self.lbf = self.lb_class(**LB_PARAMS, **self.lb_params)
+
+    def tearDown(self):
+        self.system.actors.clear()
+
     def check_profile(self, shear_plane_normal, shear_direction):
         """
         Integrate the LB fluid and regularly compare with
         the exact solution.
-
         """
-        self.system.lbboundaries.clear()
-        self.system.actors.clear()
+        self.tearDown()
         self.system.box_l = np.max(
             ((W, W, W), shear_plane_normal * (H + 2 * AGRID)), 0)
-
-        self.lbf = self.lb_class(**LB_PARAMS)
+        self.setUp()
         self.system.actors.add(self.lbf)
+        self.lbf.clear_boundaries()
 
         wall_shape1 = espressomd.shapes.Wall(
             normal=shear_plane_normal, dist=AGRID)
         wall_shape2 = espressomd.shapes.Wall(
             normal=-1.0 * shear_plane_normal, dist=-(H + AGRID))
-        wall1 = espressomd.lbboundaries.LBBoundary(
-            shape=wall_shape1, velocity=-.5 * SHEAR_VELOCITY * shear_direction)
-        wall2 = espressomd.lbboundaries.LBBoundary(
-            shape=wall_shape2, velocity=.5 * SHEAR_VELOCITY * shear_direction)
 
-        self.system.lbboundaries.add(wall1)
-        self.system.lbboundaries.add(wall2)
+        self.lbf.add_boundary_from_shape(
+            wall_shape1, velocity=-.5 * SHEAR_VELOCITY * shear_direction)
+        self.lbf.add_boundary_from_shape(
+            wall_shape2, velocity=.5 * SHEAR_VELOCITY * shear_direction)
 
         t0 = self.system.time
         sample_points = int(H / AGRID - 1)
 
+        # warmup
+        self.system.integrator.run(40)
         for _ in range(9):
             self.system.integrator.run(20)
 
@@ -149,22 +148,27 @@ def check_profile(self, shear_plane_normal, shear_direction):
         # defined as \sigma = -p 1 + \mu [\nabla * u + (\nabla * u)^T]
         # where 'p' is the static pressure, '\mu' is the dynamic viscosity,
         # '*' denotes the outer product and 'u' is the velocity field
+        # NOTE: the so called stress property of the fluid is actually the
+        # pressure tensor not the viscous stress tensor!
         shear_rate = SHEAR_VELOCITY / H
-        dynamic_viscosity = self.lbf.viscosity * self.lbf.density
+        dynamic_viscosity = self.lbf.kinematic_viscosity * DENS
         p_expected = p_eq * np.identity(3) - dynamic_viscosity * shear_rate * (
-            np.outer(shear_plane_normal, shear_direction) + np.transpose(np.outer(shear_plane_normal, shear_direction)))
-        for n in (2, 3, 4), (3, 4, 2), (5, 4, 3):
+            np.outer(shear_plane_normal, shear_direction) +
+            np.transpose(np.outer(shear_plane_normal, shear_direction)))
+        for n in [(2, 3, 4), (3, 4, 2), (5, 4, 3)]:
             node_pressure_tensor = np.copy(
                 self.lbf[n[0], n[1], n[2]].pressure_tensor)
-            np.testing.assert_allclose(node_pressure_tensor,
-                                       p_expected, atol=1E-5, rtol=5E-3)
-
-        np.testing.assert_allclose(
-            np.copy(wall1.get_force()),
-            -np.copy(wall2.get_force()),
-            atol=1E-4)
-        np.testing.assert_allclose(np.dot(np.copy(wall1.get_force()), shear_direction),
-                                   SHEAR_VELOCITY / H * W**2 * dynamic_viscosity, atol=2E-4)
+            np.testing.assert_allclose(node_pressure_tensor, p_expected,
+                                       atol=self.atol, rtol=self.rtol)
+
+        # TODO: boundary forces not implemented yet
+#        np.testing.assert_allclose(
+#            np.copy(wall1.get_force()),
+#            -np.copy(wall2.get_force()),
+#            atol=1E-4)
+#        np.testing.assert_allclose(
+#            np.dot(np.copy(wall1.get_force()), shear_direction),
+#            SHEAR_VELOCITY / H * W**2 * dynamic_viscosity, atol=2E-4)
 
     def test(self):
         x = np.array((1, 0, 0), dtype=int)
@@ -178,23 +182,26 @@ def test(self):
         self.check_profile(y, -z)
 
 
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES'])
-class LBCPUShear(ut.TestCase, LBShearCommon):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBShearWalberla(LBShearCommon, ut.TestCase):
 
-    """Test for the CPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in double-precision."""
 
-    def setUp(self):
-        self.lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
+    atol = 5e-5
+    rtol = 5e-4
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES_GPU'])
-class LBGPUShear(ut.TestCase, LBShearCommon):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBShearWalberlaSinglePrecision(LBShearCommon, ut.TestCase):
 
-    """Test for the GPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in single-precision."""
 
-    def setUp(self):
-        self.lb_class = espressomd.lb.LBFluidGPU
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
+    atol = 5e-5
+    rtol = 5e-3
 
 
 if __name__ == '__main__':
diff --git a/testsuite/python/lb_slice.py b/testsuite/python/lb_slice.py
index f1ce1985738..48b52a7a150 100644
--- a/testsuite/python/lb_slice.py
+++ b/testsuite/python/lb_slice.py
@@ -19,11 +19,12 @@
 
 import espressomd.lb
 import unittest as ut
+import unittest_decorators as utx
 import numpy as np
 import itertools
 
 
-class LBSliceTest(ut.TestCase):
+class LBTest:
 
     """This simple test first writes random numbers and then reads them
     to same slices of LB nodes and compares if the results are the same,
@@ -31,48 +32,50 @@ class LBSliceTest(ut.TestCase):
     """
 
     system = espressomd.System(box_l=[10.0, 10.0, 10.0])
-    system.time_step = .01
+    system.time_step = 0.01
     system.cell_system.skin = 0.1
     np.random.seed(seed=42)
 
-    @classmethod
-    def setUpClass(cls):
-        cls.lb_fluid = espressomd.lb.LBFluid(
-            agrid=1.0, dens=1., visc=1., tau=0.01)
-        cls.system.actors.add(cls.lb_fluid)
+    def setUp(self):
+        self.lb_fluid = self.lb_class(
+            agrid=1., density=1., kinematic_viscosity=1.,
+            tau=self.system.time_step, **self.lb_params)
+        self.system.actors.add(self.lb_fluid)
+
+    def tearDown(self):
+        self.system.actors.clear()
 
     def test_slicing(self):
         lb_fluid = self.lb_fluid
 
         # array locked
-        array = lb_fluid[1:-1:2, 5, 3:6:2].velocity
+        array = lb_fluid[1:-1:1, 5, 3:6].velocity
         with self.assertRaisesRegex(ValueError, "ESPResSo array properties return non-writable arrays"):
             array[0, 0, 0, 1] = 5.
 
+        # density broadcast (with type conversion from int to double)
+        lb_fluid[:, :, 0].density = 2
+        np.testing.assert_array_almost_equal(
+            np.copy(lb_fluid[:, :, 0].density), 2.)
+
         # velocity on test slice [:-1, :-1, -1]
         input_vel = np.random.rand(9, 9, 9, 3)
         lb_fluid[:-1, :-1, :-1].velocity = input_vel
         output_vel = lb_fluid[:-1, :-1, :-1].velocity
         np.testing.assert_array_almost_equal(input_vel, np.copy(output_vel))
 
-        with self.assertRaisesRegex(ValueError, r"Input-dimensions of velocity array \(9, 9, 9, 2\) does not match slice dimensions \(9, 9, 9, 3\)"):
+        with self.assertRaisesRegex(ValueError, r"Input-dimensions of 'velocity' array \(9, 9, 9, 2\) does not match slice dimensions \(9, 9, 9, 3\)"):
             lb_fluid[:-1, :-1, :-1].velocity = input_vel[:, :, :, :2]
 
-        # velocity broadcast
+        # velocity broadcast (with type conversion from int to double)
         lb_fluid[:, :, 0].velocity = [1, 2, 3]
         np.testing.assert_array_almost_equal(
-            np.copy(lb_fluid[:, :, 0].velocity), 10 * [10 * [[[1, 2, 3]]]])
+            np.copy(lb_fluid[:, :, 0].velocity), 10 * [10 * [[1, 2, 3]]])
 
-        # density on test slice [1:-1:2, 5, 3:6:2]
-        input_dens = np.random.rand(4, 1, 2)
-        lb_fluid[1:-1:2, 5, 3:6:2].density = input_dens
-        output_dens = lb_fluid[1:-1:2, 5, 3:6:2].density
-        np.testing.assert_array_almost_equal(input_dens, np.copy(output_dens))
-
-        # density broadcast
-        lb_fluid[:, :, 0].density = 1.2
-        np.testing.assert_array_almost_equal(
-            np.copy(lb_fluid[:, :, 0].density), 1.2)
+        input_dens = np.random.rand(8, 3) + 1.
+        lb_fluid[1:-1, 5, 3:6].density = input_dens
+        output_dens = lb_fluid[1:-1, 5, 3:6].density
+        np.testing.assert_array_almost_equal(np.copy(output_dens), input_dens)
 
         # population on test slice [:, :, :]
         input_pop = np.random.rand(10, 10, 10, 19)
@@ -80,66 +83,102 @@ def test_slicing(self):
         output_pop = lb_fluid[:, :, :].population
         np.testing.assert_array_almost_equal(input_pop, np.copy(output_pop))
 
-        with self.assertRaisesRegex(ValueError, r"Input-dimensions of population array \(10, 10, 10, 5\) does not match slice dimensions \(10, 10, 10, 19\)"):
+        with self.assertRaisesRegex(ValueError, r"Input-dimensions of 'population' array \(10, 10, 10, 5\) does not match slice dimensions \(10, 10, 10, 19\)"):
             lb_fluid[:, :, :].population = input_pop[:, :, :, :5]
 
         # pressure tensor on test slice [3, 6, 2:5]
         output_pressure_shape = lb_fluid[3, 6, 2:5].pressure_tensor.shape
-        should_pressure_shape = (1, 1, 3, 3, 3)
-        np.testing.assert_array_almost_equal(
+        should_pressure_shape = (3, 3, 3)
+        np.testing.assert_array_equal(
             output_pressure_shape, should_pressure_shape)
 
-        with self.assertRaises(NotImplementedError):
+        with self.assertRaisesRegex(RuntimeError, "Property 'pressure_tensor' is read-only"):
             lb_fluid[3, 6, 2:5].pressure_tensor = np.zeros(
                 should_pressure_shape)
 
-        # pressure tensor neq on test slice [3, 6, 2:10]
-        output_pressure_neq_shape = lb_fluid[3:5,
-                                             6:7,
-                                             2:10].pressure_tensor_neq.shape
-        should_pressure_neq_shape = (2, 1, 8, 3, 3)
-        np.testing.assert_array_almost_equal(
-            output_pressure_neq_shape, should_pressure_neq_shape)
-
-        with self.assertRaises(NotImplementedError):
-            lb_fluid[3:5, 6:7, 2:10].pressure_tensor_neq = np.zeros(
-                output_pressure_neq_shape)
-
-        # index on test slice [1, 1:5, 6:]
-        output_index_shape = lb_fluid[1, 1:5, 6:].index.shape
-        should_index_shape = (1, 4, 4, 3)
-        np.testing.assert_array_almost_equal(
-            output_index_shape, should_index_shape)
-
-        with self.assertRaisesRegex(AttributeError, "attribute 'index' of 'espressomd.lb.LBFluidRoutines' objects is not writable"):
-            lb_fluid[1, 1:5, 6:].index = np.zeros(output_index_shape)
-
-        # boundary on test slice [1:, 1:, 1:]
-        if espressomd.has_features('LB_BOUNDARIES'):
-            output_boundary_shape = lb_fluid[1:, 1:, 1:].boundary.shape
-            should_boundary_shape = (9, 9, 9)
+        # boundary velocity on test slice [1:, 1:, 1:]
+        output_boundary_shape = lb_fluid[1:, 1:, 1:].boundary.shape
+        should_boundary_shape = (9, 9, 9)
+        np.testing.assert_array_equal(
+            output_boundary_shape, should_boundary_shape)
+
+        with self.assertRaisesRegex(TypeError, "Parameter 'values' must be an array_like of VelocityBounceBack or None"):
+            lb_fluid[1:, 1:, 1:].boundary = np.zeros(should_boundary_shape)
+        with self.assertRaisesRegex(TypeError, "Parameter 'values' must be an array_like of VelocityBounceBack or None"):
+            lb_fluid[1:, 1:, 1:].boundary = np.array(
+                [None, [1, 2, 3]], dtype=object)
+
+        vbb_ref = espressomd.lb.VelocityBounceBack([1e-6, 2e-6, 3e-6])
+        lb_fluid[1:2, 1:, 0].boundary = vbb_ref
+        lb_fluid[1:2, 2:, 0].boundary = None
+        for vbb in lb_fluid[1:2, 1, 0].boundary.flatten():
             np.testing.assert_array_almost_equal(
-                output_boundary_shape, should_boundary_shape)
+                vbb.velocity, vbb_ref.velocity)
+        for vbb in lb_fluid[1:2, 2, 0:2].boundary.flatten():
+            self.assertIsNone(vbb)
+
+        # is_boundary on test slice [1:, 1:, 1:]
+        output_boundary_shape = lb_fluid[1:, 1:, 1:].is_boundary.shape
+        should_boundary_shape = (9, 9, 9)
+        np.testing.assert_array_equal(
+            output_boundary_shape, should_boundary_shape)
+
+        with self.assertRaisesRegex(RuntimeError, "Property 'is_boundary' is read-only"):
+            lb_fluid[1:, 1:, 1:].is_boundary = np.zeros(should_boundary_shape)
+
+        # last_applied_force on test slice [:-1, :-1, -1]
+        input_laf = np.random.rand(9, 9, 9, 3)
+        lb_fluid[:-1, :-1, :-1].last_applied_force = input_laf
+        output_laf = lb_fluid[:-1, :-1, :-1].last_applied_force
+        np.testing.assert_array_almost_equal(input_laf, np.copy(output_laf))
+
+        # last_applied_force broadcast
+        lb_fluid[:, :, 0].last_applied_force = [1, 2, 3]
+        np.testing.assert_array_almost_equal(
+            np.copy(lb_fluid[:, :, 0].last_applied_force),
+            10 * [10 * [[1, 2, 3]]])
 
-            with self.assertRaises(NotImplementedError):
-                lb_fluid[1:, 1:, 1:].boundary = np.zeros(
-                    should_boundary_shape)
+        # access out of bounds
+        i = lb_fluid.shape[2] + 10
+        lb_slice = lb_fluid[1, 2, i:i + 10]
+        self.assertEqual(lb_slice.density.shape, (0,))
+        self.assertIsInstance(lb_slice.density.dtype, object)
+        with self.assertRaisesRegex(AttributeError, "Cannot set properties of an empty 'LBFluidSliceWalberla' object"):
+            lb_slice.density = [1., 2., 3.]
 
     def test_iterator(self):
         lbslice_handle = self.lb_fluid[:, :, :]
         # arrange node indices using class methods
-        i_handle, j_handle, k_handle = lbslice_handle.x_indices, lbslice_handle.y_indices, lbslice_handle.z_indices
-        arranged_indices = [
-            (x, y, z) for (
-                x, y, z) in itertools.product(
-                i_handle, j_handle, k_handle)]
+        lb_indices = [np.arange(self.lb_fluid.shape[i]) for i in range(3)]
+        arranged_indices = list(itertools.product(*lb_indices))
         # arrange node indices using __iter__() enforced conversion
-        iterator_indices = [x.index for x in lbslice_handle]
+        iterator_indices = [node.index for node in lbslice_handle]
         # check the results correspond pairwise. order is implicitly preserved.
-        # uses __eq()__ method form LBFluidRoutines()
+        np.testing.assert_array_equal(arranged_indices, iterator_indices)
+        # use __eq()__ method form LBFluidRoutines()
         assert all([x == y for x, y in zip(
             arranged_indices, iterator_indices)])
 
 
+@utx.skipIfMissingFeatures("WALBERLA")
+class LBTestWalberlaDoublePrecisionCPU(LBTest, ut.TestCase):
+
+    """Test for the Walberla implementation of the LB in single-precision."""
+
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_lattice_class = espressomd.lb.LatticeWalberla
+    lb_params = {"single_precision": False}
+
+
+@utx.skipIfMissingFeatures("WALBERLA")
+class LBTestWalberlaSinglePrecisionCPU(LBTest, ut.TestCase):
+
+    """Test for the Walberla implementation of the LB in single-precision."""
+
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_lattice_class = espressomd.lb.LatticeWalberla
+    lb_params = {"single_precision": True}
+
+
 if __name__ == "__main__":
     ut.main()
diff --git a/testsuite/python/lb_stats.py b/testsuite/python/lb_stats.py
index 42f04f1df6d..1687cae6220 100644
--- a/testsuite/python/lb_stats.py
+++ b/testsuite/python/lb_stats.py
@@ -36,15 +36,15 @@ class TestLB:
     params = {'tau': 0.01,
               'agrid': 0.5,
               'dens': 0.85,
-              'viscosity': 3.0,
+              'kinematic_viscosity': 3.0,
               'friction': 2.0,
               'temp': 1.5,
               'gamma': 1.5}
 
     system.periodicity = [True, True, True]
     system.time_step = 0.01
-    system.cell_system.skin = 1.0
-    dof = 3.
+    system.cell_system.skin = 0
+    n_nodes = system.cell_system.get_state()["n_nodes"]
 
     def tearDown(self):
         self.system.actors.clear()
@@ -65,8 +65,8 @@ def test_mass_momentum_thermostat(self):
 
         self.lbf = self.lb_class(
             kT=self.params['temp'],
-            visc=self.params['viscosity'],
-            dens=self.params['dens'],
+            kinematic_viscosity=self.params['kinematic_viscosity'],
+            density=self.params['dens'],
             agrid=self.params['agrid'],
             tau=self.system.time_step,
             ext_force_density=[0, 0, 0], seed=4)
@@ -100,10 +100,10 @@ def test_mass_momentum_thermostat(self):
             fluid_temp = 0.0
 
             # Go over lb lattice
-            for lb_node in self.lbf.nodes():
-                dens = lb_node.density
-                fluid_mass += dens
-                fluid_temp += np.sum(np.copy(lb_node.velocity)**2) * dens
+            nodes_dens = self.lbf[:, :, :].density
+            nodes_vel = np.sum(np.square(self.lbf[:, :, :].velocity), axis=3)
+            fluid_mass += np.sum(nodes_dens)
+            fluid_temp += np.sum(np.multiply(nodes_dens, nodes_vel))
 
             # Normalize
             fluid_mass /= np.product(self.lbf.shape)
@@ -111,23 +111,19 @@ def test_mass_momentum_thermostat(self):
                 3. * np.product(self.lbf.shape)**2)
 
             # check mass conversation
-            self.assertAlmostEqual(fluid_mass, self.params["dens"],
-                                   delta=self.params["mass_prec_per_node"])
+            self.assertAlmostEqual(fluid_mass, self.params["dens"], delta=1E-9)
 
             # check momentum conservation
-            # NOTE: this particle momentum prediction is due to the missing f/2 part in the
-            #       LB fluid.
-            particle_momentum = np.sum(
-                [p.mass * p.v + 0.5 * p.f * self.system.time_step for p in self.system.part], axis=0)
-            fluid_momentum = self.system.analysis.linear_momentum(
-                include_particles=False, include_lbfluid=True)
-            np.testing.assert_allclose(
-                particle_momentum + fluid_momentum, self.tot_mom,
-                atol=self.params['mom_prec'])
-
-            # Calc particle temperature
-            e = self.system.analysis.energy()
-            temp_particle = 2.0 / self.dof * e["kinetic"] / self.n_col_part
+            momentum = self.system.analysis.linear_momentum()
+            f_2_correction = np.sum(
+                self.system.part.all().f,
+                axis=0) * self.system.time_step
+
+            np.testing.assert_allclose(momentum + f_2_correction, self.tot_mom,
+                                       atol=1E-10)
+
+            temp_particle = np.average(
+                [np.average(p.mass * p.v**2) for p in self.system.part])
 
             # Update lists
             all_temp_particle.append(temp_particle)
@@ -138,7 +134,7 @@ def test_mass_momentum_thermostat(self):
         #   scale=np.std(all_temp_particle,ddof=1))[1] - self.params["temp"]
         # temp_prec_fluid = scipy.stats.norm.interval(0.95, loc=self.params["temp"],
         #   scale=np.std(all_temp_fluid,ddof=1))[1] -self.params["temp"]
-        temp_prec_particle = 0.06 * self.params["temp"]
+        temp_prec_particle = 0.05 * self.params["temp"]
         temp_prec_fluid = 0.05 * self.params["temp"]
 
         self.assertAlmostEqual(
@@ -147,58 +143,47 @@ def test_mass_momentum_thermostat(self):
             np.mean(all_temp_particle), self.params["temp"], delta=temp_prec_particle)
 
 
-class TestRegularLBCPU(TestLB, ut.TestCase):
+@ut.skipIf(TestLB.n_nodes == 1,
+           "LB with regular decomposition already tested with 2 MPI ranks")
+@utx.skipIfMissingFeatures("WALBERLA")
+class TestRegularLBWalberla(TestLB, ut.TestCase):
 
-    def setUp(self):
-        self.system.cell_system.set_regular_decomposition()
-        self.lb_class = espressomd.lb.LBFluid
-        self.params.update({"mom_prec": 1E-9, "mass_prec_per_node": 5E-8})
+    """Test for the Walberla implementation of the LB in double-precision."""
 
-
-@utx.skipIfMissingGPU()
-class TestRegularLBGPU(TestLB, ut.TestCase):
+    lb_class = espressomd.lb.LBFluidWalberla
 
     def setUp(self):
         self.system.cell_system.set_regular_decomposition()
-        self.lb_class = espressomd.lb.LBFluidGPU
-        self.params.update({"mom_prec": 1E-3, "mass_prec_per_node": 1E-5})
-
-
-class TestNSquareLBCPU(TestLB, ut.TestCase):
 
-    def setUp(self):
-        self.system.cell_system.set_hybrid_decomposition(
-            n_square_types={0}, cutoff_regular=0)
-        self.lb_class = espressomd.lb.LBFluid
-        self.params.update({"mom_prec": 1E-9, "mass_prec_per_node": 5E-8})
 
+@utx.skipIfMissingFeatures("WALBERLA")
+class TestNSquareLBWalberla(TestLB, ut.TestCase):
 
-@utx.skipIfMissingGPU()
-class TestNSquareLBGPU(TestLB, ut.TestCase):
+    lb_class = espressomd.lb.LBFluidWalberla
 
     def setUp(self):
-        self.system.cell_system.set_hybrid_decomposition(
-            n_square_types={1}, cutoff_regular=0)
-        self.lb_class = espressomd.lb.LBFluidGPU
-        self.params.update({"mom_prec": 1E-3, "mass_prec_per_node": 1E-5})
+        self.system.cell_system.set_n_square()
 
 
-class TestHybrid0LBCPU(TestLB, ut.TestCase):
+@utx.skipIfMissingFeatures("WALBERLA")
+class TestHybrid0LBWalberla(TestLB, ut.TestCase):
+
+    lb_class = espressomd.lb.LBFluidWalberla
 
     def setUp(self):
         self.system.cell_system.set_hybrid_decomposition(
             n_square_types={0}, cutoff_regular=0)
-        self.lb_class = espressomd.lb.LBFluid
         self.params.update({"mom_prec": 1E-9, "mass_prec_per_node": 5E-8})
 
 
-@utx.skipIfMissingGPU()
-class TestHybrid1LBGPU(TestLB, ut.TestCase):
+@utx.skipIfMissingFeatures("WALBERLA")
+class TestHybrid1LBWalberla(TestLB, ut.TestCase):
+
+    lb_class = espressomd.lb.LBFluidWalberla
 
     def setUp(self):
         self.system.cell_system.set_hybrid_decomposition(
             n_square_types={1}, cutoff_regular=0)
-        self.lb_class = espressomd.lb.LBFluidGPU
         self.params.update({"mom_prec": 1E-3, "mass_prec_per_node": 1E-5})
 
 
diff --git a/testsuite/python/lb_stokes_sphere.py b/testsuite/python/lb_stokes_sphere.py
index a5bc8742b95..dc33a9a7bfe 100644
--- a/testsuite/python/lb_stokes_sphere.py
+++ b/testsuite/python/lb_stokes_sphere.py
@@ -18,7 +18,6 @@
 #
 
 import espressomd
-import espressomd.lbboundaries
 import espressomd.shapes
 import unittest as ut
 import unittest_decorators as utx
@@ -30,8 +29,8 @@
 KVISC = 6
 DENS = 2.3
 LB_PARAMS = {'agrid': AGRID,
-             'dens': DENS,
-             'visc': KVISC,
+             'density': DENS,
+             'kinematic_viscosity': KVISC,
              'tau': TIME_STEP}
 # System setup
 radius = 7 * AGRID
@@ -60,36 +59,32 @@ class Stokes:
     system.cell_system.skin = 0.01
 
     def setUp(self):
-        self.lbf = self.lb_class(**LB_PARAMS)
+        self.lbf = self.lb_class(**LB_PARAMS, **self.lb_params)
         self.system.actors.add(self.lbf)
         self.system.thermostat.set_lb(LB_fluid=self.lbf, gamma=1.0)
 
     def tearDown(self):
         self.system.actors.clear()
-        self.system.lbboundaries.clear()
         self.system.thermostat.turn_off()
 
     def test_stokes(self):
         # Setup walls
-        walls = [None] * 4
-        walls[0] = espressomd.lbboundaries.LBBoundary(shape=espressomd.shapes.Wall(
-            normal=[-1, 0, 0], dist=-(1 + box_width)), velocity=v)
-        walls[1] = espressomd.lbboundaries.LBBoundary(shape=espressomd.shapes.Wall(
-            normal=[1, 0, 0], dist=1), velocity=v)
-        walls[2] = espressomd.lbboundaries.LBBoundary(shape=espressomd.shapes.Wall(
-            normal=[0, -1, 0], dist=-(1 + box_width)), velocity=v)
-        walls[3] = espressomd.lbboundaries.LBBoundary(shape=espressomd.shapes.Wall(
-            normal=[0, 1, 0], dist=1), velocity=v)
-
-        for wall in walls:
-            self.system.lbboundaries.add(wall)
+        wall_shapes = [None] * 4
+        wall_shapes[0] = espressomd.shapes.Wall(
+            normal=[-1, 0, 0], dist=-(1 + box_width))
+        wall_shapes[1] = espressomd.shapes.Wall(normal=[1, 0, 0], dist=1)
+        wall_shapes[2] = espressomd.shapes.Wall(
+            normal=[0, -1, 0], dist=-(1 + box_width))
+        wall_shapes[3] = espressomd.shapes.Wall(normal=[0, 1, 0], dist=1)
+
+        for wall_shape in wall_shapes:
+            self.lbf.add_boundary_from_shape(wall_shape)
 
         # setup sphere without slip in the middle
-        sphere = espressomd.lbboundaries.LBBoundary(shape=espressomd.shapes.Sphere(
-            radius=radius, center=[real_width / 2] * 2 + [box_length / 2],
-            direction=1))
+        sphere_shape = espressomd.shapes.Sphere(
+            radius=radius, center=[real_width / 2] * 2 + [box_length / 2], direction=1)
 
-        self.system.lbboundaries.add(sphere)
+        self.lbf.add_boundary_from_shape(sphere_shape)
 
         def size(vector):
             tmp = 0
@@ -98,17 +93,17 @@ def size(vector):
             return np.sqrt(tmp)
 
         last_force = -1000.
-        dynamic_viscosity = self.lbf.viscosity * self.lbf.density
+        dynamic_viscosity = self.lbf.viscosity * DENS
         stokes_force = 6 * np.pi * dynamic_viscosity * radius * size(v)
         self.system.integrator.run(50)
         while True:
             self.system.integrator.run(3)
-            force = np.linalg.norm(sphere.get_force())
+            force = np.linalg.norm(self.lbf.boundary['sphere'].get_force())
             if np.abs(last_force - force) < 0.01 * stokes_force:
                 break
             last_force = force
 
-        force = np.copy(sphere.get_force())
+        force = np.copy(self.lbf.boundary['sphere'].get_force())
         np.testing.assert_allclose(
             force,
             [0, 0, stokes_force],
@@ -116,15 +111,22 @@ def size(vector):
             atol=stokes_force * 0.03)
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES_GPU', 'EXTERNAL_FORCES'])
-class LBGPUStokes(Stokes, ut.TestCase):
-    lb_class = espressomd.lb.LBFluidGPU
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class StokesWalberla(Stokes, ut.TestCase):
+
+    """Test for the Walberla implementation of the LB in double-precision."""
+
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class StokesWalberlaSinglePrecision(Stokes, ut.TestCase):
 
+    """Test for the Walberla implementation of the LB in single-precision."""
 
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES', 'EXTERNAL_FORCES'])
-class LBCPUStokes(Stokes, ut.TestCase):
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/lb_streaming.py b/testsuite/python/lb_streaming.py
index b63e7f57051..3281ffc1768 100644
--- a/testsuite/python/lb_streaming.py
+++ b/testsuite/python/lb_streaming.py
@@ -33,99 +33,129 @@
 AGRID = 0.5
 TAU = 0.1
 VISC = 1e18
-BULK_VISC = VISC
 VELOCITY_VECTORS = np.array([
     [0, 0, 0],
-    [1, 0, 0],
-    [-1, 0, 0],
     [0, 1, 0],
     [0, -1, 0],
+    [-1, 0, 0],
+    [1, 0, 0],
     [0, 0, 1],
     [0, 0, -1],
+    [-1, 1, 0],
     [1, 1, 0],
     [-1, -1, 0],
     [1, -1, 0],
-    [-1, 1, 0],
-    [1, 0, 1],
-    [-1, 0, -1],
-    [1, 0, -1],
-    [-1, 0, 1],
     [0, 1, 1],
-    [0, -1, -1],
+    [0, -1, 1],
+    [-1, 0, 1],
+    [1, 0, 1],
     [0, 1, -1],
-    [0, -1, 1]])
+    [0, -1, -1],
+    [-1, 0, -1],
+    [1, 0, -1]])
+# populations after streaming and relaxation using parameters omega_odd = 2
+# and omega_bulk = omega_even = omega_shear = 0
+REFERENCE_POPULATIONS = np.array([
+    1,
+    2 / 3,
+    4 + 1 / 3,
+    3 + 1 / 3,
+    5 + 2 / 3,
+    1 + 1 / 3,
+    11 + 2 / 3,
+    9,
+    9 + 2 / 3,
+    9 + 1 / 3,
+    10,
+    13,
+    14 + 1 / 3,
+    15 + 1 / 3,
+    16,
+    14 + 2 / 3,
+    16,
+    17,
+    17 + 2 / 3])
 LB_PARAMETERS = {
     'agrid': AGRID,
-    'visc': VISC,
-    'bulk_visc': BULK_VISC,
+    'kinematic_viscosity': VISC,
     'tau': TAU,
-    'dens': 1.0,
-    'gamma_odd': 1.0,
-    'gamma_even': 1.0
+    'density': 1.0,
 }
 
 
 class LBStreamingCommon:
 
     """
-    Check the streaming step of the LB fluid implementation by setting all populations
-    to zero except one. Relaxation is suppressed by choosing appropriate parameters.
+    Check the streaming and relaxation steps of the LB fluid implementation by
+    setting all populations to zero except for one cell.
 
     """
-    system = espressomd.System(box_l=[3.0] * 3)
-    system.cell_system.skin = 0.4 * AGRID
+    system = espressomd.System(box_l=[3., 2., 2.])
+    system.cell_system.skin = 0.1 * AGRID
     system.time_step = TAU
-    grid = np.array(system.box_l / AGRID, dtype=int)
 
     def setUp(self):
-        self.lbf = self.lb_class(**LB_PARAMETERS)
+        self.lbf = self.lb_class(**LB_PARAMETERS, **self.lb_params)
         self.system.actors.add(self.lbf)
 
     def tearDown(self):
         self.system.actors.clear()
 
-    def reset_fluid_populations(self):
-        """Set all populations to 0.0.
-
-        """
-        for i in itertools.product(range(self.grid[0]), range(
-                self.grid[1]), range(self.grid[2])):
-            self.lbf[i].population = np.zeros(19)
-
-    def set_fluid_populations(self, grid_index):
-        """Set the population of direction n_v of grid_index to n_v+1.
+    def test_population_streaming(self):
+        pop_default = np.zeros(19) + 1e-10
+        pop_source = np.arange(1, 20, dtype=float)
+        grid = np.array(self.system.box_l / AGRID, dtype=int)
 
-        """
-        pop = np.arange(1, 20)
-        self.lbf[grid_index].population = pop
+        # reset fluid populations
+        for i in itertools.product(
+                range(grid[0]), range(grid[1]), range(grid[2])):
+            self.lbf[i].population = pop_default
 
-    def test_population_streaming(self):
-        self.reset_fluid_populations()
+        # check streaming
         for grid_index in itertools.product(
-                range(self.grid[0]), range(self.grid[1]), range(self.grid[2])):
-            self.set_fluid_populations(grid_index)
+                range(grid[0]), range(grid[1]), range(grid[2])):
+            self.lbf[grid_index].population = pop_source
             self.system.integrator.run(1)
             for n_v in range(19):
                 target_node_index = np.mod(
-                    grid_index + VELOCITY_VECTORS[n_v], self.grid)
-                np.testing.assert_almost_equal(
-                    self.lbf[target_node_index].population[n_v], float(n_v + 1))
-                self.lbf[target_node_index].population = np.zeros(19)
+                    grid_index + VELOCITY_VECTORS[n_v], grid)
+                np.testing.assert_allclose(
+                    self.lbf[target_node_index].population[n_v],
+                    REFERENCE_POPULATIONS[n_v], rtol=self.rtol,
+                    err_msg=f"streaming is incorrect in direction {VELOCITY_VECTORS[n_v]} from cell at {grid_index}")
+                self.lbf[target_node_index].population = pop_default
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBStreamingWalberla(LBStreamingCommon, ut.TestCase):
+
+    """Test for the Walberla implementation of the LB in double-precision."""
+
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
+    rtol = 1e-10
 
 
-class LBCPU(LBStreamingCommon, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBStreamingWalberlaSinglePrecision(LBStreamingCommon, ut.TestCase):
 
-    """Test for the CPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in single-precision."""
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
+    rtol = 1e-5
 
 
-@utx.skipIfMissingGPU()
-class LBGPU(LBStreamingCommon, ut.TestCase):
+# TODO WALBERLA
+# @utx.skipIfMissingGPU()
+# @utx.skipIfMissingFeatures(["WALBERLA"])
+# class LBGPU(LBStreamingCommon, ut.TestCase):
 
-    """Test for the GPU implementation of the LB."""
+#    """Test for the Walberla implementation of the LB on the GPU."""
 
-    lb_class = espressomd.lb.LBFluidGPU
+#    lb_class = espressomd.lb.LBFluidWalberlaGPU
+#    lb_params = {}
+#    rtol = 1e-7
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/lb_switch.py b/testsuite/python/lb_switch.py
deleted file mode 100644
index 728dd908f57..00000000000
--- a/testsuite/python/lb_switch.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#
-# Copyright (C) 2010-2022 The ESPResSo project
-#
-# This file is part of ESPResSo.
-#
-# ESPResSo is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# ESPResSo is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-import unittest as ut
-import unittest_decorators as utx
-import numpy as np
-import espressomd
-import espressomd.lb
-import itertools
-
-
-@utx.skipIfMissingFeatures(["EXTERNAL_FORCES"])
-class LBSwitchActor(ut.TestCase):
-    system = espressomd.System(box_l=[10.0, 10.0, 10.0])
-
-    system.time_step = 0.01
-    system.cell_system.skin = 0.1
-
-    def switch_test(self, GPU=False):
-        system = self.system
-        system.actors.clear()
-        p = system.part.add(pos=[1., 1., 1.], v=[1., 0, 0], fix=3 * [True])
-
-        lb_fluid_params = {'agrid': 2.0, 'dens': 1.0, 'visc': 1.0, 'tau': 0.03}
-        friction_1 = 1.5
-        friction_2 = 4.0
-
-        if GPU:
-            lb_fluid_1 = espressomd.lb.LBFluidGPU(**lb_fluid_params)
-            lb_fluid_2 = espressomd.lb.LBFluidGPU(**lb_fluid_params)
-        else:
-            lb_fluid_1 = espressomd.lb.LBFluid(**lb_fluid_params)
-            lb_fluid_2 = espressomd.lb.LBFluid(**lb_fluid_params)
-
-        system.actors.add(lb_fluid_1)
-        system.thermostat.set_lb(LB_fluid=lb_fluid_1, gamma=friction_1)
-
-        system.integrator.run(1)
-
-        force_on_part = -friction_1 * np.copy(p.v)
-
-        np.testing.assert_allclose(np.copy(p.f), force_on_part)
-
-        system.integrator.run(100)
-        self.assertNotAlmostEqual(lb_fluid_1[3, 3, 3].velocity[0], 0.0)
-
-        system.actors.remove(lb_fluid_1)
-
-        p.v = [1, 0, 0]
-        system.integrator.run(0)
-
-        np.testing.assert_allclose(np.copy(p.f), 0.0)
-
-        system.actors.add(lb_fluid_2)
-        system.thermostat.set_lb(LB_fluid=lb_fluid_2, gamma=friction_2)
-
-        for pid in itertools.product(range(5), repeat=3):
-            np.testing.assert_allclose(
-                np.copy(lb_fluid_2[pid].velocity), np.zeros((3,)))
-
-        p.v = [1, 0, 0]
-
-        system.integrator.run(1)
-
-        np.testing.assert_allclose(
-            np.copy(p.f), [-friction_2, 0.0, 0.0])
-
-    def test_CPU_LB(self):
-        self.switch_test()
-
-    @utx.skipIfMissingGPU()
-    def test_GPU_LB(self):
-        self.switch_test(GPU=True)
-
-
-if __name__ == "__main__":
-    ut.main()
diff --git a/testsuite/python/lb_thermo_virtual.py b/testsuite/python/lb_thermo_virtual.py
index d4c5fad07ee..5ce1474bd2a 100644
--- a/testsuite/python/lb_thermo_virtual.py
+++ b/testsuite/python/lb_thermo_virtual.py
@@ -18,7 +18,6 @@
 #
 
 import espressomd.lb
-import espressomd.lbboundaries
 import espressomd.shapes
 import unittest as ut
 import unittest_decorators as utx
@@ -48,7 +47,7 @@ def tearDown(self):
     def check_virtual(self, fluid_class):
         s = self.system
         lb_fluid = fluid_class(
-            agrid=1.0, dens=1.0, visc=1.0, tau=1.0, kT=0.0)
+            agrid=1.0, density=1.0, kinematic_viscosity=1.0, tau=1.0)
         s.actors.add(lb_fluid)
 
         virtual = s.part.add(pos=[0, 0, 0], virtual=True, v=[1, 0, 0])
@@ -71,7 +70,7 @@ def check_virtual(self, fluid_class):
 
         s.actors.remove(lb_fluid)
         lb_fluid = fluid_class(
-            agrid=1.0, dens=1.0, visc=1.0, tau=1.0)
+            agrid=1.0, density=1.0, kinematic_viscosity=1.0, tau=1.0)
         s.actors.add(lb_fluid)
         s.thermostat.set_lb(LB_fluid=lb_fluid, gamma=1.0)
         virtual.pos = physical.pos
@@ -85,12 +84,9 @@ def check_virtual(self, fluid_class):
         np.testing.assert_almost_equal(np.copy(physical.f), [-1, 0, 0])
         np.testing.assert_almost_equal(np.copy(virtual.f), [-1, 0, 0])
 
-    def test_lb_cpu(self):
-        self.check_virtual(espressomd.lb.LBFluid)
-
-    @utx.skipIfMissingGPU()
-    def test_lb_gpu(self):
-        self.check_virtual(espressomd.lb.LBFluidGPU)
+    @utx.skipIfMissingFeatures(["WALBERLA"])
+    def test_lb_walberla(self):
+        self.check_virtual(espressomd.lb.LBFluidWalberla)
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/lb_thermostat.py b/testsuite/python/lb_thermostat.py
index 3a1965ff09e..33b3df81e00 100644
--- a/testsuite/python/lb_thermostat.py
+++ b/testsuite/python/lb_thermostat.py
@@ -29,14 +29,16 @@
 distribution.
 """
 
-KT = 0.25
-AGRID = 2.5
-VISC = 2.7
+KT = 0.9
+AGRID = 0.8
+node_volume = AGRID**3
+VISC = 6
 DENS = 1.7
-TIME_STEP = 0.05
+TIME_STEP = 0.005
+GAMMA = 2
 LB_PARAMS = {'agrid': AGRID,
-             'dens': DENS,
-             'visc': VISC,
+             'density': DENS,
+             'kinematic_viscosity': VISC,
              'tau': TIME_STEP,
              'kT': KT,
              'seed': 123}
@@ -46,50 +48,91 @@ class LBThermostatCommon(thermostats_common.ThermostatsCommon):
 
     """Check the LB thermostat."""
 
-    system = espressomd.System(box_l=[10.0, 10.0, 10.0])
+    system = espressomd.System(box_l=[AGRID * 12] * 3)
     system.time_step = TIME_STEP
     system.cell_system.skin = 0.4 * AGRID
-    np.random.seed(42)
+    np.random.seed(41)
 
     def setUp(self):
-        self.lbf = self.lb_class(**LB_PARAMS)
+        self.lbf = self.lb_class(**LB_PARAMS, **self.lb_params)
         self.system.actors.add(self.lbf)
+        self.system.thermostat.set_lb(LB_fluid=self.lbf, seed=5, gamma=GAMMA)
 
     def tearDown(self):
         self.system.actors.clear()
         self.system.thermostat.turn_off()
 
-    def test_velocity_distribution(self):
+    def get_lb_momentum(self, lbf):
+        nodes_den = lbf[:, :, :].density
+        nodes_vel = np.sum(np.square(lbf[:, :, :].velocity), axis=3)
+        return np.multiply(nodes_den, nodes_vel)
+
+    def test_fluid(self):
+        self.system.integrator.run(100)
+        fluid_temps = []
+        for _ in range(100):
+            lb_momentum = self.get_lb_momentum(self.lbf)
+            fluid_temps.append(np.average(lb_momentum) * node_volume)
+            self.system.integrator.run(3)
+
+        fluid_temp = np.average(fluid_temps) / 3
+        self.assertAlmostEqual(fluid_temp, KT, delta=0.05)
+
+    def test_with_particles(self):
         self.system.part.add(
             pos=np.random.random((100, 3)) * self.system.box_l)
-        self.system.thermostat.set_lb(LB_fluid=self.lbf, seed=5, gamma=5.0)
-        self.system.integrator.run(20)
+        self.system.integrator.run(120)
         N = len(self.system.part)
         loops = 250
-        v_stored = np.zeros((loops, N, 3))
+        v_particles = np.zeros((loops, N, 3))
+        fluid_temps = []
+
         for i in range(loops):
             self.system.integrator.run(3)
-            v_stored[i] = self.system.part.all().v
-        minmax = 5
+            if i % 10 == 0:
+                lb_momentum = self.get_lb_momentum(self.lbf)
+                fluid_temps.append(np.average(lb_momentum) * node_volume)
+            v_particles[i] = self.system.part.all().v
+        fluid_temp = np.average(fluid_temps) / 3.
+
+        np.testing.assert_allclose(np.average(v_particles), 0, atol=0.033)
+        np.testing.assert_allclose(np.var(v_particles), KT, atol=0.033)
+
+        minmax = 3
         n_bins = 7
-        error_tol = 0.01
+        error_tol = 0.016
         self.check_velocity_distribution(
-            v_stored.reshape((-1, 3)), minmax, n_bins, error_tol, KT)
+            v_particles.reshape((-1, 3)), minmax, n_bins, error_tol, KT)
 
+        np.testing.assert_allclose(fluid_temp, KT, atol=5e-3)
 
-class LBCPUThermostat(LBThermostatCommon, ut.TestCase):
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBWalberlaThermostat(LBThermostatCommon, ut.TestCase):
 
     """Test for the CPU implementation of the LB."""
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBWalberlaThermostatSinglePrecision(LBThermostatCommon, ut.TestCase):
+
+    """Test for the CPU implementation of the LB in single-precision."""
+
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
 
 
-@utx.skipIfMissingGPU()
-class LBGPUThermostat(LBThermostatCommon, ut.TestCase):
+# @utx.skipIfMissingGPU()
+# @utx.skipIfMissingFeatures(["WALBERLA"])
+# class LBWalberlaGPUThermostat(LBThermostatCommon, ut.TestCase):
 
-    """Test for the GPU implementation of the LB."""
+#    """Test for the GPU implementation of the LB."""
 
-    lb_class = espressomd.lb.LBFluidGPU
+#    lb_class = espressomd.lb.LBFluidWalberlaGPU
+#    lb_params = {"single_precision": True}
 
 
 if __name__ == '__main__':
diff --git a/testsuite/python/lb_vtk.py b/testsuite/python/lb_vtk.py
deleted file mode 100644
index f06265bc6b5..00000000000
--- a/testsuite/python/lb_vtk.py
+++ /dev/null
@@ -1,209 +0,0 @@
-#
-# Copyright (C) 2010-2022 The ESPResSo project
-#
-# This file is part of ESPResSo.
-#
-# ESPResSo is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# ESPResSo is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-import unittest as ut
-import unittest_decorators as utx
-
-import pathlib
-import tempfile
-import contextlib
-import numpy as np
-
-with contextlib.suppress(ImportError):
-    import vtk
-    import vtk.util.numpy_support
-
-import espressomd
-import espressomd.lb
-if espressomd.has_features('LB_BOUNDARIES'):
-    import espressomd.lbboundaries
-    import espressomd.shapes
-
-
-class TestLBWrite:
-    system = espressomd.System(box_l=[10, 11, 12])
-    system.time_step = 0.01
-    system.cell_system.skin = 0.4
-
-    def tearDown(self):
-        self.system.actors.clear()
-        self.system.thermostat.turn_off()
-
-    def set_lbf(self):
-        # setup LB system
-        lbf = self.lb_class(
-            kT=1, agrid=1.0, dens=1.0, visc=1.0, tau=0.1, seed=42,
-            ext_force_density=[0, 0.03, 0])
-        self.system.actors.add(lbf)
-        if espressomd.has_features('LB_BOUNDARIES'):
-            self.system.lbboundaries.add(espressomd.lbboundaries.LBBoundary(
-                shape=espressomd.shapes.Wall(normal=[1, 0, 0], dist=1.5)))
-            self.system.lbboundaries.add(espressomd.lbboundaries.LBBoundary(
-                shape=espressomd.shapes.Wall(normal=[-1, 0, 0], dist=-8.5)))
-        return lbf
-
-    def parse_vtk(self, filepath, name, shape):
-        reader = vtk.vtkStructuredPointsReader()
-        reader.SetFileName(str(filepath))
-        reader.ReadAllVectorsOn()
-        reader.ReadAllScalarsOn()
-        reader.Update()
-
-        data = reader.GetOutput()
-        points = data.GetPointData()
-
-        return vtk.util.numpy_support.vtk_to_numpy(
-            points.GetArray(name)).reshape(shape, order='F')
-
-    def test_vtk(self):
-        '''
-        Check VTK files.
-        '''
-
-        with tempfile.TemporaryDirectory() as tmp_directory:
-            path_vtk_root = pathlib.Path(tmp_directory)
-            path_vtk_boundary = path_vtk_root / 'boundary.vtk'
-            path_vtk_velocity = path_vtk_root / 'velocity.vtk'
-            path_vtk_velocity_bb = path_vtk_root / 'velocity_bb.vtk'
-            path_vtk_skip = path_vtk_root / 'skip.vtk'
-            path_vtk_invalid = path_vtk_root / 'non_existent_folder' / 'file'
-
-            shape = [10, 11, 12]
-            lbf = self.set_lbf()
-            self.system.integrator.run(100)
-
-            # write VTK files
-            with self.assertRaises(RuntimeError):
-                lbf.write_vtk_velocity(str(path_vtk_invalid))
-            with self.assertRaises(RuntimeError):
-                lbf.write_vtk_boundary(str(path_vtk_invalid))
-            lbf.write_vtk_boundary(str(path_vtk_boundary))
-            lbf.write_vtk_velocity(str(path_vtk_velocity))
-            with self.assertRaises(ValueError):
-                lbf.write_vtk_velocity(str(path_vtk_skip), 3 * [0], None)
-            with self.assertRaises(ValueError):
-                lbf.write_vtk_velocity(str(path_vtk_skip), None, 3 * [0])
-            with self.assertRaises(RuntimeError):
-                lbf.write_vtk_velocity(str(path_vtk_skip), [-2, 1, 1], 3 * [1])
-            with self.assertRaises(RuntimeError):
-                lbf.write_vtk_velocity(str(path_vtk_skip), 3 * [0], [1, 2, 16])
-            with self.assertRaises(ValueError):
-                lbf.write_vtk_velocity(str(path_vtk_skip), [1, 1], 3 * [1])
-            with self.assertRaises(ValueError):
-                lbf.write_vtk_velocity(
-                    str(path_vtk_skip), 3 * [1], np.array([2, 3]))
-            bb1, bb2 = ([1, 2, 3], [9, 10, 11])
-            lbf.write_vtk_velocity(str(path_vtk_velocity_bb), bb1, bb2)
-
-            # check VTK values match node values
-            node_velocity = np.zeros(shape + [3])
-            node_boundary = np.zeros(shape, dtype=int)
-            for i in range(shape[0]):
-                for j in range(shape[1]):
-                    for k in range(shape[2]):
-                        node = lbf[i, j, k]
-                        node_velocity[i, j, k] = node.velocity
-                        node_boundary[i, j, k] = node.boundary
-            node_velocity_bb = node_velocity[bb1[0]:bb2[0],
-                                             bb1[1]:bb2[1],
-                                             bb1[2]:bb2[2]]
-
-            vtk_velocity = self.parse_vtk(path_vtk_velocity, 'velocity',
-                                          node_velocity.shape)
-            np.testing.assert_allclose(vtk_velocity, node_velocity, atol=5e-7)
-
-            vtk_velocity_bb = self.parse_vtk(path_vtk_velocity_bb, 'velocity',
-                                             node_velocity_bb.shape)
-            np.testing.assert_allclose(
-                vtk_velocity_bb, node_velocity_bb, atol=5e-7)
-
-            vtk_boundary = self.parse_vtk(path_vtk_boundary, 'boundary', shape)
-            np.testing.assert_equal(vtk_boundary, node_boundary.astype(int))
-
-    def test_print(self):
-        '''
-        Check data files.
-        '''
-
-        with tempfile.TemporaryDirectory() as tmp_directory:
-            path_dat_root = pathlib.Path(tmp_directory)
-            path_dat_boundary = path_dat_root / 'boundary.vtk'
-            path_dat_velocity = path_dat_root / 'velocity.vtk'
-            path_dat_invalid = path_dat_root / 'non_existent_folder' / 'file'
-
-            shape = [10, 11, 12]
-            lbf = self.set_lbf()
-            self.system.integrator.run(100)
-
-            # write data files
-            with self.assertRaises(RuntimeError):
-                lbf.write_velocity(str(path_dat_invalid))
-            with self.assertRaises(RuntimeError):
-                lbf.write_boundary(str(path_dat_invalid))
-            lbf.write_boundary(str(path_dat_boundary))
-            lbf.write_velocity(str(path_dat_velocity))
-
-            # check data values match node values
-            node_velocity = np.zeros(shape + [3])
-            node_boundary = np.zeros(shape, dtype=int)
-            for i in range(shape[0]):
-                for j in range(shape[1]):
-                    for k in range(shape[2]):
-                        node = lbf[i, j, k]
-                        node_velocity[i, j, k] = node.velocity
-                        node_boundary[i, j, k] = node.boundary
-
-            ref_coord = np.array([
-                np.tile(np.arange(shape[0]), shape[1] * shape[2]),
-                np.tile(np.repeat(np.arange(shape[1]), shape[0]), shape[2]),
-                np.repeat(np.arange(shape[2]), shape[0] * shape[1])]).T
-
-            dat_velocity = np.loadtxt(path_dat_velocity)
-            dat_coord = (dat_velocity[:, 0:3] - 0.5).astype(int)
-            np.testing.assert_equal(dat_coord, ref_coord)
-            dat_vel = dat_velocity[:, 3:]
-            ref_vel = np.swapaxes(node_velocity, 0, 2).reshape((-1, 3))
-            np.testing.assert_allclose(dat_vel, ref_vel, atol=5e-7)
-
-            dat_boundary = np.loadtxt(path_dat_boundary)
-            dat_coord = (dat_boundary[:, 0:3] - 0.5).astype(int)
-            np.testing.assert_equal(dat_coord, ref_coord)
-            dat_bound = dat_boundary[:, 3].astype(int)
-            ref_bound = np.swapaxes(node_boundary, 0, 2).reshape(-1)
-            if isinstance(lbf, espressomd.lb.LBFluid):
-                ref_bound = (ref_bound != 0).astype(int)
-            np.testing.assert_equal(dat_bound, ref_bound)
-
-
-@utx.skipIfMissingModules("vtk")
-class TestLBWriteCPU(TestLBWrite, ut.TestCase):
-
-    def setUp(self):
-        self.lb_class = espressomd.lb.LBFluid
-
-
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingModules("vtk")
-class TestLBWriteGPU(TestLBWrite, ut.TestCase):
-
-    def setUp(self):
-        self.lb_class = espressomd.lb.LBFluidGPU
-
-
-if __name__ == '__main__':
-    ut.main()
diff --git a/testsuite/python/lees_edwards.py b/testsuite/python/lees_edwards.py
index b6cb494383c..114afb8a4cf 100644
--- a/testsuite/python/lees_edwards.py
+++ b/testsuite/python/lees_edwards.py
@@ -299,20 +299,19 @@ def test_trajectory_reconstruction(self):
         pos = system.box_l - 0.01
         vel = np.array([0, 1, 0])
         p = system.part.add(pos=pos, v=vel)
-        old_x = p.pos_folded[0]
 
+        crossing_time = system.time
         system.integrator.run(1)
-        new_x = p.pos[0]
-
         np.testing.assert_almost_equal(
             p.lees_edwards_offset, 
-            -(new_x - old_x))
+            get_lin_pos_offset(crossing_time, **params_lin))
         np.testing.assert_almost_equal(p.lees_edwards_flag, -1)
 
         system.integrator.run(1)  # no boundary crossing
         np.testing.assert_almost_equal(
             p.lees_edwards_offset, 
-            -(new_x - old_x))  # unchanged
+            get_lin_pos_offset(crossing_time, **params_lin))
+
         np.testing.assert_almost_equal(p.lees_edwards_flag, 0)
 
     @utx.skipIfMissingFeatures("EXTERNAL_FORCES")
diff --git a/testsuite/python/linear_momentum_lb.py b/testsuite/python/linear_momentum_lb.py
index b1c75753c2f..5994ffc48d8 100644
--- a/testsuite/python/linear_momentum_lb.py
+++ b/testsuite/python/linear_momentum_lb.py
@@ -26,11 +26,6 @@
 
 np.random.seed(seed=40)
 
-"""
-Check linear momentum calculation for lattice-Boltzmann.
-
-"""
-
 
 AGRID = .5
 EXT_FORCE = .1
@@ -40,21 +35,24 @@
 BOX_L = 3.0
 
 LB_PARAMS = {'agrid': AGRID,
-             'dens': DENS,
-             'visc': VISC,
+             'density': DENS,
+             'kinematic_viscosity': VISC,
              'tau': TIME_STEP,
              'ext_force_density': [0.1, 0.2, 0.3]}
 
 
-class LinearMomentumTest:
+class LBLinearMomentum:
+
+    """
+    Check linear momentum calculation for lattice-Boltzmann.
+    """
 
-    """Base class of the test that holds the test logic."""
     system = espressomd.System(box_l=[BOX_L] * 3)
     system.time_step = TIME_STEP
     system.cell_system.skin = 0.4 * AGRID
 
     def setUp(self):
-        self.lbf = self.lb_class(**LB_PARAMS)
+        self.lbf = self.lb_class(**LB_PARAMS, **self.lb_params)
         self.system.actors.add(self.lbf)
 
     def tearDown(self):
@@ -63,7 +61,6 @@ def tearDown(self):
     def test(self):
         """
         Compare direct calculation of fluid momentum with analysis function.
-
         """
         # setup random node velocities
         for index in itertools.product(
@@ -79,23 +76,24 @@ def test(self):
             linear_momentum, analyze_linear_momentum, atol=self.atol)
 
 
-@utx.skipIfMissingFeatures(['EXTERNAL_FORCES'])
-class LBCPULinearMomentum(LinearMomentumTest, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBLinearMomentumWalberla(LBLinearMomentum, ut.TestCase):
 
-    """Test for the CPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in double-precision."""
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
     atol = 1e-10
 
 
-@utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(['LB_BOUNDARIES_GPU', 'EXTERNAL_FORCES'])
-class LBGPULinearMomentum(LinearMomentumTest, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class LBLinearMomentumWalberlaSinglePrecision(LBLinearMomentum, ut.TestCase):
 
-    """Test for the GPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in single-precision."""
 
-    lb_class = espressomd.lb.LBFluidGPU
-    atol = 1e-5
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
+    atol = 5e-6
 
 
 if __name__ == '__main__':
diff --git a/testsuite/python/observable_cylindricalLB.py b/testsuite/python/observable_cylindricalLB.py
index 501fd46c027..dd9f31637d5 100644
--- a/testsuite/python/observable_cylindricalLB.py
+++ b/testsuite/python/observable_cylindricalLB.py
@@ -38,8 +38,8 @@ class CylindricalLBObservableCommon:
     system.cell_system.skin = 0.4
 
     lb_params = {'agrid': 1.,
-                 'dens': 1.2,
-                 'visc': 2.7,
+                 'density': 1.2,
+                 'kinematic_viscosity': 2.7,
                  'tau': 0.1,
                  }
     cyl_transform_params = espressomd.math.CylindricalTransformationParameters(
@@ -64,7 +64,7 @@ class CylindricalLBObservableCommon:
     v_z = 0.03
 
     def setUp(self):
-        self.lbf = self.lb_class(**self.lb_params)
+        self.lbf = self.lb_class(**self.lb_params, **self.lb_params_extra)
         self.system.actors.add(self.lbf)
 
     def tearDown(self):
@@ -243,11 +243,6 @@ def test_cylindrical_lb_profile_interface(self):
             np.testing.assert_array_almost_equal(np.copy(ctp.__getattr__(attr_name)),
                                                  np.copy(observable.transform_params.__getattr__(attr_name)))
 
-
-class CylindricalLBObservableCPU(CylindricalLBObservableCommon, ut.TestCase):
-
-    lb_class = espressomd.lb.LBFluid
-
     def test_cylindrical_lb_flux_density_obs(self):
         """
         Check that the result from the observable (in its own frame)
@@ -265,26 +260,40 @@ def test_cylindrical_lb_flux_density_obs(self):
 
         np.testing.assert_array_almost_equal(
             np_hist_binary *
-            self.lb_params['dens'] *
+            self.lb_params['density'] *
             self.v_r,
             core_hist_fl_r)
         np.testing.assert_array_almost_equal(
             np_hist_binary *
-            self.lb_params['dens'] *
+            self.lb_params['density'] *
             self.v_phi,
             core_hist_fl_phi)
         np.testing.assert_array_almost_equal(
             np_hist_binary *
-            self.lb_params['dens'] *
+            self.lb_params['density'] *
             self.v_z,
             core_hist_fl_z)
         self.check_edges(flux_obs, np_edges)
 
 
-@utx.skipIfMissingGPU()
-class CylindricalLBObservableGPU(CylindricalLBObservableCommon, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class CylindricalLBObservableWalberla(
+        CylindricalLBObservableCommon, ut.TestCase):
+
+    """Test for the Walberla implementation of the LB in double-precision."""
+
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params_extra = {"single_precision": False}
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class CylindricalLBObservableWalberlaSinglePrecision(
+        CylindricalLBObservableWalberla, ut.TestCase):
+
+    """Test for the Walberla implementation of the LB in single-precision."""
 
-    lb_class = espressomd.lb.LBFluidGPU
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params_extra = {"single_precision": True}
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/observable_profileLB.py b/testsuite/python/observable_profileLB.py
index 6770cea8e50..ac810b24626 100644
--- a/testsuite/python/observable_profileLB.py
+++ b/testsuite/python/observable_profileLB.py
@@ -40,8 +40,8 @@
 VISC = .7
 DENS = 1.7
 LB_PARAMS = {'agrid': AGRID,
-             'dens': DENS,
-             'visc': VISC,
+             'density': DENS,
+             'kinematic_viscosity': VISC,
              'tau': TIME_STEP
              }
 
@@ -70,7 +70,7 @@ class ObservableProfileLBCommon:
     system.cell_system.skin = 0.4 * AGRID
 
     def setUp(self):
-        self.lbf = self.lb_class(**LB_PARAMS)
+        self.lbf = self.lb_class(**LB_PARAMS, **self.lb_params)
         self.system.actors.add(self.lbf)
 
     def tearDown(self):
@@ -199,19 +199,23 @@ def test_lb_profile_interface(self):
         self.assertEqual(obs.sampling_offset_z, 15)
 
 
-class LBCPU(ObservableProfileLBCommon, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class ObservableProfileWalberla(ObservableProfileLBCommon, ut.TestCase):
 
-    """Test for the CPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in double-precision."""
 
-    lb_class = espressomd.lb.LBFluid
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
 
 
-@utx.skipIfMissingGPU()
-class LBGPU(ObservableProfileLBCommon, ut.TestCase):
+@utx.skipIfMissingFeatures(["WALBERLA"])
+class ObservableProfileWalberlaSinglePrecision(
+        ObservableProfileLBCommon, ut.TestCase):
 
-    """Test for the GPU implementation of the LB."""
+    """Test for the Walberla implementation of the LB in single-precision."""
 
-    lb_class = espressomd.lb.LBFluidGPU
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/save_checkpoint.py b/testsuite/python/save_checkpoint.py
index e7d3d07e210..81edb0a7885 100644
--- a/testsuite/python/save_checkpoint.py
+++ b/testsuite/python/save_checkpoint.py
@@ -21,6 +21,7 @@
 import unittest_generator as utg
 import numpy as np
 import pathlib
+import tempfile
 
 import espressomd
 import espressomd.checkpointing
@@ -35,7 +36,7 @@
 import espressomd.observables
 import espressomd.io.writer
 import espressomd.lb
-import espressomd.lbboundaries
+import espressomd.electrokinetics
 import espressomd.shapes
 import espressomd.constraints
 import espressomd.bond_breakage
@@ -45,7 +46,7 @@
 modes = config.get_modes()
 
 # use a box with 3 different dimensions, unless DipolarP3M is used
-system = espressomd.System(box_l=[12.0, 14.0, 16.0])
+system = espressomd.System(box_l=[12.0, 8.0, 16.0])
 if 'DP3M' in modes:
     system.box_l = 3 * [float(np.max(system.box_l))]
 system.cell_system.skin = 0.1
@@ -56,6 +57,7 @@
 system.max_oif_objects = 5
 
 # create checkpoint folder
+config.cleanup_old_checkpoint()
 checkpoint = espressomd.checkpointing.Checkpoint(
     **config.get_checkpoint_params())
 path_cpt_root = pathlib.Path(checkpoint.checkpoint_dir)
@@ -71,27 +73,43 @@
     system.lees_edwards.set_boundary_conditions(
         shear_direction="x", shear_plane_normal="y", protocol=protocol)
 
-lbf_actor = None
-if 'LB.CPU' in modes:
-    lbf_actor = espressomd.lb.LBFluid
-    has_lbb = espressomd.has_features("LB_BOUNDARIES")
-elif 'LB.GPU' in modes and espressomd.gpu_available():
-    lbf_actor = espressomd.lb.LBFluidGPU
-    has_lbb = espressomd.has_features("LB_BOUNDARIES_GPU")
-if lbf_actor:
+lbf_class = None
+lb_lattice = None
+if espressomd.has_features('WALBERLA') and 'LB.WALBERLA' in modes:
+    lbf_class = espressomd.lb.LBFluidWalberla
+    lb_lattice = espressomd.lb.LatticeWalberla(agrid=2.0, n_ghost_layers=1)
+if lbf_class:
     lbf_cpt_mode = 0 if 'LB.ASCII' in modes else 1
-    lbf = lbf_actor(agrid=0.5, visc=1.3, dens=1.5, tau=0.01, gamma_odd=0.2,
-                    gamma_even=0.3)
-    system.actors.add(lbf)
-    if 'THERM.LB' in modes:
-        system.thermostat.set_lb(LB_fluid=lbf, seed=23, gamma=2.0)
-    if has_lbb:
-        system.lbboundaries.add(espressomd.lbboundaries.LBBoundary(
-            shape=espressomd.shapes.Wall(normal=(1, 0, 0), dist=0.5), velocity=(1e-4, 1e-4, 0)))
-        system.lbboundaries.add(espressomd.lbboundaries.LBBoundary(
-            shape=espressomd.shapes.Wall(normal=(-1, 0, 0), dist=-(system.box_l[0] - 0.5)), velocity=(0, 0, 0)))
-
-p1 = system.part.add(id=0, pos=[1.0] * 3)
+    lbf = lbf_class(
+        lattice=lb_lattice, kinematic_viscosity=1.3, density=1.5, tau=0.01)
+    wall1 = espressomd.shapes.Wall(normal=(1, 0, 0), dist=1.0)
+    wall2 = espressomd.shapes.Wall(normal=(-1, 0, 0),
+                                   dist=-(system.box_l[0] - 1.0))
+    lbf.add_boundary_from_shape(wall1, (1e-4, 1e-4, 0))
+    lbf.add_boundary_from_shape(wall2, (0, 0, 0))
+
+    ek_solver = espressomd.electrokinetics.EKNone(lattice=lb_lattice)
+    ek_species = espressomd.electrokinetics.EKSpecies(
+        lattice=lb_lattice, density=1.5, kT=2.0, diffusion=0.2, valency=0.1,
+        advection=False, friction_coupling=False, ext_efield=[0.1, 0.2, 0.3],
+        single_precision=False, tau=system.time_step)
+    system.ekcontainer.solver = ek_solver
+    system.ekcontainer.tau = ek_species.tau
+    system.ekcontainer.add(ek_species)
+    ek_species.add_boundary_from_shape(
+        shape=wall1, value=1e-3 * np.array([1., 2., 3.]),
+        boundary_type=espressomd.electrokinetics.FluxBoundary)
+    ek_species.add_boundary_from_shape(
+        shape=wall2, value=1e-3 * np.array([4., 5., 6.]),
+        boundary_type=espressomd.electrokinetics.FluxBoundary)
+    ek_species.add_boundary_from_shape(
+        shape=wall1, value=1.,
+        boundary_type=espressomd.electrokinetics.DensityBoundary)
+    ek_species.add_boundary_from_shape(
+        shape=wall2, value=2.,
+        boundary_type=espressomd.electrokinetics.DensityBoundary)
+
+p1 = system.part.add(id=0, pos=[1.0, 1.0, 1.0])
 p2 = system.part.add(id=1, pos=[1.0, 1.0, 2.0])
 
 if espressomd.has_features('ELECTROSTATICS'):
@@ -103,7 +121,7 @@
     p2.dip = (7.3, 6.1, -4)
 
 if espressomd.has_features('EXCLUSIONS'):
-    system.part.add(id=2, pos=[2.0] * 3, exclusions=[0, 1])
+    system.part.add(id=2, pos=[2.0, 2.0, 2.0], exclusions=[0, 1])
 
 # place particles at the interface between 2 MPI nodes
 p3 = system.part.add(id=3, pos=system.box_l / 2.0 - 1.0, type=1)
@@ -339,22 +357,60 @@
             "p2nfft_r_cut": "11",
             "p2nfft_alpha": "0.37"}))
 
-if lbf_actor:
-    m = np.pi / 12
-    nx = int(np.round(system.box_l[0] / lbf.get_params()["agrid"]))
-    ny = int(np.round(system.box_l[1] / lbf.get_params()["agrid"]))
-    nz = int(np.round(system.box_l[2] / lbf.get_params()["agrid"]))
+if lbf_class:
+    system.actors.add(lbf)
+    if 'THERM.LB' in modes:
+        system.thermostat.set_lb(LB_fluid=lbf, seed=23, gamma=2.0)
     # Create a 3D grid with deterministic values to fill the LB fluid lattice
+    m = np.pi / 12
     grid_3D = np.fromfunction(
         lambda i, j, k: np.cos(i * m) * np.cos(j * m) * np.cos(k * m),
-        (nx, ny, nz), dtype=float)
-    for i in range(nx):
-        for j in range(ny):
-            for k in range(nz):
-                lbf[i, j, k].population = grid_3D[i, j, k] * np.arange(1, 20)
+        lbf.shape, dtype=float)
+    lbf[:, :, :].population = np.einsum(
+        'abc,d->abcd', grid_3D, np.arange(1, 20))
+    lbf[:, :, :].last_applied_force = np.einsum(
+        'abc,d->abcd', grid_3D, np.arange(1, 4))
     # save LB checkpoint file
     lbf_cpt_path = path_cpt_root / "lb.cpt"
     lbf.save_checkpoint(str(lbf_cpt_path), lbf_cpt_mode)
+    # save EK checkpoint file
+    ek_species[:, :, :].density = grid_3D
+    ek_cpt_path = path_cpt_root / "ek.cpt"
+    ek_species.save_checkpoint(str(ek_cpt_path), lbf_cpt_mode)
+    # setup VTK folder
+    vtk_suffix = config.test_name
+    vtk_root = pathlib.Path("vtk_out")
+    # create LB VTK callbacks
+    lb_vtk_auto_id = f"auto_lb_{vtk_suffix}"
+    lb_vtk_manual_id = f"manual_lb_{vtk_suffix}"
+    config.recursive_unlink(vtk_root / lb_vtk_auto_id)
+    config.recursive_unlink(vtk_root / lb_vtk_manual_id)
+    lb_vtk_auto = espressomd.lb.VTKOutput(
+        identifier=lb_vtk_auto_id, delta_N=1,
+        observables=('density', 'velocity_vector'), base_folder=str(vtk_root))
+    lbf.add_vtk_writer(vtk=lb_vtk_auto)
+    lb_vtk_auto.disable()
+    lb_vtk_manual = espressomd.lb.VTKOutput(
+        identifier=lb_vtk_manual_id, delta_N=0,
+        observables=('density',), base_folder=str(vtk_root))
+    lbf.add_vtk_writer(vtk=lb_vtk_manual)
+    lb_vtk_manual.write()
+    # create EK VTK callbacks
+    ek_vtk_auto_id = f"auto_ek_{vtk_suffix}"
+    ek_vtk_manual_id = f"manual_ek_{vtk_suffix}"
+    config.recursive_unlink(vtk_root / ek_vtk_auto_id)
+    config.recursive_unlink(vtk_root / ek_vtk_manual_id)
+    ek_vtk_auto = espressomd.electrokinetics.VTKOutput(
+        identifier=ek_vtk_auto_id,
+        observables=('density',), delta_N=1, base_folder=str(vtk_root))
+    ek_species.add_vtk_writer(vtk=ek_vtk_auto)
+    ek_vtk_auto.disable()
+    ek_vtk_manual = espressomd.electrokinetics.VTKOutput(
+        identifier=ek_vtk_manual_id,
+        observables=('density',), delta_N=0, base_folder=str(vtk_root))
+    ek_species.add_vtk_writer(vtk=ek_vtk_manual)
+    ek_vtk_manual.write()
+
 
 # set various properties
 p8 = system.part.add(id=8, pos=[2.0] * 3 + system.box_l)
@@ -383,9 +439,9 @@
         p3.gamma_rot = 2. * gamma
 if espressomd.has_features('ENGINE'):
     p3.swimming = {"f_swim": 0.03}
-if espressomd.has_features('ENGINE') and lbf_actor:
+if espressomd.has_features('ENGINE') and lbf_class:
     p4.swimming = {"v_swim": 0.02, "mode": "puller", "dipole_length": 1.}
-if espressomd.has_features('LB_ELECTROHYDRODYNAMICS') and lbf_actor:
+if espressomd.has_features('LB_ELECTROHYDRODYNAMICS') and lbf_class:
     p8.mu_E = [-0.1, 0.2, -0.3]
 
 # h5md output
@@ -418,7 +474,7 @@ def test_checkpointing(self):
         self.assertTrue(checkpoint_filepath.is_file(),
                         "checkpoint file not created")
 
-        if lbf_actor:
+        if lbf_class:
             self.assertTrue(lbf_cpt_path.is_file(),
                             "LB checkpoint file not created")
 
@@ -427,7 +483,7 @@ def test_checkpointing(self):
             local_obj = "local"  # pylint: disable=unused-variable
             checkpoint.register("local_obj")
 
-    @ut.skipIf(lbf_actor is None, "Skipping test due to missing mode.")
+    @ut.skipIf(lbf_class is None, "Skipping test due to missing mode.")
     def test_lb_checkpointing_exceptions(self):
         '''
         Check the LB checkpointing exception mechanism. Write corrupted
@@ -435,12 +491,21 @@ def test_lb_checkpointing_exceptions(self):
         '''
 
         # check exception mechanism
-        with self.assertRaisesRegex(RuntimeError, 'could not open file'):
-            invalid_path = lbf_cpt_path.parent / "unknown_dir" / "lb.cpt"
+        lbf_cpt_root = lbf_cpt_path.parent
+        with self.assertRaisesRegex(RuntimeError, "could not open file"):
+            invalid_path = lbf_cpt_root / "unknown_dir" / "lb.cpt"
             lbf.save_checkpoint(str(invalid_path), lbf_cpt_mode)
+        with self.assertRaisesRegex(RuntimeError, "unit test error"):
+            lbf.save_checkpoint(str(lbf_cpt_root / "lb_err.cpt"), -1)
+        with self.assertRaisesRegex(RuntimeError, "could not write to"):
+            lbf.save_checkpoint(str(lbf_cpt_root / "lb_err.cpt"), -2)
+        with self.assertRaisesRegex(ValueError, "Unknown mode -3"):
+            lbf.save_checkpoint(str(lbf_cpt_root / "lb_err.cpt"), -3)
+        with self.assertRaisesRegex(ValueError, "Unknown mode 2"):
+            lbf.save_checkpoint(str(lbf_cpt_root / "lb_err.cpt"), 2)
+
+        # deactivate LB actor
         system.actors.remove(lbf)
-        with self.assertRaisesRegex(RuntimeError, 'one needs to have already initialized the LB fluid'):
-            lbf.load_checkpoint(str(lbf_cpt_path), lbf_cpt_mode)
 
         # read the valid LB checkpoint file
         lbf_cpt_data = lbf_cpt_path.read_bytes()
@@ -452,14 +517,69 @@ def test_lb_checkpointing_exceptions(self):
         with open(cpt_path.format("-extra-data"), "wb") as f:
             f.write(lbf_cpt_data + lbf_cpt_data[-8:])
         if lbf_cpt_mode == 0:
-            boxsize, data = lbf_cpt_data.split(b"\n", 1)
+            boxsize, popsize, data = lbf_cpt_data.split(b"\n", 2)
             # write checkpoint file with incorrectly formatted data
             with open(cpt_path.format("-wrong-format"), "wb") as f:
-                f.write(boxsize + b"\ntext string\n" + data)
+                f.write(boxsize + b"\n" + popsize + b"\ntext string\n" + data)
+            # write checkpoint file with different box dimensions
+            with open(cpt_path.format("-wrong-boxdim"), "wb") as f:
+                f.write(b"2" + boxsize + b"\n" + popsize + b"\n" + data)
+            # write checkpoint file with different population size
+            with open(cpt_path.format("-wrong-popsize"), "wb") as f:
+                f.write(boxsize + b"\n" + b"2" + popsize + b"\n" + data)
+
+    @ut.skipIf(lbf_class is None, "Skipping test due to missing mode.")
+    def test_ek_checkpointing_exceptions(self):
+        '''
+        Check the EK checkpointing exception mechanism. Write corrupted
+        EK checkpoint files that will be tested in ``test_checkpoint.py``.
+        '''
+
+        # check exception mechanism
+        ek_cpt_root = ek_cpt_path.parent
+        with self.assertRaisesRegex(RuntimeError, "could not open file"):
+            invalid_path = ek_cpt_root / "unknown_dir" / "ek.cpt"
+            ek_species.save_checkpoint(str(invalid_path), lbf_cpt_mode)
+        with self.assertRaisesRegex(RuntimeError, "unit test error"):
+            ek_species.save_checkpoint(str(ek_cpt_root / "ek_err.cpt"), -1)
+        with self.assertRaisesRegex(RuntimeError, "could not write to"):
+            ek_species.save_checkpoint(str(ek_cpt_root / "ek_err.cpt"), -2)
+        with self.assertRaisesRegex(ValueError, "Unknown mode -3"):
+            ek_species.save_checkpoint(str(ek_cpt_root / "ek_err.cpt"), -3)
+        with self.assertRaisesRegex(ValueError, "Unknown mode 2"):
+            ek_species.save_checkpoint(str(ek_cpt_root / "ek_err.cpt"), 2)
+
+        # read the valid EK checkpoint file
+        ek_cpt_data = ek_cpt_path.read_bytes()
+        cpt_path = str(path_cpt_root / "ek") + "{}.cpt"
+        # write checkpoint file with missing data
+        with open(cpt_path.format("-missing-data"), "wb") as f:
+            f.write(ek_cpt_data[:len(ek_cpt_data) // 2])
+        # write checkpoint file with extra data
+        with open(cpt_path.format("-extra-data"), "wb") as f:
+            f.write(ek_cpt_data + ek_cpt_data[-8:])
+        if lbf_cpt_mode == 0:
+            boxsize, data = ek_cpt_data.split(b"\n", 1)
+            # write checkpoint file with incorrectly formatted data
+            with open(cpt_path.format("-wrong-format"), "wb") as f:
+                f.write(boxsize + b"\n" + b"\ntext string\n" + data)
             # write checkpoint file with different box dimensions
             with open(cpt_path.format("-wrong-boxdim"), "wb") as f:
                 f.write(b"2" + boxsize + b"\n" + data)
 
+    def test_generator_recursive_unlink(self):
+        with tempfile.TemporaryDirectory() as tmp_directory:
+            root = pathlib.Path(tmp_directory)
+            tree = root / "level1" / "level2"
+            tree.mkdir(parents=True, exist_ok=False)
+            for dirname in root.iterdir():
+                filepath = dirname / "file"
+                filepath.write_text("")
+            config.recursive_unlink(root)
+            for path in root.iterdir():
+                self.assertTrue(path.is_dir(),
+                                f"Path '{path}' should be a folder")
+
     def test_reaction_methods_sanity_check(self):
         with self.assertRaisesRegex(RuntimeError, "Reaction methods do not support checkpointing"):
             widom = espressomd.reaction_methods.WidomInsertion(kT=1, seed=1)
diff --git a/testsuite/python/test_checkpoint.py b/testsuite/python/test_checkpoint.py
index 18e3088e050..a9f08895afd 100644
--- a/testsuite/python/test_checkpoint.py
+++ b/testsuite/python/test_checkpoint.py
@@ -24,6 +24,7 @@
 import numpy as np
 import contextlib
 import pathlib
+import sys
 
 import espressomd
 import espressomd.checkpointing
@@ -35,6 +36,10 @@
 import espressomd.integrate
 import espressomd.shapes
 import espressomd.constraints
+import espressomd.lb
+import espressomd.electrokinetics
+with contextlib.suppress(ImportError):
+    import espressomd.io.vtk
 
 with contextlib.suppress(ImportError):
     import h5py  # h5py has to be imported *after* espressomd (MPI)
@@ -42,11 +47,9 @@
 config = utg.TestGenerator()
 is_gpu_available = espressomd.gpu_available()
 modes = config.get_modes()
-has_lb_mode = 'LB.CPU' in modes or 'LB.GPU' in modes and is_gpu_available
+has_lb_mode = ('LB.WALBERLA' in modes and espressomd.has_features('WALBERLA')
+               and ('LB.CPU' in modes or 'LB.GPU' in modes and is_gpu_available))
 has_p3m_mode = 'P3M.CPU' in modes or 'P3M.GPU' in modes and is_gpu_available
-has_lbb = ('LB.CPU' in modes and espressomd.has_features("LB_BOUNDARIES") or
-           'LB.GPU' in modes and espressomd.has_features("LB_BOUNDARIES_GPU")
-           and espressomd.gpu_available())
 
 
 class CheckpointTest(ut.TestCase):
@@ -58,7 +61,7 @@ class CheckpointTest(ut.TestCase):
 
     @classmethod
     def setUpClass(cls):
-        cls.ref_box_l = np.array([12.0, 14.0, 16.0])
+        cls.ref_box_l = np.array([12.0, 8.0, 16.0])
         if 'DP3M' in modes:
             cls.ref_box_l = np.array([16.0, 16.0, 16.0])
         cls.ref_periodicity = np.array([True, True, True])
@@ -80,24 +83,18 @@ def test_get_active_actor_of_type(self):
         with self.assertRaisesRegex(AssertionError, "system doesn't have an actor of type Wall"):
             self.get_active_actor_of_type(espressomd.shapes.Wall)
 
-    @ut.skipIf(not has_lb_mode, "Skipping test due to missing mode.")
+    @utx.skipIfMissingFeatures(["WALBERLA"])
+    @ut.skipIf(not has_lb_mode, "Skipping test due to missing LB feature.")
     def test_lb_fluid(self):
-        '''
-        Check serialization of the LB fluid. The checkpoint file only stores
-        population information, therefore calling ``lbf.load_checkpoint()``
-        erases all LBBoundaries information but doesn't remove the objects
-        contained in ``system.lbboundaries``. A callback should re-introduce
-        the LB boundary flag after LB populations are reloaded.
-        '''
-        lbf = self.get_active_actor_of_type(
-            espressomd.lb.HydrodynamicInteraction)
+        lbf = self.get_active_actor_of_type(espressomd.lb.LBFluidWalberla)
         cpt_mode = 0 if 'LB.ASCII' in modes else 1
         cpt_root = pathlib.Path(self.checkpoint.checkpoint_dir)
         cpt_path = str(cpt_root / "lb") + "{}.cpt"
 
-        if has_lbb:
-            # LB boundaries must be correct before LB populations are loaded
-            self.check_lb_boundaries()
+        # LB boundaries are loaded at the same time as LB populations
+        np.testing.assert_equal(np.copy(lbf[:, :, :].velocity), 0.)
+        np.testing.assert_equal(
+            np.copy(lbf[:, :, :].is_boundary.astype(int)), 0)
 
         # check exception mechanism with corrupted LB checkpoint files
         with self.assertRaisesRegex(RuntimeError, 'EOF found'):
@@ -109,12 +106,15 @@ def test_lb_fluid(self):
                 lbf.load_checkpoint(cpt_path.format("-wrong-format"), cpt_mode)
             with self.assertRaisesRegex(RuntimeError, 'grid dimensions mismatch'):
                 lbf.load_checkpoint(cpt_path.format("-wrong-boxdim"), cpt_mode)
+            with self.assertRaisesRegex(RuntimeError, 'population size mismatch'):
+                lbf.load_checkpoint(
+                    cpt_path.format("-wrong-popsize"), cpt_mode)
         with self.assertRaisesRegex(RuntimeError, 'could not open file'):
             lbf.load_checkpoint(cpt_path.format("-unknown"), cpt_mode)
 
         # load the valid LB checkpoint file
         lbf.load_checkpoint(cpt_path.format(""), cpt_mode)
-        precision = 9 if "LB.CPU" in modes else 5
+        precision = 8 if "LB.WALBERLA" in modes else 5
         m = np.pi / 12
         nx = lbf.shape[0]
         ny = lbf.shape[1]
@@ -129,16 +129,238 @@ def test_lb_fluid(self):
                         np.copy(lbf[i, j, k].population),
                         grid_3D[i, j, k] * np.arange(1, 20),
                         decimal=precision)
+                    np.testing.assert_almost_equal(
+                        np.copy(lbf[i, j, k].last_applied_force),
+                        grid_3D[i, j, k] * np.arange(1, 4),
+                        decimal=precision)
         state = lbf.get_params()
-        reference = {'agrid': 0.5, 'visc': 1.3, 'dens': 1.5, 'tau': 0.01,
-                     'gamma_odd': 0.2, 'gamma_even': 0.3}
+        reference = {
+            "agrid": 2.0,
+            "kinematic_viscosity": 1.3,
+            "density": 1.5,
+            "tau": 0.01}
+        for key in reference:
+            self.assertIn(key, state)
+            np.testing.assert_allclose(np.copy(state[key]), reference[key],
+                                       atol=1E-7, err_msg=f"{key} differs")
+        self.assertTrue(lbf.is_active)
+        self.assertFalse(lbf.single_precision)
+
+        # check boundary objects
+        slip_velocity1 = np.array([1e-4, 1e-4, 0.])
+        slip_velocity2 = np.array([0., 0., 0.])
+        # check boundary flag
+        np.testing.assert_equal(
+            np.copy(lbf[0, :, :].is_boundary.astype(int)), 1)
+        np.testing.assert_equal(
+            np.copy(lbf[-1, :, :].is_boundary.astype(int)), 1)
+        np.testing.assert_equal(
+            np.copy(lbf[1:-1, :, :].is_boundary.astype(int)), 0)
+        # check boundary conditions
+        for node in lbf[0, :, :]:
+            np.testing.assert_allclose(np.copy(node.velocity), slip_velocity1)
+        for node in lbf[-1, :, :]:
+            np.testing.assert_allclose(np.copy(node.velocity), slip_velocity2)
+        for node in lbf[2, :, :]:
+            np.testing.assert_allclose(np.copy(node.velocity), 0.)
+        # remove boundaries
+        lbf.clear_boundaries()
+        np.testing.assert_equal(
+            np.copy(lbf[:, :, :].is_boundary.astype(int)), 0)
+
+    @utx.skipIfMissingFeatures(["WALBERLA"])
+    @ut.skipIf(not has_lb_mode, "Skipping test due to missing EK feature.")
+    def test_ek_species(self):
+        cpt_mode = 0 if 'LB.ASCII' in modes else 1
+        cpt_root = pathlib.Path(self.checkpoint.checkpoint_dir)
+        cpt_path = str(cpt_root / "ek") + "{}.cpt"
+
+        self.assertEqual(len(system.ekcontainer), 1)
+        ek_species = system.ekcontainer[0]
+        self.assertTrue(
+            system.ekcontainer.call_method("is_poisson_solver_set"))
+        self.assertAlmostEqual(system.ekcontainer.tau, system.time_step,
+                               delta=1e-7)
+        self.assertIsInstance(system.ekcontainer.solver,
+                              espressomd.electrokinetics.EKNone)
+
+        # check exception mechanism with corrupted LB checkpoint files
+        with self.assertRaisesRegex(RuntimeError, 'EOF found'):
+            ek_species.load_checkpoint(
+                cpt_path.format("-missing-data"), cpt_mode)
+        with self.assertRaisesRegex(RuntimeError, 'extra data found, expected EOF'):
+            ek_species.load_checkpoint(
+                cpt_path.format("-extra-data"), cpt_mode)
+        if cpt_mode == 0:
+            with self.assertRaisesRegex(RuntimeError, 'incorrectly formatted data'):
+                ek_species.load_checkpoint(
+                    cpt_path.format("-wrong-format"), cpt_mode)
+            with self.assertRaisesRegex(RuntimeError, 'grid dimensions mismatch'):
+                ek_species.load_checkpoint(
+                    cpt_path.format("-wrong-boxdim"), cpt_mode)
+        with self.assertRaisesRegex(RuntimeError, 'could not open file'):
+            ek_species.load_checkpoint(cpt_path.format("-unknown"), cpt_mode)
+
+        ek_species.load_checkpoint(cpt_path.format(""), cpt_mode)
+
+        precision = 8 if "LB.WALBERLA" in modes else 5
+        m = np.pi / 12
+        nx = ek_species.lattice.shape[0]
+        ny = ek_species.lattice.shape[1]
+        nz = ek_species.lattice.shape[2]
+        grid_3D = np.fromfunction(
+            lambda i, j, k: np.cos(i * m) * np.cos(j * m) * np.cos(k * m),
+            (nx, ny, nz), dtype=float)
+        for i in range(nx):
+            for j in range(ny):
+                for k in range(nz):
+                    np.testing.assert_almost_equal(
+                        np.copy(ek_species[i, j, k].density),
+                        grid_3D[i, j, k], decimal=precision)
+
+        state = ek_species.get_params()
+        reference = {
+            "density": 1.5,
+            "diffusion": 0.2,
+            "kT": 2.0,
+            "valency": 0.1,
+            "ext_efield": [0.1, 0.2, 0.3],
+            "advection": False,
+            "friction_coupling": False,
+            "tau": 0.01}
         for key in reference:
             self.assertIn(key, state)
-            self.assertAlmostEqual(reference[key], state[key], delta=1E-7)
+            np.testing.assert_allclose(np.copy(state[key]), reference[key],
+                                       atol=1E-7, err_msg=f"{key} differs")
+        # self.assertFalse(ek_species.is_active)
+        self.assertFalse(ek_species.single_precision)
 
-        if has_lbb:
-            # LB boundaries must be correct after LB populations are loaded
-            self.check_lb_boundaries(remove_boundaries=True)
+        def generator(value, shape):
+            value_grid = np.tile(value, shape)
+            if value_grid.shape[-1] == 1:
+                value_grid = np.squeeze(value_grid, axis=-1)
+            return value_grid
+
+        # check boundary objects
+        dens1 = 1.
+        dens2 = 2.
+        flux1 = 1e-3 * np.array([1., 2., 3.])
+        flux2 = 1e-3 * np.array([4., 5., 6.])
+        boundaries = [("density", dens1, dens2), ("flux", flux1, flux2)]
+        for attr, value1, value2 in boundaries:
+            accessor = np.vectorize(
+                lambda obj: np.copy(getattr(obj, attr)),
+                signature=f"()->({'n' if attr == 'flux' else ''})")
+            slice1 = ek_species[0, :, :]
+            slice2 = ek_species[-1, :, :]
+            slice3 = ek_species[1:-1, :, :]
+            # check boundary flag
+
+            np.testing.assert_equal(np.copy(slice1.is_boundary), True)
+            np.testing.assert_equal(np.copy(slice2.is_boundary), True)
+            np.testing.assert_equal(np.copy(slice3.is_boundary), False)
+            # check boundary conditions
+            field = f"{attr}_boundary"
+            shape = list(ek_species.shape)[-2:] + [1]
+            np.testing.assert_allclose(
+                accessor(np.copy(getattr(slice1, field))),
+                generator(value1, shape))
+            np.testing.assert_allclose(
+                accessor(np.copy(getattr(slice2, field))),
+                generator(value2, shape))
+
+        ek_species.clear_density_boundaries()
+        ek_species.clear_flux_boundaries()
+        np.testing.assert_equal(
+            np.copy(ek_species[:, :, :].is_boundary), False)
+
+    @utx.skipIfMissingFeatures(["WALBERLA"])
+    @ut.skipIf(not has_lb_mode, "Skipping test due to missing LB feature.")
+    def test_lb_vtk(self):
+        lbf = self.get_active_actor_of_type(espressomd.lb.LBFluidWalberla)
+        self.assertEqual(len(lbf.vtk_writers), 2)
+        vtk_suffix = config.test_name
+        key_auto = f"vtk_out/auto_lb_{vtk_suffix}"
+        vtk_auto = lbf.vtk_writers[0]
+        self.assertIsInstance(vtk_auto, espressomd.lb.VTKOutput)
+        self.assertEqual(vtk_auto.vtk_uid, key_auto)
+        self.assertEqual(vtk_auto.delta_N, 1)
+        self.assertFalse(vtk_auto.enabled)
+        self.assertEqual(set(vtk_auto.observables),
+                         {"density", "velocity_vector"})
+        self.assertIn(
+            f"write to '{key_auto}' every 1 LB steps (disabled)>", repr(vtk_auto))
+        key_manual = f"vtk_out/manual_lb_{vtk_suffix}"
+        vtk_manual = lbf.vtk_writers[1]
+        self.assertIsInstance(vtk_manual, espressomd.lb.VTKOutput)
+        self.assertEqual(vtk_manual.vtk_uid, key_manual)
+        self.assertEqual(vtk_manual.delta_N, 0)
+        self.assertEqual(set(vtk_manual.observables), {"density"})
+        self.assertIn(f"write to '{key_manual}' on demand>", repr(vtk_manual))
+        # check file numbering when resuming VTK write operations
+        vtk_root = pathlib.Path("vtk_out") / f"manual_lb_{vtk_suffix}"
+        filename = "simulation_step_{}.vtu"
+        self.assertTrue((vtk_root / filename.format(0)).exists())
+        self.assertFalse((vtk_root / filename.format(1)).exists())
+        self.assertFalse((vtk_root / filename.format(2)).exists())
+        # check VTK objects are still synchronized with their LB objects
+        old_density = lbf[0, 0, 0].density
+        new_density = 1.5 * old_density
+        lbf[0, 0, 0].density = new_density
+        vtk_manual.write()
+        lbf[0, 0, 0].density = old_density
+        self.assertTrue((vtk_root / filename.format(0)).exists())
+        self.assertTrue((vtk_root / filename.format(1)).exists())
+        self.assertFalse((vtk_root / filename.format(2)).exists())
+        if "espressomd.io.vtk" in sys.modules:
+            vtk_reader = espressomd.io.vtk.VTKReader()
+            vtk_data = vtk_reader.parse(vtk_root / filename.format(1))
+            lb_density = vtk_data["density"]
+            self.assertAlmostEqual(
+                lb_density[0, 0, 0], new_density, delta=1e-5)
+
+    @utx.skipIfMissingFeatures(["WALBERLA"])
+    @ut.skipIf(not has_lb_mode, "Skipping test due to missing EK feature.")
+    def test_ek_vtk(self):
+        ek_species = system.ekcontainer[0]
+        vtk_suffix = config.test_name
+        key_auto = f"vtk_out/auto_ek_{vtk_suffix}"
+        vtk_auto = ek_species.vtk_writers[0]
+        self.assertIsInstance(vtk_auto, espressomd.electrokinetics.VTKOutput)
+        self.assertEqual(vtk_auto.vtk_uid, key_auto)
+        self.assertEqual(vtk_auto.delta_N, 1)
+        self.assertFalse(vtk_auto.enabled)
+        self.assertEqual(set(vtk_auto.observables), {"density"})
+        self.assertIn(
+            f"write to '{key_auto}' every 1 EK steps (disabled)>", repr(vtk_auto))
+        key_manual = f"vtk_out/manual_ek_{vtk_suffix}"
+        vtk_manual = ek_species.vtk_writers[1]
+        self.assertIsInstance(vtk_manual, espressomd.electrokinetics.VTKOutput)
+        self.assertEqual(vtk_manual.vtk_uid, key_manual)
+        self.assertEqual(vtk_manual.delta_N, 0)
+        self.assertEqual(set(vtk_manual.observables), {"density"})
+        self.assertIn(f"write to '{key_manual}' on demand>", repr(vtk_manual))
+        # check file numbering when resuming VTK write operations
+        vtk_root = pathlib.Path("vtk_out") / f"manual_ek_{vtk_suffix}"
+        filename = "simulation_step_{}.vtu"
+        self.assertTrue((vtk_root / filename.format(0)).exists())
+        self.assertFalse((vtk_root / filename.format(1)).exists())
+        self.assertFalse((vtk_root / filename.format(2)).exists())
+        # check VTK objects are still synchronized with their EK objects
+        old_density = ek_species[0, 0, 0].density
+        new_density = 1.5 * old_density
+        ek_species[0, 0, 0].density = new_density
+        vtk_manual.write()
+        ek_species[0, 0, 0].density = old_density
+        self.assertTrue((vtk_root / filename.format(0)).exists())
+        self.assertTrue((vtk_root / filename.format(1)).exists())
+        self.assertFalse((vtk_root / filename.format(2)).exists())
+        if "espressomd.io.vtk" in sys.modules:
+            vtk_reader = espressomd.io.vtk.VTKReader()
+            vtk_data = vtk_reader.parse(vtk_root / filename.format(1))
+            ek_density = vtk_data["density"]
+            self.assertAlmostEqual(
+                ek_density[0, 0, 0], new_density, delta=1e-5)
 
     def test_system_variables(self):
         cell_system_params = system.cell_system.get_state()
@@ -282,16 +504,15 @@ def test_shape_based_constraints_serialization(self):
             np.copy(p3.f), -np.copy(p4.f), rtol=1e-4)
         self.assertGreater(np.linalg.norm(np.copy(p3.f) - old_force), 1e6)
 
+    @utx.skipIfMissingFeatures(["WALBERLA"])
+    @ut.skipIf(not has_lb_mode, "Skipping test due to missing LB feature.")
     @ut.skipIf('THERM.LB' not in modes, 'LB thermostat not in modes')
     def test_thermostat_LB(self):
         thmst = system.thermostat.get_state()[0]
-        if 'LB.GPU' in modes and not espressomd.gpu_available():
-            self.assertEqual(thmst['type'], 'OFF')
-        else:
-            self.assertEqual(thmst['type'], 'LB')
-            # rng_counter_fluid = seed, seed is 0 because kT=0
-            self.assertEqual(thmst['rng_counter_fluid'], 0)
-            self.assertEqual(thmst['gamma'], 2.0)
+        self.assertEqual(thmst['type'], 'LB')
+        # rng_counter_fluid = seed, seed is 0 because kT=0
+        self.assertEqual(thmst['rng_counter_fluid'], 0)
+        self.assertEqual(thmst['gamma'], 2.0)
 
     @ut.skipIf('THERM.LANGEVIN' not in modes,
                'Langevin thermostat not in modes')
@@ -454,18 +675,18 @@ def test_bonded_inter(self):
         # immersed boundary bonds
         self.assertEqual(
             ibm_volcons_bond.params, {'softID': 15, 'kappaV': 0.01})
-        if 'DP3M.CPU' not in modes:
-            self.assertEqual(
-                ibm_tribend_bond.params,
-                {'kb': 2., 'theta0': 0., 'refShape': 'Initial'})
+        self.assertEqual(
+            {**ibm_tribend_bond.params, **{'theta0': 0.}},
+            {'kb': 2., 'theta0': 0., 'refShape': 'Initial'})
         self.assertEqual(
             ibm_triel_bond.params,
             {'k1': 1.1, 'k2': 1.2, 'maxDist': 1.6, 'elasticLaw': 'NeoHookean'})
         # check new bonds can be added
-        new_harmonic_bond = espressomd.interactions.HarmonicBond(r_0=0.2, k=1.)
-        system.bonded_inter.add(new_harmonic_bond)
-        bond_ids = system.bonded_inter.call_method('get_bond_ids')
-        self.assertEqual(len(bond_ids), len(system.bonded_inter))
+        if not has_lb_mode:
+            new_bond = espressomd.interactions.HarmonicBond(r_0=0.2, k=1.)
+            system.bonded_inter.add(new_bond)
+            bond_ids = system.bonded_inter.call_method('get_bond_ids')
+            self.assertEqual(len(bond_ids), len(system.bonded_inter))
 
     def test_bond_breakage_specs(self):
         # check the ObjectHandle was correctly initialized (including MPI)
@@ -691,33 +912,6 @@ def test_exclusions(self):
         self.assertEqual(list(system.part.by_id(1).exclusions), [2])
         self.assertEqual(list(system.part.by_id(2).exclusions), [0, 1])
 
-    def check_lb_boundaries(self, remove_boundaries=False):
-        # check boundary objects
-        self.assertEqual(len(system.lbboundaries), 2)
-        np.testing.assert_allclose(
-            np.copy(system.lbboundaries[0].velocity), [1e-4, 1e-4, 0])
-        np.testing.assert_allclose(
-            np.copy(system.lbboundaries[1].velocity), [0, 0, 0])
-        self.assertIsInstance(
-            system.lbboundaries[0].shape, espressomd.shapes.Wall)
-        self.assertIsInstance(
-            system.lbboundaries[1].shape, espressomd.shapes.Wall)
-
-        # check boundary flag
-        lbf = self.get_active_actor_of_type(
-            espressomd.lb.HydrodynamicInteraction)
-        np.testing.assert_equal(np.copy(lbf[0, :, :].boundary.astype(int)), 1)
-        np.testing.assert_equal(np.copy(lbf[-1, :, :].boundary.astype(int)), 2)
-        np.testing.assert_equal(
-            np.copy(lbf[1:-1, :, :].boundary.astype(int)), 0)
-
-        # remove boundaries
-        if not remove_boundaries:
-            return
-        system.lbboundaries.clear()
-        self.assertEqual(len(system.lbboundaries), 0)
-        np.testing.assert_equal(np.copy(lbf[:, :, :].boundary.astype(int)), 0)
-
     def test_constraints(self):
         n_contraints = 8
         if espressomd.has_features("ELECTROSTATICS"):
@@ -792,6 +986,7 @@ def test_constraints(self):
             self.assertAlmostEqual(wave.phi, 1.4, delta=1E-10)
 
     @utx.skipIfMissingFeatures("WCA")
+    @ut.skipIf(has_lb_mode, "LB not supported")
     @ut.skipIf("INT.SDM" in modes, "Stokesian integrator not supported")
     @ut.skipIf("INT.BD" in modes, "Brownian integrator not supported")
     @ut.skipIf("INT.SD" in modes, "Steepest descent not supported")
diff --git a/testsuite/python/tests_common.py b/testsuite/python/tests_common.py
index 6408da166a5..86f1dff7ec6 100644
--- a/testsuite/python/tests_common.py
+++ b/testsuite/python/tests_common.py
@@ -18,6 +18,7 @@
 #
 
 import pathlib
+import itertools
 import numpy as np
 
 
@@ -343,15 +344,29 @@ def lj_force(espressomd, r, epsilon, sigma, cutoff, offset=0.):
     return f
 
 
-def count_fluid_nodes(lbf):
-    """Counts the non-boundary nodes in the passed lb fluid instance."""
+def fold_index(idx, shape):
+    """Fold index into the range 0<x<shape"""
 
-    fluid_nodes = 0
-    for n in lbf.nodes():
-        if not n.boundary:
-            fluid_nodes += 1
+    res = np.copy(idx)
+    for i in range(len(idx)):
+        while res[i] >= shape[i]:
+            res[i] -= shape[i]
+        while res[i] < 0:
+            res[i] += shape[i]
+    return res
 
-    return fluid_nodes
+
+def get_lb_nodes_around_pos(pos, lbf):
+    """Returns LB node(s) relevant for interpolation around the given position"""
+
+    pos_lb_units = pos / lbf.agrid - 0.5  # relative to node centers
+    lower_left_index = np.array(np.floor(pos_lb_units), dtype=int)
+
+    nodes = []
+    for i, j, k in itertools.product([0, 1], repeat=3):
+        index = lower_left_index + np.array((i, j, k), dtype=int)
+        nodes.append(lbf[fold_index(index, lbf.shape)])
+    return nodes
 
 
 def random_dipoles(n_particles):
diff --git a/testsuite/python/thermostats_common.py b/testsuite/python/thermostats_common.py
index 212b6948804..cc2c653a0d5 100644
--- a/testsuite/python/thermostats_common.py
+++ b/testsuite/python/thermostats_common.py
@@ -43,10 +43,11 @@ def check_velocity_distribution(self, vel, minmax, n_bins, error_tol, kT):
                 vel[:, i], range=(-minmax, minmax), bins=n_bins, density=False)
             data = hist[0] / float(vel.shape[0])
             bins = hist[1]
+            expected = []
             for j in range(n_bins):
-                found = data[j]
-                expected = single_component_maxwell(bins[j], bins[j + 1], kT)
-                self.assertAlmostEqual(found, expected, delta=error_tol)
+                expected.append(single_component_maxwell(
+                    bins[j], bins[j + 1], kT))
+            np.testing.assert_allclose(data[:n_bins], expected, atol=error_tol)
 
     def test_00_verify_single_component_maxwell(self):
         """Verifies the normalization of the analytical expression."""
diff --git a/testsuite/python/unittest_generator.py b/testsuite/python/unittest_generator.py
index b399f8eac28..e2925b7d87e 100644
--- a/testsuite/python/unittest_generator.py
+++ b/testsuite/python/unittest_generator.py
@@ -136,6 +136,27 @@ def get_modes(self):
             modes.add(feature)
         return modes
 
+    @classmethod
+    def recursive_unlink(cls, root):
+        """
+        Delete files in a folder recursively but preserve the tree structure.
+        """
+        if root.exists():
+            for filepath in root.iterdir():
+                if filepath.is_file():
+                    filepath.unlink()
+
+    def cleanup_old_checkpoint(self):
+        """
+        Remove the contents of the checkpoint directory if it exists.
+        The directory itself and its subfolder structure are preserved
+        since they will typically be created soon afterwards (risk of
+        race condition on file systems with latency).
+        """
+        args = self.get_checkpoint_params()
+        root = pathlib.Path(args["checkpoint_path"]) / args["checkpoint_id"]
+        self.recursive_unlink(root)
+
     def get_checkpoint_params(self):
         """
         Generate parameters to instantiate an ESPResSo checkpoint file.
diff --git a/testsuite/python/virtual_sites_tracers.py b/testsuite/python/virtual_sites_tracers.py
index 811b51f00f1..5dcacac9cab 100644
--- a/testsuite/python/virtual_sites_tracers.py
+++ b/testsuite/python/virtual_sites_tracers.py
@@ -24,10 +24,10 @@
 
 
 @utx.skipIfMissingFeatures(
-    ['VIRTUAL_SITES_INERTIALESS_TRACERS', 'LB_BOUNDARIES'])
+    ["VIRTUAL_SITES_INERTIALESS_TRACERS", "WALBERLA"])
 class VirtualSitesTracers(VirtualSitesTracersCommon, ut.TestCase):
 
-    LBClass = espressomd.lb.LBFluid
+    LBClass = espressomd.lb.LBFluidWalberla
 
 
 if __name__ == "__main__":
diff --git a/testsuite/python/virtual_sites_tracers_common.py b/testsuite/python/virtual_sites_tracers_common.py
index eb61789db55..869f2b7a1a2 100644
--- a/testsuite/python/virtual_sites_tracers_common.py
+++ b/testsuite/python/virtual_sites_tracers_common.py
@@ -16,35 +16,37 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
+import numpy as np
+
 import espressomd
 import espressomd.lb
 import espressomd.shapes
-import espressomd.lbboundaries
 import espressomd.virtual_sites
 import espressomd.utils
 
+import tests_common
+import unittest_decorators as utx
+
 
 class VirtualSitesTracersCommon:
     box_height = 10.
     box_lw = 8.
     system = espressomd.System(box_l=(box_lw, box_lw, box_height))
-    system.time_step = 0.05
+    system.time_step = 0.08
     system.cell_system.skin = 0.1
 
     def setUp(self):
         self.system.box_l = (self.box_lw, self.box_lw, self.box_height)
 
     def tearDown(self):
-        self.system.thermostat.turn_off()
-        self.system.lbboundaries.clear()
         self.system.actors.clear()
         self.system.part.clear()
+        self.system.thermostat.turn_off()
 
-    def reset_lb(self, ext_force_density=(0, 0, 0), dir_walls=2):
-        self.system.lbboundaries.clear()
+    def set_lb(self, ext_force_density=(0, 0, 0), dir_walls=2):
         self.system.actors.clear()
         self.lbf = self.LBClass(
-            kT=0.0, agrid=1, dens=1, visc=1.8,
+            kT=0.0, agrid=1., density=1., kinematic_viscosity=1.8,
             tau=self.system.time_step, ext_force_density=ext_force_density)
         self.system.actors.add(self.lbf)
         self.system.thermostat.set_lb(
@@ -55,15 +57,12 @@ def reset_lb(self, ext_force_density=(0, 0, 0), dir_walls=2):
         # Setup boundaries
         normal = [0, 0, 0]
         normal[dir_walls] = 1
-        walls = [espressomd.lbboundaries.LBBoundary() for k in range(2)]
-        walls[0].set_params(shape=espressomd.shapes.Wall(
-            normal=normal, dist=0.5))
+        wall_shape = espressomd.shapes.Wall(normal=normal, dist=0.5)
+        self.lbf.add_boundary_from_shape(wall_shape)
         normal[dir_walls] = -1
-        walls[1].set_params(shape=espressomd.shapes.Wall(
-            normal=normal, dist=-(self.system.box_l[dir_walls] - 0.5)))
-
-        for wall in walls:
-            self.system.lbboundaries.add(wall)
+        wall_shape = espressomd.shapes.Wall(
+            normal=normal, dist=-(self.system.box_l[dir_walls] - 0.5))
+        self.lbf.add_boundary_from_shape(wall_shape)
 
         espressomd.utils.handle_errors("setup")
 
@@ -78,9 +77,57 @@ def test_aa_method_switching(self):
         self.assertIsInstance(
             self.system.virtual_sites, espressomd.virtual_sites.VirtualSitesInertialessTracers)
 
+    @utx.skipIfMissingFeatures("EXTERNAL_FORCES")
+    def test_ab_single_step(self):
+        self.set_lb()
+        self.system.part.clear()
+        self.system.virtual_sites = espressomd.virtual_sites.VirtualSitesInertialessTracers()
+
+        # Random velocities
+        self.lbf[:, :, :].velocity = np.random.random((*self.lbf.shape, 3))
+        force = [1, -2, 3]
+        # Test several particle positions
+        for pos in [[3, 2, 1], [0, 0, 0],
+                    self.system.box_l * 0.49,
+                    self.system.box_l,
+                    self.system.box_l * 0.99]:
+            p = self.system.part.add(pos=pos, ext_force=force, virtual=True)
+
+            coupling_pos = p.pos
+            # Nodes to which forces will be interpolated
+            lb_nodes = tests_common.get_lb_nodes_around_pos(
+                coupling_pos, self.lbf)
+
+            np.testing.assert_allclose(
+                [n.last_applied_force for n in lb_nodes],
+                np.zeros((len(lb_nodes), 3)))
+            self.system.integrator.run(1)
+
+            v_fluid = np.copy(
+                self.lbf.get_interpolated_velocity(
+                    pos=coupling_pos))
+
+            # Check particle velocity
+            np.testing.assert_allclose(np.copy(p.v), v_fluid)
+
+            # particle position
+            np.testing.assert_allclose(
+                np.copy(p.pos),
+                coupling_pos + v_fluid * self.system.time_step)
+
+            # check transfer of particle force to fluid
+            applied_forces = np.array([n.last_applied_force for n in lb_nodes])
+            np.testing.assert_allclose(
+                np.sum(applied_forces, axis=0), force, atol=1E-10)
+
+            # Check that last_applied_force gets cleared
+            p.remove()
+            self.system.integrator.run(1)
+            applied_forces = np.array([n.last_applied_force for n in lb_nodes])
+            np.testing.assert_allclose(
+                np.sum(applied_forces, axis=0), [0, 0, 0])
+
     def test_advection(self):
-        node_grid = self.system.cell_system.node_grid
-        lb_on_gpu = self.LBClass is espressomd.lb.LBFluidGPU
         for direction in [0, 1, 2]:
             # System setup
             system = self.system
@@ -93,7 +140,7 @@ def test_advection(self):
             box_l = 3 * [self.box_lw]
             box_l[dir_walls] = self.box_height
             system.box_l = box_l
-            self.reset_lb(ext_force_density=ext_force, dir_walls=dir_walls)
+            self.set_lb(ext_force_density=ext_force, dir_walls=dir_walls)
 
             # Establish steady state flow field
             system.integrator.run(400)
@@ -104,39 +151,23 @@ def test_advection(self):
             p = system.part.add(pos=pos_initial, virtual=True)
 
             # Perform integration
-            n_steps = 100
-            if node_grid[direction] != 1 and lb_on_gpu:
-                n_steps = 50  # with GPU, tracers must stay on MPI rank 0
             system.time = 0
             for _ in range(2):
-                system.integrator.run(n_steps)
+                system.integrator.run(100)
                 # compute expected position
-                lb_vel = self.lbf.get_interpolated_velocity(p.pos)
+                lb_vel = self.lbf.get_interpolated_velocity(pos=p.pos)
                 ref_dist = lb_vel[direction] * system.time
                 tracer_dist = p.pos[direction] - pos_initial[direction]
                 self.assertAlmostEqual(tracer_dist / ref_dist, 1., delta=0.01)
 
-            self.tearDown()
-
-    def test_zz_exceptions_with_lb(self):
-        node_grid = self.system.cell_system.node_grid
-        lb_on_gpu = self.LBClass is espressomd.lb.LBFluidGPU
-        if lb_on_gpu and sum(node_grid) != 3:
-            self.reset_lb()
-            system = self.system
-            system.virtual_sites = espressomd.virtual_sites.VirtualSitesInertialessTracers()
-            p = system.part.add(pos=(0, 0, 0), virtual=True)
-            system.integrator.run(1)
-            p.pos = (-0.5, -0.5, -0.5)
-            with self.assertRaisesRegex(Exception, "The LB GPU method cannot integrate virtual sites when more than 1 MPI ranks are used"):
-                system.integrator.run(1)
+            system.actors.clear()
 
     def test_zz_exceptions_without_lb(self):
         """Check behaviour without lb. Ignore non-virtual particles, complain on
         virtual ones.
 
         """
-        self.reset_lb()
+        self.set_lb()
         system = self.system
         system.virtual_sites = espressomd.virtual_sites.VirtualSitesInertialessTracers()
         system.actors.clear()
@@ -144,5 +175,5 @@ def test_zz_exceptions_without_lb(self):
         p = system.part.add(pos=(0, 0, 0))
         system.integrator.run(1)
         p.virtual = True
-        with self.assertRaisesRegex(Exception, "No LB method was active but virtual sites present"):
+        with self.assertRaisesRegex(Exception, "LB needs to be active for inertialess tracers"):
             system.integrator.run(1)
diff --git a/testsuite/python/virtual_sites_tracers_gpu.py b/testsuite/python/virtual_sites_tracers_gpu.py
index 57d4c6873bc..48760d920b4 100644
--- a/testsuite/python/virtual_sites_tracers_gpu.py
+++ b/testsuite/python/virtual_sites_tracers_gpu.py
@@ -24,11 +24,10 @@
 
 
 @utx.skipIfMissingGPU()
-@utx.skipIfMissingFeatures(
-    ['VIRTUAL_SITES_INERTIALESS_TRACERS', 'LB_BOUNDARIES'])
+@utx.skipIfMissingFeatures(['VIRTUAL_SITES_INERTIALESS_TRACERS'])
 class VirtualSitesTracers(VirtualSitesTracersCommon, ut.TestCase):
 
-    LBClass = espressomd.lb.LBFluidGPU
+    LBClass = espressomd.lb.LBFluidWalberlaGPU
 
 
 if __name__ == "__main__":
diff --git a/testsuite/scripts/benchmarks/test_lb.py b/testsuite/scripts/benchmarks/test_lb.py
index fd15a7cab57..0cb41efc2e5 100644
--- a/testsuite/scripts/benchmarks/test_lb.py
+++ b/testsuite/scripts/benchmarks/test_lb.py
@@ -24,7 +24,7 @@
 
 benchmark, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
     "@BENCHMARKS_DIR@/lb.py", cmd_arguments=["--particles_per_core", "80"],
-    measurement_steps=200, n_iterations=2, min_skin=0.688, max_skin=0.688)
+    measurement_steps=200, n_iterations=2)
 
 
 @skipIfMissingFeatures
diff --git a/testsuite/scripts/samples/CMakeLists.txt b/testsuite/scripts/samples/CMakeLists.txt
index 60fb63eb604..78d4a9c2675 100644
--- a/testsuite/scripts/samples/CMakeLists.txt
+++ b/testsuite/scripts/samples/CMakeLists.txt
@@ -49,7 +49,6 @@ sample_test(FILE test_diffusion_coefficient.py)
 sample_test(FILE test_dpd.py)
 sample_test(FILE test_drude_bmimpf6.py SUFFIX cpu)
 sample_test(FILE test_drude_bmimpf6.py SUFFIX gpu LABELS "gpu")
-sample_test(FILE test_ekboundaries.py LABELS "gpu")
 sample_test(FILE test_electrophoresis.py)
 sample_test(FILE test_espresso_logo.py)
 sample_test(FILE test_gibbs_ensemble.py)
@@ -58,9 +57,11 @@ if(HDF5_FOUND)
   sample_test(FILE test_h5md.py)
   sample_test(FILE test_h5md_trajectory.py)
 endif()
-sample_test(FILE test_lbf.py SUFFIX cpu)
-sample_test(FILE test_lbf.py SUFFIX gpu LABELS "gpu")
+sample_test(FILE test_lbf.py)
 sample_test(FILE test_lb_profile.py)
+sample_test(FILE test_lb_planar_couette.py)
+sample_test(FILE test_lb_circular_couette.py)
+sample_test(FILE test_lb_four_roller_mill.py)
 sample_test(FILE test_lj_liquid_distribution.py)
 sample_test(FILE test_lj_liquid.py)
 sample_test(FILE test_lj_liquid_structurefactor.py)
diff --git a/testsuite/scripts/samples/test_lb_circular_couette.py b/testsuite/scripts/samples/test_lb_circular_couette.py
new file mode 100644
index 00000000000..c91ca032424
--- /dev/null
+++ b/testsuite/scripts/samples/test_lb_circular_couette.py
@@ -0,0 +1,71 @@
+#
+# Copyright (C) 2021-2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import unittest as ut
+import importlib_wrapper
+import numpy as np
+import scipy.optimize
+
+sample, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
+    "@SAMPLES_DIR@/lb_circular_couette.py")
+
+
+def taylor_couette(v1, v2, r1, r2, agrid):
+    # Taylor-Couette equation
+    mu = v2 / v1
+    eta = r1 / r2
+    scale = 1. / 2**3 / agrid
+    a = scale * v1 * (mu - eta**2) / (1 - eta**2)
+    b = scale * v1 * r1**2 * (1 - mu) / (1 - eta**2)
+    return a, b
+
+
+@skipIfMissingFeatures
+class Sample(ut.TestCase):
+    system = sample.system
+
+    def test_taylor_couette_flow(self):
+        # get flow profile
+        v_r, v_phi, v_z = sample.profile_v.T
+
+        # check velocity is zero for the radial and axial components
+        np.testing.assert_allclose(v_r, 0., atol=1e-4)
+        np.testing.assert_allclose(v_z, 0., atol=1e-6)
+
+        # check azimuthal velocity is zero inside boundary
+        np.testing.assert_allclose(v_phi[:7], 0., atol=1e-7)
+
+        # check azimuthal velocity in the linear regime
+        self.assertGreater(v_phi[7], v_phi[6])
+        self.assertGreater(v_phi[8], v_phi[7])
+        self.assertGreater(v_phi[9], v_phi[8])
+
+        # check azimuthal velocity in the Couette regime
+        xdata = sample.profile_r[9:]
+        ydata = v_phi[9:]
+        a_ref, b_ref = taylor_couette(
+            sample.velocity_magnitude, 0.0, sample.cylinder_in.radius,
+            sample.cylinder_out.radius, sample.agrid)
+        (a_sim, b_sim), _ = scipy.optimize.curve_fit(
+            lambda x, a, b: a * x + b / x, xdata, ydata)
+        np.testing.assert_allclose([a_sim, b_sim], [a_ref, b_ref], atol=1e-3)
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/scripts/samples/test_ekboundaries.py b/testsuite/scripts/samples/test_lb_four_roller_mill.py
similarity index 63%
rename from testsuite/scripts/samples/test_ekboundaries.py
rename to testsuite/scripts/samples/test_lb_four_roller_mill.py
index ce55f3542e9..3366a150759 100644
--- a/testsuite/scripts/samples/test_ekboundaries.py
+++ b/testsuite/scripts/samples/test_lb_four_roller_mill.py
@@ -1,4 +1,5 @@
-# Copyright (C) 2019-2022 The ESPResSo project
+#
+# Copyright (C) 2021-2023 The ESPResSo project
 #
 # This file is part of ESPResSo.
 #
@@ -14,26 +15,25 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
 
 import unittest as ut
 import importlib_wrapper
-import pathlib
+import numpy as np
 
 sample, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
-    "@SAMPLES_DIR@/ekboundaries.py", gpu=True, n_int_cycles=50)
+    "@SAMPLES_DIR@/lb_four_roller_mill.py")
 
 
 @skipIfMissingFeatures
 class Sample(ut.TestCase):
     system = sample.system
 
-    def test_file_generation(self):
-        # test .vtk files exist
-        path_vtk_root = pathlib.Path("ek")
-        for basename in ["pos_dens_0.vtk", "pos_flux_0.vtk", "ekv_0.vtk",
-                         "neg_dens_0.vtk", "neg_flux_0.vtk", "ekb_0.vtk"]:
-            filepath = path_vtk_root / basename
-            self.assertTrue(filepath.is_file(), f"File {filepath} not created")
+    def test_flow_convergence(self):
+        vel = sample.fluid_vel
+        np.testing.assert_allclose(vel, np.flip(vel, axis=0), atol=1e-4)
+        np.testing.assert_allclose(vel, np.flip(vel, axis=1), atol=1e-4)
+        np.testing.assert_allclose(vel, np.rot90(np.rot90(vel)), atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/src/python/espressomd/ekboundaries.py b/testsuite/scripts/samples/test_lb_planar_couette.py
similarity index 53%
rename from src/python/espressomd/ekboundaries.py
rename to testsuite/scripts/samples/test_lb_planar_couette.py
index 7389239b226..42a9136fc95 100644
--- a/src/python/espressomd/ekboundaries.py
+++ b/testsuite/scripts/samples/test_lb_planar_couette.py
@@ -1,4 +1,5 @@
-# Copyright (C) 2010-2022 The ESPResSo project
+#
+# Copyright (C) 2021-2023 The ESPResSo project
 #
 # This file is part of ESPResSo.
 #
@@ -14,26 +15,25 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
-from .script_interface import script_interface_register
-from .code_features import has_features
-import espressomd.lbboundaries
+#
 
+import unittest as ut
+import importlib_wrapper
+import numpy as np
 
-if any(has_features(i) for i in ["LB_BOUNDARIES", "LB_BOUNDARIES_GPU"]):
-    @script_interface_register
-    class EKBoundaries(espressomd.lbboundaries.LBBoundaries):
+sample, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
+    "@SAMPLES_DIR@/lb_planar_couette.py", time_breakpoints=[500],
+    pos_breakpoints=64)
 
-        """
-        Creates a set of electrokinetics boundaries.
 
-        """
-        pass
+@skipIfMissingFeatures
+class Sample(ut.TestCase):
+    system = sample.system
 
-    @script_interface_register
-    class EKBoundary(espressomd.lbboundaries.LBBoundary):
+    def test_fit(self):
+        np.testing.assert_allclose(sample.velocity_lbf,
+                                   sample.velocity_ref, atol=1e-5)
 
-        """
-        Creates a EK boundary.
 
-        """
-        pass
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/scripts/samples/test_lb_profile.py b/testsuite/scripts/samples/test_lb_profile.py
index 9c597d458c1..8d07c0bdac1 100644
--- a/testsuite/scripts/samples/test_lb_profile.py
+++ b/testsuite/scripts/samples/test_lb_profile.py
@@ -29,7 +29,7 @@ class Sample(ut.TestCase):
 
     def test_fit(self):
         np.testing.assert_allclose(sample.lb_fluid_profile[:, 0, 0, 2],
-                                   sample.expected_profile, atol=5e-2)
+                                   sample.expected_profile, atol=7e-2)
 
 
 if __name__ == "__main__":
diff --git a/testsuite/scripts/samples/test_lbf.py b/testsuite/scripts/samples/test_lbf.py
index e2451cc5e76..245f90bb875 100644
--- a/testsuite/scripts/samples/test_lbf.py
+++ b/testsuite/scripts/samples/test_lbf.py
@@ -34,7 +34,7 @@ def test_electrophoresis_gradient(self):
         gradient = np.mean(np.gradient(sample.f_list.T, axis=1), axis=1)
         self.assertAlmostEqual(gradient[0], 0.0, places=11)
         self.assertAlmostEqual(gradient[1], 0.0, places=11)
-        self.assertAlmostEqual(gradient[2], -7.78814e-7, places=11)
+        self.assertAlmostEqual(gradient[2], -7.816e-7, delta=1e-9)
 
 
 if __name__ == "__main__":
diff --git a/testsuite/scripts/samples/test_object_in_fluid__motivation.py b/testsuite/scripts/samples/test_object_in_fluid__motivation.py
index 8836577acff..a5b3777bfec 100644
--- a/testsuite/scripts/samples/test_object_in_fluid__motivation.py
+++ b/testsuite/scripts/samples/test_object_in_fluid__motivation.py
@@ -19,6 +19,7 @@
 import importlib_wrapper
 import os
 import pathlib
+import numpy as np
 
 os.chdir("@SAMPLES_DIR@/object_in_fluid")
 sample, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
@@ -38,15 +39,25 @@ def test_file_generation(self):
             "wallBottom.vtk",
             "wallBack.vtk",
             "wallFront.vtk"]
-        for i in [0, 1]:
-            for j in range(sample.maxCycle):
-                basenames.append(f"cell{i}_{j}.vtk")
+        for j in range(sample.maxCycle + 1):
+            basenames.append(f"cell0_{j}.vtk")
 
         # test .vtk files exist
         path_vtk_root = pathlib.Path("output")
         for name in basenames:
             filepath = path_vtk_root / "sim0" / name
-            self.assertTrue(filepath.is_file(), f"File {filepath} not created")
+            self.assertTrue(
+                filepath.is_file(),
+                f"File '{filepath}' not created")
+
+        # make sure we are still in the LB linear regime
+        lb_density = np.copy(sample.lbf[:, :, :].density)
+        np.testing.assert_allclose(lb_density, 1., atol=2e-3)
+
+        # verify cell momentum
+        cell_vel = np.mean(self.system.part.all().v, axis=0)
+        np.testing.assert_allclose(
+            cell_vel, [1.54e-2, 1.8e-3, 0.], rtol=1e-2, atol=1e-6)
 
 
 if __name__ == "__main__":
diff --git a/testsuite/scripts/tutorials/CMakeLists.txt b/testsuite/scripts/tutorials/CMakeLists.txt
index 8ff5258532c..e93755f95ab 100644
--- a/testsuite/scripts/tutorials/CMakeLists.txt
+++ b/testsuite/scripts/tutorials/CMakeLists.txt
@@ -50,12 +50,12 @@ tutorial_test(FILE test_lennard_jones.py)
 tutorial_test(FILE test_charged_system.py)
 tutorial_test(FILE test_langevin_dynamics.py)
 tutorial_test(FILE test_polymers.py SUFFIX rouse)
-tutorial_test(FILE test_polymers.py SUFFIX zimm LABELS "gpu")
-tutorial_test(FILE test_lattice_boltzmann_poiseuille_flow.py LABELS "gpu")
+tutorial_test(FILE test_polymers.py SUFFIX zimm)
+tutorial_test(FILE test_lattice_boltzmann_poiseuille_flow.py)
 tutorial_test(FILE test_lattice_boltzmann_sedimentation.py)
-tutorial_test(FILE test_raspberry_electrophoresis.py LABELS "gpu")
-tutorial_test(FILE test_active_matter.py LABELS "gpu")
-tutorial_test(FILE test_electrokinetics.py LABELS "gpu")
+tutorial_test(FILE test_raspberry_electrophoresis.py)
+tutorial_test(FILE test_active_matter.py)
+tutorial_test(FILE test_electrokinetics.py)
 tutorial_test(FILE test_visualization.py)
 tutorial_test(FILE test_ferrofluid_1.py)
 tutorial_test(FILE test_ferrofluid_2.py)
diff --git a/testsuite/scripts/tutorials/test_active_matter.py b/testsuite/scripts/tutorials/test_active_matter.py
index aae4c9bd5c5..53306618ac0 100644
--- a/testsuite/scripts/tutorials/test_active_matter.py
+++ b/testsuite/scripts/tutorials/test_active_matter.py
@@ -18,10 +18,10 @@
 import numpy as np
 import unittest as ut
 import importlib_wrapper
+import os
 
 tutorial, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
     "@TUTORIALS_DIR@/active_matter/active_matter.py",
-    gpu=True,
     ED_N_SAMPLING_STEPS=100000,
     RECT_N_SAMPLES=150,
     HYDRO_N_STEPS=150
@@ -74,6 +74,13 @@ def test_flow_profile(self):
         self.assertLessEqual(curl_percent[16, 16], -threshold_percent)
         self.assertLessEqual(curl_percent[18, 20], -threshold_percent)
 
+    def test_file_generation(self):
+        for name in ["position_0.vtk", "lb_velocity_0.vtu"]:
+            filepath = os.path.join(tutorial.vtk_outdir, name)
+            self.assertTrue(
+                os.path.isfile(filepath),
+                filepath + " not created")
+
 
 if __name__ == "__main__":
     ut.main()
diff --git a/testsuite/scripts/tutorials/test_electrokinetics.py b/testsuite/scripts/tutorials/test_electrokinetics.py
index 7bd51af117e..611f8dafceb 100644
--- a/testsuite/scripts/tutorials/test_electrokinetics.py
+++ b/testsuite/scripts/tutorials/test_electrokinetics.py
@@ -20,8 +20,7 @@
 import numpy as np
 
 tutorial, skipIfMissingFeatures = iw.configure_and_import(
-    "@TUTORIALS_DIR@/electrokinetics/electrokinetics.py",
-    gpu=True, integration_length=600, dt=0.5)
+    "@TUTORIALS_DIR@/electrokinetics/electrokinetics.py", integration_length=400)
 
 
 @skipIfMissingFeatures
@@ -37,13 +36,13 @@ def normalize_two_datasets(self, a, b):
         b /= scale
 
     def test_simulation(self):
-        for varname in ("density", "velocity", "pressure_xy"):
+        for varname, tol in zip(["density", "velocity"], [2, 5]):
             sim = np.array(tutorial.__dict__[varname + "_list"])
             ana = np.array(tutorial.eof_analytical.__dict__[varname + "_list"])
             self.normalize_two_datasets(sim, ana)
             accuracy = np.max(np.abs(sim - ana))
-            # expecting at most 3% deviation
-            self.assertLess(accuracy, 3.0 / 100)
+            # expecting at most a few percents deviation
+            self.assertLess(accuracy, tol / 100.)
 
 
 if __name__ == "__main__":
diff --git a/testsuite/scripts/tutorials/test_lattice_boltzmann_poiseuille_flow.py b/testsuite/scripts/tutorials/test_lattice_boltzmann_poiseuille_flow.py
index 2e4adcb5c71..7ef907ec1b7 100644
--- a/testsuite/scripts/tutorials/test_lattice_boltzmann_poiseuille_flow.py
+++ b/testsuite/scripts/tutorials/test_lattice_boltzmann_poiseuille_flow.py
@@ -21,8 +21,7 @@
 
 
 tutorial, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
-    "@TUTORIALS_DIR@/lattice_boltzmann/lattice_boltzmann_poiseuille_flow.py",
-    gpu=True)
+    "@TUTORIALS_DIR@/lattice_boltzmann/lattice_boltzmann_poiseuille_flow.py")
 
 
 @skipIfMissingFeatures
diff --git a/testsuite/scripts/tutorials/test_lattice_boltzmann_sedimentation.py b/testsuite/scripts/tutorials/test_lattice_boltzmann_sedimentation.py
index 47390a41a76..f42c2b89c97 100644
--- a/testsuite/scripts/tutorials/test_lattice_boltzmann_sedimentation.py
+++ b/testsuite/scripts/tutorials/test_lattice_boltzmann_sedimentation.py
@@ -25,7 +25,7 @@
 
 tutorial, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
     "@TUTORIALS_DIR@/lattice_boltzmann/lattice_boltzmann_sedimentation.py",
-    sampling_steps=400)
+    sampling_steps=450)
 
 
 def curl2d(flow, spacing):
@@ -45,7 +45,7 @@ class Tutorial(ut.TestCase):
 
     def test_flow_profile(self):
         # slice trajectory to keep only the flow field onset
-        flow_field = tutorial.data_flowfield[200:400, :, :, 0:2]
+        flow_field = tutorial.data_flowfield[400:450, :, :, 0:2]
         vortices = np.zeros((2, flow_field.shape[0], 2))
         for i in range(flow_field.shape[0]):
             curl = curl2d(flow_field[i], 2 * [tutorial.spacing])
@@ -53,8 +53,8 @@ def test_flow_profile(self):
             vortices[1, i] = get_peak_position(curl, np.argmin)
 
         # check flow field curl
-        ref_pos_x = [12.5, 7.5]  # LB units
-        ref_pos_y = 2 * [11.]    # LB units
+        ref_pos_x = [5.5, 13.5]  # LB units
+        ref_pos_y = 2 * [13.5]   # LB units
         for i in range(2):
             width = tutorial.n_width  # LB units
             vortex_avg_x = scipy.stats.circmean(vortices[i, :, 0], high=width)
@@ -63,8 +63,8 @@ def test_flow_profile(self):
             vortex_std_y = np.std(vortices[i, :, 1])
             self.assertAlmostEqual(vortex_avg_x, ref_pos_x[i], delta=2.)
             self.assertAlmostEqual(vortex_avg_y, ref_pos_y[i], delta=2.)
-            self.assertLess(vortex_std_x, 4.)
-            self.assertLess(vortex_std_y, 6.)
+            self.assertLess(vortex_std_x, 2.)
+            self.assertLess(vortex_std_y, 3.)
 
 
 if __name__ == "__main__":
diff --git a/testsuite/scripts/tutorials/test_polymers.py b/testsuite/scripts/tutorials/test_polymers.py
index dadbb803e78..2da7532db8c 100644
--- a/testsuite/scripts/tutorials/test_polymers.py
+++ b/testsuite/scripts/tutorials/test_polymers.py
@@ -22,7 +22,7 @@
 if '@TEST_SUFFIX@' == 'rouse':
     params = {}
 elif '@TEST_SUFFIX@' == 'zimm':
-    params = {'LOOPS': 2000, 'POLYMER_MODEL': 'Zimm', 'gpu': True}
+    params = {'LOOPS': 400, 'POLYMER_MODEL': 'Zimm'}
 
 tutorial, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
     "@TUTORIALS_DIR@/polymers/polymers.py",
@@ -43,13 +43,13 @@ def test_exponents(self):
         msg = 'The R_h exponent should be close to 0.333'
         self.assertGreater(tutorial.rh_exponent, 0.30, msg=msg)
         self.assertLess(tutorial.rh_exponent, 0.50, msg=msg)
-        np.testing.assert_allclose(tutorial.rf2_rg2_ratio, 6.0, atol=1.0,
+        np.testing.assert_allclose(tutorial.rf2_rg2_ratio, 6.0, atol=1.1,
                                    err_msg='R_F^2/R_g^2 should be close to 6.0')
 
     def test_diffusion_coefficients(self):
         # polymer diffusion
         ref_D = [0.0363, 0.0269, 0.0234]
-        np.testing.assert_allclose(tutorial.diffusion_msd, ref_D, rtol=0.15)
+        np.testing.assert_allclose(tutorial.diffusion_msd, ref_D, rtol=0.30)
         np.testing.assert_allclose(tutorial.diffusion_gk, ref_D, rtol=0.15)
         # monomer diffusion
         if tutorial.POLYMER_MODEL == 'Rouse':
diff --git a/testsuite/scripts/tutorials/test_raspberry_electrophoresis.py b/testsuite/scripts/tutorials/test_raspberry_electrophoresis.py
index 4433b1dae12..1ee362d01db 100644
--- a/testsuite/scripts/tutorials/test_raspberry_electrophoresis.py
+++ b/testsuite/scripts/tutorials/test_raspberry_electrophoresis.py
@@ -18,11 +18,10 @@
 import unittest as ut
 import importlib_wrapper
 import numpy as np
-np.random.seed(41)
 
 tutorial, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
     "@TUTORIALS_DIR@/raspberry_electrophoresis/raspberry_electrophoresis.py",
-    gpu=True, box_l=20., num_iterations=20, num_steps_per_iteration=20)
+    box_l=16., num_iterations=100, num_steps_per_iteration=80)
 
 
 @skipIfMissingFeatures