diff --git a/.codecov.yml b/.codecov.yml index 50684dece5e..7635de48e9d 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -50,3 +50,6 @@ ignore: - "doc/tutorials" - "samples" - "maintainer" + - "src/walberla_bridge/**/generated_kernels/*" + - "src/walberla_bridge/myintrin.h" + - "src/walberla_bridge/philox_rand.h" diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4217ff87b07..5e1815f5f94 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -68,7 +68,7 @@ style_doxygen: - mkdir build - cd build - cp ../maintainer/configs/maxset.hpp myconfig.hpp - - cmake .. -D ESPRESSO_BUILD_WITH_CUDA=ON -D ESPRESSO_BUILD_WITH_GSL=ON -D ESPRESSO_BUILD_WITH_HDF5=ON -D ESPRESSO_BUILD_WITH_SCAFACOS=ON -D ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS=ON + - cmake .. -D ESPRESSO_BUILD_WITH_CUDA=ON -D ESPRESSO_BUILD_WITH_GSL=ON -D ESPRESSO_BUILD_WITH_HDF5=ON -D ESPRESSO_BUILD_WITH_SCAFACOS=ON -D ESPRESSO_BUILD_WITH_WALBERLA=ON -D ESPRESSO_BUILD_WITH_WALBERLA_FFT=ON -D ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS=ON - sh ../maintainer/CI/dox_warnings.sh tags: - espresso @@ -87,6 +87,7 @@ default: myconfig: 'default' with_coverage: 'true' with_scafacos: 'true' + with_walberla: 'true' with_stokesian_dynamics: 'true' check_skip_long: 'true' script: @@ -94,6 +95,7 @@ default: tags: - espresso - no-cuda + - numa maxset: <<: *global_job_definition @@ -107,6 +109,7 @@ maxset: myconfig: 'maxset' with_coverage: 'true' with_scafacos: 'true' + with_walberla: 'true' with_stokesian_dynamics: 'true' check_skip_long: 'true' cmake_params: '-D ESPRESSO_TEST_NP=8' @@ -116,6 +119,7 @@ maxset: - espresso - no-cuda - numa + - avx2 no_rotation: <<: *global_job_definition @@ -169,6 +173,7 @@ clang-sanitizer: with_asan: 'true' with_ubsan: 'true' with_scafacos: 'true' + with_walberla: 'true' with_stokesian_dynamics: 'true' script: - bash maintainer/CI/build_cmake.sh @@ -210,6 +215,7 @@ cuda11-coverage: with_coverage: 'true' check_skip_long: 'true' with_scafacos: 'true' + with_walberla: 'true' with_stokesian_dynamics: 'true' script: - bash maintainer/CI/build_cmake.sh @@ -232,6 +238,8 @@ cuda11-maxset: test_timeout: '900' srcdir: '${CI_PROJECT_DIR}' with_scafacos: 'true' + with_walberla: 'true' + with_walberla_avx: 'true' with_stokesian_dynamics: 'true' script: - bash maintainer/CI/build_cmake.sh @@ -256,6 +264,8 @@ tutorials-samples-maxset: with_coverage: 'false' with_coverage_python: 'true' with_scafacos: 'true' + with_walberla: 'true' + with_walberla_avx: 'true' with_stokesian_dynamics: 'true' make_check_unit_tests: 'false' make_check_python: 'false' @@ -281,6 +291,8 @@ tutorials-samples-default: with_cuda: 'true' with_coverage: 'false' with_scafacos: 'true' + with_walberla: 'true' + with_walberla_avx: 'true' make_check_unit_tests: 'false' make_check_python: 'false' make_check_tutorials: 'true' @@ -306,14 +318,14 @@ tutorials-samples-empty: myconfig: 'empty' with_cuda: 'true' with_coverage: 'false' - with_scafacos: 'true' + with_scafacos: 'false' + with_walberla: 'false' make_check_unit_tests: 'false' make_check_python: 'false' make_check_tutorials: 'true' make_check_samples: 'true' make_check_benchmarks: 'true' test_timeout: '1200' - with_scafacos: 'false' script: - bash maintainer/CI/build_cmake.sh tags: @@ -334,6 +346,8 @@ tutorials-samples-no-gpu: with_cuda: 'true' with_coverage: 'false' with_scafacos: 'true' + with_walberla: 'true' + with_walberla_avx: 'true' make_check_unit_tests: 'false' make_check_python: 'false' make_check_tutorials: 'true' @@ -362,6 +376,7 @@ installation: make_check_unit_tests: 'false' make_check_python: 'false' with_scafacos: 'true' + with_walberla: 'true' with_stokesian_dynamics: 'true' srcdir: '${CI_PROJECT_DIR}' build_type: 'Release' @@ -396,6 +411,7 @@ empty: with_cuda: 'true' with_static_analysis: 'true' with_scafacos: 'false' + with_walberla: 'false' with_stokesian_dynamics: 'false' with_coverage: 'false' with_coverage_python: 'true' diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c61e17f2ca3..9212797e37e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,8 +8,8 @@ repos: entry: sh maintainer/format/clang-format.sh language: system always_run: false - files: '.*\.(cpp|hpp|cu|cuh)' - exclude: '^libs/' + files: '.*\.(cpp|hpp|h|cu|cuh)' + exclude: '^libs/|^src/walberla_bridge/src/.*/generated_kernels/.*\.(cpp|cu)|^maintainer/walberla_kernels/templates/.*\.tmpl\.(cpp|hpp|h|cu|cuh)' args: ["-i", "-style=file"] - id: autopep8 diff --git a/CMakeLists.txt b/CMakeLists.txt index 70c01263d12..b81dfea64f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,7 @@ include(FeatureSummary) project(ESPResSo) include(GNUInstallDirs) include(espresso_option_enum) +include(espresso_enable_avx2_support) if(POLICY CMP0074) # make find_package() use _ROOT variables cmake_policy(SET CMP0074 NEW) @@ -92,6 +93,11 @@ option(ESPRESSO_BUILD_TESTS "Enable tests" ON) option(ESPRESSO_BUILD_WITH_SCAFACOS "Build with ScaFaCoS support" OFF) option(ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS "Build with Stokesian Dynamics" OFF) +option(ESPRESSO_BUILD_WITH_WALBERLA + "Build with waLBerla lattice-Boltzmann support" OFF) +option(ESPRESSO_BUILD_WITH_WALBERLA_AVX + "Build waLBerla lattice-Boltzmann with AVX vectorization" OFF) +option(ESPRESSO_BUILD_WITH_WALBERLA_FFT "Build waLBerla with FFT support" OFF) option(ESPRESSO_BUILD_BENCHMARKS "Enable benchmarks" OFF) option(ESPRESSO_BUILD_WITH_VALGRIND_MARKERS "Build with valgrind instrumentation markers" OFF) @@ -171,6 +177,15 @@ foreach(func_name __PRETTY_FUNCTION__ __FUNCSIG__ __FUNCTION__) endif(result${func_name}) endforeach() +# +# AVX2 support +# + +include(CheckCXXCompilerFlag) + +add_library(espresso_avx_flags INTERFACE) +add_library(espresso::avx_flags ALIAS espresso_avx_flags) + # # Interface libraries # @@ -573,6 +588,75 @@ if(ESPRESSO_BUILD_BENCHMARKS) add_subdirectory(maintainer/benchmarks) endif() +# +# waLBerla +# + +if(ESPRESSO_BUILD_WITH_WALBERLA) + # cmake-format: off + include(FetchContent) + FetchContent_Declare( + walberla + GIT_REPOSITORY https://i10git.cs.fau.de/walberla/walberla.git + GIT_TAG 065ce5f311850371a97ac4766f47dbb5ca8424ba + ) + # workaround for https://gitlab.kitware.com/cmake/cmake/-/issues/21146 + if(NOT DEFINED walberla_SOURCE_DIR OR NOT EXISTS "${walberla_SOURCE_DIR}") + FetchContent_Populate(walberla) + endif() + # cmake-format: on + string(REGEX REPLACE "([/\\]walberla)-src$" "\\1-build" walberla_BINARY_DIR + "${walberla_SOURCE_DIR}") + set(WALBERLA_BUILD_TESTS off CACHE BOOL "") + set(WALBERLA_BUILD_BENCHMARKS off CACHE BOOL "") + set(WALBERLA_BUILD_TOOLS off CACHE BOOL "") + set(WALBERLA_BUILD_TUTORIALS off CACHE BOOL "") + set(WALBERLA_BUILD_SHOWCASES off CACHE BOOL "") + set(WALBERLA_BUILD_DOC off CACHE BOOL "") + set(WALBERLA_LOGLEVEL "WARNING" CACHE STRING "") + set(CMAKE_POSITION_INDEPENDENT_CODE on CACHE BOOL "") + if(ESPRESSO_BUILD_WITH_CUDA) + set(WALBERLA_BUILD_WITH_CUDA "on" CACHE BOOL "") + if(CMAKE_VERSION VERSION_LESS 3.25 OR NOT ESPRESSO_CUDA_COMPILER STREQUAL + "clang") + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 75) + endif() + endif() + endif() + if(ESPRESSO_BUILD_WITH_WALBERLA_FFT) + set(ESPRESSO_USE_WALBERLA_FFT 1) + set(WALBERLA_BUILD_WITH_FFTW on CACHE BOOL "") + else() + set(WALBERLA_BUILD_WITH_FFTW off CACHE BOOL "") + endif() + set(WALBERLA_BUILD_WITH_FASTMATH off CACHE BOOL "") + add_subdirectory("${walberla_SOURCE_DIR}" "${walberla_BINARY_DIR}") + set(WALBERLA_LIBS + walberla::core walberla::domain_decomposition walberla::blockforest + walberla::boundary walberla::field walberla::lbm walberla::timeloop + walberla::vtk) + if(ESPRESSO_BUILD_WITH_WALBERLA_FFT) + set(WALBERLA_LIBS ${WALBERLA_LIBS} walberla::fft) + endif() + if(ESPRESSO_BUILD_WITH_CUDA AND WALBERLA_BUILD_WITH_CUDA) + set(WALBERLA_LIBS ${WALBERLA_LIBS} walberla::cuda) + endif() + # workaround for https://gitlab.kitware.com/cmake/cmake/-/issues/21283 + foreach(target_w_namespace IN LISTS WALBERLA_LIBS) + string(REPLACE "walberla::" "" target_wo_namespace ${target_w_namespace}) + add_library(${target_w_namespace} ALIAS ${target_wo_namespace}) + endforeach() + if(ESPRESSO_BUILD_WITH_WALBERLA_AVX) + function(espresso_avx_flags_callback COMPILER_AVX2_FLAG) + target_compile_options( + espresso_avx_flags INTERFACE "${COMPILER_AVX2_FLAG}" + "-DESPRESSO_BUILD_WITH_AVX_KERNELS") + endfunction() + espresso_enable_avx2_support(espresso_avx_flags_callback) + endif() +endif() + # # Subdirectories # diff --git a/cmake/FindFFTW3.cmake b/cmake/FindFFTW3.cmake index f7bae67947d..a4bdae4f110 100644 --- a/cmake/FindFFTW3.cmake +++ b/cmake/FindFFTW3.cmake @@ -32,14 +32,21 @@ endif(FFTW3_INCLUDE_DIR) find_path(FFTW3_INCLUDE_DIR fftw3.h) find_library(FFTW3_LIBRARIES NAMES fftw3) +find_path(FFTW3_MPI_INCLUDE_DIR fftw3-mpi.h) +find_library(FFTW3_MPI_LIBRARIES NAMES fftw3_mpi) # handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE if all # listed variables are TRUE include(FindPackageHandleStandardArgs) find_package_handle_standard_args(FFTW3 DEFAULT_MSG FFTW3_LIBRARIES FFTW3_INCLUDE_DIR) +set(FPHSA_NAME_MISMATCHED 1) +find_package_handle_standard_args(FFTW3_MPI DEFAULT_MSG FFTW3_MPI_LIBRARIES + FFTW3_MPI_INCLUDE_DIR) +unset(FPHSA_NAME_MISMATCHED) + +mark_as_advanced(FFTW3_LIBRARIES FFTW3_INCLUDE_DIR FFTW3_MPI_LIBRARIES FFTW3_MPI_INCLUDE_DIR) -mark_as_advanced(FFTW3_LIBRARIES FFTW3_INCLUDE_DIR) if(FFTW3_FOUND AND NOT TARGET FFTW3::FFTW3) add_library(FFTW3::FFTW3 INTERFACE IMPORTED) diff --git a/cmake/espresso_cmake_config.cmakein b/cmake/espresso_cmake_config.cmakein index 884c323187b..62091119c59 100644 --- a/cmake/espresso_cmake_config.cmakein +++ b/cmake/espresso_cmake_config.cmakein @@ -13,6 +13,10 @@ #cmakedefine ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS +#cmakedefine ESPRESSO_BUILD_WITH_WALBERLA + +#cmakedefine ESPRESSO_BUILD_WITH_WALBERLA_FFT + #cmakedefine ESPRESSO_BUILD_WITH_VALGRIND_MARKERS #define PACKAGE_NAME "${PROJECT_NAME}" diff --git a/cmake/espresso_enable_avx2_support.cmake b/cmake/espresso_enable_avx2_support.cmake new file mode 100644 index 00000000000..3baab871465 --- /dev/null +++ b/cmake/espresso_enable_avx2_support.cmake @@ -0,0 +1,67 @@ +# +# Copyright (C) 2022-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +function(espresso_enable_avx2_support callback) + set(COMPILER_AVX2_FLAG "") + foreach(FLAG_NAME "-mavx2" "/arch:AVX2") + string(REGEX REPLACE "[^0-9A-Za-z_]" "_" FLAG_VARIABLE "${FLAG_NAME}") + check_cxx_compiler_flag("${flag_name}" + COMPILER_HAS_${FLAG_VARIABLE}_FLAG_RESULT) + if(COMPILER_HAS_${FLAG_VARIABLE}_FLAG_RESULT) + set(COMPILER_AVX2_FLAG "${FLAG_NAME}") + cmake_language(CALL ${callback} "${COMPILER_AVX2_FLAG}") + break() + endif() + endforeach() + if(COMPILER_AVX2_FLAG STREQUAL "") + message( + FATAL_ERROR + "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION} doesn't support AVX2-specific compiler flags." + ) + endif() + if(NOT COMPILER_AVX2_FLAG STREQUAL "/arch:AVX2") + execute_process( + COMMAND ${CMAKE_CXX_COMPILER} -march=native -E -v - INPUT_FILE /dev/null + OUTPUT_VARIABLE MARCH_NATIVE_OUTPUT_STRING + ERROR_VARIABLE MARCH_NATIVE_OUTPUT_STRING) + if(NOT "${MARCH_NATIVE_OUTPUT_STRING}" MATCHES "[ \n](\\+avx2|-mavx2|-D__AVX2__)[ \n]") + message( + FATAL_ERROR + "AVX2 not supported on this CPU architecture according to ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}. While ESPResSo will still compile, you will trigger SIGILL when calling AVX functions." + ) + endif() + endif() + set(CMAKE_REQUIRED_FLAGS_BACKUP "${CMAKE_REQUIRED_FLAGS}") + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${COMPILER_AVX2_FLAG}") + check_cxx_source_compiles( + "#include + __m256i xi_i = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + __m256 xi_s = _mm256_set_ps(0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f); + __m256d xi_d = _mm256_set_pd(0.0, 1.0, 2.0, 3.0); + int main() {} + " COMPILER_HAS_AVX2_SUPPORT) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_BACKUP}") + if(NOT COMPILER_HAS_AVX2_SUPPORT) + message( + FATAL_ERROR + "Cannot execute a simple AVX2 program compiled by ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}." + ) + endif() +endfunction() + diff --git a/doc/bibliography.bib b/doc/bibliography.bib index cb3b51c2821..abb48c90e6a 100644 --- a/doc/bibliography.bib +++ b/doc/bibliography.bib @@ -146,6 +146,40 @@ @Article{batle20a doi = {10.1038/s41598-020-76029-x}, } +@InProceedings{bauer19a, + author = {Bauer, Martin and H\"{o}tzer, Johannes and Ernst, Dominik and Hammer, Julian and Seiz, Marco and Hierl, Henrik and H\"{o}nig, Jan and K\"{o}stler, Harald and Wellein, Gerhard and Nestler, Britta and R\"{u}de, Ulrich}, + title = {Code Generation for Massively Parallel Phase-Field Simulations}, + year = {2019}, + publisher = {Association for Computing Machinery}, + address = {New York}, + doi = {10.1145/3295500.3356186}, + isbn = {9781450362290}, + booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis}, + location = {Denver, Colorado}, +} + +@Article{bauer21a, + author = {Martin Bauer and Sebastian Eibl and Christian Godenschwager and Nils Kohl and Michael Kuron and Christoph Rettinger and Florian Schornbaum and Christoph Schwarzmeier and Dominik Th\"{o}nnes and Harald K\"{o}stler and Ulrich R\"{u}de}, + title = {{waLBerla}: A block-structured high-performance framework for multiphysics simulations}, + journal = {Computers \& Mathematics with Applications}, + year = {2021}, + issn = {0898-1221}, + doi = {10.1016/j.camwa.2020.01.007}, + pages = {478--501}, + volume = {81}, +} + +@Article{bauer21b, + author = {Bauer, Martin and K\"{o}stler, Harald and R\"{u}de, Ulrich}, + title = {{lbmpy}: Automatic code generation for efficient parallel lattice {Boltzmann} methods}, + journal = {Journal of Computational Science}, + volume = {49}, + pages = {101269}, + year = {2021}, + issn = {1877-7503}, + doi = {10.1016/j.jocs.2020.101269}, +} + @Article{bindgen21a, author = {Bindgen, Sebastian and Weik, Florian and Weeber, Rudolf and Koos, Erin and de Buyl, Pierre}, title = {{L}ees--{E}dwards boundary conditions for translation invariant shear flow: {I}mplementation and transport properties}, @@ -469,6 +503,17 @@ @Article{gay81a doi = {10.1063/1.441483}, } +@InProceedings{godenschwager13a, + title = {A framework for hybrid parallel flow simulations with a trillion cells in complex geometries}, + author = {Godenschwager, Christian and Schornbaum, Florian and Bauer, Martin and K{\"o}stler, Harald and R{\"u}de, Ulrich}, + booktitle = {Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis}, + year = {2013}, + publisher = {Association for Computing Machinery}, + address = {New York}, + doi = {10.1145/2503210.2503273}, + isbn = {9781450323789}, +} + @Article{gompper96a, author = {Gompper, G. and Kroll, D. M.}, title = {Random Surface Discretizations and the Renormalization of the Bending Rigidity}, @@ -845,17 +890,6 @@ @Article{reed92a publisher={AIP Publishing} } -@Article{rohm12a, - author = {Roehm, D. and Arnold, A.}, - title = {Lattice {B}oltzmann simulations on {GPU}s with {ESPResSo}}, - journal = {European Physical Journal Special Topics}, - year = {2012}, - volume = {210}, - number = {1}, - pages = {89--100}, - doi = {10.1140/epjst/e2012-01639-6}, -} - @Book{rubinstein03a, title = {Polymer Physics}, publisher = {Oxford University Press}, diff --git a/doc/sphinx/advanced_methods.rst b/doc/sphinx/advanced_methods.rst index c5b78c88e6e..7676d831980 100644 --- a/doc/sphinx/advanced_methods.rst +++ b/doc/sphinx/advanced_methods.rst @@ -374,7 +374,6 @@ Description of sample script .. note:: The following features are required: - ``LB_BOUNDARIES``, ``EXTERNAL_FORCES``, ``MASS``, ``SOFT_SPHERE`` @@ -459,17 +458,15 @@ Specification of fluid and movement ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :: - lbf = espressomd.lb.LBFluid(agrid=1, dens=1.0, visc=1.5, fric=1.5, - tau=time_step, ext_force_density=[0.002, 0.0, 0.0]) + lbf = espressomd.lb.LBFluidWalberla(agrid=1, density=1.0, kinematic_viscosity=1.5, + tau=time_step, ext_force_density=[0.002, 0.0, 0.0]) system.actors.add(lbf) This part of the script specifies the fluid that will get the system moving. Here ``agrid`` :math:`=\Delta x` is the spatial discretisation step, ``tau`` is the time step that will be the same as the time step -for particles, viscosity ``visc`` and density ``dens`` of the fluid are -physical parameters scaled to lattice units. ``fric`` is a -(non-physical) friction parameter that enters the fluid-object -interaction and has to be set carefully. Finally, ``ext_force_density`` sets the +for particles, viscosity ``viscosity`` and density ``density`` of the fluid are +physical parameters scaled to lattice units, ``ext_force_density`` sets the force-per-unit-volume vector that drives the fluid. Another option to add momentum to fluid is by specifying the velocity on the boundaries. @@ -518,12 +515,12 @@ defined as follows. First we define the two shapes: direction=1) The ``direction=1`` determines that the fluid is on the *outside*. Next -we create boundaries for the fluid: +we mark the LB nodes within the shapes as boundaries: :: - system.lbboundaries.add(lbboundaries.LBBoundary(shape=boundary1)) - system.lbboundaries.add(lbboundaries.LBBoundary(shape=boundary2)) + lbf.add_boundary_from_shape(boundary1) + lbf.add_boundary_from_shape(boundary2) Followed by creating the constraints for cells: diff --git a/doc/sphinx/constraints.rst b/doc/sphinx/constraints.rst index b46a75f5bbb..6bc6bd13922 100644 --- a/doc/sphinx/constraints.rst +++ b/doc/sphinx/constraints.rst @@ -550,8 +550,8 @@ the exception of a planar wall. For this, there is no ``direction`` option, but the ``normal`` vector of the wall points in the direction that is considered to yield positive distances. Outside their use in constraints, shapes can also be used as a way to define LB boundary nodes. In this case, negative distances -define nodes which are part of a boundary (please refer to :ref:`Using shapes -as lattice-Boltzmann boundary`). +define nodes which are part of a boundary (please refer to :ref:`Shape-based +LB boundary conditions`). .. _External Fields: diff --git a/doc/sphinx/ek.rst b/doc/sphinx/ek.rst index 2a586c3a75d..12f75cb6aea 100644 --- a/doc/sphinx/ek.rst +++ b/doc/sphinx/ek.rst @@ -11,6 +11,12 @@ interpolated on the LB grid. In the following paragraph we briefly explain the electrokinetic model implemented in |es|, before we come to the description of the interface. +.. note:: + + Requires external features ``WALBERLA`` and optionally ``WALBERLA_FFT`` + (for the FFT-based Poisson solver), enabled with the CMake options + ``-D ESPRESSO_BUILD_WITH_WALBERLA=ON -D ESPRESSO_BUILD_WITH_WALBERLA_FFT=ON``. + .. _Electrokinetic equations: Electrokinetic equations @@ -129,203 +135,298 @@ The electrokinetic equations have the following properties: spectra at frequencies, high enough that they correspond to times faster than the diffusive time scales of the charged species. -.. _Setup: +.. _EK Setup: Setup ----- -.. _Initialization: +.. _EK Initialization: Initialization -~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^ -:class:`~espressomd.electrokinetics.Electrokinetics` is used to initialize -the LB fluid of the EK method:: +Here is a minimal working example:: import espressomd import espressomd.electrokinetics - system = espressomd.System(box_l=[10.0, 10.0, 10.0]) - system.time_step = 0.0 - system.cell_system.skin = 0.4 - ek = espressomd.electrokinetics.Electrokinetics(agrid=1.0, lb_density=1.0, - viscosity=1.0, ext_force_density = [1,0,0], friction=1.0, T=1.0, prefactor=1.0, - stencil='linkcentered', advection=True, fluid_coupling='friction') - system.actors.add(ek) - -.. note:: Features ``ELECTROKINETICS`` and ``CUDA`` required - -It is very similar to the lattice-Boltzmann command in set-up. -We therefore refer the reader to chapter :ref:`Lattice-Boltzmann` -for details on the implementation of LB in |es| and describe only -the major differences here. - -The first major difference with the LB implementation is that the -electrokinetics set-up is a GPU-only implementation. A CPU version -will become available in the 4.3 line of |es|. To use the electrokinetics -features it is therefore imperative that your computer contains -a CUDA-capable GPU. - -To set up a proper LB fluid using this command, one has to specify at -least the following options: ``agrid``, ``lb_density``, ``viscosity``, -``friction``, ``T``, and ``prefactor``. The other options can be -used to modify the behavior of the LB fluid. Note that the command does -not allow the user to set the time step parameter as is the case for the -lattice-Boltzmann command, this parameter is instead taken directly from -the value set for :attr:`~espressomd.system.System.time_step`. -The LB *mass density* is set independently from the -electrokinetic *number densities*, since the LB fluid serves only as a -medium through which hydrodynamic interactions are propagated, as will -be explained further in the next paragraph. If no ``lb_density`` is specified, then our -algorithm assumes ``lb_density= 1.0``. The two 'new' parameters are the temperature ``T`` at -which the diffusive species are simulated and the ``prefactor`` -associated with the electrostatic properties of the medium. See the -above description of the electrokinetic equations for an explanation of -the introduction of a temperature, which does not come in directly via a -thermostat that produces thermal fluctuations. - -``advection`` can be set to ``True`` or ``False``. It controls whether there should be an -advective contribution to the diffusive species' fluxes. Default is -``True``. - -``fluid_coupling`` can be set to ``"friction"`` or ``"estatics"``. -This option determines the force term acting on the fluid. -The former specifies the force term to be the -sum of the species fluxes divided by their respective mobilities while -the latter simply uses the electrostatic force density acting on all -species. Note that this switching is only possible for the ``"linkcentered"`` -stencil. For all other stencils, this choice is hardcoded. The default -is ``"friction"``. - -``es_coupling`` enables the action of the electrostatic potential due to the -electrokinetics species and charged boundaries on the MD particles. The -forces on the particles are calculated by interpolation from the -electric field which is in turn calculated from the potential via finite -differences. This only includes interactions between the species and -boundaries and MD particles, not between MD particles and MD particles. -To get complete electrostatic interactions a particles Coulomb method -like Ewald or P3M has to be activated too. - -The fluctuation of the EK species can be turned on by the flag ``fluctuations``. -This adds a white-noise term to the fluxes. The amplitude of this noise term -can be controlled by ``fluctuation_amplitude``. To circumvent that these fluctuations -lead to negative densities, they are modified by a smoothed Heaviside function, -which decreases the magnitude of the fluctuation for densities close to 0. -By default the fluctuations are turned off. - -Another difference with LB is that EK parameters are immutables, -and the EK object cannot be checkpointed. + + system = espressomd.System(box_l=3 * [6.0]) + system.time_step = 0.01 + system.cell_system.skin = 1.0 + + ek_lattice = espressomd.electrokinetics.LatticeWalberla(agrid=0.5, n_ghost_layers=1) + ek_solver = espressomd.electrokinetics.EKNone(lattice=ek_lattice) + system.ekcontainer.solver = ek_solver + system.ekcontainer.tau = system.time_step + +where ``system.ekcontainer`` is the EK system, ``ek_solver`` is the Poisson +solver (here ``EKNone`` doesn't actually solve the electrostatic field, but +instead imposes a zero field), and ``ek_lattice`` contains the grid parameters. +In this setup, the EK system doesn't contain any species. The following +sections will show how to add species that can diffuse, advect, react and/or +electrostatically interact. An EK system can be set up at the same time as a +LB system. .. _Diffusive species: Diffusive species -~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^ :: - species = espressomd.electrokinetics.Species(density=density, D=D, valency=valency, - ext_force_density=ext_force) + ek_species = espressomd.electrokinetics.EKSpecies( + lattice=ek_lattice, + single_precision=False, + kT=1.0, + density=0.85, + valency=0.0, + diffusion=0.1, + advection=False, + friction_coupling=False, + ext_efield=[0., 0., 0.] + ) -:class:`~espressomd.electrokinetics.Species` is used to initialize a diffusive species. Here the -options specify: the number density ``density``, the diffusion coefficient ``D``, the -valency of the particles of that species ``valency``, and an optional external -(electric) force which is applied to the diffusive species. As mentioned -before, the LB density is completely decoupled from the electrokinetic -densities. This has the advantage that greater freedom can be achieved -in matching the internal parameters to an experimental system. Moreover, -it is possible to choose parameters for which the LB is more stable. -The species can be added to a LB fluid:: +:class:`~espressomd.electrokinetics.EKSpecies` is used to initialize a diffusive +species. Here the options specify: the electrokinetic *number densities* +``density`` (independent from the LB ``density``), the diffusion coefficient +``diffusion``, the valency of the particles of that species ``valency``, +the optional external (electric) force ``ext_efield`` which is applied to +the diffusive species, the thermal energy ``kT`` for thermal fluctuations, +``friction_coupling`` to enable coupling of the diffusive species to the +LB fluid force and ``advection`` to add an advective contribution to the +diffusive species' fluxes from the LB fluid. +Multiple species can be added to the EK system. - ek.add_species(species) +To add species to the EK system:: -One can also add the species during the initialization step of the -:class:`~espressomd.electrokinetics.Electrokinetics` class by defining -the list variable ``species``:: + system.ekcontainer.add(ek_species) - ek = espressomd.electrokinetics.Electrokinetics(species=[species], ...) +To remove species from the EK system:: -The variables ``density``, ``D``, and -``valency`` must be set to properly initialize the diffusive species; the -``ext_force_density`` is optional. + system.ekcontainer.remove(ek_species) -.. _EK boundaries: +Individual nodes and slices of the species lattice can be accessed and +modified using the syntax outlined in :ref:`Reading and setting properties +of single lattice nodes`. -EK boundaries -~~~~~~~~~~~~~ +As mentioned before, the LB density is completely decoupled from the +electrokinetic densities. This has the advantage that greater freedom can +be achieved in matching the internal parameters to an experimental system. +Moreover, it is possible to choose parameters for which the LB is more stable. -:class:`~espressomd.ekboundaries.EKBoundary` is used to set up -internal (or external) boundaries for the electrokinetics algorithm in much -the same way as the :class:`~espressomd.lbboundaries.LBBoundary` class is -used for the LB fluid:: +Performance considerations +^^^^^^^^^^^^^^^^^^^^^^^^^^ - ek_boundary = espressomd.ekboundaries.EKBoundary(charge_density=1.0, shape=my_shape) - system.ekboundaries.add(ek_boundary) +The CPU implementation of the EK has an extra flag ``single_precision`` to +use single-precision floating point values. These are approximately 10% +faster than double-precision, at the cost of a small loss in precision. -.. note:: Feature ``EK_BOUNDARIES`` required +.. _Checkpointing EK: -The major difference with the LB class is the option ``charge_density``, -with which a boundary can be endowed with a volume charge density. -To create a surface charge density, a combination of two -oppositely charged boundaries, one inside the other, can be used. However, -care should be taken to maintain the surface charge density when the value of ``agrid`` -is changed. Examples for possible shapes are wall, sphere, ellipsoid, cylinder, -rhomboid and hollow conical frustum. We refer to the documentation of the -:class:`espressomd.shapes` module for more possible shapes and information on -the options associated to these shapes. In order to properly set up the -boundaries, the ``charge_density`` and ``shape`` must be specified. +Checkpointing +------------- -.. _Output: +:: -Output -~~~~~~ + ek.save_checkpoint(path, binary) + ek.load_checkpoint(path, binary) + +The first command saves all of the EK nodes' properties to an ASCII +(``binary=False``) or binary (``binary=True``) format respectively. +The second command loads the EK nodes' properties. +In both cases ``path`` specifies the location of the +checkpoint file. This is useful for restarting a simulation either on the same +machine or a different machine. Some care should be taken when using the binary +format as the format of doubles can depend on both the computer being used as +well as the compiler. + +.. _EK VTK output: + +VTK output +---------- + +The waLBerla library implements a globally-accessible VTK registry. +A VTK stream can be attached to an EK actor to periodically write +one or multiple fluid field data into a single file using +:class:`~espressomd.electrokinetics.VTKOutput`:: + + vtk_obs = ["density"] + # create a VTK callback that automatically writes every 10 EK steps + ek_vtk = espressomd.electrokinetics.VTKOutput( + identifier="ek_vtk_automatic", observables=vtk_obs, delta_N=10) + ek.add_vtk_writer(vtk=ek_vtk) + system.integrator.run(100) + # can be deactivated + ek_vtk.disable() + system.integrator.run(10) + ek_vtk.enable() + # create a VTK callback that writes only when explicitly called + ek_vtk_on_demand = espressomd.electrokinetics.VTKOutput( + identifier="ek_vtk_now", observables=vtk_obs) + ek.add_vtk_writer(vtk=ek_vtk_on_demand) + ek_vtk_on_demand.write() + +Currently only supports the species density. +By default, the properties of the current state +of the species are written to disk on demand. To add a stream that writes +to disk continuously, use the optional argument ``delta_N`` to indicate +the level of subsampling. Such a stream can be deactivated. + +The VTK format is readable by visualization software such as ParaView [5]_ +or Mayavi2 [6]_, as well as in |es| (see :ref:`Reading VTK files`). +If you plan to use ParaView for visualization, note that also the particle +positions can be exported using the VTK format +(see :meth:`~espressomd.particle_data.ParticleList.writevtk`). + +Important: these VTK files are written in multi-piece format, i.e. each MPI +rank writes its local domain to a new piece in the VTK uniform grid to avoid +a MPI reduction. ParaView can handle the topology reconstruction natively. +However, when reading the multi-piece file with the Python ``vtk`` package, +the topology must be manually reconstructed. In particular, calling the XML +reader ``GetOutput()`` method directly after the update step will erase all +topology information. While this is not an issue for VTK files obtained from +simulations that ran with 1 MPI rank, for parallel simulations this will lead +to 3D grids with incorrectly ordered data. Automatic topology reconstruction +is available through :class:`~espressomd.io.vtk.VTKReader`:: + + import pathlib + import tempfile + import numpy as np + import espressomd + import espressomd.electrokinetics + import espressomd.io.vtk -.. _Fields: + system = espressomd.System(box_l=[12., 14., 10.]) + system.cell_system.skin = 0.4 + system.time_step = 0.1 -Fields -"""""" + lattice = espressomd.electrokinetics.LatticeWalberla(agrid=1.) + species = espressomd.electrokinetics.EKSpecies( + lattice=lattice, density=1., kT=1., diffusion=0.1, valency=0., + advection=False, friction_coupling=False, tau=system.time_step) + system.ekcontainer.tau = species.tau + system.ekcontainer.add(species) + system.integrator.run(10) -:: + vtk_reader = espressomd.io.vtk.VTKReader() + label_density = "density" - ek.write_vtk_boundary(path) - ek.write_vtk_density(path) - ek.write_vtk_velocity(path) - ek.write_vtk_potential(path) + with tempfile.TemporaryDirectory() as tmp_directory: + path_vtk_root = pathlib.Path(tmp_directory) + label_vtk = "ek_vtk" + path_vtk = path_vtk_root / label_vtk / "simulation_step_0.vtu" -A property of the fluid field can be exported into a file in one go. -Currently supported fields are: density, velocity, potential and boundary, -which give the LB fluid density, the LB fluid velocity, -the electrostatic potential, and the location and type of the -boundaries, respectively. The boundaries can only be printed when the -``EK_BOUNDARIES`` is compiled in. The output is a vtk-file, which is readable by -visualization software such as ParaView [5]_ and Mayavi2 [6]_. + # write VTK file + ek_vtk = espressomd.electrokinetics.VTKOutput( + identifier=label_vtk, delta_N=0, + observables=["density"], + base_folder=str(path_vtk_root)) + species.add_vtk_writer(vtk=ek_vtk) + ek_vtk.write() -:: + # read VTK file + vtk_grids = vtk_reader.parse(path_vtk) + vtk_density = vtk_grids[label_density] - species.write_vtk_flux(path) - species.write_vtk_density(path) + # check VTK values match node values + ek_density = np.copy(lbf[:, :, :].density) + np.testing.assert_allclose(vtk_density, ek_density, rtol=1e-10, atol=0.) -These commands are similar to the above. They enable the -export of diffusive species properties, namely: ``density`` and ``flux``, which specify the -number density and flux of species ``species``, respectively. +.. _Setting up EK boundary conditions: -.. _Local quantities: +Setting up boundary conditions +------------------------------ -Local quantities -"""""""""""""""" +It is possible to impose a fixed density and a fixed flux on EK species. -Local quantities like velocity or fluid density for single nodes can be accessed in the same way -as for an LB fluid, see :ref:`Lattice-Boltzmann`. The only EK-specific quantity is the potential. +Under the hood, a boundary field is added to the blockforest, which contains +pre-calculated information for the streaming operations. -:: +.. _Per-node EK boundary conditions: - ek[0, 0, 0].potential - ek[0, 0, 0].velocity - ek[0, 0, 0].boundary +Per-node boundary conditions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The local ``density`` and ``flux`` of a species can be obtained in the same fashion: +One can set (or update) the boundary conditions of individual nodes:: -:: + import espressomd + import espressomd.electrokinetics + system = espressomd.System(box_l=[10.0, 10.0, 10.0]) + system.cell_system.skin = 0.1 + system.time_step = 0.01 + lattice = espressomd.electrokinetics.LatticeWalberla(agrid=0.5, n_ghost_layers=1) + ek_species = espressomd.electrokinetics.EKSpecies( + kT=1.5, lattice=self.lattice, density=0.85, valency=0., diffusion=0.1, + advection=False, friction_coupling=False, tau=system.time_step) + system.ekcontainer.tau = species.tau + system.ekcontainer.add(ek_species) + # set node fixed density boundary conditions + lbf[0, 0, 0].boundary = espressomd.electrokinetics.DensityBoundary(1.) + # update node fixed density boundary conditions + lbf[0, 0, 0].boundary = espressomd.electrokinetics.DensityBoundary(2.) + # remove node boundary conditions + lbf[0, 0, 0].boundary = None + +.. _Shape-based EK boundary conditions: + +Shape-based boundary conditions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Adding a shape-based boundary is straightforward:: + + import espressomd + import espressomd.electrokinetics + import espressomd.shapes + system = espressomd.System(box_l=[10.0, 10.0, 10.0]) + system.cell_system.skin = 0.1 + system.time_step = 0.01 + lattice = espressomd.electrokinetics.LatticeWalberla(agrid=0.5, n_ghost_layers=1) + ek_species = espressomd.electrokinetics.EKSpecies( + kT=1.5, lattice=self.lattice, density=0.85, valency=0.0, diffusion=0.1, + advection=False, friction_coupling=False, tau=system.time_step) + system.ekcontainer.tau = species.tau + system.ekcontainer.add(ek_species) + # set fixed density boundary conditions + wall = espressomd.shapes.Wall(normal=[1., 0., 0.], dist=2.5) + ek_species.add_boundary_from_shape( + shape=wall, value=1., boundary_type=espressomd.electrokinetics.DensityBoundary) + # clear fixed density boundary conditions + ek_species.clear_density_boundaries() + +For a position-dependent flux, the argument to ``value`` must be a 4D grid +(the first three dimensions must match the EK grid shape, the fourth +dimension has size 3 for the flux). + +For a complete description of all available shapes, refer to +:mod:`espressomd.shapes`. + +.. _Prototyping new EK methods: + +Prototyping new EK methods +-------------------------- + +Start by installing the code generator dependencies: + +.. code-block:: bash + + python3 -m pip install --user -c requirements.txt numpy sympy lbmpy pystencils islpy + +Next, edit the code generator script to configure new kernels, then execute it: + +.. code-block:: bash + + python3 maintainer/walberla_kernels/generate_lb_kernels.py + +The script takes optional arguments to control the CPU or GPU architecture, +as well as the floating-point precision. The generated source code files need +to be written to :file:`src/walberla_bridge/src/electrokinetics/generated_kernels/` +and :file:`src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/`. +These steps can be automated with the convenience shell functions documented in +:file:`maintainer/walberla_kernels/Readme.md`. +Edit the :file:`CMakeLists.txt` file in the destination folders to include the +new kernels in the build system. +Then, adapt :file:`src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp` +to use the new EK kernels. - species[0, 0, 0].density - species[0, 0, 0].flux .. [5] https://www.paraview.org/ diff --git a/doc/sphinx/installation.rst b/doc/sphinx/installation.rst index 3c81315ae4f..1495331b0a2 100644 --- a/doc/sphinx/installation.rst +++ b/doc/sphinx/installation.rst @@ -498,19 +498,9 @@ Fluid dynamics and fluid structure interaction .. seealso:: :ref:`DPD interaction` -- ``LB_BOUNDARIES`` Enables the construction of LB boundaries from shape-based constraints on the CPU. - -- ``LB_BOUNDARIES_GPU`` Enables the construction of LB boundaries from shape-based constraints on the GPU. - - ``LB_ELECTROHYDRODYNAMICS`` Enables the implicit calculation of electro-hydrodynamics for charged particles and salt ions in an electric field. -- ``ELECTROKINETICS`` Enables the description of chemical species advected by a LB fluid on the GPU. - -- ``EK_BOUNDARIES`` Enables the construction of electrokinetic boundaries from shape-based constraints on the GPU. - -- ``EK_DEBUG`` Enables additional checks in electrokinetic simulations. - .. _Interaction features: @@ -769,6 +759,9 @@ The following options control features from external libraries: * ``ESPRESSO_BUILD_WITH_SCAFACOS``: Build with ScaFaCoS support. * ``ESPRESSO_BUILD_WITH_GSL``: Build with GSL support. * ``ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS`` Build with Stokesian Dynamics support. +* ``ESPRESSO_BUILD_WITH_WALBERLA``: Build with waLBerla support. +* ``ESPRESSO_BUILD_WITH_WALBERLA_FFT``: Build waLBerla with FFT and PFFT support, used in FFT-based electrokinetics. +* ``ESPRESSO_BUILD_WITH_WALBERLA_AVX``: Build waLBerla with AVX kernels instead of regular kernels. * ``ESPRESSO_BUILD_WITH_PYTHON``: Build with the Python interface. The following options control code instrumentation: @@ -863,7 +856,12 @@ Configuring without a network connection Several :ref:`external features ` in |es| rely on external libraries that are downloaded automatically by CMake. When a network connection cannot be established due to firewall restrictions, -the CMake logic needs editing: +the CMake logic needs editing. + +.. _Git submodules without a network connection: + +Git submodules without a network connection +""""""""""""""""""""""""""""""""""""""""""" * ``ESPRESSO_BUILD_WITH_HDF5``: when cloning |es|, the :file:`libs/h5xx` folder will be a git submodule containing a :file:`.git` subfolder. To prevent CMake @@ -876,13 +874,26 @@ the CMake logic needs editing: When installing a release version of |es|, no network communication is needed for HDF5. -* ``ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS``: this library is installed using - `FetchContent `__. - The repository URL can be found in the ``GIT_REPOSITORY`` field of the - corresponding ``FetchContent_Declare()`` command. The ``GIT_TAG`` field - provides the commit. Clone this repository locally next to the |es| - folder and edit the |es| build system such that ``GIT_REPOSITORY`` points - to the absolute path of the Stokesian Dynamics clone, for example with: +.. _CMake subprojects without a network connection: + +CMake subprojects without a network connection +"""""""""""""""""""""""""""""""""""""""""""""" + +Several libraries are downloaded and included into the CMake project using +`FetchContent `__. +The repository URLs can be found in the ``GIT_REPOSITORY`` field of the +corresponding ``FetchContent_Declare()`` commands. The ``GIT_TAG`` field +provides the commit. Clone these repositories locally and edit the |es| +build system such that ``GIT_REPOSITORY`` points to the absolute path of +the clone. You can automate this task by adapting the following commands: + +* ``ESPRESSO_BUILD_WITH_WALBERLA`` + + .. code-block:: bash + + sed -ri 's|GIT_REPOSITORY +.+/walberla.git|GIT_REPOSITORY /work/username/walberla|' CMakeLists.txt + +* ``ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS`` .. code-block:: bash diff --git a/doc/sphinx/integration.rst b/doc/sphinx/integration.rst index e4547612fad..b7f0bd34eaf 100644 --- a/doc/sphinx/integration.rst +++ b/doc/sphinx/integration.rst @@ -649,7 +649,7 @@ The backcoupling of friction forces and noise to the fluid is also done by distr Details for both the interpolation and the force distribution can be found in :cite:`ahlrichs99a` and :cite:`dunweg09a`. The LB fluid can be used to thermalize particles, while also including their hydrodynamic interactions. -The LB thermostat expects an instance of either :class:`espressomd.lb.LBFluid` or :class:`espressomd.lb.LBFluidGPU`. +The LB thermostat expects an instance of either :class:`espressomd.lb.LBFluidWalberla` or :class:`espressomd.lb.LBFluidWalberlaGPU`. Temperature is set via the ``kT`` argument of the LB fluid. The magnitude of the frictional coupling can be adjusted by the @@ -658,7 +658,7 @@ parameter ``gamma``. To enable the LB thermostat, use:: import espressomd import espressomd.lb system = espressomd.System(box_l=[1, 1, 1]) - lbf = espressomd.lb.LBFluid(agrid=1, dens=1, visc=1, tau=0.01) + lbf = espressomd.lb.LBFluidWalberla(agrid=1, density=1, kinematic_viscosity=1, tau=0.01) system.actors.add(lbf) system.thermostat.set_lb(LB_fluid=lbf, seed=123, gamma=1.5) diff --git a/doc/sphinx/io.rst b/doc/sphinx/io.rst index 66f07f5b6ab..5c6ee7eafa3 100644 --- a/doc/sphinx/io.rst +++ b/doc/sphinx/io.rst @@ -111,19 +111,22 @@ Be aware of the following limitations: for a specific combination of features, please share your findings with the |es| community. -* Checkpointing only supports recursion on the head node. It is therefore - impossible to checkpoint a :class:`espressomd.system.System` instance that - contains LB boundaries, constraint unions or auto-update accumulators when the - simulation is running with 2 or more MPI nodes. - -* The active actors, i.e., the content of ``system.actors``, are checkpointed. - For lattice-Boltzmann fluids, this only includes the parameters such as the - lattice constant (``agrid``). The actual flow field has to be saved - separately with the lattice-Boltzmann specific methods - :meth:`espressomd.lb.HydrodynamicInteraction.save_checkpoint` - and loaded via :meth:`espressomd.lb.HydrodynamicInteraction.load_checkpoint` +* The active actors, i.e., the content of ``system.actors`` resp. + ``system.ekcontainers``, are checkpointed. For lattice-based methods like + lattice-Boltzmann fluids and advection-diffusion-reaction models, this only + includes the parameters such as the lattice constant (``agrid``) and initial + densities. + The actual fields have to be saved separately with the lattice-specific + methods :meth:`espressomd.lb.LBFluidWalberla.save_checkpoint + ` resp. + :meth:`espressomd.electrokinetics.EKSpecies.save_checkpoint + ` + and loaded via :meth:`espressomd.lb.LBFluidWalberla.load_checkpoint + ` resp. + :meth:`espressomd.electrokinetics.EKSpecies.load_checkpoint + ` after restoring the checkpoint. See :ref:`LB checkpointing ` - for more details. + resp. :ref:`EK checkpointing ` for more details. * References between Python objects are not maintained during checkpointing. For example, if an instance of a shape and an instance of a constraint @@ -506,3 +509,26 @@ requires increasing and continuous indexing. The |es| ``id`` can be used as *key vtf_index[3] Note that the |es| particles are ordered in increasing order, thus ``id=3`` corresponds to the zeroth VTF index. + +.. _Reading VTK files: + +Reading VTK files +----------------- + +The waLBerla library writes VTK multi-piece uniform grids in XML format. +Each piece contains information about its spatial extent, from which it is +possible to deduce the grid dimensions. Each piece may contain one or more +array, which are uniquely identified by name. While the Python package ``vtk`` +provides tools to read VTK files as numpy arrays, it doesn't automatically +reconstruct the 3D grids using the topology information of each piece; this +functionality is provided by the wrapper :class:`~espressomd.io.vtk.VTKReader`: + +.. code-block:: python + + import espressomd.io.vtk + vtk_reader = espressomd.io.vtk.VTKReader() + vtk_grids = vtk_reader.parse("simulation_step_0.vtu") + vtk_density = vtk_grids["density"] + print(vtk_density.shape) + +For a self-contained example, please refer to :ref:`LB VTK output`. diff --git a/doc/sphinx/lb.rst b/doc/sphinx/lb.rst index 72e78d47975..68a2dc0e8a6 100644 --- a/doc/sphinx/lb.rst +++ b/doc/sphinx/lb.rst @@ -18,11 +18,16 @@ Here we restrict the documentation to the interface. For a more detailed description of the method, please refer to the literature. .. note:: + Please cite :cite:t:`godenschwager13a` and :cite:t:`bauer21a` (BibTeX keys + ``godenschwager13a`` and ``bauer21a`` in :file:`doc/bibliography.bib`) if + you use the LB fluid. When generating your own kernels with pystencils and + lbmpy, please also cite :cite:t:`bauer19a` and :cite:t:`bauer21b` (BibTeX + key ``bauer19a`` resp. ``bauer21b`` in :file:`doc/bibliography.bib`). - Please cite :cite:`arnold13a` (BibTeX key ``arnold13a`` in - :file:`doc/bibliography.bib`) if you use the LB fluid and :cite:`rohm12a` - (BibTeX key ``rohm12a`` in :file:`doc/bibliography.bib`) if you use - the GPU implementation. +.. note:: + + Requires external feature ``WALBERLA``, enabled with the CMake option + ``-D ESPRESSO_BUILD_WITH_WALBERLA=ON``. .. _Setting up a LB fluid: @@ -36,18 +41,18 @@ The following minimal example illustrates how to use the LBM in |es|:: system = espressomd.System(box_l=[10, 20, 30]) system.time_step = 0.01 system.cell_system.skin = 0.4 - lb = espressomd.lb.LBFluid(agrid=1.0, dens=1.0, visc=1.0, tau=0.01) + lb = espressomd.lb.LBFluidWalberla(agrid=1.0, density=1.0, kinematic_viscosity=1.0, tau=0.01) system.actors.add(lb) system.integrator.run(100) To use the GPU-accelerated variant, replace line 6 in the example above by:: - lb = espressomd.lb.LBFluidGPU(agrid=1.0, dens=1.0, visc=1.0, tau=0.01) + lb = espressomd.lb.LBFluidWalberlaGPU(agrid=1.0, density=1.0, kinematic_viscosity=1.0, tau=0.01) .. note:: Feature ``CUDA`` required for the GPU-accelerated variant To use the (much faster) GPU implementation of the LBM, use -:class:`~espressomd.lb.LBFluidGPU` in place of :class:`~espressomd.lb.LBFluid`. +:class:`~espressomd.lb.LBFluidWalberlaGPU` in place of :class:`~espressomd.lb.LBFluidWalberla`. Please note that the GPU implementation uses single precision floating point operations. This decreases the accuracy of calculations compared to the CPU implementation. In particular, due to rounding errors, the fluid density decreases over time, @@ -62,12 +67,12 @@ lattice constant of the fluid, so the size of the box in every direction must be a multiple of ``agrid``. In the following, we discuss the parameters that can be supplied to the LBM in |es|. -The detailed interface definition is available at :class:`~espressomd.lb.LBFluid`. +The detailed interface definition is available at :class:`~espressomd.lb.LBFluidWalberla`. The LB scheme and the MD scheme are not synchronized: In one LB time step typically several MD steps are performed. This allows to speed up the simulations and is adjusted with the parameter ``tau``, the LB time step. -The parameters ``dens`` and ``visc`` set up the density and (kinematic) viscosity of the +The parameters ``density`` and ``viscosity`` set up the density and (kinematic) viscosity of the LB fluid in (usual) MD units. Internally the LB implementation works with a different set of units: all lengths are expressed in ``agrid``, all times in ``tau`` and so on. @@ -85,26 +90,24 @@ Thermalization of the fluid (and particle coupling later on) can be activated by providing a non-zero value for the parameter ``kT``. Then, a seed has to be provided for the fluid thermalization:: - lbfluid = espressomd.lb.LBFluid(kT=1.0, seed=134, ...) + lb = espressomd.lb.LBFluidWalberla(kT=1.0, seed=134, ...) The parameter ``ext_force_density`` takes a three dimensional vector as an array_like of :obj:`float`, representing a homogeneous external body force density in MD -units to be applied to the fluid. The parameter ``bulk_visc`` allows one to -tune the bulk viscosity of the fluid and is given in MD units. In the limit of -low Mach number, the flow does not compress the fluid and the resulting flow -field is therefore independent of the bulk viscosity. It is however known that -the value of the viscosity does affect the quality of the implemented -link-bounce-back method. ``gamma_even`` and ``gamma_odd`` are the relaxation -parameters for the kinetic modes. These fluid parameters do not correspond to -any macroscopic fluid properties, but do influence numerical properties of the -algorithm, such as the magnitude of the error at boundaries. Unless you are an -expert, leave their defaults unchanged. If you do change them, note that they -are to be given in LB units. +units to be applied to the fluid. Before running a simulation at least the following parameters must be -set up: ``agrid``, ``tau``, ``visc``, ``dens``. For the other parameters, -the following are taken: ``bulk_visc=0``, ``gamma_odd=0``, ``gamma_even=0``, -``ext_force_density=[0, 0, 0]``. +set up: ``agrid``, ``tau``, ``viscosity``, ``density``. + +Performance considerations +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The CPU implementation of the LB has an extra flag ``single_precision`` to +use single-precision floating point values. These are approximately 10% +faster than double-precision, at the cost of a small loss in precision. + +To enable vectorization, run ``cmake . -D ESPRESSO_BUILD_WITH_WALBERLA_AVX=ON``. +An AVX2-capable microprocessor is required. .. _Checkpointing LB: @@ -148,18 +151,8 @@ To get interpolated velocity values between lattice nodes, the function:: lb.get_interpolated_velocity(pos=[1.1, 1.2, 1.3]) with a single position ``pos`` as an argument can be used. -For the GPU fluid :class:`espressomd.lb.LBFluidGPU`, a method -:py:meth:`~espressomd.lb.LBFluidGPU.get_interpolated_fluid_velocity_at_positions()` -is also available, which expects a numpy array of positions as an argument. - -By default, the interpolation is done linearly between the nearest 8 LB nodes, -but for the GPU implementation also a quadratic scheme involving 27 nodes is implemented -(see eqs. 297 and 301 in :cite:`dunweg09a`). -You can choose by calling -one of:: - lb.set_interpolation_order('linear') - lb.set_interpolation_order('quadratic') +The interpolation is done linearly between the nearest 8 LB nodes. A note on boundaries: both interpolation schemes don't take into account the physical location of the boundaries @@ -184,11 +177,31 @@ the :ref:`LB thermostat` (see more detailed description there). A short example system.thermostat.set_lb(LB_fluid=lbf, seed=123, gamma=1.5) -where ``lbf`` is an instance of either :class:`~espressomd.lb.LBFluid` or -:class:`~espressomd.lb.LBFluidGPU`, ``gamma`` the friction coefficient and +where ``lbf`` is an instance of either :class:`~espressomd.lb.LBFluidWalberla` or +:class:`~espressomd.lb.LBFluidWalberlaGPU`, ``gamma`` the friction coefficient and ``seed`` the seed for the random number generator involved in the thermalization. +.. _LB and LEbc: + +LB and LEbc +^^^^^^^^^^^ + +:ref:`Lees-Edwards boundary conditions` (LEbc) are supported by both +LB implementations, which follow the derivation in :cite:`wagner02a`. +Note, that there is no extra python interface for the use of LEbc +with the LB algorithm: all the necessary information is internally +derived from the currently active MD LEbc protocol in +``system.lees_edwards.protocol``. +Therefore, the MD LEbc must be set before the LB actor is instantiated. +Use the :class:`~espressomd.lees_edwards.Off` if the system should have +no shearing initially; this action will initialize the shear axes, and +when the LB actor is instantiated, the Lees-Edwards collision kernels +will be used instead of the default ones. + +.. note:: + + At the moment, LB only supports the case ``shear_plane_normal="y"``. .. _Reading and setting properties of single lattice nodes: @@ -201,14 +214,13 @@ the selected LB grid node and allows one to access all of its properties:: lb[x, y, z].density # fluid density (one scalar for LB and CUDA) lb[x, y, z].velocity # fluid velocity (a numpy array of three floats) lb[x, y, z].pressure_tensor # fluid pressure tensor (a symmetric 3x3 numpy array of floats) - lb[x, y, z].pressure_tensor_neq # nonequilibrium part of the pressure tensor (as above) - lb[x, y, z].boundary # flag indicating whether the node is fluid or boundary (fluid: boundary=0, boundary: boundary != 0) + lb[x, y, z].is_boundary # flag indicating whether the node is fluid or boundary (fluid: boundary=0, boundary: boundary != 1) lb[x, y, z].population # 19 LB populations (a numpy array of 19 floats, check order from the source code) All of these properties can be read and used in further calculations. Only the property ``population`` can be modified. The indices ``x, y, z`` are integers and enumerate the LB nodes in the three Cartesian directions, -starting at 0. To modify ``boundary``, refer to :ref:`Setting up boundary conditions`. +starting at 0. To modify ``is_boundary``, refer to :ref:`Setting up LB boundary conditions`. Example:: @@ -217,6 +229,7 @@ Example:: The first line prints the fluid velocity at node (0 0 0) to the screen. The second line sets this fluid node's density to the value ``1.2``. +Use negative indices to get nodes starting from the end of the lattice. The nodes can be read and modified using slices. Example:: @@ -230,43 +243,103 @@ a value that matches the length of the slice (which sets each node individually), or a single value that will be copied to every node (e.g. a scalar for density, or an array of length 3 for the velocity). -.. _Output for visualization: - -Output for visualization ------------------------- - -|es| implements a number of commands to output fluid field data of the whole fluid into a file at once. :: - - lb.write_vtk_velocity(path) - lb.write_vtk_boundary(path) - lb.write_velocity(path) - lb.write_boundary(path) - -Currently supported fluid properties are the velocity, and boundary flag in ASCII VTK as well as Gnuplot compatible ASCII output. +.. _LB VTK output: + +VTK output +---------- + +The waLBerla library implements a globally-accessible VTK registry. +A VTK stream can be attached to a LB actor to periodically write +one or multiple fluid field data into a single file using +:class:`~espressomd.lb.VTKOutput`:: + + vtk_obs = ["density", "velocity_vector"] + # create a VTK callback that automatically writes every 10 LB steps + lb_vtk = espressomd.lb.VTKOutput( + identifier="lb_vtk_automatic", observables=vtk_obs, delta_N=10) + lb.add_vtk_writer(vtk=lb_vtk) + self.system.integrator.run(100) + # can be deactivated + lb_vtk.disable() + self.system.integrator.run(10) + lb_vtk.enable() + # create a VTK callback that writes only when explicitly called + lb_vtk_on_demand = espressomd.lb.VTKOutput( + identifier="lb_vtk_now", observables=vtk_obs) + lb.add_vtk_writer(vtk=lb_vtk_on_demand) + lb_vtk_on_demand.write() + +Currently supported fluid properties are the density, velocity vector +and pressure tensor. By default, the properties of the current state +of the fluid are written to disk on demand. To add a stream that writes +to disk continuously, use the optional argument ``delta_N`` to indicate +the level of subsampling. Such a stream can be deactivated. The VTK format is readable by visualization software such as ParaView [1]_ -or Mayavi2 [2]_. If you plan to use ParaView for visualization, note that also the particle -positions can be exported using the VTK format (see :meth:`~espressomd.particle_data.ParticleList.writevtk`). - -The variant - -:: - - lb.write_vtk_velocity(path, bb1, bb2) - -allows you to only output part of the flow field by specifying an axis aligned -bounding box through the coordinates ``bb1`` and ``bb1`` (lists of three ints) of two of its corners. This -bounding box can be used to output a slice of the flow field. As an -example, executing - -:: - - lb.write_vtk_velocity(path, [0, 0, 5], [10, 10, 5]) - -will output the cross-section of the velocity field in a plane -perpendicular to the :math:`z`-axis at :math:`z = 5` (assuming the box -size is 10 in the :math:`x`- and :math:`y`-direction). +or Mayavi2 [2]_, as well as in |es| (see :ref:`Reading VTK files`). +If you plan to use ParaView for visualization, note that also the particle +positions can be exported using the VTK format +(see :meth:`~espressomd.particle_data.ParticleList.writevtk`). + +Important: these VTK files are written in multi-piece format, i.e. each MPI +rank writes its local domain to a new piece in the VTK uniform grid to avoid +a MPI reduction. ParaView can handle the topology reconstruction natively. +However, when reading the multi-piece file with the Python ``vtk`` package, +the topology must be manually reconstructed. In particular, calling the XML +reader ``GetOutput()`` method directly after the update step will erase all +topology information. While this is not an issue for VTK files obtained from +simulations that ran with 1 MPI rank, for parallel simulations this will lead +to 3D grids with incorrectly ordered data. Automatic topology reconstruction +is available through :class:`~espressomd.io.vtk.VTKReader`:: + + import pathlib + import tempfile + import numpy as np + import espressomd + import espressomd.lb + import espressomd.io.vtk + system = espressomd.System(box_l=[12., 14., 10.]) + system.cell_system.skin = 0.4 + system.time_step = 0.1 + + lbf = espressomd.lb.LBFluidWalberla( + agrid=1., tau=0.1, density=1., kinematic_viscosity=1.) + system.actors.add(lbf) + system.integrator.run(10) + + vtk_reader = espressomd.io.vtk.VTKReader() + label_density = "density" + label_velocity = "velocity_vector" + label_pressure = "pressure_tensor" + + with tempfile.TemporaryDirectory() as tmp_directory: + path_vtk_root = pathlib.Path(tmp_directory) + label_vtk = "lb_vtk" + path_vtk = path_vtk_root / label_vtk / "simulation_step_0.vtu" + + # write VTK file + lb_vtk = espressomd.lb.VTKOutput( + identifier=label_vtk, delta_N=0, + observables=["density", "velocity_vector", "pressure_tensor"], + base_folder=str(path_vtk_root)) + lbf.add_vtk_writer(vtk=lb_vtk) + lb_vtk.write() + + # read VTK file + vtk_grids = vtk_reader.parse(path_vtk) + vtk_density = vtk_grids[label_density] + vtk_velocity = vtk_grids[label_velocity] + vtk_pressure = vtk_grids[label_pressure] + vtk_pressure = vtk_pressure.reshape(vtk_pressure.shape[:-1] + (3, 3)) + + # check VTK values match node values + lb_density = np.copy(lbf[:, :, :].density) + lb_velocity = np.copy(lbf[:, :, :].velocity) + lb_pressure = np.copy(lbf[:, :, :].pressure_tensor) + np.testing.assert_allclose(vtk_density, lb_density, rtol=1e-10, atol=0.) + np.testing.assert_allclose(vtk_velocity, lb_velocity, rtol=1e-7, atol=0.) + np.testing.assert_allclose(vtk_pressure, lb_pressure, rtol=1e-7, atol=0.) .. _Choosing between the GPU and CPU implementations: @@ -276,8 +349,8 @@ Choosing between the GPU and CPU implementations |es| contains an implementation of the LBM for NVIDIA GPUs using the CUDA framework. On CUDA-supporting machines this can be activated by compiling with the feature ``CUDA``. Within the -Python script, the :class:`~espressomd.lb.LBFluid` object can be substituted -with the :class:`~espressomd.lb.LBFluidGPU` object to switch from CPU based +Python script, the :class:`~espressomd.lb.LBFluidWalberla` object can be substituted +with the :class:`~espressomd.lb.LBFluidWalberlaGPU` object to switch from CPU based to GPU based execution. For further information on CUDA support see section :ref:`CUDA acceleration`. @@ -289,15 +362,10 @@ of the LBM in analogy to the example for the CPU given in section system = espressomd.System(box_l=[10, 20, 30]) system.time_step = 0.01 system.cell_system.skin = 0.4 - lb = espressomd.lb.LBFluidGPU(agrid=1.0, dens=1.0, visc=1.0, tau=0.01) + lb = espressomd.lb.LBFluidWalberlaGPU(agrid=1.0, density=1.0, kinematic_viscosity=1.0, tau=0.01) system.actors.add(lb) system.integrator.run(100) -For boundary conditions analogous to the CPU -implementation, the feature ``LB_BOUNDARIES_GPU`` has to be activated. -:ref:`Lees-Edwards boundary conditions` are not supported by either -LB implementation. - .. _Electrohydrodynamics: Electrohydrodynamics @@ -316,130 +384,103 @@ particles that should be subject to the field. This effectively acts as a velocity offset between the particle and the LB fluid. For more information on this method and how it works, read the -publication :cite:`hickey10a`. - - -.. _Using shapes as lattice-Boltzmann boundary: - -Using shapes as lattice-Boltzmann boundary ------------------------------------------- +publication :cite:t:`hickey10a`. -.. note:: - Feature ``LB_BOUNDARIES`` required - -Lattice-Boltzmann boundaries are implemented in the module -:mod:`espressomd.lbboundaries`. You might want to take a look -at the classes :class:`~espressomd.lbboundaries.LBBoundary` -and :class:`~espressomd.lbboundaries.LBBoundaries` for more information. - -Adding a shape-based boundary is straightforward:: +.. _Setting up LB boundary conditions: - lbb = espressomd.lbboundaries.LBBoundary(shape=my_shape, velocity=[0, 0, 0]) - system.lbboundaries.add(lbb) - -or:: +Setting up boundary conditions +------------------------------ - lbb = espressomd.lbboundaries.LBBoundary() - lbb.shape = my_shape - lbb.velocity = [0, 0, 0] - system.lbboundaries.add(lbb) +Currently, only the so-called "link-bounce-back" algorithm for boundary +nodes is available. This creates a boundary that is located +approximately midway between lattice nodes. With no-slip boundary conditions, +populations are reflected back. With slip velocities, the reflection is +followed by a velocity interpolation. This allows to create shear flow and +boundaries "moving" relative to each other. -.. _Minimal usage example: +Under the hood, a boundary field is added to the blockforest, which contains +pre-calculated information for the reflection and interpolation operations. -Minimal usage example -~~~~~~~~~~~~~~~~~~~~~ +.. _Per-node LB boundary conditions: -.. note:: Feature ``LB_BOUNDARIES`` or ``LB_BOUNDARIES_GPU`` required +Per-node boundary conditions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In order to add a wall as boundary for a lattice-Boltzmann fluid -you could do the following:: +One can set (or update) the slip velocity of individual nodes:: - wall = espressomd.shapes.Wall(dist=5, normal=[1, 0, 0]) - lbb = espressomd.lbboundaries.LBBoundary(shape=wall, velocity=[0, 0, 0]) - system.lbboundaries.add(lbb) + import espressomd.lb + system = espressomd.System(box_l=[10.0, 10.0, 10.0]) + system.cell_system.skin = 0.1 + system.time_step = 0.01 + lbf = espressomd.lb.LBFluidWalberla(agrid=0.5, density=1.0, kinematic_viscosity=1.0, tau=0.01) + system.actors.add(lbf) + # make one node a boundary node with a slip velocity + lbf[0, 0, 0].boundary = espressomd.lb.VelocityBounceBack([0, 0, 1]) + # update node for no-slip boundary conditions + lbf[0, 0, 0].boundary = espressomd.lb.VelocityBounceBack([0, 0, 0]) + # remove boundary conditions + lbf[0, 0, 0].boundary = None -.. _Setting up boundary conditions: +.. _Shape-based LB boundary conditions: -Setting up boundary conditions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Shape-based boundary conditions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The following example sets up a system consisting of a spherical boundary -in the center of the simulation box acting as a no-slip boundary for the -LB fluid that is driven by 4 walls with a slip velocity:: +Adding a shape-based boundary is straightforward:: - import espressomd import espressomd.lb - import espressomd.lbboundaries import espressomd.shapes - - system = espressomd.System(box_l=[64, 64, 64]) + system = espressomd.System(box_l=[10.0, 10.0, 10.0]) + system.cell_system.skin = 0.1 system.time_step = 0.01 - system.cell_system.skin = 0.4 - - lb = espressomd.lb.LBFluid(agrid=1.0, dens=1.0, visc=1.0, tau=0.01) - system.actors.add(lb) - - v = [0, 0, 0.01] # the boundary slip - walls = [None] * 4 - - wall_shape = espressomd.shapes.Wall(normal=[1, 0, 0], dist=1) - walls[0] = espressomd.lbboundaries.LBBoundary(shape=wall_shape, velocity=v) - - wall_shape = espressomd.shapes.Wall(normal=[-1, 0, 0], dist=-63) - walls[1] = espressomd.lbboundaries.LBBoundary(shape=wall_shape, velocity=v) + lbf = espressomd.lb.LBFluidWalberla(agrid=0.5, density=1.0, kinematic_viscosity=1.0, tau=0.01) + system.actors.add(lbf) + # set up shear flow between two sliding walls + wall1 = espressomd.shapes.Wall(normal=[+1., 0., 0.], dist=2.5) + lbf.add_boundary_from_shape(shape=wall1, velocity=[0., +0.05, 0.]) + wall2 = espressomd.shapes.Wall(normal=[-1., 0., 0.], dist=-(system.box_l[0] - 2.5)) + lbf.add_boundary_from_shape(shape=wall2, velocity=[0., -0.05, 0.]) + +The ``velocity`` argument is optional, in which case the no-slip boundary +conditions are used. For a position-dependent slip velocity, the argument +to ``velocity`` must be a 4D grid (the first three dimensions must match +the LB grid shape, the fourth dimension has size 3 for the velocity). - wall_shape = espressomd.shapes.Wall(normal=[0, 1, 0], dist=1) - walls[2] = espressomd.lbboundaries.LBBoundary(shape=wall_shape, velocity=v) +The LB boundaries use the same :mod:`~espressomd.shapes` objects to specify +their geometry as :mod:`~espressomd.constraints` do for particles. +This allows the user to quickly set up a system with boundary conditions +that simultaneously act on the fluid and particles. For a complete +description of all available shapes, refer to :mod:`espressomd.shapes`. - wall_shape = espressomd.shapes.Wall(normal=[0, -1, 0], dist=-63) - walls[3] = espressomd.lbboundaries.LBBoundary(shape=wall_shape, velocity=v) +.. _Prototyping new LB methods: - for wall in walls: - system.lbboundaries.add(wall) +Prototyping new LB methods +-------------------------- - sphere_shape = espressomd.shapes.Sphere(radius=5.5, center=[33, 33, 33], direction=1) - sphere = espressomd.lbboundaries.LBBoundary(shape=sphere_shape) - system.lbboundaries.add(sphere) +Start by installing the code generator dependencies: - system.integrator.run(4000) +.. code-block:: bash - print(sphere.get_force()) + python3 -m pip install --user -c requirements.txt numpy sympy lbmpy pystencils islpy -After integrating the system for a sufficient time to reach the steady state, -the hydrodynamic drag force exerted on the sphere is evaluated. +Next, edit the code generator script to configure new kernels, then execute it: -The LB boundaries use the same :mod:`~espressomd.shapes` objects to specify -their geometry as :mod:`~espressomd.constraints` do for particles. -This allows the user to quickly set up a system with boundary conditions -that simultaneously act on the fluid and particles. For a complete -description of all available shapes, refer to :mod:`espressomd.shapes`. +.. code-block:: bash -Intersecting boundaries are in principle possible but must be treated -with care. In the current implementation, all nodes that are -within at least one boundary are treated as boundary nodes. + python3 maintainer/walberla_kernels/generate_lb_kernels.py -Currently, only the so-called "link-bounce-back" algorithm for wall -nodes is available. This creates a boundary that is located -approximately midway between the lattice nodes, so in the above example ``wall[0]`` -corresponds to a boundary at :math:`x=1.5`. Note that the -location of the boundary is unfortunately not entirely independent of -the viscosity. This can be seen when using the sample script with a high -viscosity. - -The bounce back boundary conditions permit it to set the velocity at the boundary -to a non-zero value via the ``v`` property of an ``LBBoundary`` object. -This allows to create shear flow and boundaries -moving relative to each other. The velocity boundary conditions are -implemented according to :cite:`succi01a` eq. 12.58. Using -this implementation as a blueprint for the boundary treatment, an -implementation of the Ladd-Coupling should be relatively -straightforward. The ``LBBoundary`` object furthermore possesses -a property ``force``, which keeps track of the hydrodynamic drag -force exerted onto the boundary by the moving fluid. +The script takes optional arguments to control the CPU or GPU architecture, +as well as the floating-point precision. The generated source code files need +to be written to :file:`src/walberla_bridge/src/lattice_boltzmann/generated_kernels/`. +These steps can be automated with the convenience shell functions documented in +:file:`maintainer/walberla_kernels/Readme.md`. +Edit the :file:`CMakeLists.txt` file in the destination folder to include the +new kernels in the build system. +Then, adapt :file:`src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp` +to use the new LB kernels. .. [1] https://www.paraview.org/ - .. [2] http://code.enthought.com/projects/mayavi/ diff --git a/doc/sphinx/particles.rst b/doc/sphinx/particles.rst index 7bbb7b46052..b1da59f0d94 100644 --- a/doc/sphinx/particles.rst +++ b/doc/sphinx/particles.rst @@ -386,7 +386,6 @@ For correct results, the LB thermostat has to be deactivated for virtual sites:: system.thermostat.set_lb(kT=0, act_on_virtual=False) Please note that the velocity attribute of the virtual particles does not carry valid information for this virtual sites scheme. -With the LB GPU implementation, inertialess tracers only work on 1 MPI rank. .. _Interacting with groups of particles: diff --git a/doc/tutorials/active_matter/active_matter.ipynb b/doc/tutorials/active_matter/active_matter.ipynb index 290cd845e85..2055485ab38 100644 --- a/doc/tutorials/active_matter/active_matter.ipynb +++ b/doc/tutorials/active_matter/active_matter.ipynb @@ -124,7 +124,7 @@ "import espressomd.accumulators\n", "\n", "espressomd.assert_features(\n", - " [\"ENGINE\", \"ROTATION\", \"MASS\", \"ROTATIONAL_INERTIA\", \"CUDA\"])" + " [\"ENGINE\", \"ROTATION\", \"MASS\", \"ROTATIONAL_INERTIA\", \"WALBERLA\"])" ] }, { @@ -891,8 +891,10 @@ }, "source": [ "```python\n", - "lbf = espressomd.lb.LBFluidGPU(agrid=HYDRO_PARAMS['agrid'], dens=HYDRO_PARAMS['dens'],\n", - " visc=HYDRO_PARAMS['visc'], tau=HYDRO_PARAMS['time_step'])\n", + "lbf = espressomd.lb.LBFluidWalberla(agrid=HYDRO_PARAMS['agrid'],\n", + " density=HYDRO_PARAMS['dens'],\n", + " kinematic_viscosity=HYDRO_PARAMS['visc'],\n", + " tau=HYDRO_PARAMS['time_step'])\n", "system.actors.add(lbf)\n", "system.thermostat.set_lb(LB_fluid=lbf, gamma=HYDRO_PARAMS['gamma'], seed=42)\n", "```" @@ -997,8 +999,19 @@ "metadata": {}, "outputs": [], "source": [ - "lbf.write_vtk_velocity('./fluid.vtk')\n", - "system.part.writevtk('./particle.vtk')" + "import os\n", + "vtk_base_dir = os.path.join('vtk_out', 'RESULTS_FLOW_FIELD')\n", + "vtk_identifier = f'T_{HYDRO_PARAMS[\"mode\"]}_P_{pos[2]}'\n", + "vtk_outdir = os.path.join(vtk_base_dir, vtk_identifier)\n", + "lb_vtk = espressomd.lb.VTKOutput(identifier=vtk_identifier,\n", + " observables=[\"velocity_vector\"],\n", + " base_folder=vtk_base_dir,\n", + " prefix=\"lb_velocity\")\n", + "lbf.add_vtk_writer(vtk=lb_vtk)\n", + "for i in range(HYDRO_N_STEPS // 100):\n", + " system.integrator.run(100)\n", + " system.part.writevtk(os.path.join(vtk_outdir, f'position_{i}.vtk'), types=[0])\n", + " lb_vtk.write()" ] }, { diff --git a/doc/tutorials/electrokinetics/CMakeLists.txt b/doc/tutorials/electrokinetics/CMakeLists.txt index 32a271d67a4..a3c419cb6fc 100644 --- a/doc/tutorials/electrokinetics/CMakeLists.txt +++ b/doc/tutorials/electrokinetics/CMakeLists.txt @@ -20,5 +20,4 @@ configure_tutorial_target(TARGET tutorial_ek DEPENDS electrokinetics.ipynb figures/schlitzpore_3d.png scripts/eof_analytical.py) -nb_export(TARGET tutorial_ek SUFFIX "" FILE "electrokinetics.ipynb" HTML_RUN - VAR_SUBST "integration_length=600;dt=0.5") +nb_export(TARGET tutorial_ek SUFFIX "" FILE "electrokinetics.ipynb" HTML_RUN) diff --git a/doc/tutorials/electrokinetics/electrokinetics.ipynb b/doc/tutorials/electrokinetics/electrokinetics.ipynb index dc709d5c7d4..a20ec75b40e 100644 --- a/doc/tutorials/electrokinetics/electrokinetics.ipynb +++ b/doc/tutorials/electrokinetics/electrokinetics.ipynb @@ -17,9 +17,8 @@ " 1. [The Electrokinetic Equations](#The-Electrokinetic-Equations)\n", " 2. [EOF in the Slit Pore Geometry](#EOF-in-the-Slit-Pore-Geometry)\n", "3. [Simulation using ESPResSo](#Simulation-using-ESPResSo)\n", - " 1. [Setting up ESPResSo](#Setting-up-ESPResSo)\n", - " 2. [Mapping SI and Simulation Units](#Mapping-SI-and-Simulation-Units)\n", - " 3. [Setting up the slit pore system](#Setting-up-the-slit-pore-system)\n", + " 1. [Mapping SI and Simulation Units](#Mapping-SI-and-Simulation-Units)\n", + " 2. [Setting up the slit pore system](#Setting-up-the-slit-pore-system)\n", "4. [References](#References)\n", " " ] @@ -31,18 +30,15 @@ "## Introduction\n", "\n", "In recent years the lattice-Boltzmann method (LBM) has proven itself to be a viable way to introduce hydrodynamic interactions into coarse-grained MD simulations with moderate computational cost.\n", - "The success of the GPU LBM implementation in ESPResSo and similar developments in other software packages created demand for further developments in this area.\n", - "ESPResSo features two such algorithms, namely ELECTROHYDRODYNAMICS, and ELECTROKINETICS (EK).\n", - "Both of these make use of the LBM and extend it to coarse-grain not only the solvent molecules but also ionic solutes.\n", - "ELECTROHYDRODYNAMICS does so using a slip layer coupling for charged particles valid in the thin Debye layer (large salt concentration) limit [1], while EK explicitly treats the ionic solutes in a continuum fashion and is valid for a wide range of salt concentrations [2-4].\n", + "ESPResSo features such an algorithm, which can make use of the LBM and extend it to coarse-grain not only the solvent molecules but also ionic solutes. It is called EK and explicitly treats the ionic solutes in a continuum fashion and is valid for a wide range of salt concentrations [1-3].\n", "\n", "### Tutorial Outline\n", "\n", "To make our first steps using ELECTROKINETICS we will work on one of the few systems for which analytic solutions for the electrokinetic equations exist: the slip pore geometry with a counterion-only electrolyte.\n", "The same slit pore system is also treated in the LBM tutorial, but there, the ionic species were modeled as explicit particles.\n", - "For this system, the two approaches lead to exactly the same results [5].\n", + "For this system, the two approaches lead to exactly the same results [4].\n", "Differences became significant for multivalent ions, very high salt concentrations, and very high surface charge, since then the mean-field approach the EK employs, is basically solving the Poisson-Nernst-Planck formalism plus the Navier-Stokes equation on a lattice.\n", - "This leads to significantly different results from explicit ion approaches [6-8].\n", + "This leads to significantly different results from explicit ion approaches [5-7].\n", "This tutorial is mainly divided into two sections.\n", "* **Theoretical Background** introduces the electrokinetic equations and the analytical solution for the slit pore system.\n", "* **Simulation using ESPResSo** deals exclusively with the simulation. \n", @@ -64,7 +60,7 @@ "### The Electrokinetic Equations\n", "\n", "In the following, we will derive the equations modeling the time evolution of the concentrations of dissolved species as well as the solvent in the standard electrokinetic model.\n", - "We do so, neglecting the salt ions' contribution to the overall mass density, which allows us to treat the dynamics of the ions and the fluid separately [8].\n", + "We do so, neglecting the salt ions' contribution to the overall mass density, which allows us to treat the dynamics of the ions and the fluid separately [7].\n", "The solvent fluid will be modeled using the Navier-Stokes equations while we use a set of diffusion-migration-advection equations for the ionic species.\n" ] }, @@ -105,7 +101,7 @@ "This free-energy density consists of only an ideal-gas and an electrostatic contribution.\n", "The same assumptions form the basis of Poisson-Boltzmann (PB) theory.\n", "Hence, the limitations of this model are the same as those of PB.\n", - "That means this model applies to monovalent ions at low to intermediate densities and surface charges [6,7,11,12].\n", + "That means this model applies to monovalent ions at low to intermediate densities and surface charges [5,6,10,11].\n", "\n", "The species' chemical potentials $\\mu_{k}$ implied by the free-energy density read\n", "\n", @@ -120,7 +116,7 @@ "&= -D_{k} \\nabla c_{k} - \\xi_{k} z_{k} e c_{k} \\nabla \\Phi .\n", "\\end{aligned}\n", "\n", - "Here, $\\xi_{k}$ and $D_{k}$ denote the mobility and the diffusion coefficient of species $k$, which are related by the Einstein-Smoluchowski relation $D_{k} / \\xi_{k} = k_{\\mathrm{B}}T$ [12,13].\n", + "Here, $\\xi_{k}$ and $D_{k}$ denote the mobility and the diffusion coefficient of species $k$, which are related by the Einstein-Smoluchowski relation $D_{k} / \\xi_{k} = k_{\\mathrm{B}}T$ [11,12].\n", "\n", "Finally, the total number density flux combining effects of diffusion and advection reads\n", "\n", @@ -219,7 +215,7 @@ "\\begin{equation}\n", "\\Phi(x) = -\\frac{k_\\mathrm{B}T}{ze} \\cdot \\log \\left[ \\frac{C^2}{8 \\pi \\, k_\\mathrm{B}T \\, l_\\mathrm{B}} \\cdot \\cos^{-2}\\left( \\frac{zeC}{2 k_\\mathrm{B}T} \\cdot x\\right) \\right], \\quad \\left| \\frac{zeC}{2 k_\\mathrm{B}T} \\cdot x \\right| < \\frac \\pi 2\\; .\n", "\\end{equation}\n", - "Refer to [5] for details on this calculation.\n", + "Refer to [4] for details on this calculation.\n", "Knowing that the counterion density $c$ resembles a Boltzmann distribution in the potential $ze \\Phi$ leads to the expression\n", "\\begin{equation}\n", "c(x) = \\frac{C^2}{8 \\pi \\, k_\\mathrm{B}T \\, l_\\mathrm{B}} \\cdot \\cos^{-2} \\left( \\frac{zeC}{2 k_\\mathrm{B}T} \\cdot x \\right) \\; .\n", @@ -251,15 +247,6 @@ "## Simulation using ESPResSo" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Setting up ESPResSo\n", - "\n", - "To use the electrokinetics solver in ESPResSo enable the features ELECTROKINETICS and EK_BOUNDARIES during the build process." - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -368,10 +355,9 @@ "# Initializing espresso modules and the numpy package\n", "import espressomd\n", "import espressomd.electrokinetics\n", - "import espressomd.ekboundaries\n", "import espressomd.shapes\n", "\n", - "espressomd.assert_features(['CUDA', 'ELECTROKINETICS'])\n", + "espressomd.assert_features([\"WALBERLA\", \"WALBERLA_FFT\"])\n", "\n", "import tqdm\n", "import numpy as np\n", @@ -379,10 +365,6 @@ "import matplotlib.pyplot as plt\n", "plt.rcParams.update({'font.size': 16})\n", "\n", - "# Set the slit pore geometry where the width is the non-periodic part of the geometry\n", - "# the padding is used to ensure that there is no field outside the slit since the\n", - "# electrostatics is used with a 3D periodic FFT solver.\n", - "\n", "box_y = 6\n", "box_z = 6\n", "width = 50\n", @@ -411,15 +393,18 @@ "# Set the electrokinetic parameters\n", "\n", "agrid = 1.0\n", - "dt = 0.2\n", - "kT = 1.0\n", + "dt = 0.5\n", + "kT = 4.0\n", "bjerrum_length = 0.7095\n", + "permittivity = 1. / (4 * np.pi * bjerrum_length)\n", "D = 0.006075\n", "valency = 1.0\n", "viscosity_dynamic = 79.53\n", "density_water = 26.15\n", "sigma = -0.05\n", - "ext_force_density = [0.0, 0.1, 0.0]" + "ext_force_density = [0.0, 0.1, 0.0]\n", + "\n", + "single_precision = False" ] }, { @@ -440,17 +425,23 @@ "system.time_step = dt\n", "system.cell_system.skin = 0.2\n", "system.thermostat.turn_off()\n", - "integration_length = int(2e4)" + "integration_length = 600" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We can now set up the electrokinetics algorithm.\n", - "All functionality pertaining to this algorithm is available through the electrokinetics submodule of espressomd.\n", - "Please note that the fluid viscosity is specified as a kinematic viscosity, which is the dynamic viscosity divided by the fluid density.\n", - "The kinematic viscosity is also required if you initialize the pure lattice-Boltzmann method.\n" + "We can now set up the electrokinetics algorithm." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lattice = espressomd.lb.LatticeWalberla(agrid=agrid, n_ghost_layers=1)" ] }, { @@ -459,26 +450,21 @@ "metadata": {}, "outputs": [], "source": [ - "# Set up the (LB) electrokinetics fluid\n", "viscosity_kinematic = viscosity_dynamic / density_water\n", - "ek = espressomd.electrokinetics.Electrokinetics(agrid=agrid,\n", - " lb_density=density_water,\n", - " viscosity=viscosity_kinematic,\n", - " friction=1.0,\n", - " T=kT,\n", - " prefactor=bjerrum_length)" + "lbf = espressomd.lb.LBFluidWalberla(lattice=lattice, density=density_water, kinematic_viscosity=viscosity_kinematic, tau=dt, single_precision=single_precision)\n", + "system.actors.add(lbf)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "The value of the friction parameter in the previous setup command is irrelevant, since we don't include any explicit particles in our simulation, but it's needed to pass the sanity check of the LB.\n", + "eksolver = espressomd.electrokinetics.EKFFT(lattice=lattice, permittivity=permittivity, single_precision=single_precision)\n", "\n", - "Next, we set up the individual ionic species.\n", - "In this case, we only set up one species of positively charged counterions.\n", - "The charge density is chosen in such a way, that it will cancel out the charges of the walls which are being inserted in the step afterwards.\n", - "After setting up the species, we have to add it to the electrokinetics instance. " + "system.ekcontainer.solver = eksolver\n", + "system.ekcontainer.tau = dt" ] }, { @@ -487,25 +473,19 @@ "metadata": {}, "outputs": [], "source": [ - "# Set up the charged and neutral species\n", "density_counterions = -2.0 * sigma / width\n", - "counterions = espressomd.electrokinetics.Species(density=density_counterions,\n", - " D=D,\n", - " valency=valency,\n", - " ext_force_density=ext_force_density)\n", - "\n", - "ek.add_species(counterions)" + "ekspecies = espressomd.electrokinetics.EKSpecies(lattice=lattice, density=0.0, kT=kT, diffusion=D, valency=valency, advection=True, friction_coupling=True, ext_efield=ext_force_density, single_precision=single_precision, tau=dt)\n", + "system.ekcontainer.add(ekspecies)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "The EKBoundary command takes the keyword charge_density and the numerical charge density in simulation units as arguments.\n", - "The shape keyword takes an instance of a shape, which is provided by the shapes submodule and is the same as for the LBBoundary command.\n", - "Here we initialize two charged Wall boundaries.\n", - "To initialize the boundaries, we have to add them to the ekboundaries instance of the system class.\n", - "Finally, we initialize the electrokinetics algorithm with our setup by adding the electrokinetics instance as an actor to the system." + "ekwallcharge = espressomd.electrokinetics.EKSpecies(lattice=lattice, density=0.0, kT=kT, diffusion=0., valency=-valency, advection=False, friction_coupling=False, ext_efield=[0, 0, 0], single_precision=single_precision, tau=dt)\n", + "system.ekcontainer.add(ekwallcharge)" ] }, { @@ -514,28 +494,42 @@ "metadata": {}, "outputs": [], "source": [ - "# Set up the walls confining the fluid\n", - "ek_wall_left = espressomd.ekboundaries.EKBoundary(charge_density=sigma / agrid,\n", - " shape=espressomd.shapes.Wall(normal=[1, 0, 0], dist=padding))\n", - "ek_wall_right = espressomd.ekboundaries.EKBoundary(charge_density=sigma / agrid,\n", - " shape=espressomd.shapes.Wall(normal=[-1, 0, 0], dist=-(padding + width)))\n", - "\n", - "system.ekboundaries.add(ek_wall_left)\n", - "system.ekboundaries.add(ek_wall_right)\n", - "\n", - "system.actors.add(ek)" + "wall_left = espressomd.shapes.Wall(normal=[1, 0, 0], dist=padding)\n", + "wall_right = espressomd.shapes.Wall(normal=[-1, 0, 0], dist=-(padding + width))" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], + "source": [ + "ekspecies[padding:-padding, :, :].density = density_counterions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "After setting up the system, we integrate a sufficient number of time steps to relax the system into the stationary state and output the counterion density profile, the velocity profile, and the shear stress.\n", - "Since this system has translational symmetry in the x- and y-direction, we iterate over a line in the z direction and use the species[node].quantity command, to output local quantities.\n", - "You can instead also use the electrokinetics.write_vtk_quantity command to output the whole field at once in a ParaView-compatible format.\n", + "ekspecies[:padding, :, :].density = 0.0\n", + "ekspecies[-padding:, :, :].density = 0.0\n", "\n", - "Density and velocity are not the only fields available for output.\n", - "Please refer to the User's Guide for all available options." + "ekwallcharge[:padding, :, :].density = -sigma / valency / padding\n", + "ekwallcharge[-padding:, :, :].density = -sigma / valency / padding" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for shape_obj in (wall_left, wall_right):\n", + " ekspecies.add_boundary_from_shape(shape=shape_obj, value=[0., 0., 0.], boundary_type=espressomd.electrokinetics.FluxBoundary)\n", + " ekspecies.add_boundary_from_shape(shape=shape_obj, value=0.0, boundary_type=espressomd.electrokinetics.DensityBoundary)\n", + " lbf.add_boundary_from_shape(shape=shape_obj, velocity=[0., 0., 0.])" ] }, { @@ -562,13 +556,13 @@ " node_idxs = (i, int(box_y / (2 * agrid)), int(box_z / (2 * agrid)))\n", "\n", " # density\n", - " density_list.append(counterions[node_idxs].density)\n", + " density_list.append(ekspecies[node_idxs].density)\n", "\n", " # velocity\n", - " velocity_list.append(ek[node_idxs].velocity[1])\n", + " velocity_list.append(lbf[node_idxs].velocity[1])\n", "\n", " # xz component pressure tensor\n", - " pressure_xy_list.append(ek[node_idxs].pressure_tensor[0, 1])\n", + " pressure_xy_list.append(lbf[node_idxs].pressure_tensor[0, 1])\n", "\n", "np.savetxt(\"eof_simulation.dat\",\n", " np.column_stack((position_list,\n", @@ -578,18 +572,12 @@ " header=\"#position calculated_density calculated_velocity calculated_pressure_xy\")" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will now plot the counterion density, fluid velocity, and fluid shear stress\n", - "profiles along the direction perpendicular to the slit pore walls." - ] - }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "from scripts import eof_analytical # executes automatically upon import\n", @@ -633,25 +621,31 @@ "source": [ "## References\n", "\n", - "[1] O. A. Hickey, C. Holm, J. L. Harden and G. W. Slater *Implicit Method for Simulating Electrohydrodynamics of Polyelectrolytes* Physical Review Letters, 2010 \n", - "[2] F. Capuani, I. Pagonabarraga and D. Frenkel *Discrete solution of the electrokinetic equations* The Journal of Chemical Physics, 2004 \n", - "[3] G. Rempfer *A Lattice based Model for Electrokinetics* Master's thesis, University of Stuttgart, 2013 \n", - "[4] G. Rempfer, G. B. Davies, C. Holm and J. de Graaf *Reducing spurious flow in simulations of electrokinetic phenomena* The Journal of Chemical Physics, 2016 \n", - "[5] G. Rempfer *Lattice-Boltzmann simulations in complex geometries* Bachelor's thesis, University of Stuttgart, Institute for Computational Physics, 2010 \n", - "[6] M. Deserno and C. Holm and S. May, *Fraction of Condensed Counterions around a Charged Rod: Comparison of Poisson-Boltzmann Theory and Computer Simulations* Macromolecules, 2000 \n", - "[7] C. Holm, P. Kékicheff and R. Podgornik *Electrostatic Effects in Soft Matter and Biophysics* Kluwer Academic Publishers, 2001 \n", - "[8] M. Deserno and C. Holm *Cell-model and Poisson-Boltzmann-theory: A brief introduction* Electrostatic Effects in Soft Matter and Biophysics, Kluwer Academic Publishers, 2001 \n", - "[9] J de Graaf., G. Rempfer and C. Holm *Diffusiophoretic Self-Propulsion for Partially Catalytic Spherical Colloids* IEEE T. Nanobiosci., 2014 \n", - "[10] M. Deserno *Counterion condensation for rigid linear polyelectrolytes* Universität Mainz, 2000 \n", - "[11] J. de Graaf, N Boon, M Dijkstra and R. van Roij *Electrostatic interactions between Janus particles* The Journal of Chemical Physics, 2012 \n", - "[12] A. Einstein *Über die von der molekularkinetischen Theorie der Wärme geforderte Bewegung von in ruhenden Flüssigkeiten suspendierten Teilchen* Annalen der Physik, 1905 \n", - "[13] M. von Smoluchowski *Zur kinetischen Theorie der Brownschen Molekularbewegung und der Suspensionen* Annalen der Physik, 1906 \n" + "[1] F. Capuani, I. Pagonabarraga and D. Frenkel *Discrete solution of the electrokinetic equations* The Journal of Chemical Physics, 2004 \n", + "[2] G. Rempfer *A Lattice based Model for Electrokinetics* Master's thesis, University of Stuttgart, 2013 \n", + "[3] G. Rempfer, G. B. Davies, C. Holm and J. de Graaf *Reducing spurious flow in simulations of electrokinetic phenomena* The Journal of Chemical Physics, 2016 \n", + "[4] G. Rempfer *Lattice-Boltzmann simulations in complex geometries* Bachelor's thesis, University of Stuttgart, Institute for Computational Physics, 2010 \n", + "[5] M. Deserno and C. Holm and S. May, *Fraction of Condensed Counterions around a Charged Rod: Comparison of Poisson-Boltzmann Theory and Computer Simulations* Macromolecules, 2000 \n", + "[6] C. Holm, P. Kékicheff and R. Podgornik *Electrostatic Effects in Soft Matter and Biophysics* Kluwer Academic Publishers, 2001 \n", + "[7] M. Deserno and C. Holm *Cell-model and Poisson-Boltzmann-theory: A brief introduction* Electrostatic Effects in Soft Matter and Biophysics, Kluwer Academic Publishers, 2001 \n", + "[8] J de Graaf., G. Rempfer and C. Holm *Diffusiophoretic Self-Propulsion for Partially Catalytic Spherical Colloids* IEEE T. Nanobiosci., 2014 \n", + "[9] M. Deserno *Counterion condensation for rigid linear polyelectrolytes* Universität Mainz, 2000 \n", + "[10] J. de Graaf, N Boon, M Dijkstra and R. van Roij *Electrostatic interactions between Janus particles* The Journal of Chemical Physics, 2012 \n", + "[11] A. Einstein *Über die von der molekularkinetischen Theorie der Wärme geforderte Bewegung von in ruhenden Flüssigkeiten suspendierten Teilchen* Annalen der Physik, 1905 \n", + "[12] M. von Smoluchowski *Zur kinetischen Theorie der Brownschen Molekularbewegung und der Suspensionen* Annalen der Physik, 1906 \n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -665,7 +659,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.10.6" } }, "nbformat": 4, diff --git a/doc/tutorials/lattice_boltzmann/lattice_boltzmann_poiseuille_flow.ipynb b/doc/tutorials/lattice_boltzmann/lattice_boltzmann_poiseuille_flow.ipynb index b984fe56e46..b0d51f19add 100644 --- a/doc/tutorials/lattice_boltzmann/lattice_boltzmann_poiseuille_flow.ipynb +++ b/doc/tutorials/lattice_boltzmann/lattice_boltzmann_poiseuille_flow.ipynb @@ -58,12 +58,11 @@ "\n", "import espressomd\n", "import espressomd.lb\n", - "import espressomd.lbboundaries\n", "import espressomd.shapes\n", "\n", "logging.basicConfig(level=logging.INFO, stream=sys.stdout)\n", "\n", - "espressomd.assert_features(['LB_BOUNDARIES_GPU'])\n", + "espressomd.assert_features(['WALBERLA'])\n", "\n", "# System constants\n", "BOX_L = 16.0\n", @@ -120,8 +119,10 @@ "source": [ "```python\n", "logging.info(\"Setup LB fluid.\")\n", - "lbf = espressomd.lb.LBFluidGPU(agrid=AGRID, dens=DENSITY, visc=VISCOSITY, tau=TIME_STEP,\n", - " ext_force_density=FORCE_DENSITY)\n", + "lbf = espressomd.lb.LBFluidWalberla(agrid=AGRID, density=DENSITY,\n", + " kinematic_viscosity=VISCOSITY,\n", + " tau=TIME_STEP,\n", + " ext_force_density=FORCE_DENSITY)\n", "system.actors.add(lbf)\n", "```" ] @@ -140,9 +141,7 @@ "solution2_first": true }, "source": [ - "Create a LB boundary and append it to the list of system LB boundaries.\n", - "\n", - "You can refer to section [using shapes as lattice-Boltzmann boundary](https://espressomd.github.io/doc/lb.html#using-shapes-as-lattice-boltzmann-boundary) in the user guide." + "Use the convenience function ``add_boundary_from_shape`` of the LB actor to mark nodes within a shape as boundaries.\n" ] }, { @@ -156,11 +155,8 @@ "top_wall = espressomd.shapes.Wall(normal=[1, 0, 0], dist=WALL_OFFSET)\n", "bottom_wall = espressomd.shapes.Wall(normal=[-1, 0, 0], dist=-(BOX_L - WALL_OFFSET))\n", "\n", - "top_boundary = espressomd.lbboundaries.LBBoundary(shape=top_wall)\n", - "bottom_boundary = espressomd.lbboundaries.LBBoundary(shape=bottom_wall)\n", - "\n", - "system.lbboundaries.add(top_boundary)\n", - "system.lbboundaries.add(bottom_boundary)\n", + "lbf.add_boundary_from_shape(top_wall)\n", + "lbf.add_boundary_from_shape(bottom_wall)\n", "```" ] }, diff --git a/doc/tutorials/lattice_boltzmann/lattice_boltzmann_sedimentation.ipynb b/doc/tutorials/lattice_boltzmann/lattice_boltzmann_sedimentation.ipynb index 9b8d73f9a05..a8100e9cbdb 100644 --- a/doc/tutorials/lattice_boltzmann/lattice_boltzmann_sedimentation.ipynb +++ b/doc/tutorials/lattice_boltzmann/lattice_boltzmann_sedimentation.ipynb @@ -72,12 +72,11 @@ "source": [ "import espressomd\n", "import espressomd.lb\n", - "import espressomd.lbboundaries\n", "import espressomd.shapes\n", "import espressomd.observables\n", "import espressomd.accumulators\n", "\n", - "espressomd.assert_features([\"LENNARD_JONES\", \"LB_BOUNDARIES\"])\n", + "espressomd.assert_features([\"LENNARD_JONES\", \"WALBERLA\"])\n", "\n", "# imports for data handling, plotting, and progress bar\n", "import numpy as np\n", @@ -133,7 +132,7 @@ "n_rows = 10\n", "\n", "# system size in units of lattice spacing\n", - "n_height = 40\n", + "n_height = 50\n", "n_width = 20\n", "n_depth = 2\n", "\n", @@ -343,7 +342,10 @@ }, "source": [ "```python\n", - "lbf = espressomd.lb.LBFluid(agrid=spacing, dens=1., visc=1., tau=system.time_step, kT=0.)\n", + "lbf = espressomd.lb.LBFluidWalberla(agrid=spacing,\n", + " density=1.,\n", + " kinematic_viscosity=1.,\n", + " tau=system.time_step, kT=0.)\n", "system.actors.add(lbf)\n", "system.thermostat.set_lb(LB_fluid=lbf, gamma=15., seed=0)\n", "```" @@ -367,7 +369,7 @@ "\n", "**Exercise:**\n", "* convert the wall shapes to LB boundaries and add them to the system list of LB boundaries\n", - " ([user guide](https://espressomd.github.io/doc/lb.html#using-shapes-as-lattice-boltzmann-boundary))" + " ([user guide](https://espressomd.github.io/doc/lb.html#setting-up-boundary-conditions))" ] }, { @@ -379,9 +381,7 @@ "```python\n", "# add LB boundaries\n", "for wall_shape in [wall_shape_b, wall_shape_t]:\n", - " no_slip_wall = espressomd.lbboundaries.LBBoundary(\n", - " shape=wall_shape, velocity=[0, 0, 0])\n", - " system.lbboundaries.add(no_slip_wall)\n", + " lbf.add_boundary_from_shape(wall_shape)\n", "```" ] }, diff --git a/doc/tutorials/lattice_boltzmann/lattice_boltzmann_theory.ipynb b/doc/tutorials/lattice_boltzmann/lattice_boltzmann_theory.ipynb index ba296175ff5..c4ffcef9d42 100644 --- a/doc/tutorials/lattice_boltzmann/lattice_boltzmann_theory.ipynb +++ b/doc/tutorials/lattice_boltzmann/lattice_boltzmann_theory.ipynb @@ -43,8 +43,6 @@ "\n", "For the tutorial you will have to compile in the following features:\n", "```c++\n", - "#define LB_BOUNDARIES\n", - "#define LB_BOUNDARIES_GPU\n", "#define LENNARD_JONES\n", "```\n", "Please uncomment the features in the myconfig.hpp and compile **ESPResSo** using this myconfig.hpp. This is not necessary if you do not use a custom myconfig.hpp, since the features are activated by default. For more information on configuring **ESPResSo** and how to activate CUDA (for GPU computation), refer to the [documentation](https://espressomd.github.io/doc/installation.html). " @@ -212,11 +210,9 @@ "## 3 The LB interface in ESPResSo\n", "\n", "**ESPResSo** features two virtually independent implementations of LB. One implementation uses CPUs and one uses a GPU to perform the computational work. For this, we provide two actor classes\n", - "[LBFluid](https://espressomd.github.io/doc/espressomd.html#espressomd.lb.LBFluid) and\n", - "[LBFluidGPU](https://espressomd.github.io/doc/espressomd.html#espressomd.lb.LBFluidGPU) in the module\n", - "[espressomd.lb](https://espressomd.github.io/doc/espressomd.html#module-espressomd.lb), as well as the optional\n", - "[LBBoundary](https://espressomd.github.io/doc/espressomd.html#espressomd.lbboundaries.LBBoundary) class found in\n", - "[espressomd.lbboundaries](https://espressomd.github.io/doc/espressomd.html#module-espressomd.lbboundaries).\n", + "[LBFluidWalberla](https://espressomd.github.io/doc/espressomd.html#espressomd.lb.LBFluidWalberla) and\n", + "[LBFluidWalberlaGPU](https://espressomd.github.io/doc/espressomd.html#espressomd.lb.LBFluidWalberlaGPU) in the module\n", + "[espressomd.lb](https://espressomd.github.io/doc/espressomd.html#module-espressomd.lb).\n", "\n", "The LB lattice is a cubic lattice, with a lattice constant agrid that\n", "is the same in all spatial directions. The chosen box length must be an integer multiple\n", @@ -251,9 +247,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### The LBFluid class\n", + "### The LBFluidWalberla class\n", "\n", - "The LBFluid class provides an interface to the LB-Method in the **ESPResSo** core. When initializing an object, one can pass the aforementioned parameters as keyword arguments. Parameters are given in MD units. The available keyword arguments are:\n", + "The LBFluidWalberla class provides an interface to the LB-Method in the **ESPResSo** core. When initializing an object, one can pass the aforementioned parameters as keyword arguments. Parameters are given in MD units. The available keyword arguments are:\n", "\n", "+ dens: The density of the fluid.\n", "+ agrid: The lattice constant of the fluid. It is used to determine the number of LB nodes per direction from box_l. *They have to be compatible.*\n", @@ -263,7 +259,7 @@ "+ seed: The random number generator seed, only relevant for thermalized fluids (i.e. kT \\> 0).\n", "+ ext_force_density: An external force density applied to every node. This is given as a list, tuple or array with three components.\n", "\n", - "Using these arguments, one can initialize an LBFluid object. This object then needs to be added to the system's actor list. The code below provides a minimal example.\n", + "Using these arguments, one can initialize an LBFluidWalberla object. This object then needs to be added to the system's actor list. The code below provides a minimal example.\n", "\n", "```python\n", "import espressomd\n", @@ -274,8 +270,8 @@ "system.time_step = 0.01\n", "system.cell_system.skin = 0.4\n", "\n", - "# Initialize an LBFluid with the minimum set of valid parameters.\n", - "lbf = lb.LBFluidGPU(agrid=1, dens=10, visc=.1, tau=0.01)\n", + "# Initialize an LBFluidWalberla with the minimum set of valid parameters.\n", + "lbf = espressomd.lb.LBFluidWalberla(agrid=1, density=10, kinematic_viscosity=.1, tau=0.01)\n", "# Activate the LB by adding it to the System's actor list.\n", "system.actors.add(lbf)\n", "```" @@ -287,16 +283,15 @@ "source": [ "### Sampling data from a node\n", "\n", - "The LBFluid class also provides a set of methods which can be used to sample data from\n", + "The LBFluidWalberla class also provides a set of methods which can be used to sample data from\n", "the fluid nodes. For example lbf[X ,Y ,Z].quantity returns the quantity of the node\n", "with $(X, Y, Z)$ coordinates. Note that the indexing in every direction starts with 0.\n", "The possible properties are:\n", "\n", "+ velocity: the fluid velocity (list of three floats)\n", "+ pressure_tensor: the pressure tensor (3x3 matrix)\n", - "+ pressure_tensor_neq: the nonequilibrium part of the pressure tensor (3x3 matrix).\n", "+ population: the 19 populations of the D3Q19 lattice.\n", - "+ boundary: the boundary flag.\n", + "+ is_boundary: the boundary flag.\n", "+ density: the local density.\n", "\n", "Slicing is supported, e.g. to obtain all velocity vectors in the LB fluid as a Numpy array, use lbf[:,:,:].velocity." @@ -306,26 +301,29 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### The LBBoundary class\n", + "### Setting up boundaries\n", "\n", - "The [LBBoundary](https://espressomd.github.io/doc/espressomd.html#espressomd.lbboundaries.LBBoundary) class represents a boundary on the\n", - "[LBFluid](https://espressomd.github.io/doc/espressomd.html#espressomd.lb.LBFluid) lattice.\n", - "It depends on the classes of the module espressomd.shapes as it derives its geometry from them. For the initialization, the arguments shape and velocity are supported. The shape argument takes an object from the shapes module and the velocity argument expects a list, tuple or array containing 3 floats. Setting the velocity will result in a slip boundary condition.\n", + "Boundary conditions for the fluid are set on the\n", + "[LBFluidWalberla](https://espressomd.github.io/doc/espressomd.html#espressomd.lb.LBFluidWalberla) lattice by marking the nodes at which the boundary condition should hold as boundary nodes.\n", + "There are several ways to access individual nodes, please refer to the documentation for a complete list. Once they are gathered, a boundary condition e.g. of the type [VelocityBounceBack](https://espressomd.github.io/doc/espressomd.html#espressomd.lb.VelocityBounceBack) can be assigned to them, as shown in the following example: \n", + "```python\n", + "node = lbf[0,0,0]\n", + "node.boundary = VelocityBounceBack(velocity=[0,0,0])\n", + "```\n", + "In order to mark several nodes as boundaries at once, there are a some convenience functions that make it possible, for example, to mark all nodes within a espressomd.shapes as a boundary.\n", "\n", - "Note that the boundaries are not constructed through the periodic boundary. If, for example, one would set a sphere with its center in one of the corner of the boxes, a sphere fragment will be generated. To avoid this, make sure the sphere, or any other boundary, fits inside the central box.\n", + "Note that nodes are not marked as boundaries through the periodic boundary if the shape exceeds the edges of the box. If, for example, one would set a sphere with its center in one of the corner of the boxes, only nodes within the sphere fragment will be boundary nodes. To avoid this, make sure the sphere, or any other shape, fits inside the central box.\n", "\n", - "Boundaries are instantiated by passing a shape object to the LBBoundary class. Here is one way to construct a wall and add it to an existing `system` instance:\n", + "Here is an example of how to use shapes to mark nodes as boundaries:\n", "\n", "```python\n", - "import espressomd.lbboundaries\n", "import espressomd.shapes\n", "\n", - "wall = espressomd.lbboundaries.LBBoundary(shape=espressomd.shapes.Wall(normal=[1, 0, 0], dist=1),\n", - " velocity=[0, 0, 0.01])\n", - "system.lbboundaries.add(wall)\n", + "wall_shape = espressomd.shapes.Wall(normal=[1, 0, 0], dist=1)\n", + "lbf.add_boundary_from_shape(wall_shape, velocity=[0, 0, 0.01])\n", "```\n", "\n", - "This will create a wall with a surface normal of $(1, 0, 0)$ at a distance of 1 from the origin of the coordinate system in direction of the normal vector. The wall exhibits a slip boundary condition with a velocity of $(0, 0, 0.01)$. For a no-slip boundary condition, leave out the velocity argument or set it to zero. Please refer to the user guide for a complete list of constraints.\n", + "This will create a wall shape with a surface normal of $(1, 0, 0)$ at a distance of 1 from the origin of the coordinate system in direction of the normal vector and mark all LB nodes within as boundaries. Additionally, a boundary condition with a velocity of $(0, 0, 0.01)$ is set using the optional `velocity` argument. For a no-slip boundary condition, leave out the velocity argument, as this will set it to zero by default.\n", "\n", "In **ESPResSo** the so-called *link bounce back* method is implemented, where the effective hydrodynamic boundary is located midway between boundary and fluid node." ] diff --git a/doc/tutorials/polymers/polymers.ipynb b/doc/tutorials/polymers/polymers.ipynb index 7e6a4b51534..40d4d3bbaa2 100644 --- a/doc/tutorials/polymers/polymers.ipynb +++ b/doc/tutorials/polymers/polymers.ipynb @@ -297,8 +297,9 @@ " '''\n", " Lattice-based solvation model based on the LBM (Zimm model).\n", " '''\n", - " lbf = espressomd.lb.LBFluidGPU(kT=kT, seed=42, agrid=1, dens=1,\n", - " visc=5, tau=system.time_step)\n", + " lbf = espressomd.lb.LBFluidWalberla(kT=kT, seed=42, agrid=1, density=1,\n", + " kinematic_viscosity=5, tau=system.time_step,\n", + " single_precision=True)\n", " system.actors.add(lbf)\n", " system.thermostat.set_lb(LB_fluid=lbf, gamma=gamma, seed=42)" ] @@ -343,7 +344,7 @@ "POLYMER_MODEL = 'Rouse'\n", "assert POLYMER_MODEL in ('Rouse', 'Zimm')\n", "if POLYMER_MODEL == 'Zimm':\n", - " espressomd.assert_features(['CUDA'])\n", + " espressomd.assert_features(['WALBERLA'])\n", " import espressomd.lb\n", "\n", "# System setup\n", @@ -439,8 +440,8 @@ "\n", " # reset system\n", " system.part.clear()\n", - " system.thermostat.turn_off()\n", " system.actors.clear()\n", + " system.thermostat.turn_off()\n", " system.auto_update_accumulators.clear()\n", "\n", "rh_results = np.array(rh_results)\n", diff --git a/doc/tutorials/raspberry_electrophoresis/raspberry_electrophoresis.ipynb b/doc/tutorials/raspberry_electrophoresis/raspberry_electrophoresis.ipynb index 6ae77d8cf57..78535da6197 100644 --- a/doc/tutorials/raspberry_electrophoresis/raspberry_electrophoresis.ipynb +++ b/doc/tutorials/raspberry_electrophoresis/raspberry_electrophoresis.ipynb @@ -74,7 +74,7 @@ "logging.basicConfig(level=logging.INFO, stream=sys.stdout)\n", "\n", "espressomd.assert_features([\"ELECTROSTATICS\", \"ROTATION\", \"ROTATIONAL_INERTIA\", \"EXTERNAL_FORCES\",\n", - " \"MASS\", \"VIRTUAL_SITES_RELATIVE\", \"CUDA\", \"LENNARD_JONES\"])\n", + " \"MASS\", \"VIRTUAL_SITES_RELATIVE\", \"LENNARD_JONES\"])\n", "\n", "import numpy as np\n", "%matplotlib inline\n", @@ -643,7 +643,9 @@ "metadata": {}, "outputs": [], "source": [ - "lb = espressomd.lb.LBFluidGPU(kT=temperature, seed=42, dens=1., visc=3., agrid=1., tau=system.time_step)\n" + "lb = espressomd.lb.LBFluidWalberla(kT=temperature, seed=42,\n", + " density=1., kinematic_viscosity=3.,\n", + " agrid=1., tau=system.time_step)" ] }, { diff --git a/maintainer/CI/build_cmake.sh b/maintainer/CI/build_cmake.sh index 43652147be1..0c9bbc07fc5 100755 --- a/maintainer/CI/build_cmake.sh +++ b/maintainer/CI/build_cmake.sh @@ -126,9 +126,12 @@ set_default_value with_hdf5 true set_default_value with_fftw true set_default_value with_gsl true set_default_value with_scafacos false +set_default_value with_walberla false +set_default_value with_walberla_avx false set_default_value with_stokesian_dynamics false set_default_value test_timeout 300 set_default_value hide_gpu false +set_default_value mpiexec_preflags "" if [ "${make_check_unit_tests}" = true ] || [ "${make_check_python}" = true ] || [ "${make_check_tutorials}" = true ] || [ "${make_check_samples}" = true ] || [ "${make_check_benchmarks}" = true ]; then run_checks=true @@ -186,6 +189,16 @@ else cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS=OFF" fi +if [ "${with_walberla}" = true ]; then + cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_WALBERLA=ON -D ESPRESSO_BUILD_WITH_WALBERLA_FFT=ON" + if [ "${with_walberla_avx}" = true ]; then + cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_WALBERLA_AVX=ON" + fi + # disable default OpenMPI CPU binding mechanism to avoid stale references to + # waLBerla objects when multiple LB python tests run in parallel on NUMA archs + mpiexec_preflags="${mpiexec_preflags:+$mpiexec_preflags;}--bind-to;none" +fi + if [ "${with_coverage}" = true ]; then cmake_params="-D ESPRESSO_BUILD_WITH_COVERAGE=ON ${cmake_params}" fi @@ -248,7 +261,7 @@ cd "${builddir}" if [ -f "/etc/os-release" ]; then grep -q suse /etc/os-release && . /etc/profile.d/modules.sh && module load gnu-openmpi grep -q 'rhel\|fedora' /etc/os-release && for f in /etc/profile.d/*module*.sh; do . "${f}"; done && module load mpi - grep -q "Ubuntu 22.04" /etc/os-release && export MPIEXEC_PREFLAGS="--mca;btl_vader_single_copy_mechanism;none" + grep -q "Ubuntu 22.04" /etc/os-release && export MPIEXEC_PREFLAGS="--mca;btl_vader_single_copy_mechanism;none${mpiexec_preflags:+;$mpiexec_preflags}" fi # CONFIGURE @@ -288,8 +301,9 @@ end "BUILD" # library. See details in https://github.com/espressomd/espresso/issues/2249 # Can't do this check on CUDA though because nvcc creates a host function # that just calls exit() for each device function, and can't do this with -# coverage because gcov 9.0 adds code that calls exit(). -if [[ "${with_coverage}" == false && ( "${with_cuda}" == false || "${with_cuda_compiler}" != "nvcc" ) ]]; then +# coverage because gcov 9.0 adds code that calls exit(), and can't do this +# with walberla because the library calls exit() in assertions. +if [[ "${with_coverage}" == false && ( "${with_cuda}" == false || "${with_cuda_compiler}" != "nvcc" ) && "${with_walberla}" != "true" ]]; then if nm -o -C $(find . -name '*.so') | grep '[^a-z]exit@@GLIBC'; then echo "Found calls to exit() function in shared libraries." exit 1 @@ -384,6 +398,9 @@ if [ "${with_coverage}" = true ] || [ "${with_coverage_python}" = true ]; then lcov --gcov-tool "${GCOV:-gcov}" -q --directory . --ignore-errors graph --capture --output-file coverage.info # capture coverage info lcov --gcov-tool "${GCOV:-gcov}" -q --remove coverage.info '/usr/*' --output-file coverage.info # filter out system lcov --gcov-tool "${GCOV:-gcov}" -q --remove coverage.info '*/doc/*' --output-file coverage.info # filter out docs + if [ -d _deps/ ]; then + lcov --gcov-tool "${GCOV:-gcov}" -q --remove coverage.info $(realpath _deps/)'/*' --output-file coverage.info # filter out docs + fi fi if [ "${with_coverage_python}" = true ]; then echo "Running python3-coverage..." diff --git a/maintainer/benchmarks/CMakeLists.txt b/maintainer/benchmarks/CMakeLists.txt index 5d1941db143..887cae6ab81 100644 --- a/maintainer/benchmarks/CMakeLists.txt +++ b/maintainer/benchmarks/CMakeLists.txt @@ -130,6 +130,16 @@ python_benchmark( python_benchmark(FILE ferrofluid.py ARGUMENTS "--particles_per_core=400") python_benchmark(FILE mc_acid_base_reservoir.py ARGUMENTS "--particles_per_core=500" RUN_WITH_MPI FALSE) +python_benchmark(FILE lb.py ARGUMENTS "--box_l=32;--single_precision") +python_benchmark(FILE lb.py ARGUMENTS "--box_l=32") +python_benchmark(FILE lb.py ARGUMENTS "--box_l=64;--single_precision") +python_benchmark(FILE lb.py ARGUMENTS "--box_l=64") +python_benchmark(FILE lb.py ARGUMENTS "--box_l=128;--single_precision") +python_benchmark(FILE lb.py ARGUMENTS "--box_l=128") +if(NOT ESPRESSO_BUILD_WITH_WALBERLA_USE_AVX) + python_benchmark(FILE lb.py ARGUMENTS "--box_l=196;--single_precision") + python_benchmark(FILE lb.py ARGUMENTS "--box_l=196") +endif() add_custom_target( benchmarks_data diff --git a/maintainer/benchmarks/lb.py b/maintainer/benchmarks/lb.py index 6aede3a3239..44252d6ab62 100644 --- a/maintainer/benchmarks/lb.py +++ b/maintainer/benchmarks/lb.py @@ -31,6 +31,9 @@ parser.add_argument("--particles_per_core", metavar="N", action="store", type=int, default=125, required=False, help="Number of particles per core") +parser.add_argument("--box_l", action="store", + type=int, default=32, required=False, + help="Number of particles per core") parser.add_argument("--lb_sites_per_particle", metavar="N_LB", action="store", type=float, default=28, required=False, help="Number of LB sites per particle") @@ -38,6 +41,8 @@ type=float, default=0.03, required=False, help="Fraction of the simulation box volume occupied by " "particles (range: [0.01-0.74], default: 0.50)") +parser.add_argument("--single_precision", action="store_true", required=False, + help="Using single-precision floating point accuracy") parser.add_argument("--output", metavar="FILEPATH", action="store", type=str, required=False, default="benchmarks.csv", help="Output file (default: benchmarks.csv)") @@ -50,7 +55,7 @@ assert args.volume_fraction < np.pi / (3 * np.sqrt(2)), \ "volume_fraction exceeds the physical limit of sphere packing (~0.74)" -required_features = ["LENNARD_JONES"] +required_features = ["LENNARD_JONES", "WALBERLA"] espressomd.assert_features(required_features) # System @@ -68,13 +73,10 @@ ############################################################# n_proc = system.cell_system.get_state()['n_nodes'] -n_part = n_proc * args.particles_per_core -# volume of N spheres with radius r: N * (4/3*pi*r^3) -box_l = (n_part * 4. / 3. * np.pi * (lj_sig / 2.)**3 - / args.volume_fraction)**(1. / 3.) -lb_grid = int(2 * round(np.cbrt(n_part * args.lb_sites_per_particle) / 2.)) -agrid = box_l / lb_grid -measurement_steps = int(max(120**3 / lb_grid**3, 50)) +box_l = args.box_l +lb_grid = box_l +agrid = 1. +measurement_steps = 80 # System ############################################################# @@ -86,51 +88,13 @@ system.cell_system.skin = 0.5 system.thermostat.turn_off() -# Interaction setup -############################################################# -system.non_bonded_inter[0, 0].lennard_jones.set_params( - epsilon=lj_eps, sigma=lj_sig, cutoff=lj_cut, shift="auto") - -# Particle setup -############################################################# - -system.part.add(pos=np.random.random((n_part, 3)) * system.box_l) - -# Warmup Integration -############################################################# - -# warmup -benchmarks.minimize(system, n_part / 10.) - -system.integrator.set_vv() -system.thermostat.set_langevin(kT=1.0, gamma=1.0, seed=42) - -# tuning and equilibration -min_skin = 0.2 -max_skin = 1.0 -print("Tune skin: {:.3f}".format(system.cell_system.tune_skin( - min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=100))) -print("Equilibration") -system.integrator.run(500) -print("Tune skin: {:.3f}".format(system.cell_system.tune_skin( - min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=100))) -print("Equilibration") -system.integrator.run(500) - - -system.thermostat.turn_off() print(f"LB shape: [{lb_grid}, {lb_grid}, {lb_grid}]") print(f"LB agrid: {agrid:.3f}") -if hasattr(espressomd.lb, "LBFluid"): - LBClass = espressomd.lb.LBFluid -elif hasattr(espressomd.lb, "LBFluidWalberla"): - LBClass = espressomd.lb.LBFluidWalberla -else: - raise Exception("LB not built in") - -lbf = LBClass(agrid=agrid, dens=1, visc=1, tau=system.time_step, kT=1, seed=1) + +lbf = espressomd.lb.LBFluidWalberla(agrid=agrid, tau=system.time_step, + density=1., kinematic_viscosity=1., + single_precision=args.single_precision) system.actors.add(lbf) -system.thermostat.set_lb(gamma=10, LB_fluid=lbf, seed=2) # time integration loop @@ -138,7 +102,7 @@ # average time avg, ci = benchmarks.get_average_time(timings) -print(f"average: {avg:.3e} +/- {ci:.3e} (95% C.I.)") +print(f"average: {1000 * avg:.1f} +/- {1000 * ci:.1f} ms (95% C.I.)") # write report benchmarks.write_report(args.output, n_proc, timings, measurement_steps) diff --git a/maintainer/benchmarks/runner.sh b/maintainer/benchmarks/runner.sh index 1c1f694cf37..4258a12fcf3 100644 --- a/maintainer/benchmarks/runner.sh +++ b/maintainer/benchmarks/runner.sh @@ -50,7 +50,7 @@ for config in ${configs}; do echo "### ${config}" | tee -a benchmarks.log cp ${config} myconfig.hpp rm -rf src/ maintainer/ - cmake -D ESPRESSO_BUILD_BENCHMARKS=ON -D ESPRESSO_TEST_TIMEOUT=1200 -D ESPRESSO_BUILD_WITH_CUDA=OFF -D ESPRESSO_BUILD_WITH_CCACHE=OFF .. + cmake -D ESPRESSO_BUILD_BENCHMARKS=ON -D ESPRESSO_TEST_TIMEOUT=1200 -D ESPRESSO_BUILD_WITH_CUDA=OFF -D ESPRESSO_BUILD_WITH_WALBERLA=ON -D ESPRESSO_BUILD_WITH_CCACHE=OFF .. make -j$(nproc) rm -f benchmarks.csv.part touch benchmarks.csv.part diff --git a/maintainer/benchmarks/suite.sh b/maintainer/benchmarks/suite.sh index 5371cce82c3..df04e82f6f3 100644 --- a/maintainer/benchmarks/suite.sh +++ b/maintainer/benchmarks/suite.sh @@ -72,6 +72,7 @@ EOF for commit in ${commits}; do echo "### commit ${commit}" >> benchmarks.log git checkout ${commit} -- ${directories} + rm -rf _deps # commits might rely on a different version of dependencies bash ../maintainer/benchmarks/runner.sh sed -ri "s/^/\"${commit}\",/" benchmarks.csv tail -n +2 benchmarks.csv >> benchmarks_suite.csv diff --git a/maintainer/configs/maxset.hpp b/maintainer/configs/maxset.hpp index 351edd0dcb0..29c531428f9 100644 --- a/maintainer/configs/maxset.hpp +++ b/maintainer/configs/maxset.hpp @@ -37,15 +37,7 @@ #endif #define ENGINE - #define LB_ELECTROHYDRODYNAMICS -#define LB_BOUNDARIES -#ifdef CUDA -#define LB_BOUNDARIES_GPU -#define ELECTROKINETICS -#define EK_BOUNDARIES -#define EK_DEBUG -#endif #define EXCLUSIONS diff --git a/maintainer/configs/no_rotation.hpp b/maintainer/configs/no_rotation.hpp index 31461bbbac1..d1a6f418c35 100644 --- a/maintainer/configs/no_rotation.hpp +++ b/maintainer/configs/no_rotation.hpp @@ -41,18 +41,6 @@ #define MMM1D_GPU #endif -// Hydrodynamics -#define LB_BOUNDARIES -#ifdef CUDA -#define LB_BOUNDARIES_GPU -#endif - -// Electrokinetics -#ifdef CUDA -#define ELECTROKINETICS -#define EK_BOUNDARIES -#endif - // Force/energy calculation #define EXCLUSIONS diff --git a/maintainer/walberla_kernels/Readme.md b/maintainer/walberla_kernels/Readme.md new file mode 100644 index 00000000000..37ecd2a7cea --- /dev/null +++ b/maintainer/walberla_kernels/Readme.md @@ -0,0 +1,59 @@ +# Automated kernel generation with waLBerla + +The scripts in this directory generate the kernels for lattice-based algorithms. + +The following dependencies need to be in the Python path: + +* pystencils (https://i10git.cs.fau.de/pycodegen/pystencils) +* lbmpy (https://i10git.cs.fau.de/pycodegen/lbmpy/) +* waLBerla's Python components. Here the same version should be used as + the one used to build ESPResSo. One way is to use the copy fetched in + ESPResSo's `build/_deps/walberla-src/python/` directory. + +The Python dependencies can be pip installed locally with the following command: + +```sh +python3 -m pip install --user -c requirements.txt numpy sympy lbmpy pystencils islpy +``` + +The kernels can be regenerated with this shell script: + +```sh +# adapt these paths to the build environment +export VERSION=1.2 +export DEPS="${HOME}/walberla_deps" +export PYTHONPATH="${DEPS}/${VERSION}/lbmpy:${DEPS}/${VERSION}/pystencils:${DEPS}/devel/walberla/python/" + +# convenience functions +function generate_lb_kernels { + python3 $(git rev-parse --show-toplevel)/maintainer/walberla_kernels/generate_lb_kernels.py $@ +} +function generate_ek_kernels { + python3 $(git rev-parse --show-toplevel)/maintainer/walberla_kernels/generate_ek_kernels.py $@ +} +function format_lb_kernels { + $(git rev-parse --show-toplevel)/maintainer/format/clang-format.sh -i *.h + $(git rev-parse --show-toplevel)/maintainer/format/clang-format.sh -i *.cpp -style "{Language: Cpp, ColumnLimit: 0}" +} +function format_ek_kernels { + $(git rev-parse --show-toplevel)/maintainer/format/clang-format.sh -i *.h + $(git rev-parse --show-toplevel)/maintainer/format/clang-format.sh -i *.cpp -style "{Language: Cpp, ColumnLimit: 0}" +} + +# LB kernels +cd $(git rev-parse --show-toplevel)/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/ +generate_lb_kernels +generate_lb_kernels --single-precision +format_lb_kernels + +# EK kernels +cd $(git rev-parse --show-toplevel)/src/walberla_bridge/src/electrokinetics/generated_kernels/ +generate_ek_kernels +generate_ek_kernels --single-precision +format_ek_kernels +mv ReactionKernel*.{cpp,h} $(git rev-parse --show-toplevel)/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/ +``` + +WARNING: The code generation sorts the arguments alphabetically by symbol name. +If you rename something, you may have to adapt the order of arguments in the +calling code! diff --git a/maintainer/walberla_kernels/code_generation_context.py b/maintainer/walberla_kernels/code_generation_context.py new file mode 100644 index 00000000000..583a3fca578 --- /dev/null +++ b/maintainer/walberla_kernels/code_generation_context.py @@ -0,0 +1,146 @@ +# +# Copyright (C) 2021-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import os +import re +import jinja2 +import hashlib +import lbmpy +import lbmpy_walberla +import pystencils +import pystencils_walberla + + +def adapt_pystencils(): + """ + Adapt pystencils to the SFINAE method (add the block offset lambda + callback and the time_step increment). + """ + old_add_pystencils_filters_to_jinja_env = pystencils_walberla.codegen.add_pystencils_filters_to_jinja_env + + def new_add_pystencils_filters_to_jinja_env(jinja_env): + # save original pystencils to adapt + old_add_pystencils_filters_to_jinja_env(jinja_env) + old_generate_members = jinja_env.filters["generate_members"] + old_generate_refs_for_kernel_parameters = jinja_env.filters[ + "generate_refs_for_kernel_parameters"] + + @jinja2.pass_context + def new_generate_members(*args, **kwargs): + output = old_generate_members(*args, **kwargs) + token = " block_offset_0_;" + if token in output: + i = output.index(token) + vartype = output[:i].split("\n")[-1].strip() + output += f"\nstd::function block_offset_generator = [](IBlock * const, {vartype}&, {vartype}&, {vartype}&) {{ }};" + return output + + def new_generate_refs_for_kernel_parameters(*args, **kwargs): + output = old_generate_refs_for_kernel_parameters(*args, **kwargs) + if "block_offset_0" in output: + old_token = "auto & block_offset_" + new_token = "auto block_offset_" + assert output.count(old_token) == 3, \ + f"could not find '{old_token}' in '''\n{output}\n'''" + output = output.replace(old_token, new_token) + output += "\nblock_offset_generator(block, block_offset_0, block_offset_1, block_offset_2);" + return output + + # replace pystencils + jinja_env.filters["generate_members"] = new_generate_members + jinja_env.filters["generate_refs_for_kernel_parameters"] = new_generate_refs_for_kernel_parameters + + pystencils_walberla.codegen.add_pystencils_filters_to_jinja_env = new_add_pystencils_filters_to_jinja_env + + +def earmark_generated_kernels(): + """ + Add an earmark at the beginning of generated kernels to document the + pystencils/lbmpy toolchain that was used to create them. + """ + walberla_root = lbmpy_walberla.__file__.split("/python/lbmpy_walberla/")[0] + with open(os.path.join(walberla_root, ".git/HEAD")) as f: + walberla_commit = f.read() + if walberla_commit.startswith("ref: refs/heads/master"): + ref = walberla_commit.split()[1] + with open(os.path.join(walberla_root, f".git/{ref}")) as f: + walberla_commit = f.read() + token = "// kernel generated with" + earmark = ( + f"{token} pystencils v{pystencils.__version__}, lbmpy v{lbmpy.__version__}, " + f"lbmpy_walberla/pystencils_walberla from waLBerla commit {walberla_commit}" + ) + for filename in os.listdir("."): + if not filename.endswith( + ".tmpl.h") and filename.endswith((".h", ".cpp", ".cu")): + with open(filename, "r+") as f: + content = f.read() + if token not in content: + pos = 0 + if content.startswith("/*"): + pos = content.find("*/") + pos = content.find("\n", pos) + 1 + elif content.startswith("//====="): + pos = content.find("//=====", 5) + pos = content.find("\n", pos) + 1 + f.seek(pos) + f.write(f"\n{earmark}\n{content[pos:]}") + + +def guard_generated_kernels_clang_format(): + """ + Some namespaces are too long and will break ``clang-format`` versions + 9 and 10. Replace them with a unique string of reasonable size. + """ + for filename in os.listdir("."): + if filename.endswith(".cpp"): + with open(filename, "r") as f: + content = f.read() + all_ns = re.findall(r"^namespace (internal_[a-zA-Z0-9_]{54,}) \{$", + content, flags=re.MULTILINE) + if not all_ns: + continue + for ns in all_ns: + content = re.sub(rf"(?<=[^a-zA-Z0-9_]){ns}(?=[^a-zA-Z0-9_])", + f"internal_{hashlib.md5(ns.encode('utf-8')).hexdigest()}", + content) + with open(filename, "w") as f: + f.write(content) + + +class CodeGeneration(pystencils_walberla.CodeGeneration): + """ + This is a patched version of ``CodeGeneration`` that elides parameters + passed to the command line when running the argument parser, and then + restores them. It also patches the Jinja templates and earmarks the + generated kernels. + """ + + def __init__(self): + import sys + old_sys_argv = sys.argv + sys.argv = sys.argv[:1] + super().__init__() + sys.argv = old_sys_argv + adapt_pystencils() + + def __exit__(self, *args, **kwargs): + super().__exit__(*args, **kwargs) + earmark_generated_kernels() + guard_generated_kernels_clang_format() diff --git a/maintainer/walberla_kernels/custom_additional_extensions.py b/maintainer/walberla_kernels/custom_additional_extensions.py new file mode 100644 index 00000000000..3ff0b83cdd2 --- /dev/null +++ b/maintainer/walberla_kernels/custom_additional_extensions.py @@ -0,0 +1,349 @@ +# +# Copyright (C) 2022-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import pathlib + +import jinja2 +import numpy as np +import pystencils as ps +import pystencils_walberla +import sympy as sp + + +class Dirichlet_Custom(ps.boundaries.Dirichlet): + inner_or_boundary = False + single_link = False # this is the hacky solution + + def __init__(self, value, name=None, data_type="double"): + super().__init__(value=value, name=name) + self.data_type = data_type + + @property + def additional_data(self): + if callable(self._value): + return [('value', ps.typing.BasicType(self.data_type))] + else: + return [] + + +class Flux(ps.boundaries.boundaryconditions.Boundary): + inner_or_boundary = True # call the boundary condition with the fluid cell + single_link = False # needs to be called for all directional fluxes + + def __init__(self, stencil, value=None, dim=None, data_type='double'): + self.stencil = stencil + self.value = value + if callable(self.value) and not dim: + raise ValueError( + "When using a flux callback the dimension has to be specified") + elif not callable(self.value): + dim = len(value) + self.dim = dim + self.data_type = data_type + + @property + def value_is_callable(self): + return callable(self.value) + + @property + def additional_data(self): + if self.value_is_callable: + return [(f'flux_{i}', ps.typing.BasicType( + self.data_type)) for i in range(self.dim)] + else: + return [] + + @property + def additional_data_init_callback(self): + if self.value_is_callable: + return self.value + + def __call__(self, field, direction_symbol, index_field, **kwargs): + assert ps.FieldType.is_staggered(field) + + value = [index_field(f'flux_{i}') for i in range( + self.dim)] if self.value_is_callable else self.value + value = sp.Matrix(value) + + assert all([s == 0 for s in self.stencil[0]]) + accesses = [field.staggered_access( + ps.stencil.offset_to_direction_string(d)) for d in self.stencil[1:]] + + conds = [ + sp.Equality( + direction_symbol, + ps.typing.CastFunc( + d + 1, + np.int32)) for d in range( + len(accesses))] + + # use conditional + conditional = None + for access, condition, direction in zip( + accesses, conds, self.stencil[1:]): + d = sp.Matrix(direction) + + local_value = value + + # make sure the vector-access is non-negative + if isinstance(access, sp.Mul): + access *= -1 + local_value *= -1 + + assignment = [ + ps.Assignment( + access, + local_value.dot(d) / + self.stencil.D ** 2)] + + # build stacked if-conditions for directions + conditional = ps.astnodes.Conditional( + condition, ps.astnodes.Block(assignment), conditional) + + return [conditional] + + def __hash__(self): + return hash((Flux, self.stencil, self.value)) + + def __eq__(self, other): + return isinstance( + other, Flux) and other.stencil == self.stencil and self.value == other.value + + +class DirichletAdditionalDataHandler( + pystencils_walberla.additional_data_handler.AdditionalDataHandler): + def __init__(self, stencil, boundary_object): + assert isinstance(boundary_object, ps.boundaries.Dirichlet) + self._boundary_object = boundary_object + assert boundary_object.data_type in ("float32", "float64", "double") + self.data_type = "float" if boundary_object.data_type == "float32" else "double" + super().__init__(stencil=stencil) + + @property + def constructor_arguments(self): + return f", std::function<{self.data_type}(const Cell &, const shared_ptr&, IBlock&)>& " \ + "dirichletCallback " + + @property + def initialiser_list(self): + return "elementInitaliser(dirichletCallback)," + + @property + def additional_arguments_for_fill_function(self): + return "blocks, " + + @property + def additional_parameters_for_fill_function(self): + return " const shared_ptr &blocks, " + + def data_initialisation(self, _): + init_list = [f"{self.data_type} InitialisatonAdditionalData = elementInitaliser(Cell(it.x(), it.y(), it.z()), " + "blocks, *block);", "element.value = InitialisatonAdditionalData;"] + + return "\n".join(init_list) + + @property + def additional_member_variable(self): + return f"std::function<{self.data_type}(const Cell &, const shared_ptr&, IBlock&)> " \ + "elementInitaliser; " + + +class FluxAdditionalDataHandler( + pystencils_walberla.additional_data_handler.AdditionalDataHandler): + def __init__(self, stencil, boundary_object): + self._boundary_object = boundary_object + assert boundary_object.data_type in ("float32", "float64", "double") + self.data_type = "float" if boundary_object.data_type == "float32" else "double" + super().__init__(stencil=stencil) + + @property + def constructor_arguments(self): + return f", std::function(const Cell &, const shared_ptr&, IBlock&)>& " \ + "fluxCallback " + + @property + def initialiser_list(self): + return "elementInitaliser(fluxCallback)," + + @property + def additional_arguments_for_fill_function(self): + return "blocks, " + + @property + def additional_parameters_for_fill_function(self): + return " const shared_ptr &blocks, " + + def data_initialisation(self, direction): + dirVec = self.stencil_info[direction][1] + + init_list = [ + f"Vector3<{self.data_type}> InitialisatonAdditionalData = elementInitaliser(Cell(it.x() + {dirVec[0]}, it.y() + {dirVec[1]}, it.z() + {dirVec[2]}), " + "blocks, *block);", "element.flux_0 = InitialisatonAdditionalData[0];", + "element.flux_1 = InitialisatonAdditionalData[1];"] + if self._dim == 3: + init_list.append( + "element.flux_2 = InitialisatonAdditionalData[2];") + + return "\n".join(init_list) + + @property + def additional_member_variable(self): + return f"std::function(const Cell &, const shared_ptr&, IBlock&)> " \ + "elementInitaliser; " + + +# this custom boundary generator is necessary because our boundary +# condition writes to several fields at once which is impossible with the +# shipped one +def generate_boundary( + generation_context, + stencil, + class_name, + dim: int, + assignment, + target=ps.enums.Target.CPU, + data_type=None, + cpu_openmp=None, + namespace="pystencils", + interface_mappings=(), + generate_functor=True, + **create_kernel_params, +): + struct_name = "IndexInfo" + + config = pystencils_walberla.codegen.config_from_context( + generation_context, + target=target, + data_type=data_type, + cpu_openmp=cpu_openmp, + **create_kernel_params, + ) + create_kernel_params = config.__dict__ + del create_kernel_params["target"] + del create_kernel_params["index_fields"] + + coordinate_names = ("x", "y", "z")[:dim] + + index_struct_dtype = np.dtype( + [(name, np.int32) for name in coordinate_names], align=True + ) + + index_field = ps.Field( + "indexVector", + ps.FieldType.INDEXED, + index_struct_dtype, + layout=[0], + shape=( + ps.typing.TypedSymbol( + "indexVectorSize", ps.typing.BasicType(np.int32) + ), + 1, + ), + strides=(1, 1), + ) + + kernel_config = ps.CreateKernelConfig( + index_fields=[index_field], target=target, **create_kernel_params + ) + + kernel = ps.kernelcreation.create_kernel(assignment, config=kernel_config) + + if isinstance(kernel, ps.astnodes.KernelFunction): + kernel.function_name = f"boundary_{class_name}" + selection_tree = pystencils_walberla.kernel_selection.KernelCallNode( + kernel) + elif isinstance(kernel, pystencils_walberla.kernel_selection.AbstractKernelSelectionNode): + selection_tree = kernel + else: + raise ValueError( + f"kernel_creation_function returned wrong type: {kernel.__class__}" + ) + + kernel_family = pystencils_walberla.kernel_selection.KernelFamily( + selection_tree, class_name) + interface_spec = pystencils_walberla.kernel_selection.HighLevelInterfaceSpec( + kernel_family.kernel_selection_parameters, interface_mappings + ) + + additional_data_handler = pystencils_walberla.additional_data_handler.AdditionalDataHandler( + stencil=stencil) + + context = { + "kernel": kernel_family, + "class_name": class_name, + "interface_spec": interface_spec, + "generate_functor": generate_functor, + "StructName": struct_name, + "StructDeclaration": pystencils_walberla.boundary.struct_from_numpy_dtype(struct_name, index_struct_dtype), + "dim": dim, + "target": target.name.lower(), + "namespace": namespace, + "inner_or_boundary": False, + "single_link": False, + "additional_data_handler": additional_data_handler, + } + + env = jinja2.Environment( + loader=jinja2.PackageLoader("pystencils_walberla"), undefined=jinja2.StrictUndefined + ) + pystencils_walberla.jinja_filters.add_pystencils_filters_to_jinja_env(env) + custom_env = jinja2.Environment( + loader=jinja2.FileSystemLoader(pathlib.Path(__file__).parent), undefined=jinja2.StrictUndefined + ) + pystencils_walberla.jinja_filters.add_pystencils_filters_to_jinja_env( + custom_env) + + header = custom_env.get_template( + "templates/Boundary.tmpl.h").render(**context) + source = env.get_template("Boundary.tmpl.cpp").render(**context) + + source_extension = "cpp" if target == ps.enums.Target.CPU else "cu" + generation_context.write_file(f"{class_name}.h", header) + generation_context.write_file(f"{class_name}.{source_extension}", source) + + +def generate_kernel_selector( + generation_context, + class_name, + namespace="pystencils", + max_num_reactants=None, + precision_suffix=None, +): + """ + Generate helper functions to select a kernel with the appropriate + floating-point precision and number of ek species for the currently + active ek reaction and ek lattice. + """ + + context = { + "namespace": namespace, + "class_name": class_name, + "precision_suffix": precision_suffix, + "max_num_reactants": max_num_reactants, + } + + custom_env = jinja2.Environment( + loader=jinja2.FileSystemLoader(pathlib.Path(__file__).parent), + undefined=jinja2.StrictUndefined + ) + + header = custom_env.get_template( + "templates/ReactionKernelSelector.tmpl.h").render(**context) + + generation_context.write_file(f"{class_name}_all.h", header) diff --git a/maintainer/walberla_kernels/ekin.py b/maintainer/walberla_kernels/ekin.py new file mode 100644 index 00000000000..a5f0c90b5d8 --- /dev/null +++ b/maintainer/walberla_kernels/ekin.py @@ -0,0 +1,214 @@ +# +# Copyright (C) 2022-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import pystencils as ps +import sympy as sp +import numpy as np +import typing + +from pystencils.fd.derivation import FiniteDifferenceStaggeredStencilDerivation +from pystencils.fd.finitevolumes import get_access_and_direction +from pystencils.rng import random_symbol +from pystencils.stencil import inverse_direction_string + + +# this is from ps.fd.FVM1stOrder.discrete_flux.discretize +def discretize(term, neighbor): + if isinstance(term, sp.Matrix): + nw = term.applyfunc(lambda t: discretize(t, neighbor)) + return nw + elif isinstance(term, ps.field.Field.Access): + avg = (term.get_shifted(*neighbor) + term) * sp.Rational(1, 2) + return avg + elif isinstance(term, ps.fd.Diff): + access, direction = get_access_and_direction(term) + + fds = FiniteDifferenceStaggeredStencilDerivation( + neighbor, access.field.spatial_dimensions, direction) + return fds.apply(access) + + if term.args: + new_args = [discretize(a, neighbor) for a in term.args] + return term.func(*new_args) + else: + return term + + +class EK: + def __init__(self, dim, density_field, flux_field, diffusion, kT=None, velocity_field=None, + force_field=None, potential_field=None, valency=None, ext_efield=None): + assert not ps.FieldType.is_staggered(density_field) + + if velocity_field is not None: + assert not ps.FieldType.is_staggered(velocity_field) + + if force_field is not None: + assert not ps.FieldType.is_staggered(force_field) + + if potential_field is not None: + assert not ps.FieldType.is_staggered(potential_field) + + assert ps.FieldType.is_staggered(flux_field) + + self.dim = dim + self.density_field = density_field + self.velocity_field = velocity_field + self.flux_field = flux_field + self.diffusion = diffusion + self.kT = kT + self.force_field = force_field + self.potential_field = potential_field + self.valency = valency + self.ext_efield = ext_efield + + full_stencil = ["C"] + self.flux_field.staggered_stencil + list( + map(inverse_direction_string, self.flux_field.staggered_stencil)) + self.stencil = tuple(map(lambda d: tuple( + ps.stencil.direction_string_to_offset(d, self.dim)), full_stencil)) + + flux_expression = -self.diffusion * sp.Matrix( + [ps.fd.diff(self.density_field, i) for i in range(self.density_field.spatial_dimensions)]) + + if self.potential_field is not None and self.valency is not None: + if ext_efield is not None: + field = sp.Matrix([ps.fd.diff(self.potential_field, i) - ext_efield[i] + for i in range(self.density_field.spatial_dimensions)]) + else: + field = sp.Matrix([ps.fd.diff(self.potential_field, i) + for i in range(self.density_field.spatial_dimensions)]) + + flux_expression += - self.diffusion / self.kT * \ + self.density_field.center * self.valency * field + + self.disc = ps.fd.FVM1stOrder( + self.density_field, flux=flux_expression, source=0) + + if self.velocity_field is not None: + self.vof = ps.fd.VOF( + self.flux_field, + self.velocity_field, + self.density_field) + + def flux_advection(self): + if self.velocity_field is not None: + return [ps.Assignment(j_adv.lhs, j_adv.lhs + j_adv.rhs) + for j_adv in self.vof] + + def flux(self, include_vof: bool = False, + include_fluctuations: bool = False, + rng_node: typing.Optional[ps.rng.RNGBase] = None): + + _flux_collection = ps.AssignmentCollection( + [self.disc.discrete_flux(self.flux_field)]) + + if include_fluctuations: + if rng_node is None: + raise ValueError( + "rng_node not provided but fluctuations requested") + + block_offsets = tuple( + ps.TypedSymbol( + "block_offset_{}".format(i), + np.uint32) for i in range( + self.dim)) + + rng_symbol_gen = random_symbol(_flux_collection.subexpressions, + dim=self.dim, + rng_node=rng_node, + seed=ps.TypedSymbol( + "seed", np.uint32), + offsets=block_offsets) + + stencil = self.flux_field.staggered_stencil + stencil_offsets = list( + map(lambda d: ps.stencil.direction_string_to_offset(d), stencil)) + + for i, (val, d, rng_symb) in enumerate( + zip(stencil, stencil_offsets, rng_symbol_gen)): + assert _flux_collection.main_assignments[i].lhs == self.flux_field.staggered_access( + val) + _flux_collection.main_assignments[i] = ps.Assignment( + self.flux_field.staggered_access(val), + _flux_collection.main_assignments[i].rhs + sp.sqrt( + 2 * self.diffusion * discretize(self.density_field.center, d)) / sp.Matrix( + d).norm() * rng_symb * sp.sqrt( + 3) / 4) + + if include_vof: + assert self.velocity_field is not None, "velocity field is not provided!" + + for i, j_adv in enumerate(self.vof): + assert _flux_collection.main_assignments[i].lhs == j_adv.lhs + _flux_collection.main_assignments[i] = ps.Assignment( + j_adv.lhs, + _flux_collection.main_assignments[i].rhs + j_adv.rhs) + + return _flux_collection + + def continuity(self): + return self.disc.discrete_continuity(self.flux_field) + + def friction_coupling(self): + if self.kT is None or self.force_field is None: + raise RuntimeError("kT or f is not provided!") + + stencil = self.flux_field.staggered_stencil + \ + [ps.stencil.inverse_direction_string( + d) for d in self.flux_field.staggered_stencil] + + return ps.AssignmentCollection([ps.Assignment(self.force_field.center_vector, self.kT / (2 * self.diffusion) * sum([self.flux_field.staggered_access(val) * sp.Matrix( + ps.stencil.direction_string_to_offset(val)) for val in stencil[1:]], self.flux_field.staggered_access(stencil[0]) * sp.Matrix(ps.stencil.direction_string_to_offset(stencil[0]))))]) + + +class Reaction: + def __init__(self, species, orders, stoechom_coefs, rate_coef): + self.species = species + self.orders = orders + self.stoechom_coefs = stoechom_coefs + self.rate_coef = rate_coef + + def generate_reaction(self, num_reactants: int) -> ps.AssignmentCollection: + if num_reactants > len(self.species): + raise ValueError( + "Not enough species defined for number of requested reactants") + + # read density fields into subexpressions + rho_symbols = sp.symbols(f"local_rho_:{num_reactants}") + rate_symbol = sp.Symbol("rate_factor") + + subexpressions = [ + ps.Assignment( + rho_symbols[i], + self.species[i].center) for i in range(num_reactants)] + + rate = self.rate_coef + for i in range(num_reactants): + rate *= sp.Pow(rho_symbols[i], self.orders[i]) + + subexpressions.append(ps.Assignment(rate_symbol, rate)) + + main_assignments = [] + for i in range(num_reactants): + main_assignments.append(ps.Assignment(self.species[i].center, + rho_symbols[i] + rate_symbol * self.stoechom_coefs[i])) + + collection = ps.AssignmentCollection(subexpressions=subexpressions, + main_assignments=main_assignments) + + return collection diff --git a/maintainer/walberla_kernels/generate_ek_kernels.py b/maintainer/walberla_kernels/generate_ek_kernels.py new file mode 100644 index 00000000000..fbf9dc747f5 --- /dev/null +++ b/maintainer/walberla_kernels/generate_ek_kernels.py @@ -0,0 +1,225 @@ +# +# Copyright (C) 2022-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import pystencils as ps +import pystencils_walberla +import sympy as sp +import lbmpy +import argparse + +import pystencils_espresso +import code_generation_context + +import ekin +import custom_additional_extensions + + +parser = argparse.ArgumentParser(description='Generate the waLBerla kernels.') +parser.add_argument('--single-precision', action='store_true', required=False, + help='Use single-precision') +args = parser.parse_args() + +double_precision: bool = not args.single_precision + +data_type_cpp = "double" if double_precision else "float" +data_type_np = pystencils_espresso.data_type_np[data_type_cpp] +precision_suffix = pystencils_espresso.precision_suffix[double_precision] +precision_rng = pystencils_espresso.precision_rng[double_precision] + + +def replace_getData_with_uncheckedFastGetData(filename: str) -> None: + with open(filename, "r+") as f: + content = f.read() + f.seek(0) + f.truncate(0) + content = content.replace("block->getData(indexVectorID);", + "block->uncheckedFastGetData(indexVectorID);") + f.write(content) + + +dim: int = 3 +target: ps.enums.Target = ps.enums.Target.CPU +flux_count: int = 3 ** dim // 2 + +diffusion = ps.TypedSymbol("D", data_type_np) +kT = ps.TypedSymbol("kT", data_type_np) +valency = ps.TypedSymbol("z", data_type_np) +ext_efield = [ps.TypedSymbol(f"f_ext_{i}", data_type_np) for i in range(dim)] + +density_field, potential_field, velocity_field, force_field = ps.fields( + f"rho, phi, u(#), f(#): {data_type_np}[#D]".replace("#", str(dim)), layout='zyxf') +flux_field = ps.fields( + f"j({flux_count}): {data_type_np}[{dim}D]", + layout='zyxf', + field_type=ps.FieldType.STAGGERED_FLUX) + +ek = ekin.EK( + dim=dim, + density_field=density_field, + flux_field=flux_field, + diffusion=diffusion, + kT=kT, + velocity_field=velocity_field, + force_field=force_field, + potential_field=None, + valency=None, + ext_efield=None) +ek_electrostatic = ekin.EK( + dim=dim, + density_field=density_field, + flux_field=flux_field, + diffusion=diffusion, + kT=kT, + velocity_field=velocity_field, + force_field=force_field, + potential_field=potential_field, + valency=valency, + ext_efield=sp.Matrix(ext_efield)) + +max_num_reactants: int = 5 + +react_rhos, orders, stoechom_coefs = [], [], [] +for i in range(max_num_reactants): + react_rhos.append( + ps.fields(f"rho_{i}: {data_type_np}[#D]".replace("#", str(dim)), + layout="zyxf")) + orders.append(ps.TypedSymbol(f"order_{i}", data_type_np)) + stoechom_coefs.append(ps.TypedSymbol(f"stoech_{i}", data_type_np)) +rate_coef = sp.Symbol("rate_coefficient") + +reaction_obj = ekin.Reaction( + species=react_rhos, + orders=orders, + stoechom_coefs=stoechom_coefs, + rate_coef=rate_coef, +) + +params = { + "target": target, + "cpu_vectorize_info": {"assume_inner_stride_one": False}} + +with code_generation_context.CodeGeneration() as ctx: + ctx.double_accuracy = double_precision + + # codegen configuration + config = pystencils_espresso.generate_config(ctx, params) + + pystencils_walberla.generate_sweep( + ctx, + f"DiffusiveFluxKernel_{precision_suffix}", + ek.flux(include_vof=False, include_fluctuations=False, + rng_node=precision_rng), + staggered=True, + **params) + pystencils_walberla.generate_sweep( + ctx, + f"DiffusiveFluxKernelWithElectrostatic_{precision_suffix}", + ek_electrostatic.flux(include_vof=False, include_fluctuations=False, + rng_node=precision_rng), + staggered=True, + **params) + pystencils_walberla.generate_sweep( + ctx, + f"AdvectiveFluxKernel_{precision_suffix}", + ek.flux_advection(), + staggered=True, + **params) + pystencils_walberla.generate_sweep( + ctx, + f"ContinuityKernel_{precision_suffix}", + ek.continuity(), + **params) + + pystencils_walberla.generate_sweep( + ctx, + f"FrictionCouplingKernel_{precision_suffix}", + ek.friction_coupling(), + **params) + + # generate dynamic fixed flux + stencil = lbmpy.LBStencil(stencil="D3Q27") + dynamic_flux = custom_additional_extensions.Flux( + stencil, lambda *args: None, dim=3, data_type=data_type_np) + dynamic_flux_additional_data = custom_additional_extensions.FluxAdditionalDataHandler( + stencil=stencil, boundary_object=dynamic_flux) + + pystencils_walberla.generate_staggered_flux_boundary( + generation_context=ctx, + class_name=f"FixedFlux_{precision_suffix}", + boundary_object=dynamic_flux, + dim=dim, + neighbor_stencil=stencil, + index_shape=flux_field.index_shape, + target=target, + additional_data_handler=dynamic_flux_additional_data) + + # generate dynamic fixed density + dirichlet_stencil = lbmpy.stencils.LBStencil(stencil=((0, 0, 0),)) + dirichlet = custom_additional_extensions.Dirichlet_Custom( + lambda *args: None, data_type=data_type_np) + dirichlet_additional_data = custom_additional_extensions.DirichletAdditionalDataHandler( + dirichlet_stencil, dirichlet) + + pystencils_walberla.boundary.generate_boundary( + generation_context=ctx, + class_name=f"Dirichlet_{precision_suffix}", + boundary_object=dirichlet, + additional_data_handler=dirichlet_additional_data, + field_name="field", + neighbor_stencil=stencil, + index_shape=density_field.index_shape, + target=target) + + pystencils_walberla.generate_pack_info_from_kernel( + ctx, + f"DensityPackInfo_{precision_suffix}", + ek_electrostatic.continuity(), + target=target) + + # ek reactions + for i in range(1, max_num_reactants + 1): + assignments = list(reaction_obj.generate_reaction(num_reactants=i)) + filename_stem: str = f"ReactionKernelBulk_{i}_{precision_suffix}" + pystencils_walberla.generate_sweep( + ctx, + filename_stem, + assignments) + + filename_stem: str = f"ReactionKernelIndexed_{i}_{precision_suffix}" + custom_additional_extensions.generate_boundary( + generation_context=ctx, + stencil=dirichlet_stencil, + class_name=filename_stem, + dim=dim, + target=target, + assignment=assignments) + replace_getData_with_uncheckedFastGetData( + filename=f"{filename_stem}.cpp") + + # ek reactions helper functions + custom_additional_extensions.generate_kernel_selector( + generation_context=ctx, + class_name="ReactionKernelBulk", + max_num_reactants=max_num_reactants, + precision_suffix=pystencils_espresso.precision_suffix) + custom_additional_extensions.generate_kernel_selector( + generation_context=ctx, + class_name="ReactionKernelIndexed", + max_num_reactants=max_num_reactants, + precision_suffix=pystencils_espresso.precision_suffix) diff --git a/maintainer/walberla_kernels/generate_lb_kernels.py b/maintainer/walberla_kernels/generate_lb_kernels.py new file mode 100644 index 00000000000..dc3083450b4 --- /dev/null +++ b/maintainer/walberla_kernels/generate_lb_kernels.py @@ -0,0 +1,205 @@ +# +# Copyright (C) 2020-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import argparse +import pkg_resources + +import sympy as sp + +import pystencils as ps +import pystencils_walberla +import pystencils_espresso + +import lbmpy +import lbmpy.creationfunctions +import lbmpy.forcemodels +import lbmpy.stencils +import lbmpy.enums + +import lbmpy_walberla +import lbmpy_espresso + +import lees_edwards +import relaxation_rates +import walberla_lbm_generation +import code_generation_context + +parser = argparse.ArgumentParser(description="Generate the waLBerla kernels.") +parser.add_argument("--single-precision", action="store_true", required=False, + help="Use single-precision") +parser.add_argument("--gpu", action="store_true") +args = parser.parse_args() + +if args.gpu: + target = ps.Target.GPU +else: + target = ps.Target.CPU + +# Make sure we have the correct versions of the required dependencies +for module, requirement in [(ps, "==1.2"), (lbmpy, "==1.2")]: + assert pkg_resources.packaging.specifiers.SpecifierSet(requirement).contains(module.__version__), \ + f"{module.__name__} version {module.__version__} doesn't match requirement {requirement}" + + +def paramlist(parameters, keys): + for key in keys: + if key in parameters: + yield parameters[key] + + +with code_generation_context.CodeGeneration() as ctx: + ctx.double_accuracy = not args.single_precision + if target == ps.Target.GPU: + ctx.cuda = True + + # vectorization parameters + parameters = {} + if target == ps.Target.GPU: + default_key = "GPU" + parameters["GPU"] = ({"target": target}, "CUDA") + else: + default_key = "CPU" + cpu_vectorize_info = { + "instruction_set": "avx", + "assume_inner_stride_one": True, + "assume_aligned": True, + "assume_sufficient_line_padding": False} + parameters["CPU"] = ({"target": target}, "") + parameters["AVX"] = ({"target": target, + "cpu_vectorize_info": cpu_vectorize_info}, "AVX") + + # codegen configuration + config = pystencils_espresso.generate_config( + ctx, parameters[default_key][0]) + + precision_prefix = pystencils_espresso.precision_prefix[ctx.double_accuracy] + precision_suffix = pystencils_espresso.precision_suffix[ctx.double_accuracy] + precision_rng = pystencils_espresso.precision_rng[ctx.double_accuracy] + kT = sp.symbols("kT") + stencil = lbmpy.stencils.LBStencil(lbmpy.enums.Stencil.D3Q19) + fields = pystencils_espresso.generate_fields(config, stencil) + force_field = fields["force"] + + # LB Method definition + method = lbmpy.creationfunctions.create_mrt_orthogonal( + stencil=stencil, + compressible=True, + weighted=True, + relaxation_rates=relaxation_rates.rr_getter, + force_model=lbmpy.forcemodels.Schiller(force_field.center_vector) + ) + + # generate stream kernels + for params, target_suffix in paramlist(parameters, ("GPU", "CPU", "AVX")): + pystencils_espresso.generate_stream_sweep( + ctx, + method, + f"StreamSweep{precision_prefix}{target_suffix}", + params) + + # generate initial densities + for params, target_suffix in paramlist(parameters, (default_key,)): + pystencils_walberla.codegen.generate_sweep( + ctx, + f"InitialPDFsSetter{precision_prefix}{target_suffix}", + pystencils_espresso.generate_setters(ctx, method, params), + **params) + + # generate unthermalized Lees-Edwards collision rule + le_config = lbmpy.LBMConfig(stencil=stencil, + method=lbmpy.Method.TRT, + relaxation_rate=sp.Symbol("omega_shear"), + compressible=True, + zero_centered=False, + force_model=lbmpy.ForceModel.GUO, + force=force_field.center_vector, + kernel_type="collide_only") + lbm_opt = lbmpy.LBMOptimisation(symbolic_field=fields["pdfs"]) + le_collision_rule_unthermalized = lbmpy.create_lb_update_rule( + lbm_config=le_config, + lbm_optimisation=lbm_opt) + le_collision_rule_unthermalized = lees_edwards.add_lees_edwards_to_collision( + config, le_collision_rule_unthermalized, + fields["pdfs"], stencil, 1) # shear_dir_normal y + for params, target_suffix in paramlist(parameters, ("GPU", "CPU", "AVX")): + pystencils_espresso.generate_collision_sweep( + ctx, + le_config, + le_collision_rule_unthermalized, + f"CollideSweep{precision_prefix}LeesEdwards{target_suffix}", + params + ) + + # generate thermalized LB + collision_rule_thermalized = lbmpy.creationfunctions.create_lb_collision_rule( + method, + zero_centered=False, + fluctuating={ + "temperature": kT, + "block_offsets": "walberla", + "rng_node": precision_rng + }, + optimization={"cse_global": True, + "double_precision": ctx.double_accuracy} + ) + for params, target_suffix in paramlist(parameters, ("GPU", "CPU", "AVX")): + pystencils_espresso.generate_collision_sweep( + ctx, + method, + collision_rule_thermalized, + f"CollideSweep{precision_prefix}Thermalized{target_suffix}", + params + ) + + # generate accessors + for _, target_suffix in paramlist(parameters, ("GPU", "CPU")): + filename = f"FieldAccessors{precision_prefix}{target_suffix}" + if target == ps.Target.GPU: + templates = { + f"{filename}.h": "templates/FieldAccessors.tmpl.cuh", + f"{filename}.cu": "templates/FieldAccessors.tmpl.cu", + } + else: + templates = { + f"{filename}.h": "templates/FieldAccessors.tmpl.h", + } + walberla_lbm_generation.generate_macroscopic_values_accessors( + ctx, config, method, templates + ) + + # boundary conditions + ubb_dynamic = lbmpy_espresso.UBB( + lambda *args: None, dim=3, data_type=config.data_type.default_factory()) + ubb_data_handler = lbmpy_espresso.BounceBackSlipVelocityUBB( + method.stencil, ubb_dynamic) + + for _, target_suffix in paramlist(parameters, ("GPU", "CPU")): + lbmpy_walberla.generate_boundary( + ctx, f"Dynamic_UBB_{precision_suffix}{target_suffix}", ubb_dynamic, + method, additional_data_handler=ubb_data_handler, + streaming_pattern="push", target=target) + + with open(f"Dynamic_UBB_{precision_suffix}{target_suffix}.h", "r+") as f: + content = f.read() + f.seek(0) + f.truncate(0) + # patch for floating point accuracy + content = content.replace("real_t", + config.data_type.default_factory().c_name) + f.write(content) diff --git a/maintainer/walberla_kernels/lbmpy_espresso.py b/maintainer/walberla_kernels/lbmpy_espresso.py new file mode 100644 index 00000000000..5055fac308c --- /dev/null +++ b/maintainer/walberla_kernels/lbmpy_espresso.py @@ -0,0 +1,81 @@ +# +# Copyright (C) 2021-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import pystencils as ps + +import lbmpy.advanced_streaming.indexing +import lbmpy.boundaries + +import lbmpy_walberla.additional_data_handler + + +class BounceBackSlipVelocityUBB( + lbmpy_walberla.additional_data_handler.UBBAdditionalDataHandler): + ''' + Dynamic UBB that implements the bounce-back method with slip velocity. + ''' + + def data_initialisation(self, direction): + ''' + Modified ``indexVector`` initialiser. The "classical" dynamic UBB + uses the velocity callback as a velocity flow profile generator. + Here we use that callback as a bounce-back slip velocity generator. + This way, the dynamic UBB can be used to implement a LB boundary. + ''' + code = super().data_initialisation(direction) + dirVec = self.stencil_info[direction][1] + token = ' = elementInitaliser(Cell(it.x(){}, it.y(){}, it.z(){}),' + old_initialiser = token.format('', '', '') + assert old_initialiser in code + new_initialiser = token.format( + '+' + str(dirVec[0]), + '+' + str(dirVec[1]), + '+' + str(dirVec[2])).replace('+-', '-') + return code.replace(old_initialiser, new_initialiser) + + +class UBB(lbmpy.boundaries.UBB): + ''' + Velocity bounce back boundary condition, enforcing specified velocity at + obstacle. This is a patched version of ``lbmpy.boundaries.UBB``, which + currently doesn't support the bounce back scheme we need. + ''' + + def __call__(self, f_out, f_in, dir_symbol, + inv_dir, lb_method, index_field): + ''' + Modify the assignments such that the source and target pdfs are swapped. + ''' + assignments = super().__call__( + f_out, f_in, dir_symbol, inv_dir, lb_method, index_field) + + assert len(assignments) > 0 + + out = [] + if len(assignments) > 1: + out.extend(assignments[:-1]) + + neighbor_offset = lbmpy.advanced_streaming.indexing.NeighbourOffsetArrays.neighbour_offset( + dir_symbol, lb_method.stencil) + + assignment = assignments[-1] + assert assignment.lhs.field == f_in + out.append(ps.Assignment(assignment.lhs.get_shifted(*neighbor_offset), + assignment.rhs - f_out(dir_symbol) + f_in(dir_symbol))) + return out diff --git a/maintainer/walberla_kernels/lees_edwards.py b/maintainer/walberla_kernels/lees_edwards.py new file mode 100644 index 00000000000..041162e7068 --- /dev/null +++ b/maintainer/walberla_kernels/lees_edwards.py @@ -0,0 +1,129 @@ +# +# Copyright (C) 2021-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +from pystencils.astnodes import LoopOverCoordinate +from pystencils.typing.typed_sympy import TypedSymbol +from pystencils.typing import CastFunc +from pystencils import Assignment + +from lbmpy.macroscopic_value_kernels import macroscopic_values_setter + +import sympy as sp + + +def type_all_numbers(expr, dtype): + # originally from file pystencils/data_types.py in pycodegen/lbmpy@942c7d96 + substitutions = {a: CastFunc(a, dtype) for a in expr.atoms(sp.Number)} + return expr.subs(substitutions) + + +def velocity_offset_eqs(config, method, pdfs, shear_dir_normal, stencil): + """Calculates the difference between quilibrium pdf distributions + with (rho, u) and (rho, u+v) and applies them to out-flowing + populations in the boundary layer. Returns an AssignmentCollection + with one Assignment per stencil direction. + """ + dim = len(stencil[0]) + default_dtype = config.data_type.default_factory() + + # Placeholders indicating a population flows up or down. + # Will be replaced later using the component of the stencil direction + # along the shear_dir_normal. + points_up = sp.Symbol('points_up') + points_down = sp.Symbol('points_down') + + # Symbol for the coordinate index within the field, + # used to identify boundary layers + counters = [LoopOverCoordinate.get_loop_counter_symbol( + i) for i in range(dim)] + + grid_size = TypedSymbol("grid_size", dtype=default_dtype) + + # +,-1 for upper/lower boundary layers, 0 otherwise. + # Based on symbolic counters defined above. Only becomes + # non-zero if the corresponding points_up/down flags + # are engaged (which is only done for out-flowing populations) + layer_prefactor = sp.Piecewise( + (-1, + sp.And(type_all_numbers(counters[1] <= 0, default_dtype), + points_down)), + (+1, + sp.And(type_all_numbers(counters[1] >= grid_size - 1, default_dtype), + points_up)), + (0, True) + ) + + # Start with an equilibrium distribution for a given density and velocity + delta_pdf_eqs = macroscopic_values_setter( + method, sp.Symbol("dens"), [ + sp.Symbol("v_0"), sp.Symbol("v_1"), sp.Symbol("v_2")], pdfs) + + # Replace the assignments of (rho,u) by (rho, u+v) - (rho,u) + ma = [] + for a, c in zip(delta_pdf_eqs.main_assignments, method.stencil): + # Determine direction of the stencil component in the + # shear_dir_normal + if c[shear_dir_normal] == 1: + up = True + down = False + elif c[shear_dir_normal] == -1: + up = False + down = True + else: + up = False + down = False + + # Replace (rho,u) by (rho,u+v) in boundary layers + rhs = sp.simplify( + a.rhs - + a.rhs.replace( + sp.Symbol("u_0"), + sp.Symbol("u_0") + + layer_prefactor * + sp.Symbol("v_s"))) + + # Only engage if the population is outflowing. See layer_prefactor + rhs = rhs.replace(points_up, up) + rhs = rhs.replace(points_down, down) + new_a = Assignment(a.lhs, rhs) + + ma.append(new_a) + print(c, ma[-1]) + # Plug in modified assignments + delta_pdf_eqs.main_assignments = ma + return delta_pdf_eqs.main_assignments + + +def add_lees_edwards_to_collision( + config, collision, pdfs, stencil, shear_dir_normal): + # Get population shift for outflowing populations at the boundaries + offset = velocity_offset_eqs( + config, + collision.method, + pdfs, + shear_dir_normal, + stencil) + + ma = [] + for i, a in enumerate(collision.main_assignments): + # Add Lees-Edwards-shift to collision main assignments + new_a = Assignment(a.lhs, a.rhs + offset[i].rhs) + ma.append(new_a) + collision.main_assignments = ma + return collision diff --git a/maintainer/walberla_kernels/pystencils_espresso.py b/maintainer/walberla_kernels/pystencils_espresso.py new file mode 100644 index 00000000000..1980ba14387 --- /dev/null +++ b/maintainer/walberla_kernels/pystencils_espresso.py @@ -0,0 +1,162 @@ +# +# Copyright (C) 2021-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import sympy as sp +import lbmpy.fieldaccess +import lbmpy.macroscopic_value_kernels +import lbmpy.updatekernels +import pystencils as ps +import pystencils_walberla + + +def skip_philox_unthermalized(code, result_symbols, rng_name): + for r in result_symbols: + statement = f" {r.name};" + assert statement in code, f"no declaration for variable '{r.name}' in '{code}'" + code = code.replace(statement, f" {r.name}{{}};", 1) + statement = f"{rng_name}(" + assert code.count(statement) == 1, f"need 1 '{rng_name}' call in '{code}'" + lines = code.rstrip().split("\n") + assert lines[-1].startswith(rng_name), f"'{rng_name}' not in '{lines[-1]}'" + lines[-1] = f"if (kT > 0.) {{ \n{lines[-1]}\n}}" + return "\n".join(lines) + + +class PhiloxTwoDoubles(ps.rng.PhiloxTwoDoubles): + def get_code(self, *args, **kwargs): + code = super().get_code(*args, **kwargs) + return skip_philox_unthermalized(code, self.result_symbols, self._name) + + +class PhiloxFourFloats(ps.rng.PhiloxFourFloats): + def get_code(self, *args, **kwargs): + code = super().get_code(*args, **kwargs) + return skip_philox_unthermalized(code, self.result_symbols, self._name) + + +precision_prefix = { + True: 'DoublePrecision', + False: 'SinglePrecision'} +precision_suffix = { + True: 'double_precision', + False: 'single_precision'} +precision_rng = { + True: PhiloxTwoDoubles, + False: PhiloxFourFloats} +data_type_np = {'double': 'float64', 'float': 'float32'} + + +def generate_fields(config, stencil): + dtype = data_type_np[config.data_type.default_factory().c_name] + field_layout = 'fzyx' + q = len(stencil) + dim = len(stencil[0]) + + fields = {} + # Symbols for PDF (twice, due to double buffering) + fields['pdfs'] = ps.Field.create_generic( + 'pdfs', + dim, + dtype, + index_dimensions=1, + layout=field_layout, + index_shape=(q,) + ) + fields['pdfs_tmp'] = ps.Field.create_generic( + 'pdfs_tmp', + dim, + dtype, + index_dimensions=1, + layout=field_layout, + index_shape=(q,) + ) + fields['velocity'] = ps.Field.create_generic( + 'velocity', + dim, + dtype, + index_dimensions=1, + layout=field_layout, + index_shape=(dim,) + ) + fields['force'] = ps.Field.create_generic( + 'force', + dim, + dtype, + index_dimensions=1, + layout=field_layout, + index_shape=(dim,) + ) + + return fields + + +def generate_config(ctx, params): + return pystencils_walberla.codegen.config_from_context(ctx, **params) + + +def generate_collision_sweep( + ctx, lb_method, collision_rule, class_name, params): + config = generate_config(ctx, params) + + # Symbols for PDF (twice, due to double buffering) + fields = generate_fields(config, lb_method.stencil) + + # Generate collision kernel + collide_update_rule = lbmpy.updatekernels.create_lbm_kernel( + collision_rule, + fields['pdfs'], + fields['pdfs_tmp'], + lbmpy.fieldaccess.CollideOnlyInplaceAccessor()) + collide_ast = ps.create_kernel( + collide_update_rule, config=config, **params) + collide_ast.function_name = 'kernel_collide' + collide_ast.assumed_inner_stride_one = True + pystencils_walberla.codegen.generate_sweep( + ctx, class_name, collide_ast, **params) + + +def generate_stream_sweep(ctx, lb_method, class_name, params): + config = generate_config(ctx, params) + + # Symbols for PDF (twice, due to double buffering) + fields = generate_fields(config, lb_method.stencil) + + # Generate stream kernel + stream_update_rule = lbmpy.updatekernels.create_stream_pull_with_output_kernel( + lb_method, fields['pdfs'], fields['pdfs_tmp'], + output={'velocity': fields['velocity']}) + stream_ast = ps.create_kernel(stream_update_rule, config=config, **params) + stream_ast.function_name = 'kernel_stream' + stream_ast.assumed_inner_stride_one = True + pystencils_walberla.codegen.generate_sweep( + ctx, class_name, stream_ast, + field_swaps=[(fields['pdfs'], fields['pdfs_tmp'])], **params) + + +def generate_setters(ctx, lb_method, params): + config = generate_config(ctx, params) + fields = generate_fields(config, lb_method.stencil) + + initial_rho = sp.Symbol('rho_0') + pdfs_setter = lbmpy.macroscopic_value_kernels.macroscopic_values_setter( + lb_method, + initial_rho, + fields['velocity'].center_vector, + fields['pdfs'].center_vector) + return pdfs_setter diff --git a/maintainer/walberla_kernels/relaxation_rates.py b/maintainer/walberla_kernels/relaxation_rates.py new file mode 100644 index 00000000000..14d02fdcb22 --- /dev/null +++ b/maintainer/walberla_kernels/relaxation_rates.py @@ -0,0 +1,54 @@ +# +# Copyright (C) 2021-2023 The ESPResSo project +# Copyright (C) 2019-2021 The waLBerla project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# This describes the mapping between LB moments and the corresponding relaxation rates +# There are 4 relaxation rates for shear, bulk, even and odd modes, +# respectively. + +# Original source: +# https://i10git.cs.fau.de/pycodegen/lbmpy/-/blob/0e7962be84613466e6842f37111c571db8183b3d/lbmpy_tests/test_fluctuating_lb.py#L25-47 + +from lbmpy.moments import is_bulk_moment, is_shear_moment, get_order +import sympy as sp + + +def rr_getter(moment_group): + """Maps a group of moments to a relaxation rate (shear, bulk, even, odd) + in the 4 relaxation time thermalized LB model or 0 for conserved modes. + """ + is_shear = [is_shear_moment(m, 3) for m in moment_group] + is_bulk = [is_bulk_moment(m, 3) for m in moment_group] + order = [get_order(m) for m in moment_group] + assert min(order) == max(order) + order = order[0] + + if order < 2: + return [0] * len(moment_group) + elif any(is_bulk): + assert all(is_bulk) + return [sp.Symbol("omega_bulk")] * len(moment_group) + elif any(is_shear): + assert all(is_shear) + return [sp.Symbol("omega_shear")] * len(moment_group) + elif order % 2 == 0: + assert order > 2 + return [sp.Symbol("omega_even")] * len(moment_group) + else: + return [sp.Symbol("omega_odd")] * len(moment_group) diff --git a/maintainer/walberla_kernels/templates/Boundary.tmpl.h b/maintainer/walberla_kernels/templates/Boundary.tmpl.h new file mode 100644 index 00000000000..6bda8f86e06 --- /dev/null +++ b/maintainer/walberla_kernels/templates/Boundary.tmpl.h @@ -0,0 +1,306 @@ +/* + * Copyright (C) 2022-2023 The ESPResSo project + * Copyright (C) 2020-2023 The waLBerla project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* + * Boundary class. + * Adapted from the waLBerla source file + * https://i10git.cs.fau.de/walberla/walberla/-/blob/fb076cd18daa6e2f24448349d1fffb974c845269/python/pystencils_walberla/templates/Boundary.tmpl.h + */ + +#pragma once + +#include + +{% if target is equalto 'cpu' -%} +#include +{%- elif target is equalto 'gpu' -%} +#include +#include +{%- endif %} +#include +#include +#include +#include +#include + +#include +#include +#include + +{% for header in interface_spec.headers %} +#include {{header}} +{% endfor %} + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace {{namespace}} { + + +class {{class_name}} +{ +public: + {{StructDeclaration|indent(4)}} + + + class IndexVectors + { + public: + using CpuIndexVector = std::vector<{{StructName}}>; + + enum Type { + ALL = 0, + INNER = 1, + OUTER = 2, + NUM_TYPES = 3 + }; + + IndexVectors() = default; + bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; } + + {% if target == 'gpu' -%} + ~IndexVectors() { + for( auto & gpuVec: gpuVectors_) + cudaFree( gpuVec ); + } + {% endif -%} + + CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; } + {{StructName}} * pointerCpu(Type t) { return cpuVectors_[t].data(); } + + {% if target == 'gpu' -%} + {{StructName}} * pointerGpu(Type t) { return gpuVectors_[t]; } + {% endif -%} + + void syncGPU() + { + {% if target == 'gpu' -%} + for( auto & gpuVec: gpuVectors_) + cudaFree( gpuVec ); + gpuVectors_.resize( cpuVectors_.size() ); + + WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES); + for(size_t i=0; i < cpuVectors_.size(); ++i ) + { + auto & gpuVec = gpuVectors_[i]; + auto & cpuVec = cpuVectors_[i]; + cudaMalloc( &gpuVec, sizeof({{StructName}}) * cpuVec.size() ); + cudaMemcpy( gpuVec, &cpuVec[0], sizeof({{StructName}}) * cpuVec.size(), cudaMemcpyHostToDevice ); + } + {%- endif %} + } + + private: + std::vector cpuVectors_{NUM_TYPES}; + + {% if target == 'gpu' -%} + using GpuIndexVector = {{StructName}} *; + std::vector gpuVectors_; + {%- endif %} + }; + + {{class_name}}( const shared_ptr & blocks, + {{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize'])}}{{additional_data_handler.constructor_arguments}}) + :{{additional_data_handler.initialiser_list}} {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize']) }} + { + auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); }; + indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_{{class_name}}"); + }; + + {{class_name}}({{kernel|generate_constructor_parameters(['indexVectorSize'])}}{{additional_data_handler.constructor_arguments}}) + : {{additional_data_handler.initialiser_list}} {{ kernel|generate_constructor_initializer_list(['indexVectorSize']) }} + {}; + + void run ( + {{- ["IBlock * block", kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} + ); + + {% if generate_functor -%} + void operator() ( + {{- ["IBlock * block", kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} + ) + { + run( {{- ["block", kernel.kernel_selection_parameters, ["stream"] if target == 'gpu' else []] | identifier_list -}} ); + } + {%- endif %} + + void inner ( + {{- ["IBlock * block", kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} + ); + + void outer ( + {{- ["IBlock * block", kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} + ); + + std::function getSweep( {{- [interface_spec.high_level_args, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} ) + { + return [ {{- ["this", interface_spec.high_level_args, ["stream"] if target == 'gpu' else []] | identifier_list -}} ] + (IBlock * b) + { this->run( {{- [ ['b'], interface_spec.mapping_codes, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ); }; + } + + std::function getInnerSweep( {{- [interface_spec.high_level_args, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} ) + { + return [ {{- [ ['this'], interface_spec.high_level_args, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ] + (IBlock * b) + { this->inner( {{- [ ['b'], interface_spec.mapping_codes, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ); }; + } + + std::function getOuterSweep( {{- [interface_spec.high_level_args, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} ) + { + return [ {{- [ ['this'], interface_spec.high_level_args, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ] + (IBlock * b) + { this->outer( {{- [ ['b'], interface_spec.mapping_codes, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ); }; + } + + template + void fillFromFlagField( const shared_ptr & blocks, ConstBlockDataID flagFieldID, + FlagUID boundaryFlagUID, FlagUID domainFlagUID) + { + for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) + fillFromFlagField({{additional_data_handler.additional_arguments_for_fill_function}}&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID ); + } + + + template + void fillFromFlagField({{additional_data_handler.additional_parameters_for_fill_function}}IBlock * block, ConstBlockDataID flagFieldID, + FlagUID boundaryFlagUID, FlagUID domainFlagUID ) + { + auto * indexVectors = block->getData< IndexVectors > ( indexVectorID ); + auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL); + auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER); + auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER); + + auto * flagField = block->getData< FlagField_T > ( flagFieldID ); + {{additional_data_handler.additional_field_data|indent(4)}} + + if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) )) + return; + + auto boundaryFlag = flagField->getFlag(boundaryFlagUID); + auto domainFlag = flagField->getFlag(domainFlagUID); + + auto inner = flagField->xyzSize(); + inner.expand( cell_idx_t(-1) ); + + indexVectorAll.clear(); + indexVectorInner.clear(); + indexVectorOuter.clear(); + + {% if inner_or_boundary -%} + for( auto it = flagField->begin(); it != flagField->end(); ++it ) + { + if( ! isFlagSet(it, domainFlag) ) + continue; + {%- for dirIdx, dirVec, offset in additional_data_handler.stencil_info %} + if ( isFlagSet( it.neighbor({{offset}} {%if dim == 3%}, 0 {%endif %}), boundaryFlag ) ) + { + auto element = {{StructName}}(it.x(), it.y(), {%if dim == 3%} it.z(), {%endif %} {{dirIdx}} ); + {{additional_data_handler.data_initialisation(dirIdx)|indent(16)}} + indexVectorAll.push_back( element ); + if( inner.contains( it.x(), it.y(), it.z() ) ) + indexVectorInner.push_back( element ); + else + indexVectorOuter.push_back( element ); + } + {% endfor %} + } + {%else%} + auto flagWithGLayers = flagField->xyzSizeWithGhostLayer(); + {% if single_link %} + {{dtype}} dot = 0.0; {{dtype}} maxn = 0.0; + cell_idx_t calculated_idx = 0; + cell_idx_t dx = 0; cell_idx_t dy = 0; {%if dim == 3%} cell_idx_t dz = 0; {% endif %} + cell_idx_t sum_x = 0; cell_idx_t sum_y = 0; {%if dim == 3%} cell_idx_t sum_z = 0; {%endif %} + {% endif -%} + for( auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end(); ++it ) + { + {% if single_link -%} + sum_x = 0; sum_y = 0; {%if dim == 3%} sum_z = 0; {%endif %} + {% endif %} + if( ! isFlagSet(it, boundaryFlag) ) + continue; + {%- for dirIdx, dirVec, offset in additional_data_handler.stencil_info %} + if ( flagWithGLayers.contains(it.x() + cell_idx_c({{dirVec[0]}}), it.y() + cell_idx_c({{dirVec[1]}}), it.z() + cell_idx_c({{dirVec[2]}})) && isFlagSet( it.neighbor({{offset}} {%if dim == 3%}, 0 {%endif %}), domainFlag ) ) + { + {% if single_link -%} + sum_x += cell_idx_c({{dirVec[0]}}); sum_y += cell_idx_c({{dirVec[1]}}); {%if dim == 3%} sum_z += cell_idx_c({{dirVec[2]}}); {%endif %} + {% else %} + auto element = {{StructName}}(it.x(), it.y(), {%if dim == 3%} it.z(), {%endif %} {{dirIdx}} ); + {{additional_data_handler.data_initialisation(dirIdx)|indent(16)}} + indexVectorAll.push_back( element ); + if( inner.contains( it.x(), it.y(), it.z() ) ) + indexVectorInner.push_back( element ); + else + indexVectorOuter.push_back( element ); + {% endif %} + } + {% endfor %} + + {% if single_link %} + dot = 0.0; maxn = 0.0; calculated_idx = 0; + if(sum_x != 0 or sum_y !=0 {%if dim == 3%} or sum_z !=0 {%endif %}) + { + {%- for dirIdx, dirVec, offset in additional_data_handler.stencil_info %} + dx = {{dirVec[0]}}; dy = {{dirVec[1]}}; {%if dim == 3%} dz = {{dirVec[2]}}; {% endif %} + dot = numeric_cast< {{dtype}} >( dx*sum_x + dy*sum_y {%if dim == 3%} + dz*sum_z {% endif %}); + if (dot > maxn) + { + maxn = dot; + calculated_idx = {{dirIdx}}; + } + {% endfor %} + auto element = {{StructName}}(it.x(), it.y(), {%if dim == 3%} it.z(), {%endif %} calculated_idx ); + {{additional_data_handler.data_initialisation(dirIdx)|indent(16)}} + indexVectorAll.push_back( element ); + if( inner.contains( it.x(), it.y(), it.z() ) ) + indexVectorInner.push_back( element ); + else + indexVectorOuter.push_back( element ); + } + {% endif -%} + + } + {% endif %} + + indexVectors->syncGPU(); + } + +private: + void run_impl( + {{- ["IBlock * block", "IndexVectors::Type type", + kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] + | type_identifier_list -}} + ); + + BlockDataID indexVectorID; + {{additional_data_handler.additional_member_variable|indent(4)}} +public: + {{kernel|generate_members(('indexVector', 'indexVectorSize'))|indent(4)}} +}; + +} // namespace {{namespace}} +} // namespace walberla diff --git a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h new file mode 100644 index 00000000000..37e1edcf9cd --- /dev/null +++ b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h @@ -0,0 +1,437 @@ +/* + * Copyright (C) 2021-2023 The ESPResSo project + * Copyright (C) 2020 The waLBerla project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* + * Lattice field accessors. + * Adapted from the waLBerla source file + * https://i10git.cs.fau.de/walberla/walberla/-/blob/a16141524c58ab88386e2a0f8fdd7c63c5edd704/python/lbmpy_walberla/templates/LatticeModel.tmpl.h + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +#ifdef WALBERLA_CXX_COMPILER_IS_CLANG +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" +#endif + +namespace walberla { +namespace {{namespace}} { +namespace accessor { + +namespace Population +{ + inline std::array<{{dtype}}, {{Q}}u> + get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, + Cell const & cell ) + { + {{dtype}} const & xyz0 = pdf_field->get(cell, uint_t{ 0u }); + std::array<{{dtype}}, {{Q}}u> pop; + {% for i in range(Q) -%} + pop[{{i}}u] = pdf_field->getF( &xyz0, uint_t{ {{i}}u }); + {% endfor -%} + return pop; + } + + inline void + set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field, + std::array<{{dtype}}, {{Q}}u> const & pop, + Cell const & cell ) + { + {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u }); + {% for i in range(Q) -%} + pdf_field->getF( &xyz0, uint_t{ {{i}}u }) = pop[{{i}}u]; + {% endfor -%} + } + + inline void + broadcast( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field, + std::array<{{dtype}}, {{Q}}u> const & pop) + { + WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(pdf_field, { + {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(Q) -%} + pdf_field->getF( &xyz0, uint_t{ {{i}}u }) = pop[{{i}}u]; + {% endfor -%} + }); + } + + inline std::vector< {{dtype}} > + get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, + CellInterval const & ci ) + { + std::vector< {{dtype}} > out; + out.reserve(ci.numCells() * uint_t({{Q}}u)); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + {{dtype}} const & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(Q) -%} + out.emplace_back(pdf_field->getF( &xyz0, uint_t{ {{i}}u })); + {% endfor -%} + } + } + } + return out; + } + + inline void + set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field, + std::vector< {{dtype}} > const & values, + CellInterval const & ci ) + { + assert(uint_c(values.size()) == ci.numCells() * uint_t({{Q}}u)); + auto values_ptr = values.data(); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(Q) -%} + pdf_field->getF( &xyz0, uint_t{ {{i}}u }) = values_ptr[{{i}}u]; + {% endfor -%} + values_ptr += {{Q}}u; + } + } + } + } +} // namespace Population + +namespace Vector +{ + inline Vector{{D}}< {{dtype}} > + get( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * vec_field, + Cell const & cell ) + { + const {{dtype}} & xyz0 = vec_field->get(cell, uint_t{ 0u }); + Vector{{D}}< {{dtype}} > vec; + {% for i in range(D) -%} + vec[{{i}}] = vec_field->getF( &xyz0, uint_t{ {{i}}u }); + {% endfor -%} + return vec; + } + + inline void + set( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * vec_field, + Vector{{D}}< {{dtype}} > const & vec, + Cell const & cell ) + { + {{dtype}} & xyz0 = vec_field->get(cell, uint_t{ 0u }); + {% for i in range(D) -%} + vec_field->getF( &xyz0, uint_t{ {{i}}u }) = vec[{{i}}u]; + {% endfor -%} + } + + inline void + add( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * vec_field, + Vector{{D}}< {{dtype}} > const & vec, + Cell const & cell ) + { + {{dtype}} & xyz0 = vec_field->get(cell, uint_t{ 0u }); + {% for i in range(D) -%} + vec_field->getF( &xyz0, uint_t{ {{i}}u }) += vec[{{i}}u]; + {% endfor -%} + } + + inline void + broadcast( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * vec_field, + Vector{{D}}< {{dtype}} > const & vec) + { + WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(vec_field, { + {{dtype}} & xyz0 = vec_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(D) -%} + vec_field->getF( &xyz0, uint_t{ {{i}}u }) = vec[{{i}}u]; + {% endfor -%} + }); + } + + inline void + add_to_all( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * vec_field, + Vector{{D}}< {{dtype}} > const & vec) + { + WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(vec_field, { + {{dtype}} & xyz0 = vec_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(D) -%} + vec_field->getF( &xyz0, uint_t{ {{i}}u }) += vec[{{i}}u]; + {% endfor -%} + }); + } + + inline std::vector< {{dtype}} > + get( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * vec_field, + CellInterval const & ci ) + { + std::vector< {{dtype}} > out; + out.reserve(ci.numCells() * uint_t({{D}}u)); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + const {{dtype}} & xyz0 = vec_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(D) -%} + out.emplace_back(vec_field->getF( &xyz0, uint_t{ {{i}}u })); + {% endfor -%} + } + } + } + return out; + } + + inline void + set( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * vec_field, + std::vector< {{dtype}} > const & values, + CellInterval const & ci ) + { + assert(uint_c(values.size()) == ci.numCells() * uint_t({{D}}u)); + auto values_ptr = values.data(); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + {{dtype}} & xyz0 = vec_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(D) -%} + vec_field->getF( &xyz0, uint_t{ {{i}}u }) = values_ptr[{{i}}u]; + {% endfor -%} + values_ptr += {{D}}u; + } + } + } + } +} // namespace Vector + +namespace EquilibriumDistribution +{ + inline {{dtype}} + get( stencil::Direction const direction, + Vector{{D}}< {{dtype}} > const & u = Vector{{D}}< {{dtype}} >( {{dtype}}(0.0) ), + {{dtype}} rho = {{dtype}}(1.0) ) + { + {% if not compressible %} + rho -= {{dtype}}(1.0); + {% endif %} + {{equilibrium_from_direction}} + } +} // namespace EquilibriumDistribution + +namespace Equilibrium +{ + inline void + set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field, + Vector{{D}}< {{dtype}} > const & u, + {{dtype}} const rho, + Cell const & cell ) + { + {%if not compressible %} + rho -= {{dtype}}(1.0); + {%endif %} + + {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u }); + {% for eqTerm in equilibrium -%} + pdf_field->getF( &xyz0, uint_t{ {{ loop.index0 }}u }) = {{eqTerm}}; + {% endfor -%} + } +} // namespace Equilibrium + +namespace Density +{ + inline {{dtype}} + get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, + Cell const & cell ) + { + const {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }); + {% endfor -%} + {{density_getters | indent(8)}} + return rho; + } + + inline void + set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field, + {{dtype}} const rho_in, + Cell const & cell ) + { + const {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }); + {% endfor -%} + + {{unshifted_momentum_density_getter | indent(8)}} + + // calculate current velocity (before density change) + const {{dtype}} conversion = {{dtype}}(1) / rho; + Vector{{D}}< {{dtype}} > velocity; + {% for i in range(D) -%} + velocity[{{i}}u] = momdensity_{{i}} * conversion; + {% endfor %} + + Equilibrium::set(pdf_field, velocity, rho_in {%if not compressible %} + {{dtype}}(1) {%endif%}, cell); + } + + inline std::vector< {{dtype}} > + get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, + CellInterval const & ci ) + { + std::vector< {{dtype}} > out; + out.reserve(ci.numCells()); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + const {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }); + {% endfor -%} + {{density_getters | indent(12)}} + out.emplace_back(rho); + } + } + } + return out; + } + + inline void + set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field, + std::vector< {{dtype}} > const & values, + CellInterval const & ci ) + { + assert(uint_c(values.size()) == ci.numCells()); + auto values_it = values.begin(); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + const {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }); + {% endfor -%} + + {{unshifted_momentum_density_getter | indent(12)}} + + // calculate current velocity (before density change) + const {{dtype}} conversion = {{dtype}}(1) / rho; + Vector{{D}}< {{dtype}} > velocity; + {% for i in range(D) -%} + velocity[{{i}}u] = momdensity_{{i}} * conversion; + {% endfor %} + + Equilibrium::set(pdf_field, velocity, *values_it {%if not compressible %} + {{dtype}}(1) {%endif%}, Cell{x, y, z}); + ++values_it; + } + } + } + } +} // namespace Density + +namespace Velocity +{ + inline void + set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field, + Vector{{D}}< {{dtype}} > const & u, + Cell const & cell ) + { + const {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }); + {% endfor -%} + {{density_getters | indent(8)}} + + {% for c in "xyz" -%} + const auto {{c}} = cell.{{c}}(); + {% endfor -%} + {{density_velocity_setter_macroscopic_values | substitute_force_getter_cpp | indent(8)}} + + Equilibrium::set(pdf_field, Vector{{D}}<{{dtype}}>({% for i in range(D) %}u_{{i}}{% if not loop.last %}, {% endif %}{% endfor %}), rho {%if not compressible %} + {{dtype}}(1) {%endif%}, cell); + } +} // namespace Velocity + +namespace MomentumDensity +{ + inline Vector{{D}}< {{dtype}} > + reduce( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field ) + { + Vector{{D}}< {{dtype}} > momentumDensity({{dtype}} {0}); + WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, { + const {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }); + {% endfor -%} + + {{momentum_density_getter | substitute_force_getter_cpp | indent(8) }} + + {% for i in range(D) -%} + momentumDensity[{{i}}u] += md_{{i}}; + {% endfor %} + }); + return momentumDensity; + } +} // namespace MomentumDensity + +namespace PressureTensor +{ + inline Matrix{{D}}< {{dtype}} > + get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, + Cell const & cell ) + { + const {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }); + {% endfor -%} + + {{second_momentum_getter | indent(8) }} + + Matrix{{D}}< {{dtype}} > pressureTensor; + {% for i in range(D) -%} + {% for j in range(D) -%} + pressureTensor[{{i*D+j}}u] = p_{{i*D+j}}; + {% endfor %} + {% endfor %} + return pressureTensor; + } +} // namespace PressureTensor + +} // namespace accessor +} // namespace {{namespace}} +} // namespace walberla + +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic pop +#endif + +#ifdef WALBERLA_CXX_COMPILER_IS_CLANG +#pragma clang diagnostic pop +#endif diff --git a/maintainer/walberla_kernels/templates/ReactionKernelSelector.tmpl.h b/maintainer/walberla_kernels/templates/ReactionKernelSelector.tmpl.h new file mode 100644 index 00000000000..7ec4666d7b9 --- /dev/null +++ b/maintainer/walberla_kernels/templates/ReactionKernelSelector.tmpl.h @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2022-2023 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once + +{% for i in range(1, max_num_reactants + 1) %} +#include "{{class_name}}_{{i}}_{{precision_suffix[True]}}.h" +#include "{{class_name}}_{{i}}_{{precision_suffix[False]}}.h" +{% endfor %} + +#include + +#include +#include +#include +#include +#include + +namespace walberla { +namespace detail { +namespace {{class_name}}Selector { + +template struct KernelTrait { + using {{class_name}} = + {{namespace}}::{{class_name}}_1_{{precision_suffix[True]}}; +}; +{% for i in range(2, max_num_reactants + 1) %} +template <> struct KernelTrait { + using {{class_name}} = + {{namespace}}::{{class_name}}_{{i}}_{{precision_suffix[True]}}; +}; +{% endfor %} +{% for i in range(1, max_num_reactants + 1) %} +template <> struct KernelTrait { + using {{class_name}} = + {{namespace}}::{{class_name}}_{{i}}_{{precision_suffix[False]}}; +}; +{% endfor %} + +template +auto get_kernel_impl(const std::vector> &reactants, + const double coefficient, + {% if class_name == 'ReactionKernelIndexed' -%} + const BlockDataID &indexFieldID, + {% endif -%} + std::index_sequence int_seq) { + auto kernel = std::make_shared< + typename KernelTrait::{{class_name}}>( + {% if class_name == 'ReactionKernelIndexed' -%} + indexFieldID, + {% endif -%} + walberla::BlockDataID( + reactants[ints]->get_species()->get_density_id())..., + numeric_cast(reactants[ints]->get_order())..., + numeric_cast(coefficient), + numeric_cast(reactants[ints]->get_stoech_coeff())...); + + std::function sweep = [kernel](IBlock * b) { kernel->run(b); }; + return sweep; +} + +template +auto get_kernel_impl(const std::vector> &reactants, + Args... args) { + switch (reactants.size()) { +{% for i in range(1, max_num_reactants + 1) %} + case {{i}}: + return get_kernel_impl(reactants, args..., + std::make_index_sequence<{{i}}>{}); +{% endfor %} + default: + throw std::runtime_error("reactions of this size are not implemented!"); + } +} + +template +auto get_kernel(const std::vector> &reactants, + Args... args) { + + const auto is_double_precision = + reactants[0]->get_species()->is_double_precision(); + + if (is_double_precision) { + return get_kernel_impl(reactants, args...); + } + + return get_kernel_impl(reactants, args...); +} + +} // namespace {{class_name}}Selector +} // namespace detail +} // namespace walberla diff --git a/maintainer/walberla_kernels/walberla_lbm_generation.py b/maintainer/walberla_kernels/walberla_lbm_generation.py new file mode 100644 index 00000000000..72f5ffdfec4 --- /dev/null +++ b/maintainer/walberla_kernels/walberla_lbm_generation.py @@ -0,0 +1,212 @@ +# +# Copyright (C) 2021-2023 The ESPResSo project +# Copyright (C) 2020-2022 The waLBerla project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import os +import sympy as sp +import pystencils as ps +import lbmpy_walberla +from pystencils.typing.typed_sympy import TypedSymbol +from pystencils.typing import BasicType, CastFunc, TypedSymbol + +# File derived from lbmpy_walberla.walberla_lbm_generation in the +# walberla project, commit 3455bf3eebc64efa9beaecd74ebde3459b98991d + + +def __type_equilibrium_assignments(assignments, config, subs_dict): + # Function derived from lbmpy_walberla.walberla_lbm_generation.__type_equilibrium_assignments() + # in the walberla project, commit 9dcd0dd90f50f7b64b0a38bb06327854463fdafd + from pystencils.node_collection import NodeCollection + from pystencils.typing.transformations import add_types + result = assignments.new_with_substitutions(subs_dict) + result = NodeCollection(result.main_assignments) + result.evaluate_terms() + result = add_types(result.all_assignments, config) + return result + + +def type_expr(eq, dtype): + # manually cast floats to dtype since this is not done automatically + repl = ((rational := sp.Rational(1, i), CastFunc(rational, dtype)) + for i in (2, 3, 4, 6, 8, 9, 12, 24, 18, 36, 72)) + eq = eq.subs(repl) + return eq.subs({s: TypedSymbol(s.name, dtype) + for s in eq.atoms(sp.Symbol)}) + + +def pow_to_mul(eq): + keep_processing = True + while keep_processing: + for expr in sp.preorder_traversal(eq): + if expr.is_Pow: + if expr.args[0].is_Symbol and expr.args[1].is_Integer: + power = expr.args[1].p + if power >= 1: + chained_product = expr.args[1].p * [expr.args[0]] + expr_mul = sp.Mul(*chained_product, evaluate=False) + print(f"folding '{expr}' to '{expr_mul}'") + eq = eq.subs(expr, sp.UnevaluatedExpr(expr_mul)) + break + else: + keep_processing = False + return eq + + +def make_velocity_getters(cqc, rho_sym, vel_arr_symbols): + velocity_getter = cqc.equilibrium_input_equations_from_init_values( + rho_sym, vel_arr_symbols) + eq = velocity_getter.main_assignments.pop(0) + assert eq.lhs == rho_sym and eq.rhs == rho_sym + eq = velocity_getter.main_assignments.pop(0) + assert eq.lhs.name == f"delta_{rho_sym.name}" + return velocity_getter + + +def equations_to_code(equations, variable_prefix="", + variables_without_prefix=None, dtype=None, backend=None): + if dtype is None: + dtype = BasicType("float64") + + if variables_without_prefix is None: + variables_without_prefix = [] + if isinstance(equations, ps.AssignmentCollection): + equations = equations.all_assignments + + variables_without_prefix = list(variables_without_prefix) + + result = [] + left_hand_side_names = [eq.lhs.name for eq in equations] + for eq in equations: + lhs, rhs = eq.lhs, eq.rhs + rhs = lbmpy_walberla.walberla_lbm_generation.field_and_symbol_substitute( + rhs, variable_prefix, variables_without_prefix + left_hand_side_names) + lhs = type_expr(lhs, dtype=dtype) + rhs = type_expr(rhs, dtype=dtype) + rhs = pow_to_mul(rhs) + assignment = ps.astnodes.SympyAssignment(lhs, rhs) + result.append(backend(assignment)) + return "\n".join(result) + + +def substitute_force_getter_cpp(code): + field_getter = "force->" + assert field_getter in code is not None, f"pattern '{field_getter} not found in '''\n{code}\n'''" + return code.replace(field_getter, "force_field->") + + +def add_espresso_filters_to_jinja_env(jinja_env): + jinja_env.filters["substitute_force_getter_cpp"] = substitute_force_getter_cpp + + +def generate_macroscopic_values_accessors(ctx, config, lb_method, templates): + + # Function derived from lbmpy_walberla.walberla_lbm_generation.__lattice_model() + # in the walberla project, commit 3455bf3eebc64efa9beaecd74ebde3459b98991d + # with backports from commit de6b00071233a9a1f45d7a6773988363e058f1a0 + + from jinja2 import Environment, FileSystemLoader, StrictUndefined + from sympy.tensor import IndexedBase + from pystencils.backends.cbackend import CustomSympyPrinter + from pystencils.backends.cbackend import CBackend + from pystencils.backends.cuda_backend import CudaBackend + from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env + from lbmpy_walberla.walberla_lbm_generation import stencil_switch_statement + + cpp_printer = CustomSympyPrinter() + stencil_name = lb_method.stencil.name + if not stencil_name: + raise ValueError( + "lb_method uses a stencil that is not supported in waLBerla") + + default_dtype = config.data_type.default_factory() + if config.target == ps.Target.GPU: + backend = CudaBackend() + else: + backend = CBackend() + kwargs = { + "backend": backend, + "variable_prefix": "", + "dtype": default_dtype} + + cqc = lb_method.conserved_quantity_computation + vel_symbols = cqc.velocity_symbols + rho_sym = sp.Symbol("rho") + pdfs_sym = sp.symbols(f"f_:{lb_method.stencil.Q}") + vel_arr_symbols = [ + IndexedBase(TypedSymbol("u", dtype=default_dtype), shape=(1,))[i] + for i in range(len(vel_symbols))] + momentum_density_symbols = sp.symbols(f"md_:{len(vel_symbols)}") + second_momentum_symbols = sp.symbols(f"p_:{len(vel_symbols)**2}") + + equilibrium_subs_dict = dict(zip(vel_symbols, vel_arr_symbols)) + equilibrium = lb_method.get_equilibrium() + lhs_list = [a.lhs for a in equilibrium.main_assignments] + equilibrium_matrix = sp.Matrix( + [e.rhs for e in equilibrium.main_assignments]) + equilibrium = ps.AssignmentCollection([ps.Assignment(lhs, rhs) + for lhs, rhs in zip(lhs_list, equilibrium_matrix)]) + equilibrium = __type_equilibrium_assignments( + equilibrium, config, equilibrium_subs_dict) + + velocity_getters = make_velocity_getters(cqc, rho_sym, vel_arr_symbols) + density_velocity_setter_macroscopic_values = equations_to_code( + velocity_getters, variables_without_prefix=["rho", "u"], **kwargs) + momentum_density_getter = cqc.output_equations_from_pdfs( + pdfs_sym, {"density": rho_sym, "momentum_density": momentum_density_symbols}) + unshifted_momentum_density_getter = cqc.output_equations_from_pdfs( + pdfs_sym, {"density": rho_sym, "momentum_density": momentum_density_symbols}) + for i, eq in reversed( + list(enumerate(unshifted_momentum_density_getter.main_assignments))): + if eq.lhs.name.startswith("md_"): + del unshifted_momentum_density_getter.main_assignments[i] + second_momentum_getter = cqc.output_equations_from_pdfs( + pdfs_sym, {"moment2": second_momentum_symbols}) + + jinja_context = { + "stencil_name": stencil_name, + "D": lb_method.stencil.D, + "Q": lb_method.stencil.Q, + "compressible": cqc.compressible, + "zero_centered": cqc.zero_centered_pdfs, + "dtype": default_dtype, + + "equilibrium_from_direction": stencil_switch_statement(lb_method.stencil, equilibrium), + "equilibrium": [cpp_printer.doprint(e.rhs) for e in equilibrium], + + "density_getters": equations_to_code( + cqc.output_equations_from_pdfs(pdfs_sym, {"density": rho_sym}), + variables_without_prefix=[e.name for e in pdfs_sym], **kwargs), + "momentum_density_getter": equations_to_code( + momentum_density_getter, variables_without_prefix=pdfs_sym, **kwargs), + "second_momentum_getter": equations_to_code( + second_momentum_getter, variables_without_prefix=pdfs_sym, **kwargs), + "density_velocity_setter_macroscopic_values": density_velocity_setter_macroscopic_values, + "unshifted_momentum_density_getter": equations_to_code(unshifted_momentum_density_getter, variables_without_prefix=pdfs_sym, **kwargs), + + "namespace": "lbm", + } + + env = Environment(loader=FileSystemLoader(os.path.dirname(__file__)), + undefined=StrictUndefined) + add_pystencils_filters_to_jinja_env(env) + add_espresso_filters_to_jinja_env(env) + + for filename, template in templates.items(): + source = env.get_template(template).render(**jinja_context) + ctx.write_file(filename, source) diff --git a/requirements.txt b/requirements.txt index 8493b9872ab..111ff05ecd0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ cython>=0.29.21,<3.0 setuptools>=59.6.0 # required scientific packages -numpy>=1.21.5 +numpy>=1.23 h5py>=3.6.0 # optional scientific packages scipy>=1.8.0 @@ -12,6 +12,11 @@ matplotlib>=3.5.1 vtk>=9.1.0 PyOpenGL>=3.1.5 pygame>=2.1.2 +# waLBerla dependencies +pystencils==1.2 +lbmpy==1.2 +sympy==1.9 +islpy==2022.2.1 # CI-related requests>=2.25.1 lxml>=4.8.0 diff --git a/samples/ekboundaries.py b/samples/ekboundaries.py deleted file mode 100644 index e8b825070cb..00000000000 --- a/samples/ekboundaries.py +++ /dev/null @@ -1,77 +0,0 @@ -# -# Copyright (C) 2010-2022 The ESPResSo project -# -# This file is part of ESPResSo. -# -# ESPResSo is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# ESPResSo is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# - -""" -Set up an electrokinetics (LB) fluid confined between charged walls. -""" - -import os - -import espressomd -import espressomd.shapes -import espressomd.electrokinetics -import espressomd.ekboundaries - -required_features = ["ELECTROKINETICS", "EK_BOUNDARIES", "EXTERNAL_FORCES"] -espressomd.assert_features(required_features) - -system = espressomd.System(box_l=[10, 10, 10]) -system.cell_system.skin = 0.4 -system.time_step = 0.1 - -ek = espressomd.electrokinetics.Electrokinetics( - lb_density=1, friction=1, agrid=1, viscosity=1, T=1, prefactor=1) - -pos = espressomd.electrokinetics.Species( - density=0.05, D=0.1, valency=1, ext_force_density=[0, 0, 1.]) -neg = espressomd.electrokinetics.Species( - density=0.05, D=0.1, valency=-1, ext_force_density=[0, 0, -1.]) -ek.add_species(pos) -ek.add_species(neg) -system.actors.add(ek) - -print(ek.get_params()) -print(pos.get_params()) -print(neg.get_params()) -print(pos[5, 5, 5].density) - - -ek_wall_left = espressomd.ekboundaries.EKBoundary( - shape=espressomd.shapes.Wall(dist=1, normal=[1, 0, 0]), charge_density=-0.01) -ek_wall_right = espressomd.ekboundaries.EKBoundary( - shape=espressomd.shapes.Wall(dist=-9, normal=[-1, 0, 0]), charge_density=0.01) -system.ekboundaries.add(ek_wall_left) -system.ekboundaries.add(ek_wall_right) - - -if not os.path.isdir("ek"): - os.makedirs("ek") - - -n_int_cycles = 1000 -for i in range(n_int_cycles): - system.integrator.run(100) - print("\rIntegrating: %03i" % i, end='', flush=True) - - pos.write_vtk_density("ek/pos_dens_%i.vtk" % i) - neg.write_vtk_density("ek/neg_dens_%i.vtk" % i) - pos.write_vtk_flux("ek/pos_flux_%i.vtk" % i) - neg.write_vtk_flux("ek/neg_flux_%i.vtk" % i) - ek.write_vtk_velocity("ek/ekv_%i.vtk" % i) - ek.write_vtk_boundary("ek/ekb_%i.vtk" % i) diff --git a/samples/immersed_boundary/sampleImmersedBoundary.py b/samples/immersed_boundary/sampleImmersedBoundary.py index 5e5ef28cbd1..11b63a45753 100644 --- a/samples/immersed_boundary/sampleImmersedBoundary.py +++ b/samples/immersed_boundary/sampleImmersedBoundary.py @@ -26,10 +26,9 @@ import espressomd import espressomd.lb import espressomd.shapes -import espressomd.lbboundaries import espressomd.virtual_sites -required_features = ["LB_BOUNDARIES", "VIRTUAL_SITES_INERTIALESS_TRACERS"] +required_features = ["VIRTUAL_SITES_INERTIALESS_TRACERS", "WALBERLA"] espressomd.assert_features(required_features) parser = argparse.ArgumentParser() @@ -76,20 +75,20 @@ outputDir = "outputVolParaCUDA" # Add LB Fluid -lbf = espressomd.lb.LBFluid(agrid=1, dens=1, visc=1, tau=system.time_step, - ext_force_density=[force, 0, 0]) +lbf = espressomd.lb.LBFluidWalberla( + agrid=1, density=1, kinematic_viscosity=1, tau=system.time_step, + ext_force_density=[force, 0, 0]) system.actors.add(lbf) system.thermostat.set_lb(LB_fluid=lbf, gamma=1.0, act_on_virtual=False) # Setup boundaries -walls = [espressomd.lbboundaries.LBBoundary() for k in range(2)] -walls[0].set_params(shape=espressomd.shapes.Wall(normal=[0, 0, 1], dist=0.5)) -walls[1].set_params(shape=espressomd.shapes.Wall( - normal=[0, 0, -1], dist=-boxZ + 0.5)) +wall_shapes = [None] * 2 +wall_shapes[0] = espressomd.shapes.Wall(normal=[0, 0, 1], dist=0.5) +wall_shapes[1] = espressomd.shapes.Wall(normal=[0, 0, -1], dist=-boxZ + 0.5) -for wall in walls: - system.lbboundaries.add(wall) +for wall_shape in wall_shapes: + lbf.add_boundary_from_shape(wall_shape) # make directory os.makedirs(outputDir) diff --git a/samples/lb_circular_couette.py b/samples/lb_circular_couette.py new file mode 100644 index 00000000000..9b30fbac048 --- /dev/null +++ b/samples/lb_circular_couette.py @@ -0,0 +1,201 @@ +# +# Copyright (C) 2021-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +""" +Simulate a rotating cylinder in a fluid via slip velocity boundary conditions. +""" + +import espressomd.lb +import espressomd.shapes +import espressomd.constraints +import espressomd.observables +import espressomd.math +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.colors +import matplotlib.ticker +import itertools +import argparse + +espressomd.assert_features(["WALBERLA"]) + +parser = argparse.ArgumentParser(epilog=__doc__) +parser.add_argument("--visualizer", action="store_true", dest="visualizer", + help="Run the visualizer") +args = parser.parse_args() + +# set up LB system +agrid = 0.5 +grid_size = np.array([31, 31, 4]) +system = espressomd.System(box_l=grid_size * agrid) +system.time_step = 0.1 +if args.visualizer: + system.time_step = 0.001 +system.cell_system.skin = 0.1 +system.periodicity = [False, False, True] +lb_fluid = espressomd.lb.LBFluidWalberla( + agrid=agrid, density=0.5, kinematic_viscosity=3.2, tau=system.time_step) +system.actors.add(lb_fluid) + +# set up cylinders +cyl_center = agrid * (grid_size // 2 + 0.5) * [1, 1, 0] +cylinder_in = espressomd.shapes.Cylinder( + center=cyl_center, axis=[0, 0, 1], length=3 * system.box_l[2], + radius=8.1 * agrid, direction=1) +cylinder_out = espressomd.shapes.Cylinder( + center=cyl_center, axis=[0, 0, 1], length=3 * system.box_l[2], + radius=14.5 * agrid, direction=-1) +lb_fluid.add_boundary_from_shape(cylinder_in) +lb_fluid.add_boundary_from_shape(cylinder_out) + +# the system needs to be fully symmetric +mask = np.copy(lb_fluid[:, :, :].is_boundary.astype(int)) +np.testing.assert_array_equal(mask, np.flip(mask, axis=0)) +np.testing.assert_array_equal(mask, np.flip(mask, axis=1)) +np.testing.assert_array_equal(mask, np.flip(mask, axis=2)) + +# the system needs to be closed in the x and y directions +np.testing.assert_array_equal(mask[0, :, :], 1) +np.testing.assert_array_equal(mask[-1, :, :], 1) +np.testing.assert_array_equal(mask[:, 0, :], 1) +np.testing.assert_array_equal(mask[:, -1, :], 1) + +# add tangential slip velocity to the inner cylinder +velocity_magnitude = 0.01 +surface_nodes = espressomd.lb.edge_detection( + lb_fluid.get_shape_bitmask(cylinder_in), system.periodicity) +tangents = espressomd.lb.calc_cylinder_tangential_vectors( + cylinder_in.center, lb_fluid.agrid, 0.5, surface_nodes) +for node, tangent in zip(surface_nodes, tangents): + lb_fluid[node].boundary = espressomd.lb.VelocityBounceBack( + velocity_magnitude * tangent) + +if args.visualizer: + import espressomd.visualization + visualizer = espressomd.visualization.openGLLive( + system, + LB_draw_velocity_plane=True, + LB_plane_dist=0, + LB_plane_axis=2, + LB_vel_scale=80, + LB_vel_radius_scale=0.05, + LB_plane_ngrid=15, + quality_constraints=128, + camera_position=[8, 8, 30], + background_color=[1, 1, 1], + velocity_arrows_type_colors=[[0, 1, 0]] + ) + system.constraints.add(shape=cylinder_in) + system.constraints.add(shape=cylinder_out) + system.integrator.run(100) + visualizer.run(1) + +# add observable for the fluid velocity in cylindrical coordinates +cyl_transform_params = espressomd.math.CylindricalTransformationParameters( + center=cyl_center, axis=[0, 0, 1], orientation=[1, 0, 0]) +observable = espressomd.observables.CylindricalLBVelocityProfile( + transform_params=cyl_transform_params, + n_r_bins=grid_size[0] // 2, + n_phi_bins=1, + n_z_bins=1, + min_r=0.0, + max_r=system.box_l[0] / 2, + min_phi=0., + max_phi=2 * np.pi, + min_z=-system.box_l[2] / 2, + max_z=+system.box_l[2] / 2, + axis=[0.0, 0.0, 1.0], + sampling_density=1 +) +obs_data_baseline = observable.calculate() + +# equilibrate the fluid +system.integrator.run(100) +obs_data = observable.calculate() + +# fetch fluid and slip velocities +boundary_mask = np.squeeze(lb_fluid[:, :, 0].is_boundary.astype(bool)) +quivers_boundary = [] +quivers_fluid = [] +for i, j in itertools.product(range(boundary_mask.shape[0]), + range(boundary_mask.shape[1])): + v_fluid = lb_fluid[i, j, 0].velocity + if boundary_mask[i, j]: + quivers_boundary.append([i, j, v_fluid[0], v_fluid[1]]) + else: + quivers_fluid.append([i, j, v_fluid[0], v_fluid[1]]) + +# prepare canvas +plt.rcParams.update({'font.size': 16}) +fig1 = plt.figure() +fig2 = plt.figure() +ax1 = fig1.add_subplot(111) +ax2 = fig2.add_subplot(111) + +# plot velocity as a function of distance +profile_r = observable.bin_centers().reshape([-1, 3])[:, 0] +profile_v = (obs_data - obs_data_baseline).reshape([-1, 3]) +ax1.plot(profile_r, profile_v[:, 1]) +y_formatter = matplotlib.ticker.ScalarFormatter() +y_formatter.set_powerlimits((-1e-2, 1e-2)) +ax1.yaxis.set_major_formatter(y_formatter) +ax1.set(xlabel='Distance from cylinder center', ylabel='Fluid velocity') + +# plot boundary geometry +cmap = matplotlib.colors.ListedColormap(['white', 'silver', 'silver']) +cmap_bounds = [0, 1, 2] +cmap_norm = matplotlib.colors.BoundaryNorm(cmap_bounds, cmap.N) +ax2.imshow(boundary_mask.T, origin='lower', interpolation='nearest', cmap=cmap, + norm=cmap_norm) + +# add grid lines based on minor ticks +minor_locator = matplotlib.ticker.FixedLocator(np.arange(0.5, grid_size[0], 1)) +ax2.xaxis.set_minor_locator(minor_locator) +ax2.yaxis.set_minor_locator(minor_locator) +ax2.tick_params(axis='both', which='minor', length=0) +ax2.grid(which='minor', color='w', linestyle='-', linewidth=1.2, zorder=2) + +# remove major ticks +ax2.set_xticks([]) +ax2.set_yticks([]) + +# add cylinder radii +# circle_in = plt.Circle( +# cyl_center[:2] / agrid - agrid, cylinder_in.radius / agrid, +# color='r', fill=False, zorder=3) +# circle_out = plt.Circle( +# cyl_center[:2] / agrid - agrid, cylinder_out.radius / agrid, +# color='r', fill=False, zorder=3) +# ax2.add_patch(circle_in) +# ax2.add_patch(circle_out) + +# plot velocity field +quivers_boundary = np.array(quivers_boundary) +quivers_fluid = np.array(quivers_fluid) +ax2.quiver(quivers_boundary[:, 0], quivers_boundary[:, 1], quivers_boundary[:, 2], + quivers_boundary[:, 3], scale=.25, width=0.003, color='black', + zorder=4, label='slip velocity') +ax2.quiver(quivers_fluid[:, 0], quivers_fluid[:, 1], quivers_fluid[:, 2], + quivers_fluid[:, 3], scale=.25, width=0.003, color='royalblue', + zorder=4, label='fluid velocity') +ax2.set(xlabel='x-axis', ylabel='y-axis') +ax2.legend(framealpha=1, loc='upper right') + +plt.tight_layout() +plt.show() diff --git a/samples/lb_four_roller_mill.py b/samples/lb_four_roller_mill.py new file mode 100644 index 00000000000..dc7c33cbc23 --- /dev/null +++ b/samples/lb_four_roller_mill.py @@ -0,0 +1,189 @@ +# +# Copyright (C) 2021-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +""" +Simulate a four-roller mill via slip velocity boundary conditions. +""" + +import espressomd.lb +import espressomd.shapes +import espressomd.constraints +import espressomd.observables +import espressomd.math +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.colors +import matplotlib.ticker +import itertools +import argparse +import logging +import tqdm +import sys + +espressomd.assert_features(["WALBERLA"]) +logging.basicConfig(level=logging.INFO, stream=sys.stdout) + +parser = argparse.ArgumentParser(epilog=__doc__) +parser.add_argument("--visualizer", action="store_true", dest="visualizer", + help="Run the visualizer") +args = parser.parse_args() + +# set up LB system +logging.info('Setting up the lattice-Boltzmann fluid') +agrid = 0.5 +grid_size = np.array([64, 64, 2]) +system = espressomd.System(box_l=grid_size * agrid) +system.time_step = 0.1 +if args.visualizer: + system.time_step = 0.001 +system.cell_system.skin = 0.1 +lb_fluid = espressomd.lb.LBFluidWalberla( + agrid=agrid, density=0.5, kinematic_viscosity=3.2, tau=system.time_step) +system.actors.add(lb_fluid) + +# set up rollers by adding tangential slip velocities to cylinders +logging.info('Setting up the rollers') +cyl_center = agrid * (grid_size // 2 + 0.5) * [1, 1, 0] +for i, j in itertools.product(range(2), range(2)): + cyl_offset = np.array([1 + i * 0.99 - 0.51, 1 + j * 0.99 - 0.51, 0]) + cyl = espressomd.shapes.Cylinder( + center=agrid * (grid_size // 2 + 0.5) * cyl_offset, axis=[0, 0, 1], + length=3 * system.box_l[2], radius=14.1 * agrid, direction=1) + if args.visualizer: + system.constraints.add(shape=cyl) + lb_fluid.add_boundary_from_shape(cyl) + surface_nodes = espressomd.lb.edge_detection( + lb_fluid.get_shape_bitmask(cyl), system.periodicity) + tangents = espressomd.lb.calc_cylinder_tangential_vectors( + cyl.center, lb_fluid.agrid, 0.5, surface_nodes) + direction = 1 if (i + j) % 2 == 0 else -1 + for node, tangent in zip(surface_nodes, tangents): + vbb = espressomd.lb.VelocityBounceBack(0.01 * direction * tangent) + lb_fluid[node].boundary = vbb + +# the system needs to be fully symmetric +mask = np.copy(lb_fluid[:, :, :].is_boundary.astype(int)) +np.testing.assert_array_equal(mask, np.flip(mask, axis=0)) +np.testing.assert_array_equal(mask, np.flip(mask, axis=1)) +np.testing.assert_array_equal(mask, np.flip(mask, axis=2)) + +if args.visualizer: + import espressomd.visualization + visualizer = espressomd.visualization.openGLLive( + system, + LB_draw_velocity_plane=True, + LB_plane_dist=0, + LB_plane_axis=2, + LB_vel_scale=80, + LB_vel_radius_scale=0.05, + LB_plane_ngrid=24, + LB_arrow_quality=6, + quality_constraints=48, + camera_position=[4, 4, 50], + background_color=[1, 1, 1], + velocity_arrows_type_colors=[[0, 1, 0]] + ) + visualizer.run(1) + +# equilibrate the fluid +logging.info('Integration loop') +for _ in tqdm.tqdm(range(40)): + system.integrator.run(20) + +# fetch fluid and slip velocities +boundary_mask = np.squeeze(lb_fluid[:, :, 0].is_boundary.astype(bool)) +quivers_boundary = [] +quivers_fluid = [] +for i, j in itertools.product(range(boundary_mask.shape[0]), + range(boundary_mask.shape[1])): + v_fluid = lb_fluid[i, j, 0].velocity + if boundary_mask[i, j]: + if np.linalg.norm(v_fluid) > 1e-10: + quivers_boundary.append([i, j, v_fluid[0], v_fluid[1]]) + else: + quivers_fluid.append([i, j, v_fluid[0], v_fluid[1]]) + +# prepare canvas +logging.info('Plotting') +plt.rcParams.update({'font.size': 16}) +fig1 = plt.figure() +fig2 = plt.figure() +fig3 = plt.figure() +ax1 = fig1.add_subplot(111) +ax2 = fig2.add_subplot(111) +ax3 = fig3.add_subplot(111) + +# plot fluid velocity +fluid_vel = np.mean(np.linalg.norm( + lb_fluid[:, :, :].velocity, axis=-1), axis=-1) +mask = np.ones(fluid_vel.shape) * np.nan +mask[np.nonzero(np.squeeze(lb_fluid[:, :, 0].is_boundary))] = 0 +img = ax1.imshow(fluid_vel.T, origin='lower', interpolation='bilinear') +cbar = plt.colorbar(img, ax=ax1) +cbar.set_label('Fluid velocity (MD units)', rotation=90, labelpad=10) +ax1.imshow(mask.T, origin='lower', interpolation='nearest') +ax1.set_xticks([]) +ax1.set_yticks([]) +ax1.set(xlabel='x-axis', ylabel='y-axis') + +# plot fluid velocity between the rollers +ax2.plot(agrid * np.arange(fluid_vel.shape[1]), + np.mean(fluid_vel[31:33, :], axis=0), label='$V(x, y=L / 2)$') +ax2.set_xticks(np.arange(0, system.box_l[1] + 1, 4.0)) +ax2.set(xlabel='x-axis (MD units)', ylabel='Fluid velocity (MD units)') +ax2.legend() + +# plot boundary geometry +cmap = matplotlib.colors.ListedColormap(['white', 'silver', 'silver']) +cmap_bounds = [0, 1, 2] +cmap_norm = matplotlib.colors.BoundaryNorm(cmap_bounds, cmap.N) +ax3.imshow(boundary_mask.T, origin='lower', interpolation='nearest', cmap=cmap, + norm=cmap_norm) + +# add grid lines based on minor ticks +minor_locator = matplotlib.ticker.FixedLocator(np.arange(0.5, grid_size[0], 1)) +ax3.xaxis.set_minor_locator(minor_locator) +ax3.yaxis.set_minor_locator(minor_locator) +ax3.tick_params(axis='both', which='minor', length=0) +ax3.grid(which='minor', color='w', linestyle='-', linewidth=1.2, zorder=2) + +# remove major ticks +ax3.set_xticks([]) +ax3.set_yticks([]) + +# add cylinder radii +# for cyl in rollers: +# circle = plt.Circle( +# cyl.center[:2] / agrid - agrid, cyl.radius / agrid, +# color='r', fill=False, zorder=3) +# ax3.add_patch(circle) + +# plot velocity field +quivers_boundary = np.array(quivers_boundary) +quivers_fluid = np.array(quivers_fluid) +ax3.quiver(quivers_boundary[:, 0], quivers_boundary[:, 1], quivers_boundary[:, 2], + quivers_boundary[:, 3], scale=.44, width=0.002, color='black', + zorder=4, label='slip velocity') +ax3.quiver(quivers_fluid[:, 0], quivers_fluid[:, 1], quivers_fluid[:, 2], + quivers_fluid[:, 3], scale=.44, width=0.002, color='royalblue', + zorder=4, label='fluid velocity') +ax3.set(xlabel='x-axis', ylabel='y-axis') +ax3.legend(framealpha=1, loc='upper right') + +plt.show() diff --git a/samples/lb_planar_couette.py b/samples/lb_planar_couette.py new file mode 100644 index 00000000000..8ffbc8f0680 --- /dev/null +++ b/samples/lb_planar_couette.py @@ -0,0 +1,108 @@ +# +# Copyright (C) 2021-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +""" +Simulate the flow profile of a lattice-Boltzmann fluid between two +shear planes with Lees-Edwards boundary conditions and compare it +to the analytical solution. +""" + +import espressomd +import espressomd.lb +import espressomd.lees_edwards + +import numpy as np +import matplotlib.pyplot as plt + +required_features = ["WALBERLA"] +espressomd.assert_features(required_features) + + +def analytical(x, t, nu, v, h, k_max): + """ + Analytical solution with Fourier series of the Navier-Stokes equation. + + Parameters + ---------- + x : :obj:`float` + Height within the channel + t : :obj:`float` + Time since the start up of the shear flow + nu: :obj:`float` + Kinematic viscosity + v: :obj:`float` + Shearing velocity + h : :obj:`float` + Distance between shear planes + k_max : :obj:`int` + Upper limit of sums for sinus series + """ + u = x / h - 0.5 + for k in np.arange(1, k_max + 1): + wave = 2 * np.pi * k / h + u += np.exp(-nu * wave ** 2 * t) * np.sin(wave * x) / (np.pi * k) + return v * u + + +# LB and LE parameters +nu = 1. / 6. +h = 64.0 +v = 0.02 +k_max = 100 + +system = espressomd.System(box_l=[h, 64, 1]) +system.time_step = 1. +system.cell_system.skin = 0.1 +system.cell_system.set_n_square() + +system.lees_edwards.set_boundary_conditions( + shear_direction="x", shear_plane_normal="y", + protocol=espressomd.lees_edwards.LinearShear( + shear_velocity=v, initial_pos_offset=0.0, time_0=0.0)) + +lbf = espressomd.lb.LBFluidWalberla( + agrid=1., density=1., kinematic_viscosity=nu, tau=1.) +system.actors.add(lbf) + +# sampling +time_breakpoints = [50, 200, 500, 2000] +pos_breakpoints = 256 +for steps in time_breakpoints: + steps -= int(system.time) - 1 + system.integrator.run(steps) + time = system.time - 1. + position_ref = np.linspace(0.5, 63.5, pos_breakpoints) + position_lbf = np.linspace(0.5, 63.5, 64) + velocity_ref = analytical(position_ref, time, nu, v, h, k_max) + velocity_lbf = np.copy(lbf[5, :, 0].velocity[:, 0].reshape([-1])) + ax = plt.gca() + color = next(ax._get_lines.prop_cycler)['color'] + plt.plot(velocity_ref, position_ref, '-', color=color, + label=f"Analytical solution at t={time:.0f}") + plt.plot(velocity_lbf, position_lbf, 'o', color=color, + label=f"Simulated profile at t={time:.0f}") + +plt.xlabel('shear velocity') +plt.ylabel('y-position') +# format legend in 2 columns +ax = plt.gca() +handles, labels = ax.get_legend_handles_labels() +handles, labels = zip(*sorted(zip(handles, labels), key=lambda x: x[1][0])) +ax.legend(handles, labels, ncol=2) +plt.show() diff --git a/samples/lb_profile.py b/samples/lb_profile.py index 63ebad94ecc..6043a649fce 100644 --- a/samples/lb_profile.py +++ b/samples/lb_profile.py @@ -28,11 +28,10 @@ import espressomd.lb import espressomd.observables import espressomd.shapes -import espressomd.lbboundaries import espressomd.accumulators import espressomd.math -required_features = ["LB_BOUNDARIES"] +required_features = ["WALBERLA"] espressomd.assert_features(required_features) system = espressomd.System(box_l=[10.0, 10.0, 5.0]) @@ -42,9 +41,9 @@ n_steps_warmup = 1000 n_steps = 800 -lb_fluid = espressomd.lb.LBFluid( - agrid=1.0, dens=1.0, visc=1.0, tau=0.01, - ext_force_density=[0, 0, 0.15], kT=1.0, seed=32) +lb_fluid = espressomd.lb.LBFluidWalberla( + agrid=1.0, density=1.0, kinematic_viscosity=1.0, tau=0.01, + ext_force_density=[0, 0, 0.15], kT=0.0) system.actors.add(lb_fluid) system.thermostat.set_lb(LB_fluid=lb_fluid, seed=23) ctp = espressomd.math.CylindricalTransformationParameters( @@ -64,8 +63,7 @@ direction=-1, radius=radius, length=20.0) -cylinder_boundary = espressomd.lbboundaries.LBBoundary(shape=cylinder_shape) -system.lbboundaries.add(cylinder_boundary) +lb_fluid.add_boundary_from_shape(cylinder_shape) # equilibrate fluid system.integrator.run(n_steps_warmup) diff --git a/samples/lbf.py b/samples/lbf.py index 2792d681ead..643fb172469 100644 --- a/samples/lbf.py +++ b/samples/lbf.py @@ -38,7 +38,7 @@ ======================================================= """) -required_features = ["EXTERNAL_FORCES"] +required_features = ["WALBERLA", "EXTERNAL_FORCES"] if args.gpu: print("Using GPU implementation") required_features.append("CUDA") @@ -59,13 +59,13 @@ particle = system.part.add(pos=[box_l / 2.0] * 3, fix=[True, True, True]) -lb_params = {'agrid': 1, 'dens': 1, 'visc': 1, 'tau': 0.01, +lb_params = {'agrid': 1, 'density': 1, 'kinematic_viscosity': 1, 'tau': 0.01, 'ext_force_density': [0, 0, -1.0 / (box_l**3)]} if args.gpu: - lbf = espressomd.lb.LBFluidGPU(**lb_params) + lbf = espressomd.lb.LBFluidWalberlaGPU(**lb_params) else: - lbf = espressomd.lb.LBFluid(**lb_params) + lbf = espressomd.lb.LBFluidWalberla(**lb_params) system.actors.add(lbf) system.thermostat.set_lb(LB_fluid=lbf, gamma=1.0) print(lbf.get_params()) diff --git a/samples/object_in_fluid/motivation.py b/samples/object_in_fluid/motivation.py index 5cf2d5653dd..52f5316ae82 100644 --- a/samples/object_in_fluid/motivation.py +++ b/samples/object_in_fluid/motivation.py @@ -20,14 +20,13 @@ """ import espressomd -import espressomd.lbboundaries import espressomd.shapes -required_features = ["LB_BOUNDARIES", "EXTERNAL_FORCES", "SOFT_SPHERE", - "MASS"] +required_features = ["WALBERLA", "EXTERNAL_FORCES", "SOFT_SPHERE", "MASS"] espressomd.assert_features(required_features) import os +import tqdm import argparse import warnings @@ -48,8 +47,8 @@ boxX = 22.0 boxY = 14.0 -boxZ = 15.0 -time_step = 0.1 +boxZ = 6.0 +time_step = 0.05 system = espressomd.System(box_l=(boxX, boxY, boxZ)) system.time_step = time_step @@ -65,92 +64,89 @@ # creating the RBCs cell0 = oif.OifCell(cell_type=cell_type, particle_type=0, origin=[5.0, 5.0, 3.0]) -cell1 = oif.OifCell(cell_type=cell_type, - particle_type=1, origin=[5.0, 5.0, 7.0]) # cell-wall interactions -system.non_bonded_inter[0, 10].soft_sphere.set_params( - a=0.0001, n=1.2, cutoff=0.1, offset=0.0) -system.non_bonded_inter[1, 10].soft_sphere.set_params( +system.non_bonded_inter[cell0.particle_type, 10].soft_sphere.set_params( a=0.0001, n=1.2, cutoff=0.1, offset=0.0) # fluid -lbf = espressomd.lb.LBFluid(agrid=1, dens=1.0, visc=1.5, tau=0.1, - ext_force_density=[0.002, 0.0, 0.0]) +lbf = espressomd.lb.LBFluidWalberla( + agrid=1., density=1., kinematic_viscosity=1.5, tau=system.time_step, + ext_force_density=[0.025, 0., 0.], single_precision=True) system.actors.add(lbf) system.thermostat.set_lb(LB_fluid=lbf, gamma=1.5) # creating boundaries and obstacles in the channel # OutputVtk writes a file -# lbboundaries created boundaries for fluid -# constraints created boundaries for the cells +# boundaries for the fluid are set up by marking LB nodes as boundaries, here with the help of shapes +# boundaries for the cells are created by creating constraints from the shapes -boundaries = [] +boundary_shapes = [] # bottom of the channel bottom_shape = espressomd.shapes.Rhomboid(corner=[0.0, 0.0, 0.0], a=[boxX, 0.0, 0.0], b=[0.0, boxY, 0.0], c=[0.0, 0.0, 1.0], direction=1) -boundaries.append(bottom_shape) +boundary_shapes.append(bottom_shape) output_vtk_rhomboid( bottom_shape, out_file=os.path.join(output_path, "wallBottom.vtk")) # top of the channel top_shape = espressomd.shapes.Rhomboid(corner=[0.0, 0.0, boxZ - 1], a=[boxX, 0.0, 0.0], b=[0.0, boxY, 0.0], c=[0.0, 0.0, 1.0], direction=1) -boundaries.append(top_shape) +boundary_shapes.append(top_shape) output_vtk_rhomboid( top_shape, out_file=os.path.join(output_path, "wallTop.vtk")) # front wall of the channel front_shape = espressomd.shapes.Rhomboid(corner=[0.0, 0.0, 0.0], a=[boxX, 0.0, 0.0], b=[0.0, 1.0, 0.0], c=[0.0, 0.0, boxZ], direction=1) -boundaries.append(front_shape) +boundary_shapes.append(front_shape) output_vtk_rhomboid( front_shape, out_file=os.path.join(output_path, "wallFront.vtk")) # back wall of the channel back_shape = espressomd.shapes.Rhomboid(corner=[0.0, boxY - 1.0, 0.0], a=[boxX, 0.0, 0.0], b=[0.0, 1.0, 0.0], c=[0.0, 0.0, boxZ], direction=1) -boundaries.append(back_shape) +boundary_shapes.append(back_shape) output_vtk_rhomboid( back_shape, out_file=os.path.join(output_path, "wallBack.vtk")) # obstacle - cylinder A cylA_shape = espressomd.shapes.Cylinder(center=[11.0, 2.0, boxZ / 2.], axis=[0.0, 0.0, 1.0], length=boxZ, radius=2.0, direction=1) -boundaries.append(cylA_shape) +boundary_shapes.append(cylA_shape) output_vtk_cylinder( cylA_shape, n=20, out_file=os.path.join(output_path, "cylinderA.vtk")) # obstacle - cylinder B cylB_shape = espressomd.shapes.Cylinder(center=[16.0, 8.0, boxZ / 2.], axis=[0.0, 0.0, 1.0], length=boxZ, radius=2.0, direction=1) -boundaries.append(cylB_shape) +boundary_shapes.append(cylB_shape) output_vtk_cylinder( cylB_shape, n=20, out_file=os.path.join(output_path, "cylinderB.vtk")) # obstacle - cylinder C cylC_shape = espressomd.shapes.Cylinder(center=[11.0, 12.0, boxZ / 2.], axis=[0.0, 0.0, 1.0], length=boxZ, radius=2.0, direction=1) -boundaries.append(cylC_shape) +boundary_shapes.append(cylC_shape) output_vtk_cylinder( cylC_shape, n=20, out_file=os.path.join(output_path, "cylinderC.vtk")) -for boundary in boundaries: - system.lbboundaries.add(espressomd.lbboundaries.LBBoundary(shape=boundary)) - system.constraints.add(shape=boundary, particle_type=10) +for shape in boundary_shapes: + lbf.add_boundary_from_shape(shape) + system.constraints.add(shape=shape, particle_type=10) + + +def write_cells_vtk(i): + filepath = os.path.join(output_path, "cell{cell_id}_{index}.vtk") + cell0.output_vtk_pos_folded(file_name=filepath.format(cell_id=0, index=i)) -maxCycle = 50 +maxCycle = 100 # main integration loop -cell0.output_vtk_pos_folded(file_name=os.path.join(output_path, "cell0_0.vtk")) -cell1.output_vtk_pos_folded(file_name=os.path.join(output_path, "cell1_0.vtk")) -for i in range(1, maxCycle): - system.integrator.run(steps=500) - cell0.output_vtk_pos_folded( - file_name=os.path.join(output_path, f"cell0_{i}.vtk")) - cell1.output_vtk_pos_folded( - file_name=os.path.join(output_path, f"cell1_{i}.vtk")) - print(f"time: {i * time_step:.1f}") +for i in tqdm.tqdm(range(maxCycle)): + write_cells_vtk(i) + system.integrator.run(steps=100) +write_cells_vtk(maxCycle) print("Simulation completed.") diff --git a/samples/visualization_lbboundaries.py b/samples/visualization_lbboundaries.py index 77b96fbd565..92ca3b59b7f 100644 --- a/samples/visualization_lbboundaries.py +++ b/samples/visualization_lbboundaries.py @@ -24,18 +24,17 @@ import espressomd import espressomd.lb import espressomd.shapes -import espressomd.lbboundaries import espressomd.visualization -required_features = ["LB_BOUNDARIES"] +required_features = ["WALBERLA"] espressomd.assert_features(required_features) system = espressomd.System(box_l=[10.0, 10.0, 5.0]) system.time_step = 0.01 system.cell_system.skin = 0.4 -lb_fluid = espressomd.lb.LBFluid( - agrid=1.0, dens=1.0, visc=1.0, tau=0.01, ext_force_density=[0, 0, 0.15]) +lb_fluid = espressomd.lb.LBFluidWalberla( + agrid=1.0, density=1.0, kinematic_viscosity=1.0, tau=0.01, ext_force_density=[0, 0, 0.15]) system.actors.add(lb_fluid) cylinder_shape = espressomd.shapes.Cylinder( @@ -44,8 +43,7 @@ direction=-1, radius=4.0, length=20.0) -cylinder_boundary = espressomd.lbboundaries.LBBoundary(shape=cylinder_shape) -system.lbboundaries.add(cylinder_boundary) +lb_fluid.add_boundary_from_shape(cylinder_shape) visualizer = espressomd.visualization.openGLLive( system, diff --git a/samples/visualization_poiseuille.py b/samples/visualization_poiseuille.py index 4cf9eda33b6..94c7f614cc2 100644 --- a/samples/visualization_poiseuille.py +++ b/samples/visualization_poiseuille.py @@ -24,12 +24,11 @@ import espressomd import espressomd.lb -import espressomd.lbboundaries import espressomd.shapes import espressomd.visualization import numpy as np -required_features = ["LB_BOUNDARIES", "EXTERNAL_FORCES"] +required_features = ["WALBERLA", "EXTERNAL_FORCES"] espressomd.assert_features(required_features) # System setup @@ -54,21 +53,20 @@ velocity_arrows_type_radii=[0.1], velocity_arrows_type_colors=[[0, 1, 0]]) -lbf = espressomd.lb.LBFluid(kT=0, agrid=1.0, dens=1.0, visc=1.0, tau=0.1, - ext_force_density=[0, 0.003, 0]) +lbf = espressomd.lb.LBFluidWalberla(kT=0, agrid=1.0, density=1.0, kinematic_viscosity=1.0, + tau=0.1, ext_force_density=[0, 0.003, 0]) system.actors.add(lbf) system.thermostat.set_lb(LB_fluid=lbf, gamma=1.5) # Setup boundaries -walls = [espressomd.lbboundaries.LBBoundary() for k in range(2)] -walls[0].set_params(shape=espressomd.shapes.Wall(normal=[1, 0, 0], dist=1.5)) -walls[1].set_params(shape=espressomd.shapes.Wall( - normal=[-1, 0, 0], dist=-14.5)) +wall_shapes = [None] * 2 +wall_shapes[0] = espressomd.shapes.Wall(normal=[1, 0, 0], dist=1.5) +wall_shapes[1] = espressomd.shapes.Wall(normal=[-1, 0, 0], dist=-14.5) for i in range(100): system.part.add(pos=np.random.random(3) * system.box_l) -for wall in walls: - system.lbboundaries.add(wall) +for wall_shape in wall_shapes: + lbf.add_boundary_from_shape(wall_shape) visualizer.run(1) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 14175bfd6fb..40e23972f3c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -40,6 +40,10 @@ if(ESPRESSO_BUILD_WITH_SCAFACOS) add_subdirectory(scafacos) endif() +if(ESPRESSO_BUILD_WITH_WALBERLA) + add_subdirectory(walberla_bridge) +endif() + if(ESPRESSO_BUILD_WITH_PYTHON) add_subdirectory(script_interface) add_subdirectory(python) diff --git a/src/config/features.def b/src/config/features.def index 10e462e3615..4e27c01ae43 100644 --- a/src/config/features.def +++ b/src/config/features.def @@ -75,14 +75,7 @@ VIRTUAL_SITES_INERTIALESS_TRACERS implies VIRTUAL_SITES DPD /* Lattice-Boltzmann features */ -LB_BOUNDARIES -LB_BOUNDARIES_GPU requires CUDA LB_ELECTROHYDRODYNAMICS -ELECTROKINETICS implies EXTERNAL_FORCES, ELECTROSTATICS -ELECTROKINETICS requires CUDA -EK_BOUNDARIES implies ELECTROKINETICS, LB_BOUNDARIES_GPU, EXTERNAL_FORCES, ELECTROSTATICS -EK_BOUNDARIES requires CUDA -EK_DEBUG requires ELECTROKINETICS /* Interaction features */ TABULATED @@ -120,4 +113,6 @@ HDF5 external SCAFACOS external GSL external STOKESIAN_DYNAMICS external +WALBERLA external +WALBERLA_FFT external VALGRIND_MARKERS external diff --git a/src/config/myconfig-default.hpp b/src/config/myconfig-default.hpp index 5b3b0ad410f..4e8c8df611a 100644 --- a/src/config/myconfig-default.hpp +++ b/src/config/myconfig-default.hpp @@ -48,14 +48,6 @@ // Active matter #define ENGINE -// Hydrodynamics, Electrokinetics -#define LB_BOUNDARIES -#ifdef CUDA -#define LB_BOUNDARIES_GPU -#define ELECTROKINETICS -#define EK_BOUNDARIES -#endif - // Force/energy calculation #define EXCLUSIONS diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 04465777b7d..72830217e30 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -61,27 +61,13 @@ set_target_properties(espresso_core PROPERTIES CXX_CLANG_TIDY "${ESPRESSO_CXX_CLANG_TIDY}") if(ESPRESSO_BUILD_WITH_CUDA) - target_sources( - espresso_core - PRIVATE cuda_init.cpp cuda_interface.cpp - grid_based_algorithms/electrokinetics.cpp - grid_based_algorithms/lbgpu.cpp) + target_sources(espresso_core PRIVATE cuda_init.cpp cuda_interface.cpp) espresso_add_gpu_library( - espresso_cuda - SHARED - cuda_common_cuda.cu - cuda_init_cuda.cu - CudaHostAllocator.cu - magnetostatics/barnes_hut_gpu_cuda.cu + espresso_cuda SHARED cuda_common_cuda.cu cuda_init_cuda.cu + CudaHostAllocator.cu magnetostatics/barnes_hut_gpu_cuda.cu magnetostatics/dipolar_direct_sum_gpu_cuda.cu - electrostatics/mmm1d_gpu_cuda.cu - electrostatics/p3m_gpu_cuda.cu - electrostatics/p3m_gpu_error_cuda.cu - EspressoSystemInterface_cuda.cu - grid_based_algorithms/electrokinetics_cuda.cu - grid_based_algorithms/lbgpu_cuda.cu - grid_based_algorithms/fd-electrostatics_cuda.cu - virtual_sites/lb_inertialess_tracers_cuda.cu) + electrostatics/mmm1d_gpu_cuda.cu electrostatics/p3m_gpu_cuda.cu + electrostatics/p3m_gpu_error_cuda.cu EspressoSystemInterface_cuda.cu) add_library(espresso::cuda ALIAS espresso_cuda) target_link_libraries( espresso_cuda PRIVATE CUDA::cuda_driver CUDA::cudart CUDA::cufft @@ -89,8 +75,6 @@ if(ESPRESSO_BUILD_WITH_CUDA) target_include_directories( espresso_cuda PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} - ${CMAKE_CURRENT_SOURCE_DIR}/virtual_sites - ${CMAKE_CURRENT_SOURCE_DIR}/grid_based_algorithms ${CMAKE_CURRENT_SOURCE_DIR}/electrostatics ${CMAKE_CURRENT_SOURCE_DIR}/magnetostatics) set_target_properties(espresso_cuda PROPERTIES CUDA_CLANG_TIDY diff --git a/src/core/analysis/statistics.cpp b/src/core/analysis/statistics.cpp index 27205a69f84..b48c0619676 100644 --- a/src/core/analysis/statistics.cpp +++ b/src/core/analysis/statistics.cpp @@ -28,7 +28,6 @@ #include "Particle.hpp" #include "cells.hpp" -#include "communication.hpp" #include "errorhandling.hpp" #include "grid.hpp" #include "grid_based_algorithms/lb_interface.hpp" @@ -84,8 +83,8 @@ Utils::Vector3d calc_linear_momentum(bool include_particles, return m + p.mass() * p.v(); }); } - if (include_lbfluid) { - momentum += lb_lbfluid_calc_fluid_momentum(); + if (include_lbfluid and lattice_switch != ActiveLB::NONE) { + momentum += LB::calc_fluid_momentum() * LB::get_lattice_speed(); } return momentum; } diff --git a/src/core/communication.cpp b/src/core/communication.cpp index 2cee973cc02..7c4a430306a 100644 --- a/src/core/communication.cpp +++ b/src/core/communication.cpp @@ -19,12 +19,18 @@ * along with this program. If not, see . */ +#include "config/config.hpp" + #include "communication.hpp" #include "errorhandling.hpp" #include "event.hpp" #include "grid.hpp" +#ifdef WALBERLA +#include +#endif + #include #include @@ -73,6 +79,10 @@ void init(std::shared_ptr mpi_env) { ErrorHandling::init_error_handling(mpiCallbacks()); +#ifdef WALBERLA + walberla::mpi_init(); +#endif + on_program_start(); } } // namespace Communication diff --git a/src/core/cuda_utils.cuh b/src/core/cuda_utils.cuh index a72be413dd6..6bdafb96a6e 100644 --- a/src/core/cuda_utils.cuh +++ b/src/core/cuda_utils.cuh @@ -27,7 +27,6 @@ #include -#include #include class cuda_runtime_error_cuda : public cuda_runtime_error { @@ -71,18 +70,6 @@ void cuda_check_errors_exit(const dim3 &block, const dim3 &grid, #define cuda_safe_mem(a) cuda_safe_mem_exit((a), __FILE__, __LINE__) -/** Calculate @c dim_grid for CUDA kernel calls. */ -inline dim3 calculate_dim_grid(unsigned const threads_x, - unsigned const blocks_per_grid_y, - unsigned const threads_per_block) { - assert(threads_x >= 1); - assert(blocks_per_grid_y >= 1); - assert(threads_per_block >= 1); - auto const threads_y = threads_per_block * blocks_per_grid_y; - auto const blocks_per_grid_x = (threads_x + threads_y - 1) / threads_y; - return make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); -} - #define KERNELCALL_shared(_function, _grid, _block, _stream, ...) \ _function<<<_grid, _block, _stream, stream[0]>>>(__VA_ARGS__); \ cuda_check_errors_exit(_grid, _block, #_function, __FILE__, __LINE__); diff --git a/src/core/electrostatics/coulomb.cpp b/src/core/electrostatics/coulomb.cpp index c6073e34791..0f4a5860306 100644 --- a/src/core/electrostatics/coulomb.cpp +++ b/src/core/electrostatics/coulomb.cpp @@ -30,7 +30,6 @@ #include "communication.hpp" #include "electrostatics/icc.hpp" #include "errorhandling.hpp" -#include "grid_based_algorithms/electrokinetics.hpp" #include "integrate.hpp" #include "npt.hpp" #include "partCfg_global.hpp" @@ -284,12 +283,6 @@ void calc_long_range_force(ParticleRange const &particles) { if (electrostatics_actor) { boost::apply_visitor(LongRangeForce(particles), *electrostatics_actor); } -#ifdef ELECTROKINETICS - /* Add fields from EK if enabled */ - if (this_node == 0) { - ek_calculate_electrostatic_coupling(); - } -#endif } double calc_energy_long_range(ParticleRange const &particles) { diff --git a/src/core/event.cpp b/src/core/event.cpp index bf1a3588d16..18dc0ef34bc 100644 --- a/src/core/event.cpp +++ b/src/core/event.cpp @@ -38,8 +38,6 @@ #include "electrostatics/icc.hpp" #include "errorhandling.hpp" #include "grid.hpp" -#include "grid_based_algorithms/electrokinetics.hpp" -#include "grid_based_algorithms/lb_boundaries.hpp" #include "grid_based_algorithms/lb_interface.hpp" #include "immersed_boundaries.hpp" #include "integrate.hpp" @@ -97,14 +95,12 @@ void on_integration_start(double time_step) { integrator_npt_sanity_checks(); #endif long_range_interactions_sanity_checks(); - lb_lbfluid_sanity_checks(time_step); + LB::sanity_checks(time_step); /********************************************/ /* end sanity checks */ /********************************************/ - lb_lbfluid_on_integration_start(); - #ifdef CUDA MPI_Bcast(gpu_get_global_particle_vars_pointer_host(), sizeof(CUDA_global_part_vars), MPI_BYTE, 0, comm_cart); @@ -168,12 +164,6 @@ void on_observable_calc() { } #endif /* DIPOLES */ -#ifdef ELECTROKINETICS - if (ek_initialized) { - ek_integrate_electrostatics(); - } -#endif /* ELECTROKINETICS */ - clear_particle_node(); } @@ -248,13 +238,7 @@ void on_short_range_ia_change() { void on_constraint_change() { recalc_forces = true; } -void on_lbboundary_change() { -#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU) - LBBoundaries::lb_init_boundaries(); - - recalc_forces = true; -#endif -} +void on_lb_boundary_conditions_change() { recalc_forces = true; } void on_boxl_change(bool skip_method_adaption) { grid_changed_box_l(box_geo); @@ -272,16 +256,20 @@ void on_boxl_change(bool skip_method_adaption) { Dipoles::on_boxl_change(); #endif - lb_lbfluid_init(); -#ifdef LB_BOUNDARIES - LBBoundaries::lb_init_boundaries(); -#endif + LB::init(); } } void on_cell_structure_change() { clear_particle_node(); + if (lattice_switch == ActiveLB::WALBERLA_LB) { + throw std::runtime_error( + "LB does not currently support handling changes of the MD cell " + "geometry. Setup the cell system, skin and interactions before " + "activating the CPU LB."); + } + /* Now give methods a chance to react to the change in cell structure. * Most ES methods need to reinitialize, as they depend on skin, * node grid and so on. */ @@ -294,7 +282,11 @@ void on_cell_structure_change() { #endif } -void on_temperature_change() { lb_lbfluid_reinit_parameters(); } +void on_temperature_change() { + if (lattice_switch != ActiveLB::NONE) { + throw std::runtime_error("Temperature change not supported by LB"); + } +} void on_periodicity_change() { #ifdef ELECTROSTATICS @@ -323,7 +315,9 @@ void on_skin_change() { void on_thermostat_param_change() { reinit_thermo = true; } void on_timestep_change() { - lb_lbfluid_reinit_parameters(); + if (lattice_switch != ActiveLB::NONE) { + throw std::runtime_error("Time step change not supported by LB"); + } on_thermostat_param_change(); } @@ -349,7 +343,7 @@ unsigned global_ghost_flags() { /* Position and Properties are always requested. */ unsigned data_parts = Cells::DATA_PART_POSITION | Cells::DATA_PART_PROPERTIES; - if (lattice_switch == ActiveLB::CPU) + if (lattice_switch == ActiveLB::WALBERLA_LB) data_parts |= Cells::DATA_PART_MOMENTUM; if (thermo_switch & THERMO_DPD) diff --git a/src/core/event.hpp b/src/core/event.hpp index 8f1f56c6af6..3fda162f9ad 100644 --- a/src/core/event.hpp +++ b/src/core/event.hpp @@ -130,8 +130,10 @@ void on_node_grid_change(); unsigned global_ghost_flags(); -/** called every time the walls for the lb fluid are changed */ -void on_lbboundary_change(); +/** @brief Called when the LB boundary conditions are changed + * (geometry, slip velocity, or both). + */ +void on_lb_boundary_conditions_change(); /** @brief Update particles with properties depending on other particles, * namely virtual sites and ICC charges. diff --git a/src/core/forces.cpp b/src/core/forces.cpp index 7a553d389a2..b2db558000e 100644 --- a/src/core/forces.cpp +++ b/src/core/forces.cpp @@ -37,7 +37,6 @@ #include "forcecap.hpp" #include "forces_inline.hpp" #include "galilei/ComFixed.hpp" -#include "grid_based_algorithms/electrokinetics.hpp" #include "grid_based_algorithms/lb_interface.hpp" #include "grid_based_algorithms/lb_particle_coupling.hpp" #include "immersed_boundaries.hpp" @@ -225,8 +224,10 @@ void force_calc(CellStructure &cell_structure, double time_step, double kT) { // Must be done here. Forces need to be ghost-communicated immersed_boundaries.volume_conservation(cell_structure); - lb_lbcoupling_calc_particle_lattice_ia(thermo_virtual, particles, - ghost_particles, time_step); + if (lattice_switch != ActiveLB::NONE) { + lb_lbcoupling_calc_particle_lattice_ia(thermo_virtual, particles, + ghost_particles, time_step); + } #ifdef CUDA copy_forces_from_GPU(particles, this_node); diff --git a/src/core/grid.cpp b/src/core/grid.cpp index 17ef5a4df28..5f8aca36b72 100644 --- a/src/core/grid.cpp +++ b/src/core/grid.cpp @@ -44,18 +44,6 @@ Utils::Vector3i node_grid{}; void init_node_grid() { grid_changed_n_nodes(); } -int map_position_node_array(const Utils::Vector3d &pos) { - auto const f_pos = folded_position(pos, box_geo); - - Utils::Vector3i im; - for (unsigned int i = 0; i < 3; i++) { - im[i] = static_cast(std::floor(f_pos[i] / local_geo.length()[i])); - im[i] = std::clamp(im[i], 0, node_grid[i] - 1); - } - - return Utils::Mpi::cart_rank(comm_cart, im); -} - Utils::Vector3i calc_node_pos(const boost::mpi::communicator &comm) { return Utils::Mpi::cart_coords<3>(comm, comm.rank()); } diff --git a/src/core/grid.hpp b/src/core/grid.hpp index a690695d7f6..2b8a17693e6 100644 --- a/src/core/grid.hpp +++ b/src/core/grid.hpp @@ -49,9 +49,6 @@ extern Utils::Vector3i node_grid; */ void init_node_grid(); -/** @brief Map a spatial position to the node grid */ -int map_position_node_array(const Utils::Vector3d &pos); - /** @brief Fill neighbor lists of node. * * Calculates the numbers of the nearest neighbors for a node. diff --git a/src/core/grid_based_algorithms/CMakeLists.txt b/src/core/grid_based_algorithms/CMakeLists.txt index a568c73a9bc..1f9c29d3a76 100644 --- a/src/core/grid_based_algorithms/CMakeLists.txt +++ b/src/core/grid_based_algorithms/CMakeLists.txt @@ -19,11 +19,15 @@ target_sources( espresso_core - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/halo.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lattice.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lb_boundaries.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lb_collective_interface.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lb.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lb_interface.cpp + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/lb_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lb_interpolation.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lb_particle_coupling.cpp) + ${CMAKE_CURRENT_SOURCE_DIR}/lb_particle_coupling.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ek_container.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ek_reactions.cpp) + +if(ESPRESSO_BUILD_WITH_WALBERLA) + target_link_libraries(espresso_core PRIVATE espresso::walberla + ${WALBERLA_LIBS}) + target_sources(espresso_core + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/lb_walberla_instance.cpp) +endif() diff --git a/src/core/grid_based_algorithms/EKReactions.hpp b/src/core/grid_based_algorithms/EKReactions.hpp new file mode 100644 index 00000000000..b1b7905e920 --- /dev/null +++ b/src/core/grid_based_algorithms/EKReactions.hpp @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2022-2023 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef ESPRESSO_EKREACTIONS_HPP +#define ESPRESSO_EKREACTIONS_HPP + +#include +#include +#include + +template class EKReactions { + using container_type = std::vector>; + +public: + using value_type = typename container_type::value_type; + using iterator = typename container_type::iterator; + using const_iterator = typename container_type::const_iterator; + +private: + container_type m_ekreactions; + +public: + void add(std::shared_ptr const &c) { + assert(std::find(m_ekreactions.begin(), m_ekreactions.end(), c) == + m_ekreactions.end()); + + m_ekreactions.emplace_back(c); + } + void remove(std::shared_ptr const &c) { + assert(std::find(m_ekreactions.begin(), m_ekreactions.end(), c) != + m_ekreactions.end()); + m_ekreactions.erase( + std::remove(m_ekreactions.begin(), m_ekreactions.end(), c), + m_ekreactions.end()); + } + + iterator begin() { return m_ekreactions.begin(); } + iterator end() { return m_ekreactions.end(); } + const_iterator begin() const { return m_ekreactions.begin(); } + const_iterator end() const { return m_ekreactions.end(); } + [[nodiscard]] bool empty() const { return m_ekreactions.empty(); } +}; + +#endif diff --git a/src/core/grid_based_algorithms/OptionalCounter.hpp b/src/core/grid_based_algorithms/OptionalCounter.hpp deleted file mode 100644 index 1404e6481da..00000000000 --- a/src/core/grid_based_algorithms/OptionalCounter.hpp +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2020-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef OPTIONAL_COUNTER_HPP -#define OPTIONAL_COUNTER_HPP - -#include - -#include -#include - -/** Re-implementation of a boost::optional for a RNG counter. - * - * Workaround for a compiler error with Clang 9.0, boost 1.71 - * and CUDA 10.1 (see espressomd/espresso#3650). - */ -class OptionalCounter { -private: - Utils::Counter m_counter; - bool m_initialized; - -public: - OptionalCounter() : m_counter{}, m_initialized(false) {} - OptionalCounter(Utils::Counter const &counter) - : m_counter(counter), m_initialized(true) {} - OptionalCounter &operator=(Utils::Counter counter) { - m_counter = std::move(counter); - m_initialized = true; - return *this; - } - template - void serialize(Archive &ar, const unsigned int /* version */) { - ar &m_counter; - ar &m_initialized; - } - bool is_initialized() noexcept { return m_initialized; } - explicit operator bool() const noexcept { return m_initialized; } - bool operator!() const noexcept { return !m_initialized; } - Utils::Counter &operator*() { return m_counter; } - Utils::Counter *operator->() { return &m_counter; } -}; - -#endif diff --git a/src/core/grid_based_algorithms/ek_container.cpp b/src/core/grid_based_algorithms/ek_container.cpp new file mode 100644 index 00000000000..780a8eee574 --- /dev/null +++ b/src/core/grid_based_algorithms/ek_container.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2022-2023 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "config/config.hpp" + +#include "ek_container.hpp" +#include "ek_reactions.hpp" +#include "errorhandling.hpp" +#include "lb_interface.hpp" +#include "lb_walberla_instance.hpp" + +#ifdef WALBERLA +#include +#endif // WALBERLA + +#include + +#ifdef WALBERLA +#include +#include +#include +#endif // WALBERLA + +namespace EK { + +#ifdef WALBERLA +EKContainer ek_container; +#endif // WALBERLA + +double get_tau() { +#ifdef WALBERLA + return ek_container.get_tau(); +#else + throw NoEKActive(); +#endif // WALBERLA +} + +int get_steps_per_md_step(double md_timestep) { + return static_cast(std::round(get_tau() / md_timestep)); +} + +void propagate() { +#ifdef WALBERLA + // first calculate the charge for the potential, for that get all the + // field-ids from the ekspecies pass the potential-field-id to the + // flux-kernels of the eks for this the integrate function has to be split + // with a public interface to diffusive and advective-flux this should also + // allow the back-coupling to the LB with a field-id + + if (ek_container.empty()) { + return; + } + + if (!ek_container.is_poisson_solver_set()) { + runtimeErrorMsg() << "EK requires a Poisson solver."; + return; + } + + ek_container.reset_charge(); + std::for_each(ek_container.begin(), ek_container.end(), [](auto const &ek) { + ek_container.add_charge(ek->get_density_id(), ek->get_valency(), + ek->is_double_precision()); + }); + + ek_container.solve_poisson(); + + auto velocity_field_id = std::size_t{}; + auto force_field_id = std::size_t{}; + try { + auto const lbf = ::lb_walberla(); + velocity_field_id = lbf->get_velocity_field_id(); + force_field_id = lbf->get_force_field_id(); + } catch (std::runtime_error const &) { + } + + std::for_each(ek_container.begin(), ek_container.end(), + [velocity_field_id, force_field_id](auto const &ek) { + try { + ek->integrate(ek_container.get_potential_field_id(), + velocity_field_id, force_field_id); + } catch (std::runtime_error const &e) { + runtimeErrorMsg() << e.what(); + } + }); + + EK::perform_reactions(); + + for (auto const &species : ek_container) { + species->ghost_communication(); + } +#endif // WALBERLA +} + +} // namespace EK diff --git a/src/script_interface/object_container_mpi_guard.cpp b/src/core/grid_based_algorithms/ek_container.hpp similarity index 51% rename from src/script_interface/object_container_mpi_guard.cpp rename to src/core/grid_based_algorithms/ek_container.hpp index da62287b3f2..1892bb5adda 100644 --- a/src/script_interface/object_container_mpi_guard.cpp +++ b/src/core/grid_based_algorithms/ek_container.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2022 The ESPResSo project + * Copyright (C) 2022-2023 The ESPResSo project * * This file is part of ESPResSo. * @@ -17,22 +17,32 @@ * along with this program. If not, see . */ -#include "script_interface/object_container_mpi_guard.hpp" +#ifndef ESPRESSO_EK_CONTAINER_HPP +#define ESPRESSO_EK_CONTAINER_HPP -#include -#include +#include "config/config.hpp" + +#ifdef WALBERLA +#include +#include +#endif // WALBERLA -#include -#include #include -void object_container_mpi_guard(boost::string_ref const &name, - std::size_t n_elements, int world_size) { - if (world_size > 1 and n_elements) { - std::stringstream error_msg; - error_msg << "Non-empty object containers do not support checkpointing in " - << "MPI environments. Container " << name << " contains " - << n_elements << " elements."; - throw std::runtime_error(error_msg.str()); - } -} +struct NoEKActive : public std::exception { + const char *what() const noexcept override { return "EK not activated"; } +}; + +namespace EK { + +#ifdef WALBERLA +extern EKContainer ek_container; +#endif // WALBERLA + +double get_tau(); +int get_steps_per_md_step(double md_timestep); +void propagate(); + +} // namespace EK + +#endif diff --git a/src/core/virtual_sites/lb_inertialess_tracers.hpp b/src/core/grid_based_algorithms/ek_reactions.cpp similarity index 55% rename from src/core/virtual_sites/lb_inertialess_tracers.hpp rename to src/core/grid_based_algorithms/ek_reactions.cpp index 666cafa0432..d65fc813984 100644 --- a/src/core/virtual_sites/lb_inertialess_tracers.hpp +++ b/src/core/grid_based_algorithms/ek_reactions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2022 The ESPResSo project * * This file is part of ESPResSo. * @@ -16,22 +16,28 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -/// \file -/// \brief Main of the Bayreuth Immersed-Boundary implementation - -#ifndef VIRTUAL_SITES_LB_INERTIALESS_TRACERS_HPP -#define VIRTUAL_SITES_LB_INERTIALESS_TRACERS_HPP #include "config/config.hpp" -#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS +#ifdef WALBERLA + +#include "ek_reactions.hpp" + +#include + +namespace EK { + +EKReactions ek_reactions; + +void perform_reactions() { + if (ek_reactions.empty()) { + return; + } -#include "ParticleRange.hpp" + std::for_each(ek_reactions.begin(), ek_reactions.end(), + [](auto const &reaction) { reaction->perform_reaction(); }); +} -void IBM_UpdateParticlePositions(ParticleRange const &particles, - double time_step, int this_node); -void IBM_ForcesIntoFluid_CPU(); -void IBM_ForcesIntoFluid_GPU(ParticleRange const &particles, int this_node); +} // namespace EK -#endif // VIRTUAL_SITES_INERTIALESS_TRACERS -#endif +#endif // WALBERLA diff --git a/src/script_interface/lbboundaries/initialize.cpp b/src/core/grid_based_algorithms/ek_reactions.hpp similarity index 61% rename from src/script_interface/lbboundaries/initialize.cpp rename to src/core/grid_based_algorithms/ek_reactions.hpp index df2d93b5c23..91958140dcc 100644 --- a/src/script_interface/lbboundaries/initialize.cpp +++ b/src/core/grid_based_algorithms/ek_reactions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015-2022 The ESPResSo project + * Copyright (C) 2022 The ESPResSo project * * This file is part of ESPResSo. * @@ -17,17 +17,23 @@ * along with this program. If not, see . */ -#include "initialize.hpp" +#ifndef ESPRESSO_EK_REACTIONS_HPP +#define ESPRESSO_EK_REACTIONS_HPP -#include "LBBoundaries.hpp" -#include "LBBoundary.hpp" +#include "config/config.hpp" -namespace ScriptInterface { -namespace LBBoundaries { +#ifdef WALBERLA -void initialize(Utils::Factory *om) { - om->register_new("LBBoundaries::LBBoundaries"); - om->register_new("LBBoundaries::LBBoundary"); -} -} /* namespace LBBoundaries */ -} /* namespace ScriptInterface */ +#include "EKReactions.hpp" +#include "walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp" + +namespace EK { + +extern EKReactions ek_reactions; + +void perform_reactions(); + +} // namespace EK + +#endif // WALBERLA +#endif diff --git a/src/core/grid_based_algorithms/electrokinetics.hpp b/src/core/grid_based_algorithms/electrokinetics.hpp deleted file mode 100644 index b9f611c2148..00000000000 --- a/src/core/grid_based_algorithms/electrokinetics.hpp +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef CORE_GRID_BASED_ALGORITHMS_ELECTROKINETICS_HPP -#define CORE_GRID_BASED_ALGORITHMS_ELECTROKINETICS_HPP - -#include "config/config.hpp" -#include "grid_based_algorithms/lb_boundaries.hpp" - -// note that we need to declare the ek_parameters struct and instantiate it for -// LB_GPU to compile when electrokinetics is not compiled in. This seemed more -// elegant than ifdeffing multiple versions of the kernel integrate. -#ifdef CUDA - -#define MAX_NUMBER_OF_SPECIES 10 - -/* Data structure holding parameters and memory pointers for the link flux - * system. */ -struct EKParameters { - float agrid; - float time_step; // MD time step - float lb_density; - unsigned int dim_x; - unsigned int dim_x_padded; - unsigned int dim_y; - unsigned int dim_z; - unsigned int number_of_nodes; - float viscosity; - float bulk_viscosity; - float gamma_odd; - float gamma_even; - float friction; - float T; - float prefactor; - float lb_ext_force_density[3]; - unsigned int number_of_species; - int reaction_species[3]; - float rho_reactant_reservoir; - float rho_product0_reservoir; - float rho_product1_reservoir; - float reaction_ct_rate; - float reaction_fraction_0; - float reaction_fraction_1; - float mass_reactant; - float mass_product0; - float mass_product1; - int stencil; - int number_of_boundary_nodes; - float fluctuation_amplitude; - bool fluctuations; - bool advection; - bool fluidcoupling_ideal_contribution; - bool es_coupling; - float *charge_potential_buffer; - float *electric_field; - float *charge_potential; - float *j; - float *lb_force_density_previous; -#ifdef EK_DEBUG - float *j_fluc; -#endif - float *rho[MAX_NUMBER_OF_SPECIES]; - int species_index[MAX_NUMBER_OF_SPECIES]; - float density[MAX_NUMBER_OF_SPECIES]; - float D[MAX_NUMBER_OF_SPECIES]; - float d[MAX_NUMBER_OF_SPECIES]; - float valency[MAX_NUMBER_OF_SPECIES]; - float ext_force_density[3][MAX_NUMBER_OF_SPECIES]; - char *node_is_catalyst; -}; - -#endif - -#ifdef ELECTROKINETICS - -/* Constants enumerating the links of a node in the link flux system EK_LINK_xyz - is the number of the link in direction (x, y, z), where x, y and z can be 0, - U or D representing 0 and one agrid in direction of or against the x, y or z - axis. The numbering differs from the one used in the LB since the LB - velocities are directed but the links are not. Links 0 - 8 represent - the odd LB velocities and numbers 13 - 21 represent the even LB velocities - (without the 0). In between there are the links connecting the corners, which - represent the 3rd shell not used in the LB but in the advection. The - following 13 constants are only defined for the sake of completeness.*/ - -#define EK_LINK_U00 0 -#define EK_LINK_0U0 1 -#define EK_LINK_00U 2 -#define EK_LINK_UU0 3 -#define EK_LINK_UD0 4 -#define EK_LINK_U0U 5 -#define EK_LINK_U0D 6 -#define EK_LINK_0UU 7 -#define EK_LINK_0UD 8 - -#define EK_LINK_UUU 9 -#define EK_LINK_UUD 10 -#define EK_LINK_UDU 11 -#define EK_LINK_UDD 12 - -#define EK_LINK_D00 13 -#define EK_LINK_0D0 14 -#define EK_LINK_00D 15 -#define EK_LINK_DD0 16 -#define EK_LINK_DU0 17 -#define EK_LINK_D0D 18 -#define EK_LINK_D0U 19 -#define EK_LINK_0DD 20 -#define EK_LINK_0DU 21 - -#define EK_LINK_DDD 22 -#define EK_LINK_DDU 23 -#define EK_LINK_DUD 24 -#define EK_LINK_DUU 25 - -extern EKParameters ek_parameters; -extern bool ek_initialized; - -void ek_integrate(); -void ek_integrate_electrostatics(); -void ek_print_parameters(); -void ek_print_lbpar(); -unsigned int ek_calculate_boundary_mass(); -int ek_print_vtk_density(int species, char *filename); -int ek_print_vtk_flux(int species, char *filename); -int ek_print_vtk_flux_fluc(int species, char *filename); -int ek_print_vtk_flux_link(int species, char *filename); -int ek_print_vtk_potential(char *filename); -int ek_print_vtk_particle_potential(char *filename); -int ek_print_vtk_lbforce_density(char *filename); -int ek_lb_print_vtk_density(char *filename); -int ek_lb_print_vtk_velocity(char *filename); -int ek_init(); -void ek_set_agrid(float agrid); -void ek_set_lb_density(float lb_density); -void ek_set_viscosity(float viscosity); -void ek_set_lb_ext_force_density(float lb_ext_force_dens_x, - float lb_ext_force_dens_y, - float lb_ext_force_dens_z); -void ek_set_friction(float friction); -void ek_set_T(float T); -void ek_set_prefactor(float prefactor); -void ek_set_electrostatics_coupling(bool electrostatics_coupling); -void ek_calculate_electrostatic_coupling(); -void ek_set_bulk_viscosity(float bulk_viscosity); -void ek_set_gamma_odd(float gamma_odd); -void ek_set_gamma_even(float gamma_even); -void ek_set_density(int species, float density); -void ek_set_D(int species, float D); -void ek_set_valency(int species, float valency); -void ek_set_ext_force_density(int species, float ext_force_density_x, - float ext_force_density_y, - float ext_force_density_z); -void ek_set_stencil(int stencil); -void ek_set_advection(bool advection); -void ek_set_fluidcoupling(bool ideal_contribution); -void ek_set_fluctuations(bool fluctuations); -void ek_set_fluctuation_amplitude(float fluctuation_amplitude); -void ek_set_rng_state(uint64_t counter); -int ek_node_get_density(int species, int x, int y, int z, double *density); -int ek_node_get_flux(int species, int x, int y, int z, double *flux); -int ek_node_get_potential(int x, int y, int z, double *potential); -int ek_node_set_density(int species, int x, int y, int z, double density); -float ek_calculate_net_charge(); -int ek_neutralize_system(int species); - -#ifdef EK_BOUNDARIES -void ek_gather_wallcharge_species_density(float *wallcharge_species_density, - int wallcharge_species); -void ek_init_species_density_wallcharge(float *wallcharge_species_density, - int wallcharge_species); -#endif - -#endif /* CUDA */ - -#endif /* CORE_GRID_BASED_ALGORITHMS_ELECTROKINETICS_HPP */ diff --git a/src/core/grid_based_algorithms/electrokinetics_cuda.cu b/src/core/grid_based_algorithms/electrokinetics_cuda.cu deleted file mode 100644 index 8a401bf9688..00000000000 --- a/src/core/grid_based_algorithms/electrokinetics_cuda.cu +++ /dev/null @@ -1,3842 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "config/config.hpp" - -#ifdef CUDA /* Terminates at end of file */ -#ifdef ELECTROKINETICS /* Terminates at end of file */ - -#include "grid_based_algorithms/electrokinetics.hpp" - -#include "cuda_interface.hpp" -#include "cuda_utils.cuh" -#include "errorhandling.hpp" -#include "fd-electrostatics.cuh" -#include "grid_based_algorithms/lb_boundaries.hpp" -#include "grid_based_algorithms/lb_interface.hpp" -#include "grid_based_algorithms/lb_particle_coupling.hpp" -#include "grid_based_algorithms/lbgpu.cuh" -#include "grid_based_algorithms/lbgpu.hpp" -#include "integrate.hpp" - -#include -#include - -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(OMPI_MPI_H) || defined(_MPI_H) -#error CU-file includes mpi.h! This should not happen! -#endif - -extern ActiveLB lattice_switch; -extern bool ek_initialized; - -// Used to limit register use for the pressure calculation -#define EK_LINK_U00_pressure 0 -#define EK_LINK_0U0_pressure 1 -#define EK_LINK_00U_pressure 2 -#define EK_LINK_D00_pressure 3 -#define EK_LINK_0D0_pressure 4 -#define EK_LINK_00D_pressure 5 - -#ifdef EK_BOUNDARIES -void LBBoundaries::lb_init_boundaries(); -#endif - -static constexpr unsigned int threads_per_block = 64; - -EKParameters ek_parameters = { - // agrid - -1.0, - // time_step - -1.0, - // lb_density - -1.0, - // dim_x - 0, - // dim_x_padded - 0, - // dim_y - 0, - // dim_z - 0, - // number_of_nodes - 0, - // viscosity - -1.0, - // bulk_viscosity - -1.0, - // gamma_odd - 0.0, - // gamma_even - 0.0, - // friction - 0.0, - // T - -1.0, - // prefactor - -1.0, - // lb_ext_force_density - {0.0, 0.0, 0.0}, - // number_of_species - 0, - // reaction_species - {-1, -1, -1}, - // rho_reactant_reservoir - -1.0, - // rho_product0_reservoir - -1.0, - // rho_product1_reservoir - -1.0, - // reaction_ct_rate - -1.0, - // reaction_fraction_0 - -1.0, - // reaction_fraction_1 - -1.0, - // mass_reactant - -1.0, - // mass_product0 - -1.0, - // mass_product1 - -1.0, - // stencil - 0, - // number_of_boundary_nodes - -1, - // fluctuation_amplitude - -1.0, - // fluctuation - false, - // advection - true, - // fluidcoupling_ideal_contribution - true, - // es_coupling - false, - // charge_potential_buffer - nullptr, - // electric_field - nullptr, - // charge_potential - nullptr, - // j - nullptr, - // lb_force_density_previous - nullptr, -#ifdef EK_DEBUG - // j_fluc - nullptr, -#endif - // rho - {}, - // species_index - {-1}, - // density - {}, - // D - {}, - // d - {}, - // valency - {}, - // ext_force_density - {}, - // node_is_catalyst - nullptr, -}; - -__device__ __constant__ EKParameters ek_parameters_gpu[1]; -float *charge_gpu; -LB_parameters_gpu *ek_lbparameters_gpu; -CUDA_particle_data *particle_data_gpu; -float *ek_lb_boundary_force; -char *ek_node_is_catalyst; -unsigned int old_number_of_species = 0; -unsigned int old_number_of_boundaries = 0; -Utils::Counter philox_counter = Utils::Counter(0); - -FdElectrostatics *electrostatics = nullptr; - -extern LB_parameters_gpu lbpar_gpu; -extern LB_node_force_density_gpu node_f, node_f_buf; -extern LB_nodes_gpu *current_nodes; -extern EKParameters *lb_ek_parameters; - -__device__ cufftReal ek_getNode(unsigned x, unsigned y, unsigned z) { - auto *field = - reinterpret_cast(ek_parameters_gpu->charge_potential); - return field[ek_parameters_gpu->dim_y * ek_parameters_gpu->dim_x_padded * z + - ek_parameters_gpu->dim_x_padded * y + x]; -} - -__device__ void ek_setNode(unsigned x, unsigned y, unsigned z, - cufftReal value) { - auto *field = - reinterpret_cast(ek_parameters_gpu->charge_potential); - field[ek_parameters_gpu->dim_y * ek_parameters_gpu->dim_x_padded * z + - ek_parameters_gpu->dim_x_padded * y + x] = value; -} - -__device__ cufftReal ek_getNode(unsigned i) { - auto const x = i % ek_parameters_gpu->dim_x; - i /= ek_parameters_gpu->dim_x; - auto const y = i % ek_parameters_gpu->dim_y; - auto const z = i / ek_parameters_gpu->dim_y; - return ek_getNode(x, y, z); -} - -__device__ void ek_setNode(unsigned i, cufftReal value) { - auto const x = i % ek_parameters_gpu->dim_x; - i /= ek_parameters_gpu->dim_x; - auto const y = i % ek_parameters_gpu->dim_y; - auto const z = i / ek_parameters_gpu->dim_y; - ek_setNode(x, y, z, value); -} - -__device__ unsigned int ek_getThreadIndex() { - - return blockIdx.y * gridDim.x * blockDim.x + blockDim.x * blockIdx.x + - threadIdx.x; -} - -__device__ void rhoindex_linear2cartesian(unsigned int index, - unsigned int *coord) { - - coord[0] = index % ek_parameters_gpu->dim_x; - index /= ek_parameters_gpu->dim_x; - coord[1] = index % ek_parameters_gpu->dim_y; - coord[2] = index / ek_parameters_gpu->dim_y; -} - -__device__ unsigned int -rhoindex_cartesian2linear(unsigned int x, unsigned int y, unsigned int z) { - - return z * ek_parameters_gpu->dim_y * ek_parameters_gpu->dim_x + - y * ek_parameters_gpu->dim_x + x; -} - -__device__ unsigned int rhoindex_cartesian2linear_padded(unsigned int x, - unsigned int y, - unsigned int z) { - - return z * ek_parameters_gpu->dim_y * ek_parameters_gpu->dim_x_padded + - y * ek_parameters_gpu->dim_x_padded + x; -} - -// TODO fluxindex fastest running might improve caching -__device__ unsigned int jindex_getByRhoLinear(unsigned int rho_index, - unsigned int c) { - - return c * ek_parameters_gpu->number_of_nodes + rho_index; -} - -__device__ void ek_displacement(float *dx, LB_nodes_gpu n, - unsigned int node_index, - LB_parameters_gpu *ek_lbparameters_gpu) { - - float rho = ek_lbparameters_gpu->rho * ek_lbparameters_gpu->agrid * - ek_lbparameters_gpu->agrid * ek_lbparameters_gpu->agrid; - - float mode[19]; - - for (unsigned i = 0; i < 19; i++) { - mode[i] = n.populations[node_index][i]; - } - - rho += mode[0] + mode[1] + mode[2] + mode[3] + mode[4] + mode[5] + mode[6] + - mode[7] + mode[8] + mode[9] + mode[10] + mode[11] + mode[12] + - mode[13] + mode[14] + mode[15] + mode[16] + mode[17] + mode[18]; - - dx[0] = (mode[1] - mode[2]) + (mode[7] - mode[8]) + (mode[9] - mode[10]) + - (mode[11] - mode[12]) + (mode[13] - mode[14]); - - dx[1] = (mode[3] - mode[4]) + (mode[7] - mode[8]) - (mode[9] - mode[10]) + - (mode[15] - mode[16]) + (mode[17] - mode[18]); - - dx[2] = (mode[5] - mode[6]) + (mode[11] - mode[12]) - (mode[13] - mode[14]) + - (mode[15] - mode[16]) - (mode[17] - mode[18]); - - // Velocity requires half the force_density in the previous time step - - dx[0] += 0.5f * ek_parameters_gpu->lb_force_density_previous[node_index]; - dx[1] += 0.5f * - ek_parameters_gpu - ->lb_force_density_previous[ek_parameters_gpu->number_of_nodes + - node_index]; - dx[2] += - 0.5f * - ek_parameters_gpu - ->lb_force_density_previous[2 * ek_parameters_gpu->number_of_nodes + - node_index]; - - dx[0] *= 1.0f / rho; - dx[1] *= 1.0f / rho; - dx[2] *= 1.0f / rho; -} - -__device__ void ek_diffusion_migration_lbforce_linkcentered_stencil( - unsigned int index, unsigned int index_padded, - unsigned int const *neighborindex, unsigned int const *neighborindex_padded, - unsigned int species_index, LB_node_force_density_gpu node_f, - LB_nodes_gpu lb_node) { - float flux, force; - - float agrid_inv = 1.0f / ek_parameters_gpu->agrid; - float sqrt2agrid_inv = 1.0f / (sqrtf(2.0f) * ek_parameters_gpu->agrid); - float sqrt2_inv = 1.0f / sqrtf(2.0f); - float twoT_inv = 1.0f / (2.0f * ek_parameters_gpu->T); - float D_inv = 1.0f / ek_parameters_gpu->D[species_index]; - float force_conv = - agrid_inv * ek_parameters_gpu->time_step * ek_parameters_gpu->time_step; - - // face in x - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U00]]) * - agrid_inv; - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_U00]]) * - agrid_inv + - ek_parameters_gpu->ext_force_density[0][species_index]); - - flux += force * - (ek_parameters_gpu->rho[species_index][index] + - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U00]]) * - twoT_inv; - - flux *= ek_parameters_gpu->d[species_index] * agrid_inv; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_U00]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U00)], - flux * ek_parameters_gpu->time_step); - - if (ek_parameters_gpu->fluidcoupling_ideal_contribution) { - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv; - force *= force_conv; - - atomicAdd(&node_f.force_density[index][0], force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U00]][0], - force * 0.5f); - } else { - force = -1.0f * ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_U00]] - - ((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded]) * - agrid_inv; - - force *= force_conv; - - atomicAdd(&node_f.force_density[index][0], - ek_parameters_gpu->rho[species_index][index] * - (force * 0.5f + - ek_parameters_gpu->ext_force_density[0][species_index] * - force_conv)); - } - - // face in y - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0U0]]) * - agrid_inv; - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_0U0]]) * - agrid_inv + - ek_parameters_gpu->ext_force_density[1][species_index]); - - flux += force * - (ek_parameters_gpu->rho[species_index][index] + - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0U0]]) * - twoT_inv; - - flux *= ek_parameters_gpu->d[species_index] * agrid_inv; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_0U0]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0U0)], - flux * ek_parameters_gpu->time_step); - - if (ek_parameters_gpu->fluidcoupling_ideal_contribution) { - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv; - force *= force_conv; - - atomicAdd(&node_f.force_density[index][1], force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0U0]][1], - force * 0.5f); - } else { - force = -1.0f * ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_0U0]] - - ((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded]) * - agrid_inv; - - force *= force_conv; - - atomicAdd(&node_f.force_density[index][1], - ek_parameters_gpu->rho[species_index][index] * - (force * 0.5f + - ek_parameters_gpu->ext_force_density[1][species_index] * - force_conv)); - - atomicAdd( - &node_f.force_density[neighborindex[EK_LINK_0U0]][1], - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0U0]] * - force * 0.5f); - } - - // face in z - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_00U]]) * - agrid_inv; - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_00U]]) * - agrid_inv + - ek_parameters_gpu->ext_force_density[2][species_index]); - - flux += force * - (ek_parameters_gpu->rho[species_index][index] + - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_00U]]) * - twoT_inv; - - flux *= ek_parameters_gpu->d[species_index] * agrid_inv; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_00U]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_00U)], - flux * ek_parameters_gpu->time_step); - - if (ek_parameters_gpu->fluidcoupling_ideal_contribution) { - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv; - force *= force_conv; - - atomicAdd(&node_f.force_density[index][2], force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_00U]][2], - force * 0.5f); - } else { - force = -1.0f * ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_00U]] - - ((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded]) * - agrid_inv; - - force *= force_conv; - - atomicAdd(&node_f.force_density[index][2], - ek_parameters_gpu->rho[species_index][index] * - (force * 0.5f + - ek_parameters_gpu->ext_force_density[2][species_index] * - force_conv)); - - atomicAdd( - &node_f.force_density[neighborindex[EK_LINK_00U]][2], - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_00U]] * - force * 0.5f); - } - - // edge in z - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UU0]]) * - sqrt2agrid_inv; - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_UU0]]) * - sqrt2agrid_inv + - (ek_parameters_gpu->ext_force_density[0][species_index] + - ek_parameters_gpu->ext_force_density[1][species_index]) * - sqrt2_inv); - - flux += force * - (ek_parameters_gpu->rho[species_index][index] + - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UU0]]) * - twoT_inv; - - flux *= ek_parameters_gpu->d[species_index] * agrid_inv; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_UU0]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UU0)], - flux * ek_parameters_gpu->time_step); - - if (ek_parameters_gpu->fluidcoupling_ideal_contribution) { - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv; - force *= force_conv; - - atomicAdd(&node_f.force_density[index][0], force * 0.5f); - atomicAdd(&node_f.force_density[index][1], force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UU0]][0], - force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UU0]][1], - force * 0.5f); - } - - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UD0]]) * - sqrt2agrid_inv; - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_UD0]]) * - sqrt2agrid_inv + - (ek_parameters_gpu->ext_force_density[0][species_index] - - ek_parameters_gpu->ext_force_density[1][species_index]) * - sqrt2_inv); - - flux += force * - (ek_parameters_gpu->rho[species_index][index] + - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UD0]]) * - twoT_inv; - - flux *= ek_parameters_gpu->d[species_index] * agrid_inv; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_UD0]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UD0)], - flux * ek_parameters_gpu->time_step); - - if (ek_parameters_gpu->fluidcoupling_ideal_contribution) { - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv; - force *= force_conv; - - atomicAdd(&node_f.force_density[index][0], force * 0.5f); - atomicAdd(&node_f.force_density[index][1], -force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UD0]][0], - force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UD0]][1], - -force * 0.5f); - } - - // edge in y - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0U]]) * - sqrt2agrid_inv; - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_U0U]]) * - sqrt2agrid_inv + - (ek_parameters_gpu->ext_force_density[0][species_index] + - ek_parameters_gpu->ext_force_density[2][species_index]) * - sqrt2_inv); - - flux += force * - (ek_parameters_gpu->rho[species_index][index] + - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0U]]) * - twoT_inv; - - flux *= ek_parameters_gpu->d[species_index] * agrid_inv; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_U0U]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U0U)], - flux * ek_parameters_gpu->time_step); - - if (ek_parameters_gpu->fluidcoupling_ideal_contribution) { - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv; - force *= force_conv; - - atomicAdd(&node_f.force_density[index][0], force * 0.5f); - atomicAdd(&node_f.force_density[index][2], force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0U]][0], - force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0U]][2], - force * 0.5f); - } - - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0D]]) * - sqrt2agrid_inv; - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_U0D]]) * - sqrt2agrid_inv + - (ek_parameters_gpu->ext_force_density[0][species_index] - - ek_parameters_gpu->ext_force_density[2][species_index]) * - sqrt2_inv); - - flux += force * - (ek_parameters_gpu->rho[species_index][index] + - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0D]]) * - twoT_inv; - - flux *= ek_parameters_gpu->d[species_index] * agrid_inv; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_U0D]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U0D)], - flux * ek_parameters_gpu->time_step); - - if (ek_parameters_gpu->fluidcoupling_ideal_contribution) { - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv; - force *= force_conv; - - atomicAdd(&node_f.force_density[index][0], force * 0.5f); - atomicAdd(&node_f.force_density[index][2], -force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0D]][0], - force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0D]][2], - -force * 0.5f); - } - - // edge in x - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UU]]) * - sqrt2agrid_inv; - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_0UU]]) * - sqrt2agrid_inv + - (ek_parameters_gpu->ext_force_density[1][species_index] + - ek_parameters_gpu->ext_force_density[2][species_index]) * - sqrt2_inv); - - flux += force * - (ek_parameters_gpu->rho[species_index][index] + - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UU]]) * - twoT_inv; - - flux *= ek_parameters_gpu->d[species_index] * agrid_inv; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_0UU]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0UU)], - flux * ek_parameters_gpu->time_step); - - if (ek_parameters_gpu->fluidcoupling_ideal_contribution) { - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv; - force *= force_conv; - - atomicAdd(&node_f.force_density[index][1], force * 0.5f); - atomicAdd(&node_f.force_density[index][2], force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UU]][1], - force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UU]][2], - force * 0.5f); - } - - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UD]]) * - sqrt2agrid_inv; - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_0UD]]) * - sqrt2agrid_inv + - (ek_parameters_gpu->ext_force_density[1][species_index] - - ek_parameters_gpu->ext_force_density[2][species_index]) * - sqrt2_inv); - - flux += force * - (ek_parameters_gpu->rho[species_index][index] + - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UD]]) * - twoT_inv; - - flux *= ek_parameters_gpu->d[species_index] * agrid_inv; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_0UD]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0UD)], - flux * ek_parameters_gpu->time_step); - - if (ek_parameters_gpu->fluidcoupling_ideal_contribution) { - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid * D_inv; - force *= force_conv; - - atomicAdd(&node_f.force_density[index][1], force * 0.5f); - atomicAdd(&node_f.force_density[index][2], -force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UD]][1], - force * 0.5f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UD]][2], - -force * 0.5f); - } -} - -__device__ void ek_diffusion_migration_lbforce_nodecentered_stencil( - unsigned int index, unsigned int index_padded, - unsigned int const *neighborindex, unsigned int const *neighborindex_padded, - unsigned int species_index, LB_node_force_density_gpu node_f, - LB_nodes_gpu lb_node) { - float flux, force; - - // face in x - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U00]]) / - ek_parameters_gpu->agrid; - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_U00]]) / - ek_parameters_gpu->agrid + - ek_parameters_gpu->ext_force_density[0][species_index]); - - flux += - force * - (static_cast(force >= 0.0f) * - ek_parameters_gpu->rho[species_index][index] + - static_cast(force < 0.0f) * - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U00]]) / - ek_parameters_gpu->T; - - flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_U00]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U00)], - flux * ek_parameters_gpu->time_step); - - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid / - ek_parameters_gpu->D[species_index]; - - force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step * - ek_parameters_gpu->time_step; - - atomicAdd(&node_f.force_density[index][0], force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U00]][0], force / 2.0f); - - // face in y - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0U0]]) / - ek_parameters_gpu->agrid; - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_0U0]]) / - ek_parameters_gpu->agrid + - ek_parameters_gpu->ext_force_density[1][species_index]); - - flux += - force * - (static_cast(force >= 0.0f) * - ek_parameters_gpu->rho[species_index][index] + - static_cast(force < 0.0f) * - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0U0]]) / - ek_parameters_gpu->T; - - flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_0U0]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0U0)], - flux * ek_parameters_gpu->time_step); - - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid / - ek_parameters_gpu->D[species_index]; - - force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step * - ek_parameters_gpu->time_step; - - atomicAdd(&node_f.force_density[index][1], force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0U0]][1], force / 2.0f); - - // face in z - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_00U]]) / - ek_parameters_gpu->agrid; - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_00U]]) / - ek_parameters_gpu->agrid + - ek_parameters_gpu->ext_force_density[2][species_index]); - - flux += - force * - (static_cast(force >= 0.0f) * - ek_parameters_gpu->rho[species_index][index] + - static_cast(force < 0.0f) * - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_00U]]) / - ek_parameters_gpu->T; - - flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_00U]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_00U)], - flux * ek_parameters_gpu->time_step); - - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid / - ek_parameters_gpu->D[species_index]; - - force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step * - ek_parameters_gpu->time_step; - - atomicAdd(&node_f.force_density[index][2], force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_00U]][2], force / 2.0f); - - // edge in z - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UU0]]) / - (sqrtf(2.0f) * ek_parameters_gpu->agrid); - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_UU0]]) / - (sqrtf(2.0f) * ek_parameters_gpu->agrid) + - (ek_parameters_gpu->ext_force_density[0][species_index] + - ek_parameters_gpu->ext_force_density[1][species_index]) / - sqrtf(2.0f)); - - flux += - force * - (static_cast(force >= 0.0f) * - ek_parameters_gpu->rho[species_index][index] + - static_cast(force < 0.0f) * - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UU0]]) / - ek_parameters_gpu->T; - - flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_UU0]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UU0)], - flux * ek_parameters_gpu->time_step); - - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid / - ek_parameters_gpu->D[species_index]; - - force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step * - ek_parameters_gpu->time_step; - - atomicAdd(&node_f.force_density[index][0], force / 2.0f); - atomicAdd(&node_f.force_density[index][1], force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UU0]][0], force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UU0]][1], force / 2.0f); - - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UD0]]) / - (sqrtf(2.0f) * ek_parameters_gpu->agrid); - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_UD0]]) / - (sqrtf(2.0f) * ek_parameters_gpu->agrid) + - (ek_parameters_gpu->ext_force_density[0][species_index] - - ek_parameters_gpu->ext_force_density[1][species_index]) / - sqrtf(2.0f)); - - flux += - force * - (static_cast(force >= 0.0f) * - ek_parameters_gpu->rho[species_index][index] + - static_cast(force < 0.0f) * - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_UD0]]) / - ek_parameters_gpu->T; - - flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_UD0]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UD0)], - flux * ek_parameters_gpu->time_step); - - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid / - ek_parameters_gpu->D[species_index]; - - force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step * - ek_parameters_gpu->time_step; - - atomicAdd(&node_f.force_density[index][0], force / 2.0f); - atomicAdd(&node_f.force_density[index][1], -force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UD0]][0], force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_UD0]][1], - -force / 2.0f); - - // edge in y - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0U]]) / - (sqrtf(2.0f) * ek_parameters_gpu->agrid); - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_U0U]]) / - (sqrtf(2.0f) * ek_parameters_gpu->agrid) + - (ek_parameters_gpu->ext_force_density[0][species_index] + - ek_parameters_gpu->ext_force_density[2][species_index]) / - sqrtf(2.0f)); - - flux += - force * - (static_cast(force >= 0.0f) * - ek_parameters_gpu->rho[species_index][index] + - static_cast(force < 0.0f) * - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0U]]) / - ek_parameters_gpu->T; - - flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_U0U]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U0U)], - flux * ek_parameters_gpu->time_step); - - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid / - ek_parameters_gpu->D[species_index]; - - force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step * - ek_parameters_gpu->time_step; - - atomicAdd(&node_f.force_density[index][0], force / 2.0f); - atomicAdd(&node_f.force_density[index][2], force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0U]][0], force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0U]][2], force / 2.0f); - - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0D]]) / - (sqrtf(2.0f) * ek_parameters_gpu->agrid); - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_U0D]]) / - (sqrtf(2.0f) * ek_parameters_gpu->agrid) + - (ek_parameters_gpu->ext_force_density[0][species_index] - - ek_parameters_gpu->ext_force_density[2][species_index]) / - sqrtf(2.0f)); - - flux += - force * - (static_cast(force >= 0.0f) * - ek_parameters_gpu->rho[species_index][index] + - static_cast(force < 0.0f) * - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_U0D]]) / - ek_parameters_gpu->T; - - flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_U0D]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U0D)], - flux * ek_parameters_gpu->time_step); - - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid / - ek_parameters_gpu->D[species_index]; - - force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step * - ek_parameters_gpu->time_step; - - atomicAdd(&node_f.force_density[index][0], force / 2.0f); - atomicAdd(&node_f.force_density[index][2], -force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0D]][0], force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_U0D]][2], - -force / 2.0f); - - // edge in x - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UU]]) / - (sqrtf(2.0f) * ek_parameters_gpu->agrid); - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_0UU]]) / - (sqrtf(2.0f) * ek_parameters_gpu->agrid) + - (ek_parameters_gpu->ext_force_density[1][species_index] + - ek_parameters_gpu->ext_force_density[2][species_index]) / - sqrtf(2.0f)); - - flux += - force * - (static_cast(force >= 0.0f) * - ek_parameters_gpu->rho[species_index][index] + - static_cast(force < 0.0f) * - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UU]]) / - ek_parameters_gpu->T; - - flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_0UU]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0UU)], - flux * ek_parameters_gpu->time_step); - - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid / - ek_parameters_gpu->D[species_index]; - - force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step * - ek_parameters_gpu->time_step; - - atomicAdd(&node_f.force_density[index][1], force / 2.0f); - atomicAdd(&node_f.force_density[index][2], force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UU]][1], force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UU]][2], force / 2.0f); - - flux = (ek_parameters_gpu->rho[species_index][index] - - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UD]]) / - (sqrtf(2.0f) * ek_parameters_gpu->agrid); - - force = - (ek_parameters_gpu->valency[species_index] * - (((cufftReal *)ek_parameters_gpu->charge_potential)[index_padded] - - ((cufftReal *)ek_parameters_gpu - ->charge_potential)[neighborindex_padded[EK_LINK_0UD]]) / - (sqrtf(2.0f) * ek_parameters_gpu->agrid) + - (ek_parameters_gpu->ext_force_density[1][species_index] - - ek_parameters_gpu->ext_force_density[2][species_index]) / - sqrtf(2.0f)); - - flux += - force * - (static_cast(force >= 0.0f) * - ek_parameters_gpu->rho[species_index][index] + - static_cast(force < 0.0f) * - ek_parameters_gpu->rho[species_index][neighborindex[EK_LINK_0UD]]) / - ek_parameters_gpu->T; - - flux *= ek_parameters_gpu->d[species_index] / ek_parameters_gpu->agrid; - - flux *= static_cast(!(lb_node.boundary[index] || - lb_node.boundary[neighborindex[EK_LINK_0UD]])); - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0UD)], - flux * ek_parameters_gpu->time_step); - - force = flux * ek_parameters_gpu->T * ek_parameters_gpu->agrid / - ek_parameters_gpu->D[species_index]; - - force *= powf(ek_parameters_gpu->agrid, -1) * ek_parameters_gpu->time_step * - ek_parameters_gpu->time_step; - - atomicAdd(&node_f.force_density[index][1], force / 2.0f); - atomicAdd(&node_f.force_density[index][2], -force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UD]][1], force / 2.0f); - atomicAdd(&node_f.force_density[neighborindex[EK_LINK_0UD]][2], - -force / 2.0f); -} - -__device__ void -ek_add_advection_to_flux(unsigned int index, unsigned int *coord, - unsigned int species_index, LB_nodes_gpu lb_node, - LB_parameters_gpu *ek_lbparameters_gpu) { - float dx[3]; - unsigned int di[3]; - unsigned int node; - - ek_displacement(dx, lb_node, index, ek_lbparameters_gpu); - - di[0] = 1 - static_cast(signbit(dx[0])); - di[1] = 1 - static_cast(signbit(dx[1])); - di[2] = 1 - static_cast(signbit(dx[2])); - - dx[0] = fabs(dx[0]); - dx[1] = fabs(dx[1]); - dx[2] = fabs(dx[2]); - - unsigned int target_node[3]; - unsigned int target_node_index; - int not_boundary; - - // face in x - node = rhoindex_cartesian2linear( - (coord[0] + di[0] - 1 + ek_parameters_gpu->dim_x) % - ek_parameters_gpu->dim_x, - coord[1], coord[2]); - - target_node[0] = (coord[0] + 2 * di[0] - 1 + ek_parameters_gpu->dim_x) % - ek_parameters_gpu->dim_x; - target_node[1] = coord[1]; - target_node[2] = coord[2]; - target_node_index = - rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]); - not_boundary = - (lb_node.boundary[index] || lb_node.boundary[target_node_index]) == 0; - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(node, EK_LINK_U00)], - (2 * static_cast(di[0]) - 1) * - ek_parameters_gpu->rho[species_index][index] * dx[0] * - (1.0f - dx[1]) * (1.0f - dx[2]) * - static_cast(not_boundary)); - - // face in y - node = rhoindex_cartesian2linear( - coord[0], - (coord[1] + di[1] - 1 + ek_parameters_gpu->dim_y) % - ek_parameters_gpu->dim_y, - coord[2]); - - target_node[0] = coord[0]; - target_node[1] = (coord[1] + 2 * di[1] - 1 + ek_parameters_gpu->dim_y) % - ek_parameters_gpu->dim_y; - target_node[2] = coord[2]; - target_node_index = - rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]); - not_boundary = - (lb_node.boundary[index] || lb_node.boundary[target_node_index]) == 0; - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(node, EK_LINK_0U0)], - (2 * static_cast(di[1]) - 1) * - ek_parameters_gpu->rho[species_index][index] * (1.0f - dx[0]) * - dx[1] * (1.0f - dx[2]) * static_cast(not_boundary)); - - // face in z - node = rhoindex_cartesian2linear( - coord[0], coord[1], - (coord[2] + di[2] - 1 + ek_parameters_gpu->dim_z) % - ek_parameters_gpu->dim_z); - - target_node[0] = coord[0]; - target_node[1] = coord[1]; - target_node[2] = (coord[2] + 2 * di[2] - 1 + ek_parameters_gpu->dim_z) % - ek_parameters_gpu->dim_z; - target_node_index = - rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]); - not_boundary = - (lb_node.boundary[index] || lb_node.boundary[target_node_index]) == 0; - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear(node, EK_LINK_00U)], - (2 * static_cast(di[2]) - 1) * - ek_parameters_gpu->rho[species_index][index] * (1.0f - dx[0]) * - (1.0f - dx[1]) * dx[2] * static_cast(not_boundary)); - - // edge in x - node = rhoindex_cartesian2linear( - coord[0], - (coord[1] + di[1] - 1 + ek_parameters_gpu->dim_y) % - ek_parameters_gpu->dim_y, - (coord[2] + (1 - di[1]) * (2 * di[2] - 1) + ek_parameters_gpu->dim_z) % - ek_parameters_gpu->dim_z); - - target_node[0] = coord[0]; - target_node[1] = (coord[1] + 2 * di[1] - 1 + ek_parameters_gpu->dim_y) % - ek_parameters_gpu->dim_y; - target_node[2] = (coord[2] + 2 * di[2] - 1 + ek_parameters_gpu->dim_z) % - ek_parameters_gpu->dim_z; - target_node_index = - rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]); - not_boundary = - (lb_node.boundary[index] || lb_node.boundary[target_node_index]) == 0; - - atomicAdd( - &ek_parameters_gpu - ->j[jindex_getByRhoLinear(node, EK_LINK_0UU + (di[1] + di[2] == 1))], - (2 * static_cast(di[1]) - 1) * - ek_parameters_gpu->rho[species_index][index] * (1.0f - dx[0]) * - dx[1] * dx[2] * static_cast(not_boundary)); - - // edge in y - node = rhoindex_cartesian2linear( - (coord[0] + di[0] - 1 + ek_parameters_gpu->dim_x) % - ek_parameters_gpu->dim_x, - coord[1], - (coord[2] + (1 - di[0]) * (2 * di[2] - 1) + ek_parameters_gpu->dim_z) % - ek_parameters_gpu->dim_z); - - target_node[0] = (coord[0] + 2 * di[0] - 1 + ek_parameters_gpu->dim_x) % - ek_parameters_gpu->dim_x; - target_node[1] = coord[1]; - target_node[2] = (coord[2] + 2 * di[2] - 1 + ek_parameters_gpu->dim_z) % - ek_parameters_gpu->dim_z; - target_node_index = - rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]); - not_boundary = - (lb_node.boundary[index] || lb_node.boundary[target_node_index]) == 0; - - atomicAdd( - &ek_parameters_gpu - ->j[jindex_getByRhoLinear(node, EK_LINK_U0U + (di[0] + di[2] == 1))], - (2 * static_cast(di[0]) - 1) * - ek_parameters_gpu->rho[species_index][index] * dx[0] * - (1.0f - dx[1]) * dx[2] * static_cast(not_boundary)); - - // edge in z - node = rhoindex_cartesian2linear( - (coord[0] + di[0] - 1 + ek_parameters_gpu->dim_x) % - ek_parameters_gpu->dim_x, - (coord[1] + (1 - di[0]) * (2 * di[1] - 1) + ek_parameters_gpu->dim_y) % - ek_parameters_gpu->dim_y, - coord[2]); - - target_node[0] = (coord[0] + 2 * di[0] - 1 + ek_parameters_gpu->dim_x) % - ek_parameters_gpu->dim_x; - target_node[1] = (coord[1] + 2 * di[1] - 1 + ek_parameters_gpu->dim_y) % - ek_parameters_gpu->dim_y; - target_node[2] = coord[2]; - target_node_index = - rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]); - not_boundary = - (lb_node.boundary[index] || lb_node.boundary[target_node_index]) == 0; - - atomicAdd( - &ek_parameters_gpu - ->j[jindex_getByRhoLinear(node, EK_LINK_UU0 + (di[0] + di[1] == 1))], - (2 * static_cast(di[0]) - 1) * - ek_parameters_gpu->rho[species_index][index] * dx[0] * dx[1] * - (1.0f - dx[2]) * static_cast(not_boundary)); - - // corner - node = rhoindex_cartesian2linear( - (coord[0] + di[0] - 1 + ek_parameters_gpu->dim_x) % - ek_parameters_gpu->dim_x, - (coord[1] + (1 - di[0]) * (2 * di[1] - 1) + ek_parameters_gpu->dim_y) % - ek_parameters_gpu->dim_y, - (coord[2] + (1 - di[0]) * (2 * di[2] - 1) + ek_parameters_gpu->dim_z) % - ek_parameters_gpu->dim_z); - - target_node[0] = (coord[0] + 2 * di[0] - 1 + ek_parameters_gpu->dim_x) % - ek_parameters_gpu->dim_x; - target_node[1] = (coord[1] + 2 * di[1] - 1 + ek_parameters_gpu->dim_y) % - ek_parameters_gpu->dim_y; - target_node[2] = (coord[2] + 2 * di[2] - 1 + ek_parameters_gpu->dim_z) % - ek_parameters_gpu->dim_z; - target_node_index = - rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]); - not_boundary = - (lb_node.boundary[index] || lb_node.boundary[target_node_index]) == 0; - - atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear( - node, (1 - di[0]) * (EK_LINK_UUU + 2 * di[1] + di[2]) + - di[0] * (EK_LINK_UDD - 2 * di[1] - di[2]))], - (2 * static_cast(di[0]) - 1) * - ek_parameters_gpu->rho[species_index][index] * dx[0] * dx[1] * - dx[2] * static_cast(not_boundary)); -} - -__device__ float4 ek_random_wrapper_philox(unsigned int index, - unsigned int mode, - uint64_t philox_counter) { - // Split the 64 bit counter into two 32 bit ints. - auto const philox_counter_hi = static_cast(philox_counter >> 32); - auto const philox_counter_low = static_cast(philox_counter); - uint4 rnd_ints = - curand_Philox4x32_10(make_uint4(index, philox_counter_hi, 0, mode), - make_uint2(philox_counter_low, 0)); - float4 rnd_floats; - rnd_floats.w = static_cast(rnd_ints.w) * CURAND_2POW32_INV + - (CURAND_2POW32_INV / 2.0f); - rnd_floats.x = static_cast(rnd_ints.x) * CURAND_2POW32_INV + - (CURAND_2POW32_INV / 2.0f); - rnd_floats.y = static_cast(rnd_ints.y) * CURAND_2POW32_INV + - (CURAND_2POW32_INV / 2.0f); - rnd_floats.z = static_cast(rnd_ints.z) * CURAND_2POW32_INV + - (CURAND_2POW32_INV / 2.0f); - return rnd_floats; -} - -__device__ void ek_add_fluctuations_to_flux(unsigned int index, - unsigned int species_index, - unsigned int const *neighborindex, - LB_nodes_gpu lb_node, - uint64_t philox_counter) { - if (index < ek_parameters_gpu->number_of_nodes) { - float density = ek_parameters_gpu->rho[species_index][index]; - float *flux = ek_parameters_gpu->j; - float diffusion = ek_parameters_gpu->D[species_index]; - float time_step = ek_parameters_gpu->time_step; - float agrid = ek_parameters_gpu->agrid; - float4 random_floats; - float random; - -#ifdef EK_DEBUG - float *flux_fluc = ek_parameters_gpu->j_fluc; -#endif - float fluc = 0.0f; - - for (unsigned i = 0; i < 9; i++) { - - if (i % 4 == 0) { - random_floats = ek_random_wrapper_philox(index, i + 40, philox_counter); - random = (random_floats.w - 0.5f) * 2.0f; - } else if (i % 4 == 1) { - random = (random_floats.x - 0.5f) * 2.0f; - } else if (i % 4 == 2) { - random = (random_floats.y - 0.5f) * 2.0f; - } else if (i % 4 == 3) { - random = (random_floats.z - 0.5f) * 2.0f; - } - float H = 0.0f; - float HN = 0.0f; - float neighbor_density = - ek_parameters_gpu->rho[species_index][neighborindex[i]]; - - H = static_cast(density >= 0.0f) * min(density, 1.0f); - HN = static_cast(neighbor_density >= 0.0f) * - min(neighbor_density, 1.0f); - - float average_density = H * HN * (density + neighbor_density) / 2.0f; - - if (i > 2) { - fluc = 1.0f * - powf(2.0f * average_density * diffusion * time_step / - (agrid * agrid), - 0.5f) * - random * ek_parameters_gpu->fluctuation_amplitude / sqrtf(2.0f); - fluc *= static_cast( - !(lb_node.boundary[index] || lb_node.boundary[neighborindex[i]])); -#ifdef EK_DEBUG - flux_fluc[jindex_getByRhoLinear(index, i)] = fluc; -#endif - flux[jindex_getByRhoLinear(index, i)] += fluc; - } else { - fluc = 1.0f * - powf(2.0f * average_density * diffusion * time_step / - (agrid * agrid), - 0.5f) * - random * ek_parameters_gpu->fluctuation_amplitude; - fluc *= static_cast( - !(lb_node.boundary[index] || lb_node.boundary[neighborindex[i]])); -#ifdef EK_DEBUG - flux_fluc[jindex_getByRhoLinear(index, i)] = fluc; -#endif - flux[jindex_getByRhoLinear(index, i)] += fluc; - } - } - } -} - -__global__ void ek_calculate_quantities(unsigned int species_index, - LB_nodes_gpu lb_node, - LB_node_force_density_gpu node_f, - LB_parameters_gpu *ek_lbparameters_gpu, - uint64_t philox_counter) { - - unsigned int index = ek_getThreadIndex(); - - if (index < ek_parameters_gpu->number_of_nodes) { - - unsigned int coord[3]; - unsigned int neighborindex[9]; - unsigned int neighborindex_padded[9]; - unsigned int index_padded; - - rhoindex_linear2cartesian(index, coord); - - /* Calculate the diffusive fluxes between this node and its neighbors. Only - the 9 fluxes along the directions of the LB velocities c_i with i odd are - stored with a node to avoid redundancies. */ - - neighborindex[EK_LINK_U00] = rhoindex_cartesian2linear( - (coord[0] + 1) % ek_parameters_gpu->dim_x, coord[1], coord[2]); - - neighborindex[EK_LINK_0U0] = rhoindex_cartesian2linear( - coord[0], (coord[1] + 1) % ek_parameters_gpu->dim_y, coord[2]); - - neighborindex[EK_LINK_00U] = rhoindex_cartesian2linear( - coord[0], coord[1], (coord[2] + 1) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_UU0] = rhoindex_cartesian2linear( - (coord[0] + 1) % ek_parameters_gpu->dim_x, - (coord[1] + 1) % ek_parameters_gpu->dim_y, coord[2]); - - neighborindex[EK_LINK_UD0] = rhoindex_cartesian2linear( - (coord[0] + 1) % ek_parameters_gpu->dim_x, - (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y, - coord[2]); - - neighborindex[EK_LINK_U0U] = rhoindex_cartesian2linear( - (coord[0] + 1) % ek_parameters_gpu->dim_x, coord[1], - (coord[2] + 1) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_U0D] = rhoindex_cartesian2linear( - (coord[0] + 1) % ek_parameters_gpu->dim_x, coord[1], - (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_0UU] = rhoindex_cartesian2linear( - coord[0], (coord[1] + 1) % ek_parameters_gpu->dim_y, - (coord[2] + 1) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_0UD] = rhoindex_cartesian2linear( - coord[0], (coord[1] + 1) % ek_parameters_gpu->dim_y, - (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - - /* calculate the same indices respecting the FFT padding */ - - index_padded = - rhoindex_cartesian2linear_padded(coord[0], coord[1], coord[2]); - - neighborindex_padded[EK_LINK_U00] = rhoindex_cartesian2linear_padded( - (coord[0] + 1) % ek_parameters_gpu->dim_x, coord[1], coord[2]); - - neighborindex_padded[EK_LINK_0U0] = rhoindex_cartesian2linear_padded( - coord[0], (coord[1] + 1) % ek_parameters_gpu->dim_y, coord[2]); - - neighborindex_padded[EK_LINK_00U] = rhoindex_cartesian2linear_padded( - coord[0], coord[1], (coord[2] + 1) % ek_parameters_gpu->dim_z); - - neighborindex_padded[EK_LINK_UU0] = rhoindex_cartesian2linear_padded( - (coord[0] + 1) % ek_parameters_gpu->dim_x, - (coord[1] + 1) % ek_parameters_gpu->dim_y, coord[2]); - - neighborindex_padded[EK_LINK_UD0] = rhoindex_cartesian2linear_padded( - (coord[0] + 1) % ek_parameters_gpu->dim_x, - (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y, - coord[2]); - - neighborindex_padded[EK_LINK_U0U] = rhoindex_cartesian2linear_padded( - (coord[0] + 1) % ek_parameters_gpu->dim_x, coord[1], - (coord[2] + 1) % ek_parameters_gpu->dim_z); - - neighborindex_padded[EK_LINK_U0D] = rhoindex_cartesian2linear_padded( - (coord[0] + 1) % ek_parameters_gpu->dim_x, coord[1], - (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - - neighborindex_padded[EK_LINK_0UU] = rhoindex_cartesian2linear_padded( - coord[0], (coord[1] + 1) % ek_parameters_gpu->dim_y, - (coord[2] + 1) % ek_parameters_gpu->dim_z); - - neighborindex_padded[EK_LINK_0UD] = rhoindex_cartesian2linear_padded( - coord[0], (coord[1] + 1) % ek_parameters_gpu->dim_y, - (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - - /* diffusive contribution to flux and LB force_density*/ - if (ek_parameters_gpu->stencil == 0) // link centered - ek_diffusion_migration_lbforce_linkcentered_stencil( - index, index_padded, neighborindex, neighborindex_padded, - species_index, node_f, lb_node); - else if (ek_parameters_gpu->stencil == 1) // node centered - ek_diffusion_migration_lbforce_nodecentered_stencil( - index, index_padded, neighborindex, neighborindex_padded, - species_index, node_f, lb_node); - - /* advective contribution to flux */ - if (ek_parameters_gpu->advection) - ek_add_advection_to_flux(index, coord, species_index, lb_node, - ek_lbparameters_gpu); - - /* fluctuation contribution to flux */ - if (ek_parameters_gpu->fluctuations) - ek_add_fluctuations_to_flux(index, species_index, neighborindex, lb_node, - philox_counter); - } -} - -__global__ void ek_propagate_densities(unsigned int species_index) { - - unsigned int index = ek_getThreadIndex(); - - if (index < ek_parameters_gpu->number_of_nodes) { - unsigned int neighborindex[13]; - unsigned int coord[3]; - - rhoindex_linear2cartesian(index, coord); - - /* Indices of the neighbors storing the other half - of the fluxes associated with this link */ - neighborindex[EK_LINK_D00 - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - coord[1], coord[2]); - - neighborindex[EK_LINK_0D0 - 13] = rhoindex_cartesian2linear( - coord[0], - (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y, - coord[2]); - - neighborindex[EK_LINK_00D - 13] = rhoindex_cartesian2linear( - coord[0], coord[1], - (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_DD0 - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y, - coord[2]); - - neighborindex[EK_LINK_DU0 - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - (coord[1] + 1) % ek_parameters_gpu->dim_y, coord[2]); - - neighborindex[EK_LINK_D0D - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - coord[1], - (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_D0U - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - coord[1], (coord[2] + 1) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_0DD - 13] = rhoindex_cartesian2linear( - coord[0], - (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y, - (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_0DU - 13] = rhoindex_cartesian2linear( - coord[0], - (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y, - (coord[2] + 1) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_DDD - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y, - (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_DDU - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y, - (coord[2] + 1) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_DUD - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - (coord[1] + 1) % ek_parameters_gpu->dim_y, - (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_DUU - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - (coord[1] + 1) % ek_parameters_gpu->dim_y, - (coord[2] + 1) % ek_parameters_gpu->dim_z); - - /* Calculate change of densities due to diffusive fluxes */ - ek_parameters_gpu->rho[species_index][index] -= - ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U00)]; - ek_parameters_gpu->rho[species_index][index] += - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_D00 - 13], EK_LINK_U00)]; - - ek_parameters_gpu->rho[species_index][index] -= - ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0U0)]; - ek_parameters_gpu->rho[species_index][index] += - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_0D0 - 13], EK_LINK_0U0)]; - - ek_parameters_gpu->rho[species_index][index] -= - ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_00U)]; - ek_parameters_gpu->rho[species_index][index] += - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_00D - 13], EK_LINK_00U)]; - - ek_parameters_gpu->rho[species_index][index] -= - ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UU0)]; - ek_parameters_gpu->rho[species_index][index] += - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_DD0 - 13], EK_LINK_UU0)]; - - ek_parameters_gpu->rho[species_index][index] -= - ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UD0)]; - ek_parameters_gpu->rho[species_index][index] += - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_DU0 - 13], EK_LINK_UD0)]; - - ek_parameters_gpu->rho[species_index][index] -= - ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U0U)]; - ek_parameters_gpu->rho[species_index][index] += - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_D0D - 13], EK_LINK_U0U)]; - - ek_parameters_gpu->rho[species_index][index] -= - ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_U0D)]; - ek_parameters_gpu->rho[species_index][index] += - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_D0U - 13], EK_LINK_U0D)]; - - ek_parameters_gpu->rho[species_index][index] -= - ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0UU)]; - ek_parameters_gpu->rho[species_index][index] += - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_0DD - 13], EK_LINK_0UU)]; - - ek_parameters_gpu->rho[species_index][index] -= - ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_0UD)]; - ek_parameters_gpu->rho[species_index][index] += - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_0DU - 13], EK_LINK_0UD)]; - - ek_parameters_gpu->rho[species_index][index] -= - ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UUU)]; - ek_parameters_gpu->rho[species_index][index] += - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_DDD - 13], EK_LINK_UUU)]; - - ek_parameters_gpu->rho[species_index][index] -= - ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UUD)]; - ek_parameters_gpu->rho[species_index][index] += - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_DDU - 13], EK_LINK_UUD)]; - - ek_parameters_gpu->rho[species_index][index] -= - ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UDU)]; - ek_parameters_gpu->rho[species_index][index] += - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_DUD - 13], EK_LINK_UDU)]; - - ek_parameters_gpu->rho[species_index][index] -= - ek_parameters_gpu->j[jindex_getByRhoLinear(index, EK_LINK_UDD)]; - ek_parameters_gpu->rho[species_index][index] += - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_DUU - 13], EK_LINK_UDD)]; - } -} - -__global__ void ek_apply_boundaries(LB_nodes_gpu lbnode) { - - unsigned int index = ek_getThreadIndex(); - unsigned int neighborindex[22]; - unsigned int coord[3]; - - if (index < ek_parameters_gpu->number_of_nodes) { - if (lbnode.boundary[index]) { - - rhoindex_linear2cartesian(index, coord); - - /* Indices of the neighbors */ - neighborindex[EK_LINK_D00 - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - coord[1], coord[2]); - - neighborindex[EK_LINK_0D0 - 13] = rhoindex_cartesian2linear( - coord[0], - (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y, - coord[2]); - - neighborindex[EK_LINK_00D - 13] = rhoindex_cartesian2linear( - coord[0], coord[1], - (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_DD0 - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y, - coord[2]); - - neighborindex[EK_LINK_DU0 - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - (coord[1] + 1) % ek_parameters_gpu->dim_y, coord[2]); - - neighborindex[EK_LINK_D0D - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - coord[1], - (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_D0U - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - coord[1], (coord[2] + 1) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_0DD - 13] = rhoindex_cartesian2linear( - coord[0], - (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y, - (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_0DU - 13] = rhoindex_cartesian2linear( - coord[0], - (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y, - (coord[2] + 1) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_DDD - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y, - (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_DDU - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - (coord[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y, - (coord[2] + 1) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_DUD - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - (coord[1] + 1) % ek_parameters_gpu->dim_y, - (coord[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - - neighborindex[EK_LINK_DUU - 13] = rhoindex_cartesian2linear( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x, - (coord[1] + 1) % ek_parameters_gpu->dim_y, - (coord[2] + 1) % ek_parameters_gpu->dim_z); - - /* Clear fluxes on links connecting a boundary node */ - for (unsigned i = 0; i < 13; i++) - ek_parameters_gpu->j[jindex_getByRhoLinear(index, i)] = 0.0f; - - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_D00 - 13], EK_LINK_U00)] = 0.0f; - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_0D0 - 13], EK_LINK_0U0)] = 0.0f; - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_00D - 13], EK_LINK_00U)] = 0.0f; - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_DD0 - 13], EK_LINK_UU0)] = 0.0f; - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_DU0 - 13], EK_LINK_UD0)] = 0.0f; - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_D0D - 13], EK_LINK_U0U)] = 0.0f; - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_D0U - 13], EK_LINK_U0D)] = 0.0f; - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_0DD - 13], EK_LINK_0UU)] = 0.0f; - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_0DU - 13], EK_LINK_0UD)] = 0.0f; - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_DDD - 13], EK_LINK_UUU)] = 0.0f; - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_DDU - 13], EK_LINK_UUD)] = 0.0f; - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_DUD - 13], EK_LINK_UDU)] = 0.0f; - ek_parameters_gpu->j[jindex_getByRhoLinear( - neighborindex[EK_LINK_DUU - 13], EK_LINK_UDD)] = 0.0f; - } - } -} - -__global__ void ek_clear_fluxes() { - unsigned int index = ek_getThreadIndex(); - - if (index < ek_parameters_gpu->number_of_nodes) { - for (unsigned i = 0; i < 13; i++) { - ek_parameters_gpu->j[jindex_getByRhoLinear(index, i)] = 0.0f; -#ifdef EK_DEBUG - ek_parameters_gpu->j_fluc[jindex_getByRhoLinear(index, i)] = 0.0f; -#endif - } - } -} - -__global__ void ek_init_species_density_homogeneous() { - unsigned int index = ek_getThreadIndex(); - unsigned int coord[3]; - - rhoindex_linear2cartesian(index, coord); - - if (index < ek_parameters_gpu->number_of_nodes) { - for (int i = 0; i < ek_parameters_gpu->number_of_species; i++) { - ek_parameters_gpu->rho[i][index] = - ek_parameters_gpu->density[i] * ek_parameters_gpu->agrid * - ek_parameters_gpu->agrid * ek_parameters_gpu->agrid; - } - } -} - -__global__ void ek_gather_species_charge_density() { - auto const index = ek_getThreadIndex(); - - if (index < ek_parameters_gpu->number_of_nodes) { - ek_setNode(index, 0.0f); - cufftReal tmp = 0.0f; - for (int i = 0; i < ek_parameters_gpu->number_of_species; i++) { - tmp += ek_parameters_gpu->valency[i] * ek_parameters_gpu->rho[i][index]; - } - ek_setNode(index, tmp / powf(ek_parameters_gpu->agrid, 3)); - } -} - -__global__ void -ek_gather_particle_charge_density(CUDA_particle_data *particle_data, - std::size_t number_of_particles, - LB_parameters_gpu *ek_lbparameters_gpu) { - unsigned int index = ek_getThreadIndex(); - unsigned int lowernode[3]; - float cellpos[3]; - float gridpos; - - if (index < number_of_particles) { - gridpos = particle_data[index].p[0] / ek_parameters_gpu->agrid - 0.5f; - lowernode[0] = static_cast(floorf(gridpos)); - cellpos[0] = gridpos - static_cast(lowernode[0]); - - gridpos = particle_data[index].p[1] / ek_parameters_gpu->agrid - 0.5f; - lowernode[1] = static_cast(floorf(gridpos)); - cellpos[1] = gridpos - static_cast(lowernode[1]); - - gridpos = particle_data[index].p[2] / ek_parameters_gpu->agrid - 0.5f; - lowernode[2] = static_cast(floorf(gridpos)); - cellpos[2] = gridpos - static_cast(lowernode[2]); - - lowernode[0] = (lowernode[0] + ek_lbparameters_gpu->dim[0]) % - ek_lbparameters_gpu->dim[0]; - lowernode[1] = (lowernode[1] + ek_lbparameters_gpu->dim[1]) % - ek_lbparameters_gpu->dim[1]; - lowernode[2] = (lowernode[2] + ek_lbparameters_gpu->dim[2]) % - ek_lbparameters_gpu->dim[2]; - - atomicAdd(&((cufftReal *)ek_parameters_gpu - ->charge_potential)[rhoindex_cartesian2linear_padded( - lowernode[0], lowernode[1], lowernode[2])], - particle_data[index].q * (1 - cellpos[0]) * (1 - cellpos[1]) * - (1 - cellpos[2])); - - atomicAdd(&((cufftReal *)ek_parameters_gpu - ->charge_potential)[rhoindex_cartesian2linear_padded( - (lowernode[0] + 1) % ek_parameters_gpu->dim_x, lowernode[1], - lowernode[2])], - particle_data[index].q * cellpos[0] * (1 - cellpos[1]) * - (1 - cellpos[2])); - - atomicAdd(&((cufftReal *)ek_parameters_gpu - ->charge_potential)[rhoindex_cartesian2linear_padded( - lowernode[0], (lowernode[1] + 1) % ek_parameters_gpu->dim_y, - lowernode[2])], - particle_data[index].q * (1 - cellpos[0]) * cellpos[1] * - (1 - cellpos[2])); - - atomicAdd(&((cufftReal *)ek_parameters_gpu - ->charge_potential)[rhoindex_cartesian2linear_padded( - lowernode[0], lowernode[1], - (lowernode[2] + 1) % ek_parameters_gpu->dim_z)], - particle_data[index].q * (1 - cellpos[0]) * (1 - cellpos[1]) * - cellpos[2]); - - atomicAdd(&((cufftReal *)ek_parameters_gpu - ->charge_potential)[rhoindex_cartesian2linear_padded( - (lowernode[0] + 1) % ek_parameters_gpu->dim_x, - (lowernode[1] + 1) % ek_parameters_gpu->dim_y, lowernode[2])], - particle_data[index].q * cellpos[0] * cellpos[1] * - (1 - cellpos[2])); - - atomicAdd(&((cufftReal *)ek_parameters_gpu - ->charge_potential)[rhoindex_cartesian2linear_padded( - (lowernode[0] + 1) % ek_parameters_gpu->dim_x, lowernode[1], - (lowernode[2] + 1) % ek_parameters_gpu->dim_z)], - particle_data[index].q * cellpos[0] * (1 - cellpos[1]) * - cellpos[2]); - - atomicAdd(&((cufftReal *)ek_parameters_gpu - ->charge_potential)[rhoindex_cartesian2linear_padded( - lowernode[0], (lowernode[1] + 1) % ek_parameters_gpu->dim_y, - (lowernode[2] + 1) % ek_parameters_gpu->dim_z)], - particle_data[index].q * (1 - cellpos[0]) * cellpos[1] * - cellpos[2]); - - atomicAdd(&((cufftReal *)ek_parameters_gpu - ->charge_potential)[rhoindex_cartesian2linear_padded( - (lowernode[0] + 1) % ek_parameters_gpu->dim_x, - (lowernode[1] + 1) % ek_parameters_gpu->dim_y, - (lowernode[2] + 1) % ek_parameters_gpu->dim_z)], - particle_data[index].q * cellpos[0] * cellpos[1] * cellpos[2]); - } -} - -__global__ void ek_spread_particle_force( - CUDA_particle_data *particle_data, std::size_t number_of_particles, - float *particle_forces, LB_parameters_gpu *ek_lbparameters_gpu) { - - unsigned int index = ek_getThreadIndex(); - unsigned int lowernode[3]; - float cellpos[3]; - float gridpos; - - if (index < number_of_particles) { - gridpos = particle_data[index].p[0] / ek_parameters_gpu->agrid - 0.5f; - lowernode[0] = static_cast(floorf(gridpos)); - cellpos[0] = gridpos - static_cast(lowernode[0]); - - gridpos = particle_data[index].p[1] / ek_parameters_gpu->agrid - 0.5f; - lowernode[1] = static_cast(floorf(gridpos)); - cellpos[1] = gridpos - static_cast(lowernode[1]); - - gridpos = particle_data[index].p[2] / ek_parameters_gpu->agrid - 0.5f; - lowernode[2] = static_cast(floorf(gridpos)); - cellpos[2] = gridpos - static_cast(lowernode[2]); - - lowernode[0] = (lowernode[0] + ek_lbparameters_gpu->dim[0]) % - ek_lbparameters_gpu->dim[0]; - lowernode[1] = (lowernode[1] + ek_lbparameters_gpu->dim[1]) % - ek_lbparameters_gpu->dim[1]; - lowernode[2] = (lowernode[2] + ek_lbparameters_gpu->dim[2]) % - ek_lbparameters_gpu->dim[2]; - - float efield[3] = {0., 0., 0.}; - for (unsigned int dim = 0; dim < 3; ++dim) { - // 0 0 0 - efield[dim] += - ek_parameters_gpu->electric_field[3 * rhoindex_cartesian2linear( - lowernode[0], lowernode[1], - lowernode[2]) + - dim] * - (1 - cellpos[0]) * (1 - cellpos[1]) * (1 - cellpos[2]); - - // 0 0 1 - efield[dim] += - ek_parameters_gpu - ->electric_field[3 * rhoindex_cartesian2linear( - lowernode[0], lowernode[1], - (lowernode[2] + 1) % - ek_lbparameters_gpu->dim[2]) + - dim] * - (1 - cellpos[0]) * (1 - cellpos[1]) * cellpos[2]; - - // 0 1 0 - efield[dim] += - ek_parameters_gpu - ->electric_field[3 * rhoindex_cartesian2linear( - lowernode[0], - (lowernode[1] + 1) % - ek_lbparameters_gpu->dim[1], - lowernode[2]) + - dim] * - (1 - cellpos[0]) * cellpos[1] * (1 - cellpos[2]); - - // 0 1 1 - efield[dim] += - ek_parameters_gpu->electric_field - [3 * rhoindex_cartesian2linear( - lowernode[0], - (lowernode[1] + 1) % ek_lbparameters_gpu->dim[1], - (lowernode[2] + 1) % ek_lbparameters_gpu->dim[2]) + - dim] * - (1 - cellpos[0]) * cellpos[1] * cellpos[2]; - - // 1 0 0 - efield[dim] += - ek_parameters_gpu - ->electric_field[3 * rhoindex_cartesian2linear( - (lowernode[0] + 1) % - ek_lbparameters_gpu->dim[0], - lowernode[1], lowernode[2]) + - dim] * - cellpos[0] * (1 - cellpos[1]) * (1 - cellpos[2]); - - // 1 0 1 - efield[dim] += - ek_parameters_gpu->electric_field - [3 * rhoindex_cartesian2linear( - (lowernode[0] + 1) % ek_lbparameters_gpu->dim[0], - lowernode[1], - (lowernode[2] + 1) % ek_lbparameters_gpu->dim[2]) + - dim] * - cellpos[0] * (1 - cellpos[1]) * cellpos[2]; - - // 1 1 0 - efield[dim] += - ek_parameters_gpu->electric_field - [3 * rhoindex_cartesian2linear( - (lowernode[0] + 1) % ek_lbparameters_gpu->dim[0], - (lowernode[1] + 1) % ek_lbparameters_gpu->dim[1], - lowernode[2]) + - dim] * - cellpos[0] * cellpos[1] * (1 - cellpos[2]); - - // 1 1 1 - efield[dim] += - ek_parameters_gpu->electric_field - [3 * rhoindex_cartesian2linear( - (lowernode[0] + 1) % ek_lbparameters_gpu->dim[0], - (lowernode[1] + 1) % ek_lbparameters_gpu->dim[1], - (lowernode[2] + 1) % ek_lbparameters_gpu->dim[2]) + - dim] * - cellpos[0] * cellpos[1] * cellpos[2]; - } - particle_forces[3 * index + 0] += particle_data[index].q * efield[0]; - particle_forces[3 * index + 1] += particle_data[index].q * efield[1]; - particle_forces[3 * index + 2] += particle_data[index].q * efield[2]; - } -} - -__global__ void ek_calc_electric_field(const float *potential) { - unsigned int coord[3]; - const unsigned int index = ek_getThreadIndex(); - - if (index < ek_parameters_gpu->number_of_nodes) { - rhoindex_linear2cartesian(index, coord); - const float agrid_inv = 1.0f / ek_parameters_gpu->agrid; - - ek_parameters_gpu->electric_field[3 * index + 0] = - -0.5f * agrid_inv * - (potential[rhoindex_cartesian2linear_padded( - (coord[0] + 1) % ek_parameters_gpu->dim_x, coord[1], coord[2])] - - potential[rhoindex_cartesian2linear_padded( - (coord[0] - 1 + ek_parameters_gpu->dim_x) % - ek_parameters_gpu->dim_x, - coord[1], coord[2])]); - ek_parameters_gpu->electric_field[3 * index + 1] = - -0.5f * agrid_inv * - (potential[rhoindex_cartesian2linear_padded( - coord[0], (coord[1] + 1) % ek_parameters_gpu->dim_y, coord[2])] - - potential[rhoindex_cartesian2linear_padded( - coord[0], - (coord[1] - 1 + ek_parameters_gpu->dim_y) % - ek_parameters_gpu->dim_y, - coord[2])]); - ek_parameters_gpu->electric_field[3 * index + 2] = - -0.5f * agrid_inv * - (potential[rhoindex_cartesian2linear_padded( - coord[0], coord[1], (coord[2] + 1) % ek_parameters_gpu->dim_z)] - - potential[rhoindex_cartesian2linear_padded( - coord[0], coord[1], - (coord[2] - 1 + ek_parameters_gpu->dim_z) % - ek_parameters_gpu->dim_z)]); - } -} - -__global__ void ek_clear_boundary_densities(LB_nodes_gpu lbnode) { - - unsigned int index = ek_getThreadIndex(); - - if (index < ek_parameters_gpu->number_of_nodes) { - if (lbnode.boundary[index]) { - for (int i = 0; i < ek_parameters_gpu->number_of_species; i++) { - ek_parameters_gpu->rho[i][index] = 0.0f; - } - } - } -} - -__global__ void ek_calculate_system_charge(float *charge_gpu) { - - unsigned int index = ek_getThreadIndex(); - - if (index < ek_parameters_gpu->number_of_nodes) { - for (int i = 0; i < ek_parameters_gpu->number_of_species; i++) { - atomicAdd(charge_gpu, ek_parameters_gpu->rho[i][index] * - ek_parameters_gpu->valency[i]); - } - } -} - -// TODO delete ?? (it has the previous step setting now) -// This is not compatible with external LB force_densities! -__global__ void ek_clear_node_force(LB_node_force_density_gpu node_f) { - - unsigned int index = ek_getThreadIndex(); - - if (index < ek_parameters_gpu->number_of_nodes) { - ek_parameters_gpu->lb_force_density_previous[index] = - node_f.force_density[index][0]; - ek_parameters_gpu - ->lb_force_density_previous[ek_parameters_gpu->number_of_nodes + - index] = node_f.force_density[index][1]; - ek_parameters_gpu - ->lb_force_density_previous[2 * ek_parameters_gpu->number_of_nodes + - index] = node_f.force_density[index][2]; - - node_f.force_density[index] = {}; - } -} - -void ek_calculate_electrostatic_coupling() { - - if ((!ek_parameters.es_coupling) || (!ek_initialized)) - return; - - auto device_particles = gpu_get_particle_pointer(); - dim3 dim_grid = calculate_dim_grid( - static_cast(device_particles.size()), 4, threads_per_block); - - KERNELCALL(ek_spread_particle_force, dim_grid, threads_per_block, - device_particles.data(), device_particles.size(), - gpu_get_particle_force_pointer(), ek_lbparameters_gpu); -} - -void ek_integrate_electrostatics() { - - dim3 dim_grid = - calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); - - KERNELCALL(ek_gather_species_charge_density, dim_grid, threads_per_block); - - if (ek_parameters.es_coupling) { - cuda_safe_mem(cudaMemcpy( - ek_parameters.charge_potential_buffer, ek_parameters.charge_potential, - sizeof(cufftComplex) * ek_parameters.dim_z * ek_parameters.dim_y * - (ek_parameters.dim_x / 2 + 1), - cudaMemcpyDeviceToDevice)); - electrostatics->calculatePotential( - (cufftComplex *)ek_parameters.charge_potential_buffer); - KERNELCALL(ek_calc_electric_field, dim_grid, threads_per_block, - ek_parameters.charge_potential_buffer); - } - - auto device_particles = gpu_get_particle_pointer(); - // TODO make it an if number_of_charged_particles != 0 - if (not device_particles.empty()) { - dim_grid = calculate_dim_grid( - static_cast(device_particles.size()), 4, threads_per_block); - - particle_data_gpu = device_particles.data(); - - KERNELCALL(ek_gather_particle_charge_density, dim_grid, threads_per_block, - particle_data_gpu, device_particles.size(), ek_lbparameters_gpu); - } - - electrostatics->calculatePotential(); -} - -void ek_integrate() { - dim3 dim_grid = - calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); - - /* Clears the force on the nodes and must be called before fluxes are - calculated, since in the reaction set up the previous-step LB force is - added to the flux - (in ek_calculate_quantities / ek_displacement), which is copied in this - routine */ - - // KERNELCALL( ek_clear_node_force, dim_grid, threads_per_block, node_f ); - - /* Integrate diffusion-advection */ - for (unsigned i = 0; i < ek_parameters.number_of_species; i++) { - KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block); - KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block, i, - *current_nodes, node_f, ek_lbparameters_gpu, - philox_counter.value()); - - KERNELCALL(ek_propagate_densities, dim_grid, threads_per_block, i); - } - - /* Integrate electrostatics */ - ek_integrate_electrostatics(); - - /* Integrate Navier-Stokes */ - lb_integrate_GPU(); - - philox_counter.increment(); -} - -#ifdef EK_BOUNDARIES -void ek_gather_wallcharge_species_density(float *wallcharge_species_density, - int wallcharge_species) { - if (wallcharge_species != -1) { - cuda_safe_mem(cudaMemcpy( - wallcharge_species_density, ek_parameters.rho[wallcharge_species], - ek_parameters.number_of_nodes * sizeof(float), cudaMemcpyDeviceToHost)); - } -} -void ek_init_species_density_wallcharge(float *wallcharge_species_density, - int wallcharge_species) { - dim3 dim_grid = - calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); - - KERNELCALL(ek_clear_boundary_densities, dim_grid, threads_per_block, - *current_nodes); - - if (wallcharge_species != -1) { - cuda_safe_mem(cudaMemcpy( - ek_parameters.rho[wallcharge_species], wallcharge_species_density, - ek_parameters.number_of_nodes * sizeof(float), cudaMemcpyHostToDevice)); - } -} -#endif - -void ek_init_species(int species) { - if (!ek_initialized) { - ek_init(); - } - - if (ek_parameters.species_index[species] == -1) { - ek_parameters.species_index[species] = - static_cast(ek_parameters.number_of_species); - ek_parameters.number_of_species++; - - cuda_safe_mem(cudaMalloc( - (void **)&ek_parameters.rho[ek_parameters.species_index[species]], - ek_parameters.number_of_nodes * sizeof(float))); - - ek_parameters.density[ek_parameters.species_index[species]] = 0.0; - ek_parameters.D[ek_parameters.species_index[species]] = 0.0; - ek_parameters.valency[ek_parameters.species_index[species]] = 0.0; - ek_parameters.ext_force_density[0][ek_parameters.species_index[species]] = - 0.0; - ek_parameters.ext_force_density[1][ek_parameters.species_index[species]] = - 0.0; - ek_parameters.ext_force_density[2][ek_parameters.species_index[species]] = - 0.0; - ek_parameters.d[ek_parameters.species_index[species]] = - ek_parameters.D[ek_parameters.species_index[species]] / - (1.0f + 2.0f * sqrt(2.0f)); - } -} - -int ek_init() { - if (ek_parameters.agrid < 0.0 || ek_parameters.viscosity < 0.0 || - ek_parameters.T < 0.0 || ek_parameters.prefactor < 0.0) { - - fprintf(stderr, "ERROR: invalid agrid, viscosity, T or prefactor\n"); - - return 1; - } - - if (!ek_initialized) { - for (auto &val : ek_parameters.species_index) { - val = -1; - } - - if (lattice_switch != ActiveLB::NONE) { - fprintf(stderr, - "ERROR: Electrokinetics automatically initializes the LB on the " - "GPU and can therefore not be used in conjunction with LB.\n"); - fprintf(stderr, "ERROR: Please run either electrokinetics or LB.\n"); - - return 1; - } - - lattice_switch = ActiveLB::GPU; - ek_initialized = true; - - lbpar_gpu.agrid = ek_parameters.agrid; - lbpar_gpu.viscosity = 1.0; // dummy values (real initialization later) - lbpar_gpu.bulk_viscosity = 1.0; // dummy values (real initialization later) - lb_lbcoupling_set_gamma(ek_parameters.friction); - - // Convert the density (given in MD units) to LB units - lbpar_gpu.rho = - (ek_parameters.lb_density < 0.0) - ? 1.0f - : ek_parameters.lb_density * Utils::int_pow<3>(lbpar_gpu.agrid); - - lbpar_gpu.is_TRT = true; - - lb_reinit_parameters_gpu(); - auto const time_step = static_cast(get_time_step()); - lbpar_gpu.viscosity = - ek_parameters.viscosity * time_step / Utils::sqr(lbpar_gpu.agrid); - lbpar_gpu.bulk_viscosity = - ek_parameters.bulk_viscosity * time_step / Utils::sqr(lbpar_gpu.agrid); - - lbpar_gpu.external_force_density = - ek_parameters.lb_ext_force_density[0] != 0.f || - ek_parameters.lb_ext_force_density[1] != 0.f || - ek_parameters.lb_ext_force_density[2] != 0.f; - lbpar_gpu.ext_force_density = - Utils::Vector3f(ek_parameters.lb_ext_force_density) * - Utils::sqr(lbpar_gpu.agrid * time_step); - - lb_reinit_parameters_gpu(); - lb_init_gpu(); - - ek_parameters.time_step = time_step; - ek_parameters.dim_x = lbpar_gpu.dim[0]; - ek_parameters.dim_x_padded = (ek_parameters.dim_x / 2 + 1) * 2; - ek_parameters.dim_y = lbpar_gpu.dim[1]; - ek_parameters.dim_z = lbpar_gpu.dim[2]; - ek_parameters.number_of_nodes = - ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z; - - cuda_safe_mem( - cudaMalloc((void **)&ek_parameters.j, - ek_parameters.number_of_nodes * 13 * sizeof(float))); -#ifdef EK_DEBUG - cuda_safe_mem( - cudaMalloc((void **)&ek_parameters.j_fluc, - ek_parameters.number_of_nodes * 13 * sizeof(float))); -#endif - - cuda_safe_mem(cudaMemcpyToSymbol(ek_parameters_gpu, &ek_parameters, - sizeof(EKParameters))); - - lb_get_para_pointer(&ek_lbparameters_gpu); - - cuda_safe_mem( - cudaMalloc((void **)&ek_parameters.lb_force_density_previous, - ek_parameters.number_of_nodes * 3 * sizeof(float))); - - if (ek_parameters.es_coupling) { - cuda_safe_mem(cudaMalloc((void **)&ek_parameters.charge_potential_buffer, - sizeof(cufftComplex) * ek_parameters.dim_z * - ek_parameters.dim_y * - (ek_parameters.dim_x / 2 + 1))); - cuda_safe_mem( - cudaMalloc((void **)&ek_parameters.electric_field, - ek_parameters.number_of_nodes * 3 * sizeof(float))); - } - - cuda_safe_mem(cudaMalloc((void **)&charge_gpu, sizeof(float))); - - if (cudaGetLastError() != cudaSuccess) { - fprintf(stderr, "ERROR: Failed to allocate\n"); - return 1; - } - - cudaMallocHost((void **)&ek_parameters.node_is_catalyst, - sizeof(char) * ek_parameters.dim_z * ek_parameters.dim_y * - ek_parameters.dim_x); - - if (cudaGetLastError() != cudaSuccess) { - fprintf(stderr, "ERROR: Failed to allocate\n"); - return 1; - } - - // initialize electrostatics - delete electrostatics; - - FdElectrostatics::InputParameters es_parameters = { - ek_parameters.prefactor, int(ek_parameters.dim_x), - int(ek_parameters.dim_y), int(ek_parameters.dim_z), - ek_parameters.agrid}; - try { - electrostatics = new FdElectrostatics(es_parameters, stream[0]); - } catch (std::string e) { - std::cerr << "Error in initialization of electrokinetics electrostatics " - "solver: " - << e << std::endl; - return 1; - } - - ek_parameters.charge_potential = electrostatics->getGrid().grid; - cuda_safe_mem(cudaMemcpyToSymbol(ek_parameters_gpu, &ek_parameters, - sizeof(EKParameters))); - - // clear initial LB force and finish up - dim3 dim_grid = calculate_dim_grid( - ek_parameters.dim_z * ek_parameters.dim_y * ek_parameters.dim_x, 4, - threads_per_block); - KERNELCALL(ek_clear_node_force, dim_grid, threads_per_block, node_f); - - ek_initialized = true; - } else { - auto const not_close = [](float a, float b) { - return std::abs(a - b) > std::numeric_limits::epsilon(); - }; - if (not_close(lbpar_gpu.agrid, ek_parameters.agrid) || - not_close(lbpar_gpu.viscosity, ek_parameters.viscosity * - ek_parameters.time_step / - Utils::sqr(ek_parameters.agrid)) || - not_close(lbpar_gpu.bulk_viscosity, - ek_parameters.bulk_viscosity * ek_parameters.time_step / - Utils::sqr(ek_parameters.agrid)) || - not_close(static_cast(lb_lbcoupling_get_gamma()), - ek_parameters.friction) || - not_close(lbpar_gpu.rho, ek_parameters.lb_density * - Utils::int_pow<3>(ek_parameters.agrid))) { - fprintf(stderr, - "ERROR: The LB parameters on the GPU cannot be reinitialized.\n"); - - return 1; - } - cuda_safe_mem(cudaMemcpyToSymbol(ek_parameters_gpu, &ek_parameters, - sizeof(EKParameters))); - - dim3 dim_grid = - calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); - - KERNELCALL(ek_init_species_density_homogeneous, dim_grid, - threads_per_block); - -#ifdef EK_BOUNDARIES - LBBoundaries::lb_init_boundaries(); - lb_get_boundary_force_pointer(&ek_lb_boundary_force); - - cuda_safe_mem(cudaMemcpyToSymbol(ek_parameters_gpu, &ek_parameters, - sizeof(EKParameters))); -#endif - - ek_integrate_electrostatics(); - } - return 0; -} - -unsigned int ek_calculate_boundary_mass() { - std::vector bound_array(lbpar_gpu.number_of_nodes); - - lb_get_boundary_flags_GPU(bound_array.data()); - - unsigned int boundary_node_number = 0; - - for (unsigned j = 0; j < ek_parameters.number_of_nodes; j++) - if (bound_array[j] != 0) - boundary_node_number++; - - return boundary_node_number; -} - -void rhoindex_linear2cartesian_host(unsigned int index, unsigned int *coord) { - - coord[0] = index % ek_parameters.dim_x; - index /= ek_parameters.dim_x; - coord[1] = index % ek_parameters.dim_y; - coord[2] = index / ek_parameters.dim_y; -} - -unsigned int jindex_cartesian2linear_host(unsigned int x, unsigned int y, - unsigned int z, unsigned int c) { - x = (x + ek_parameters.dim_x) % ek_parameters.dim_x; - y = (y + ek_parameters.dim_y) % ek_parameters.dim_y; - z = (z + ek_parameters.dim_z) % ek_parameters.dim_z; - - return c * ek_parameters.number_of_nodes + - z * ek_parameters.dim_y * ek_parameters.dim_x + - y * ek_parameters.dim_x + x; -} - -unsigned int jindex_getByRhoLinear_host(unsigned int rho_index, - unsigned int c) { - - return c * ek_parameters.number_of_nodes + rho_index; -} - -unsigned int rhoindex_cartesian2linear_host(unsigned int x, unsigned int y, - unsigned int z) { - - return z * ek_parameters.dim_y * ek_parameters.dim_x + - y * ek_parameters.dim_x + x; -} - -int ek_lb_print_vtk_velocity(char *filename) { - - FILE *fp = fopen(filename, "w"); - - if (fp == nullptr) { - return 1; - } - - std::vector host_values(lbpar_gpu.number_of_nodes); - lb_get_values_GPU(host_values.data()); - auto const lattice_speed = lbpar_gpu.agrid / lbpar_gpu.tau; - fprintf(fp, "\ -# vtk DataFile Version 2.0\n\ -velocity\n\ -ASCII\n\ -\n\ -DATASET STRUCTURED_POINTS\n\ -DIMENSIONS %u %u %u\n\ -ORIGIN %f %f %f\n\ -SPACING %f %f %f\n\ -\nPOINT_DATA %u\n\ -SCALARS velocity float 3\n\ -LOOKUP_TABLE default\n", - lbpar_gpu.dim[0], lbpar_gpu.dim[1], lbpar_gpu.dim[2], - lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid * 0.5f, - lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid, lbpar_gpu.agrid, - lbpar_gpu.agrid, lbpar_gpu.number_of_nodes); - - for (unsigned i = 0; i < lbpar_gpu.number_of_nodes; i++) { - fprintf(fp, "%e %e %e\n", host_values[i].v[0] * lattice_speed, - host_values[i].v[1] * lattice_speed, - host_values[i].v[2] * lattice_speed); - } - - fclose(fp); - - return 0; -} - -int ek_lb_print_vtk_density(char *filename) { - - FILE *fp = fopen(filename, "w"); - - if (fp == nullptr) { - return 1; - } - - std::vector host_values(lbpar_gpu.number_of_nodes); - lb_get_values_GPU(host_values.data()); - - fprintf(fp, "\ -# vtk DataFile Version 2.0\n\ -density_lb\n\ -ASCII\n\ -\n\ -DATASET STRUCTURED_POINTS\n\ -DIMENSIONS %u %u %u\n\ -ORIGIN %f %f %f\n\ -SPACING %f %f %f\n\ -\n\ -POINT_DATA %u\n\ -SCALARS density_lb float 1\n\ -LOOKUP_TABLE default\n", - lbpar_gpu.dim[0], lbpar_gpu.dim[1], lbpar_gpu.dim[2], - lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid * 0.5f, - lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid, lbpar_gpu.agrid, - lbpar_gpu.agrid, lbpar_gpu.number_of_nodes); - auto const agrid = lb_lbfluid_get_agrid(); - for (unsigned i = 0; i < lbpar_gpu.number_of_nodes; i++) { - fprintf(fp, "%e\n", host_values[i].rho / agrid / agrid / agrid); - } - - fclose(fp); - - return 0; -} - -int ek_print_vtk_density(int species, char *filename) { - - if (ek_parameters.species_index[species] == -1) { - return 1; - } - - FILE *fp = fopen(filename, "w"); - - if (fp == nullptr) { - return 1; - } - - std::vector densities(ek_parameters.number_of_nodes); - - cuda_safe_mem(cudaMemcpy( - densities.data(), ek_parameters.rho[ek_parameters.species_index[species]], - densities.size() * sizeof(float), cudaMemcpyDeviceToHost)); - - fprintf(fp, "\ -# vtk DataFile Version 2.0\n\ -density_%d\n\ -ASCII\n\ -\n\ -DATASET STRUCTURED_POINTS\n\ -DIMENSIONS %u %u %u\n\ -ORIGIN %f %f %f\n\ -SPACING %f %f %f\n\ -\n\ -POINT_DATA %u\n\ -SCALARS density_%d float 1\n\ -LOOKUP_TABLE default\n", - species, ek_parameters.dim_x, ek_parameters.dim_y, - ek_parameters.dim_z, ek_parameters.agrid * 0.5f, - ek_parameters.agrid * 0.5f, ek_parameters.agrid * 0.5f, - ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid, - ek_parameters.number_of_nodes, species); - - for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) { - fprintf(fp, "%e\n", densities[i] / Utils::int_pow<3>(ek_parameters.agrid)); - } - - fclose(fp); - - return 0; -} - -int ek_node_get_density(int species, int x, int y, int z, double *density) { - - if (ek_parameters.species_index[species] == -1) { - return 1; - } - - std::vector densities(ek_parameters.number_of_nodes); - - cuda_safe_mem(cudaMemcpy( - densities.data(), ek_parameters.rho[ek_parameters.species_index[species]], - densities.size() * sizeof(float), cudaMemcpyDeviceToHost)); - - auto const index = - static_cast(z) * ek_parameters.dim_y * ek_parameters.dim_x + - static_cast(y) * ek_parameters.dim_x + static_cast(x); - *density = densities[index] / Utils::int_pow<3>(ek_parameters.agrid); - - return 0; -} - -int ek_node_get_flux(int species, int x, int y, int z, double *flux) { - - if (ek_parameters.species_index[species] == -1) { - return 1; - } - - float flux_local_cartesian[3]; // temporary variable for converting fluxes - // into Cartesian coordinates for output - unsigned int coord[3]; - - coord[0] = static_cast(x); - coord[1] = static_cast(y); - coord[2] = static_cast(z); - - std::vector fluxes(ek_parameters.number_of_nodes * 13); - - dim3 dim_grid = - calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); - - KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block); - KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block, - static_cast(ek_parameters.species_index[species]), - *current_nodes, node_f, ek_lbparameters_gpu, - philox_counter.value()); - reset_LB_force_densities_GPU(false); - -#ifdef EK_BOUNDARIES - KERNELCALL(ek_apply_boundaries, dim_grid, threads_per_block, *current_nodes); -#endif - - cuda_safe_mem(cudaMemcpy(fluxes.data(), ek_parameters.j, - fluxes.size() * sizeof(float), - cudaMemcpyDeviceToHost)); - - auto const i = rhoindex_cartesian2linear_host(coord[0], coord[1], coord[2]); - - flux_local_cartesian[0] = - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U00)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UU0)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UD0)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0U)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0D)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1], - coord[2], EK_LINK_D00 - 13)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] - 1, - coord[2], EK_LINK_DD0 - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] + 1, - coord[2], EK_LINK_DU0 - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1], coord[2] - 1, EK_LINK_D0D - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1], coord[2] + 1, EK_LINK_D0U - 13)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)]; - - flux_local_cartesian[1] = - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0U0)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UU0)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UD0)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UU)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UD)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0], coord[1] - 1, - coord[2], EK_LINK_0D0 - 13)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] - 1, - coord[2], EK_LINK_DD0 - 13)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] + 1, - coord[2], EK_LINK_DU0 - 13)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1] - 1, coord[2] - 1, EK_LINK_0DD - 13)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1] - 1, coord[2] + 1, EK_LINK_0DU - 13)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)]; - - flux_local_cartesian[2] = - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_00U)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0U)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0D)]; - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UU)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UD)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)]; - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1], coord[2] - 1, EK_LINK_00D - 13)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1], coord[2] - 1, EK_LINK_D0D - 13)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1], coord[2] + 1, EK_LINK_D0U - 13)]; - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1] - 1, coord[2] - 1, EK_LINK_0DD - 13)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1] - 1, coord[2] + 1, EK_LINK_0DU - 13)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)]; - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)]; - - flux[0] = flux_local_cartesian[0] / - (ek_parameters.time_step * Utils::sqr(ek_parameters.agrid)); - flux[1] = flux_local_cartesian[1] / - (ek_parameters.time_step * Utils::sqr(ek_parameters.agrid)); - flux[2] = flux_local_cartesian[2] / - (ek_parameters.time_step * Utils::sqr(ek_parameters.agrid)); - - return 0; -} - -int ek_node_set_density(int species, int x, int y, int z, double density) { - - if (ek_parameters.species_index[species] == -1) { - return 1; - } - - auto const index = - static_cast(z) * ek_parameters.dim_y * ek_parameters.dim_x + - static_cast(y) * ek_parameters.dim_x + static_cast(x); - float num_particles = - static_cast(density) * Utils::int_pow<3>(ek_parameters.agrid); - - cuda_safe_mem(cudaMemcpy( - &ek_parameters.rho[ek_parameters.species_index[species]][index], - &num_particles, sizeof(float), cudaMemcpyHostToDevice)); - - return 0; -} - -int ek_print_vtk_flux(int species, char *filename) { - - if (ek_parameters.species_index[species] == -1) { - return 1; - } - - FILE *fp = fopen(filename, "w"); - - if (fp == nullptr) { - return 1; - } - - float flux_local_cartesian[3]; // temporary variable for converting fluxes - // into Cartesian coordinates for output - - unsigned int coord[3]; - - std::vector fluxes(ek_parameters.number_of_nodes * 13); - - dim3 dim_grid = - calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); - - KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block); - KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block, - static_cast(ek_parameters.species_index[species]), - *current_nodes, node_f, ek_lbparameters_gpu, - philox_counter.value()); - reset_LB_force_densities_GPU(false); - -#ifdef EK_BOUNDARIES - KERNELCALL(ek_apply_boundaries, dim_grid, threads_per_block, *current_nodes); -#endif - - cuda_safe_mem(cudaMemcpy(fluxes.data(), ek_parameters.j, - fluxes.size() * sizeof(float), - cudaMemcpyDeviceToHost)); - - fprintf(fp, "\ -# vtk DataFile Version 2.0\n\ -flux_%d\n\ -ASCII\n\ -\n\ -DATASET STRUCTURED_POINTS\n\ -DIMENSIONS %u %u %u\n\ -ORIGIN %f %f %f\n\ -SPACING %f %f %f\n\ -\n\ -POINT_DATA %u\n\ -SCALARS flux_%d float 3\n\ -LOOKUP_TABLE default\n", - species, ek_parameters.dim_x, ek_parameters.dim_y, - ek_parameters.dim_z, ek_parameters.agrid * 0.5f, - ek_parameters.agrid * 0.5f, ek_parameters.agrid * 0.5f, - ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid, - ek_parameters.number_of_nodes, species); - - for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) { - rhoindex_linear2cartesian_host(i, coord); - - flux_local_cartesian[0] = - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U00)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UU0)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UD0)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0U)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0D)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1], - coord[2], EK_LINK_D00 - 13)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] - 1, - coord[2], EK_LINK_DD0 - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] + 1, - coord[2], EK_LINK_DU0 - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1], coord[2] - 1, EK_LINK_D0D - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1], coord[2] + 1, EK_LINK_D0U - 13)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)]; - - flux_local_cartesian[1] = - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0U0)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UU0)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UD0)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UU)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UD)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0], coord[1] - 1, - coord[2], EK_LINK_0D0 - 13)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] - 1, - coord[2], EK_LINK_DD0 - 13)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] + 1, - coord[2], EK_LINK_DU0 - 13)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1] - 1, coord[2] - 1, EK_LINK_0DD - 13)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1] - 1, coord[2] + 1, EK_LINK_0DU - 13)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)]; - - flux_local_cartesian[2] = - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_00U)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0U)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0D)]; - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UU)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UD)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)]; - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1], coord[2] - 1, EK_LINK_00D - 13)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1], coord[2] - 1, EK_LINK_D0D - 13)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1], coord[2] + 1, EK_LINK_D0U - 13)]; - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1] - 1, coord[2] - 1, EK_LINK_0DD - 13)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1] - 1, coord[2] + 1, EK_LINK_0DU - 13)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)]; - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)]; - - fprintf(fp, "%e %e %e\n", - flux_local_cartesian[0] / - (ek_parameters.time_step * Utils::sqr(ek_parameters.agrid)), - flux_local_cartesian[1] / - (ek_parameters.time_step * Utils::sqr(ek_parameters.agrid)), - flux_local_cartesian[2] / - (ek_parameters.time_step * Utils::sqr(ek_parameters.agrid))); - } - - fclose(fp); - - return 0; -} - -int ek_print_vtk_flux_fluc(int species, char *filename) { -#ifndef EK_DEBUG - return 1; -#else - if (ek_parameters.species_index[species] == -1) { - return 1; - } - - FILE *fp = fopen(filename, "w"); - float flux_local_cartesian[3]; // temporary variable for converting fluxes - // into cartesian coordinates for output - - unsigned int coord[3]; - - if (fp == nullptr) { - return 1; - } - - std::vector fluxes(ek_parameters.number_of_nodes * 13); - - dim3 dim_grid = - calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); - - KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block); - KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block, - static_cast(ek_parameters.species_index[species]), - *current_nodes, node_f, ek_lbparameters_gpu, - philox_counter.value()); - reset_LB_force_densities_GPU(false); - -#ifdef EK_BOUNDARIES - KERNELCALL(ek_apply_boundaries, dim_grid, threads_per_block, *current_nodes); -#endif - - cuda_safe_mem(cudaMemcpy(fluxes.data(), ek_parameters.j_fluc, - fluxes.size() * sizeof(float), - cudaMemcpyDeviceToHost)); - - fprintf(fp, "\ -# vtk DataFile Version 2.0\n\ -flux_fluc_%d\n\ -ASCII\n\ -\n\ -DATASET STRUCTURED_POINTS\n\ -DIMENSIONS %u %u %u\n\ -ORIGIN %f %f %f\n\ -SPACING %f %f %f\n\ -\n\ -POINT_DATA %u\n\ -SCALARS flux_fluc_%d float 4\n\ -LOOKUP_TABLE default\n", - species, ek_parameters.dim_x, ek_parameters.dim_y, - ek_parameters.dim_z, ek_parameters.agrid * 0.5f, - ek_parameters.agrid * 0.5f, ek_parameters.agrid * 0.5f, - ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid, - ek_parameters.number_of_nodes, species); - - for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) { - - float flux_local_linksum = 0; - rhoindex_linear2cartesian_host(i, coord); - - flux_local_cartesian[0] = - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U00)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UU0)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UD0)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0U)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0D)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1], - coord[2], EK_LINK_D00 - 13)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] - 1, - coord[2], EK_LINK_DD0 - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] + 1, - coord[2], EK_LINK_DU0 - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1], coord[2] - 1, EK_LINK_D0D - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1], coord[2] + 1, EK_LINK_D0U - 13)]; - - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)]; - flux_local_cartesian[0] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)]; - - flux_local_cartesian[1] = - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0U0)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UU0)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UD0)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UU)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UD)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0], coord[1] - 1, - coord[2], EK_LINK_0D0 - 13)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] - 1, - coord[2], EK_LINK_DD0 - 13)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_cartesian2linear_host(coord[0] - 1, coord[1] + 1, - coord[2], EK_LINK_DU0 - 13)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1] - 1, coord[2] - 1, EK_LINK_0DD - 13)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1] - 1, coord[2] + 1, EK_LINK_0DU - 13)]; - - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)]; - flux_local_cartesian[1] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)]; - flux_local_cartesian[1] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)]; - - flux_local_cartesian[2] = - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_00U)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0U)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_U0D)]; - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UU)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_0UD)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUU)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UUD)]; - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDU)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_getByRhoLinear_host(i, EK_LINK_UDD)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1], coord[2] - 1, EK_LINK_00D - 13)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1], coord[2] - 1, EK_LINK_D0D - 13)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1], coord[2] + 1, EK_LINK_D0U - 13)]; - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1] - 1, coord[2] - 1, EK_LINK_0DD - 13)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0], coord[1] - 1, coord[2] + 1, EK_LINK_0DU - 13)]; - - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] - 1, EK_LINK_DDD - 13)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] - 1, coord[2] + 1, EK_LINK_DDU - 13)]; - flux_local_cartesian[2] += - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] - 1, EK_LINK_DUD - 13)]; - flux_local_cartesian[2] -= - 0.5f * fluxes[jindex_cartesian2linear_host( - coord[0] - 1, coord[1] + 1, coord[2] + 1, EK_LINK_DUU - 13)]; - - for (int j = 0; j < 13; j++) { - flux_local_linksum += fluxes[jindex_getByRhoLinear_host(i, j)]; - } - - fprintf( - fp, "%e %e %e %e\n", - flux_local_cartesian[0] / (ek_parameters.agrid * ek_parameters.agrid), - flux_local_cartesian[1] / (ek_parameters.agrid * ek_parameters.agrid), - flux_local_cartesian[2] / (ek_parameters.agrid * ek_parameters.agrid), - flux_local_linksum / (ek_parameters.agrid * ek_parameters.agrid)); - } - - fclose(fp); - - return 0; -#endif // EK_DEBUG -} - -int ek_print_vtk_flux_link(int species, char *filename) { - - if (ek_parameters.species_index[species] == -1) { - return 1; - } - - FILE *fp = fopen(filename, "w"); - - if (fp == nullptr) { - return 1; - } - - unsigned int coord[3]; - - std::vector fluxes(ek_parameters.number_of_nodes * 13); - - dim3 dim_grid = - calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); - - KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block); - KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block, - static_cast(ek_parameters.species_index[species]), - *current_nodes, node_f, ek_lbparameters_gpu, - philox_counter.value()); - reset_LB_force_densities_GPU(false); - -#ifdef EK_BOUNDARIES - KERNELCALL(ek_apply_boundaries, dim_grid, threads_per_block, *current_nodes); -#endif - - cuda_safe_mem(cudaMemcpy(fluxes.data(), ek_parameters.j, - fluxes.size() * sizeof(float), - cudaMemcpyDeviceToHost)); - - fprintf(fp, "\ -# vtk DataFile Version 2.0\n\ -flux_link_%d\n\ -ASCII\n\ -\n\ -DATASET STRUCTURED_POINTS\n\ -DIMENSIONS %u %u %u\n\ -ORIGIN %f %f %f\n\ -SPACING %f %f %f\n\ -\n\ -POINT_DATA %u\n\ -SCALARS flux_link_%d float 13\n\ -LOOKUP_TABLE default\n", - species, ek_parameters.dim_x, ek_parameters.dim_y, - ek_parameters.dim_z, ek_parameters.agrid * 0.5f, - ek_parameters.agrid * 0.5f, ek_parameters.agrid * 0.5f, - ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid, - ek_parameters.number_of_nodes, species); - - for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) { - rhoindex_linear2cartesian_host(i, coord); - - fprintf(fp, "%e %e %e %e %e %e %e %e %e %e %e %e %e\n", - fluxes[jindex_getByRhoLinear_host(i, 0)], - fluxes[jindex_getByRhoLinear_host(i, 1)], - fluxes[jindex_getByRhoLinear_host(i, 2)], - fluxes[jindex_getByRhoLinear_host(i, 3)], - fluxes[jindex_getByRhoLinear_host(i, 4)], - fluxes[jindex_getByRhoLinear_host(i, 5)], - fluxes[jindex_getByRhoLinear_host(i, 6)], - fluxes[jindex_getByRhoLinear_host(i, 7)], - fluxes[jindex_getByRhoLinear_host(i, 8)], - fluxes[jindex_getByRhoLinear_host(i, 9)], - fluxes[jindex_getByRhoLinear_host(i, 10)], - fluxes[jindex_getByRhoLinear_host(i, 11)], - fluxes[jindex_getByRhoLinear_host(i, 12)]); - } - - fclose(fp); - - return 0; -} - -int ek_node_get_potential(int x, int y, int z, double *potential) { - auto const index = static_cast(z) * ek_parameters.dim_y * - ek_parameters.dim_x_padded + - static_cast(y) * ek_parameters.dim_x_padded + - static_cast(x); - float pot; - - cuda_safe_mem(cudaMemcpy(&pot, &ek_parameters.charge_potential[index], - 1 * sizeof(cufftReal), cudaMemcpyDeviceToHost)); - - *potential = pot; - return 0; -} - -int ek_print_vtk_potential(char *filename) { - - FILE *fp = fopen(filename, "w"); - - if (fp == nullptr) { - return 1; - } - - std::vector potential(ek_parameters.number_of_nodes); - - cuda_safe_mem(cudaMemcpy2D( - potential.data(), ek_parameters.dim_x * sizeof(cufftReal), - ek_parameters.charge_potential, - ek_parameters.dim_x_padded * sizeof(cufftReal), - ek_parameters.dim_x * sizeof(cufftReal), - ek_parameters.dim_z * ek_parameters.dim_y, cudaMemcpyDeviceToHost)); - - fprintf(fp, "\ -# vtk DataFile Version 2.0\n\ -potential\n\ -ASCII\n\ -\n\ -DATASET STRUCTURED_POINTS\n\ -DIMENSIONS %u %u %u\n\ -ORIGIN %f %f %f\n\ -SPACING %f %f %f\n\ -\n\ -POINT_DATA %u\n\ -SCALARS potential float 1\n\ -LOOKUP_TABLE default\n", - ek_parameters.dim_x, ek_parameters.dim_y, ek_parameters.dim_z, - ek_parameters.agrid * 0.5f, ek_parameters.agrid * 0.5f, - ek_parameters.agrid * 0.5f, ek_parameters.agrid, ek_parameters.agrid, - ek_parameters.agrid, ek_parameters.number_of_nodes); - - for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) { - fprintf(fp, "%e\n", potential[i]); - } - - fclose(fp); - - return 0; -} - -int ek_print_vtk_particle_potential(char *filename) { - - FILE *fp = fopen(filename, "w"); - - if (fp == nullptr) { - return 1; - } - - std::vector potential(ek_parameters.number_of_nodes); - - cuda_safe_mem(cudaMemcpy2D( - potential.data(), ek_parameters.dim_x * sizeof(cufftReal), - ek_parameters.charge_potential_buffer, - ek_parameters.dim_x_padded * sizeof(cufftReal), - ek_parameters.dim_x * sizeof(cufftReal), - ek_parameters.dim_z * ek_parameters.dim_y, cudaMemcpyDeviceToHost)); - - fprintf(fp, "\ -# vtk DataFile Version 2.0\n\ -potential\n\ -ASCII\n\ -\n\ -DATASET STRUCTURED_POINTS\n\ -DIMENSIONS %u %u %u\n\ -ORIGIN %f %f %f\n\ -SPACING %f %f %f\n\ -\n\ -POINT_DATA %u\n\ -SCALARS potential float 1\n\ -LOOKUP_TABLE default\n", - ek_parameters.dim_x, ek_parameters.dim_y, ek_parameters.dim_z, - ek_parameters.agrid * 0.5f, ek_parameters.agrid * 0.5f, - ek_parameters.agrid * 0.5f, ek_parameters.agrid, ek_parameters.agrid, - ek_parameters.agrid, ek_parameters.number_of_nodes); - - for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) { - fprintf(fp, "%e\n", potential[i]); - } - - fclose(fp); - - return 0; -} - -int ek_print_vtk_lbforce_density(char *filename) { -#if !defined(VIRTUAL_SITES_INERTIALESS_TRACERS) && !defined(EK_DEBUG) - throw std::runtime_error("Please rebuild ESPResSo with EK_DEBUG"); -#else - - FILE *fp = fopen(filename, "w"); - - if (fp == nullptr) { - return 1; - } - - std::vector lbforce_density(ek_parameters.number_of_nodes * 3); - - cuda_safe_mem(cudaMemcpy(lbforce_density.data(), node_f.force_density_buf, - ek_parameters.number_of_nodes * 3 * sizeof(float), - cudaMemcpyDeviceToHost)); - - fprintf(fp, "\ -# vtk DataFile Version 2.0\n\ -lbforce\n\ -ASCII\n\ -\n\ -DATASET STRUCTURED_POINTS\n\ -DIMENSIONS %u %u %u\n\ -ORIGIN %f %f %f\n\ -SPACING %f %f %f\n\ -\n\ -POINT_DATA %u\n\ -SCALARS lbforce float 3\n\ -LOOKUP_TABLE default\n", - ek_parameters.dim_x, ek_parameters.dim_y, ek_parameters.dim_z, - ek_parameters.agrid * 0.5f, ek_parameters.agrid * 0.5f, - ek_parameters.agrid * 0.5f, ek_parameters.agrid, ek_parameters.agrid, - ek_parameters.agrid, ek_parameters.number_of_nodes); - - auto const norm = (Utils::int_pow<2>(ek_parameters.time_step) * - Utils::int_pow<4>(ek_parameters.agrid)); - for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) { - fprintf(fp, "%e %e %e\n", lbforce_density[i] / norm, - lbforce_density[i + ek_parameters.number_of_nodes] / norm, - lbforce_density[i + 2 * ek_parameters.number_of_nodes] / norm); - } - - fclose(fp); - - return 0; -#endif -} - -void ek_print_parameters() { - - printf("ek_parameters {\n"); - - printf(" float agrid = %f;\n", ek_parameters.agrid); - printf(" float time_step = %f;\n", ek_parameters.time_step); - printf(" float lb_density = %f;\n", ek_parameters.lb_density); - printf(" unsigned int dim_x = %d;\n", ek_parameters.dim_x); - printf(" unsigned int dim_y = %d;\n", ek_parameters.dim_y); - printf(" unsigned int dim_z = %d;\n", ek_parameters.dim_z); - printf(" unsigned int number_of_nodes = %d;\n", - ek_parameters.number_of_nodes); - printf(" float viscosity = %f;\n", ek_parameters.viscosity); - printf(" float bulk_viscosity = %f;\n", ek_parameters.bulk_viscosity); - printf(" float gamma_odd = %f;\n", ek_parameters.gamma_odd); - printf(" float gamma_even = %f;\n", ek_parameters.gamma_even); - printf(" float friction = %f;\n", ek_parameters.friction); - printf(" float T = %f;\n", ek_parameters.T); - printf(" float prefactor = %f;\n", ek_parameters.prefactor); - printf(" float lb_ext_force_density[] = {%f, %f, %f};\n", - ek_parameters.lb_ext_force_density[0], - ek_parameters.lb_ext_force_density[1], - ek_parameters.lb_ext_force_density[2]); - printf(" unsigned int number_of_species = %d;\n", - ek_parameters.number_of_species); - printf(" int reaction_species[] = {%d, %d, %d};\n", - ek_parameters.reaction_species[0], ek_parameters.reaction_species[1], - ek_parameters.reaction_species[2]); - printf(" float rho_reactant_reservoir = %f;\n", - ek_parameters.rho_reactant_reservoir); - printf(" float rho_product0_reservoir = %f;\n", - ek_parameters.rho_product0_reservoir); - printf(" float rho_product1_reservoir = %f;\n", - ek_parameters.rho_product1_reservoir); - printf(" float reaction_ct_rate = %f;\n", ek_parameters.reaction_ct_rate); - printf(" float reaction_fraction_0 = %f;\n", - ek_parameters.reaction_fraction_0); - printf(" float reaction_fraction_1 = %f;\n", - ek_parameters.reaction_fraction_0); - printf(" float* j = %p;\n", (void *)ek_parameters.j); - - printf(" float* rho[] = {%p, %p, %p, %p, %p, %p, %p, %p, %p, %p};\n", - (void *)ek_parameters.rho[0], (void *)ek_parameters.rho[1], - (void *)ek_parameters.rho[2], (void *)ek_parameters.rho[3], - (void *)ek_parameters.rho[4], (void *)ek_parameters.rho[5], - (void *)ek_parameters.rho[6], (void *)ek_parameters.rho[7], - (void *)ek_parameters.rho[8], (void *)ek_parameters.rho[9]); - - printf(" int species_index[] = {%d, %d, %d, %d, %d, %d, %d, %d, %d, %d};\n", - ek_parameters.species_index[0], ek_parameters.species_index[1], - ek_parameters.species_index[2], ek_parameters.species_index[3], - ek_parameters.species_index[4], ek_parameters.species_index[5], - ek_parameters.species_index[6], ek_parameters.species_index[7], - ek_parameters.species_index[8], ek_parameters.species_index[9]); - - printf(" float density = {%f, %f, %f, %f, %f, %f, %f, %f, %f, %f};\n", - ek_parameters.density[0], ek_parameters.density[1], - ek_parameters.density[2], ek_parameters.density[3], - ek_parameters.density[4], ek_parameters.density[5], - ek_parameters.density[6], ek_parameters.density[7], - ek_parameters.density[8], ek_parameters.density[9]); - - printf(" float D[] = {%f, %f, %f, %f, %f, %f, %f, %f, %f, %f};\n", - ek_parameters.D[0], ek_parameters.D[1], ek_parameters.D[2], - ek_parameters.D[3], ek_parameters.D[4], ek_parameters.D[5], - ek_parameters.D[6], ek_parameters.D[7], ek_parameters.D[8], - ek_parameters.D[9]); - - printf(" float d[] = {%f, %f, %f, %f, %f, %f, %f, %f, %f, %f};\n", - ek_parameters.d[0], ek_parameters.d[1], ek_parameters.d[2], - ek_parameters.d[3], ek_parameters.d[4], ek_parameters.d[5], - ek_parameters.d[6], ek_parameters.d[7], ek_parameters.d[8], - ek_parameters.d[9]); - - printf(" float valency[] = {%f, %f, %f, %f, %f, %f, %f, %f, %f, %f};\n", - ek_parameters.valency[0], ek_parameters.valency[1], - ek_parameters.valency[2], ek_parameters.valency[3], - ek_parameters.valency[4], ek_parameters.valency[5], - ek_parameters.valency[6], ek_parameters.valency[7], - ek_parameters.valency[8], ek_parameters.valency[9]); - - printf(" float ext_force_density[0][] = {%f, %f, %f, %f, %f, %f, %f, %f, " - "%f, %f};\n", - ek_parameters.ext_force_density[0][0], - ek_parameters.ext_force_density[0][1], - ek_parameters.ext_force_density[0][2], - ek_parameters.ext_force_density[0][3], - ek_parameters.ext_force_density[0][4], - ek_parameters.ext_force_density[0][5], - ek_parameters.ext_force_density[0][6], - ek_parameters.ext_force_density[0][7], - ek_parameters.ext_force_density[0][8], - ek_parameters.ext_force_density[0][9]); - - printf(" float ext_force_density[1][] = {%f, %f, %f, %f, %f, %f, %f, %f, " - "%f, %f};\n", - ek_parameters.ext_force_density[1][0], - ek_parameters.ext_force_density[1][1], - ek_parameters.ext_force_density[1][2], - ek_parameters.ext_force_density[1][3], - ek_parameters.ext_force_density[1][4], - ek_parameters.ext_force_density[1][5], - ek_parameters.ext_force_density[1][6], - ek_parameters.ext_force_density[1][7], - ek_parameters.ext_force_density[1][8], - ek_parameters.ext_force_density[1][9]); - - printf(" float ext_force_density[2][] = {%f, %f, %f, %f, %f, %f, %f, %f, " - "%f, %f};\n", - ek_parameters.ext_force_density[2][0], - ek_parameters.ext_force_density[2][1], - ek_parameters.ext_force_density[2][2], - ek_parameters.ext_force_density[2][3], - ek_parameters.ext_force_density[2][4], - ek_parameters.ext_force_density[2][5], - ek_parameters.ext_force_density[2][6], - ek_parameters.ext_force_density[2][7], - ek_parameters.ext_force_density[2][8], - ek_parameters.ext_force_density[2][9]); - - printf("}\n"); -} - -void ek_print_lbpar() { - - printf("lbpar_gpu {\n"); - - printf(" float rho = %f;\n", lbpar_gpu.rho); - printf(" float mu = %f;\n", lbpar_gpu.mu); - printf(" float viscosity = %f;\n", lbpar_gpu.viscosity); - printf(" float gamma_shear = %f;\n", lbpar_gpu.gamma_shear); - printf(" float gamma_bulk = %f;\n", lbpar_gpu.gamma_bulk); - printf(" float gamma_odd = %f;\n", lbpar_gpu.gamma_odd); - printf(" float gamma_even = %f;\n", lbpar_gpu.gamma_even); - printf(" float agrid = %f;\n", lbpar_gpu.agrid); - printf(" float tau = %f;\n", lbpar_gpu.tau); - printf(" float bulk_viscosity = %f;\n", lbpar_gpu.bulk_viscosity); - printf(" unsigned int dim_x = %u;\n", lbpar_gpu.dim[0]); - printf(" unsigned int dim_y = %u;\n", lbpar_gpu.dim[1]); - printf(" unsigned int dim_z = %u;\n", lbpar_gpu.dim[2]); - printf(" unsigned int number_of_nodes = %u;\n", lbpar_gpu.number_of_nodes); - printf(" bool external_force_density = %d;\n", - static_cast(lbpar_gpu.external_force_density)); - printf(" float ext_force_density[3] = {%f, %f, %f};\n", - lbpar_gpu.ext_force_density[0], lbpar_gpu.ext_force_density[1], - lbpar_gpu.ext_force_density[2]); - - printf("}\n"); -} - -inline void ek_setter_throw_if_initialized() { - if (ek_initialized) - throw std::runtime_error( - "Electrokinetics parameters cannot be set after initialisation"); -}; - -void ek_set_agrid(float agrid) { - ek_setter_throw_if_initialized(); - ek_parameters.agrid = agrid; -} - -void ek_set_lb_density(float lb_density) { - ek_setter_throw_if_initialized(); - ek_parameters.lb_density = lb_density; -} - -void ek_set_prefactor(float prefactor) { - ek_setter_throw_if_initialized(); - ek_parameters.prefactor = prefactor; -} - -void ek_set_electrostatics_coupling(bool electrostatics_coupling) { - ek_setter_throw_if_initialized(); - ek_parameters.es_coupling = electrostatics_coupling; -} - -void ek_set_viscosity(float viscosity) { - ek_setter_throw_if_initialized(); - ek_parameters.viscosity = viscosity; -} - -void ek_set_lb_ext_force_density(float lb_ext_force_dens_x, - float lb_ext_force_dens_y, - float lb_ext_force_dens_z) { - ek_setter_throw_if_initialized(); - ek_parameters.lb_ext_force_density[0] = lb_ext_force_dens_x; - ek_parameters.lb_ext_force_density[1] = lb_ext_force_dens_y; - ek_parameters.lb_ext_force_density[2] = lb_ext_force_dens_z; -} - -void ek_set_friction(float friction) { - ek_setter_throw_if_initialized(); - ek_parameters.friction = friction; -} - -void ek_set_bulk_viscosity(float bulk_viscosity) { - ek_setter_throw_if_initialized(); - ek_parameters.bulk_viscosity = bulk_viscosity; -} - -void ek_set_gamma_odd(float gamma_odd) { - ek_setter_throw_if_initialized(); - ek_parameters.gamma_odd = gamma_odd; -} - -void ek_set_gamma_even(float gamma_even) { - - ek_setter_throw_if_initialized(); - ek_parameters.gamma_even = gamma_even; -} - -void ek_set_stencil(int stencil) { - ek_setter_throw_if_initialized(); - if (!ek_parameters.fluidcoupling_ideal_contribution) - throw std::runtime_error( - "Combination of stencil and fluid coupling not implmented."); - ek_parameters.stencil = stencil; -} - -void ek_set_advection(bool advection) { - ek_setter_throw_if_initialized(); - ek_parameters.advection = advection; -} - -void ek_set_fluctuations(bool fluctuations) { - ek_setter_throw_if_initialized(); - ek_parameters.fluctuations = fluctuations; -} - -void ek_set_fluctuation_amplitude(float fluctuation_amplitude) { - ek_setter_throw_if_initialized(); - ek_parameters.fluctuation_amplitude = fluctuation_amplitude; -} - -void ek_set_fluidcoupling(bool ideal_contribution) { - ek_setter_throw_if_initialized(); - if (ek_parameters.stencil != 0) - throw std::runtime_error( - "Combination of stencil and fluid coupling not implemented."); - ek_parameters.fluidcoupling_ideal_contribution = ideal_contribution; -} - -void ek_set_T(float T) { - ek_setter_throw_if_initialized(); - ek_parameters.T = T; -} - -void ek_set_density(int species, float density) { - ek_init_species(species); - ek_parameters.density[ek_parameters.species_index[species]] = density; -} - -void ek_set_D(int species, float D) { - ek_init_species(species); - ek_parameters.D[ek_parameters.species_index[species]] = D; - ek_parameters.d[ek_parameters.species_index[species]] = - D / (1.0f + 2.0f * sqrt(2.0f)); -} - -void ek_set_valency(int species, float valency) { - ek_init_species(species); - ek_parameters.valency[ek_parameters.species_index[species]] = valency; -} - -void ek_set_ext_force_density(int species, float ext_force_density_x, - float ext_force_density_y, - float ext_force_density_z) { - ek_init_species(species); - ek_parameters.ext_force_density[0][ek_parameters.species_index[species]] = - ext_force_density_x; - ek_parameters.ext_force_density[1][ek_parameters.species_index[species]] = - ext_force_density_y; - ek_parameters.ext_force_density[2][ek_parameters.species_index[species]] = - ext_force_density_z; -} - -struct ek_charge_of_particle { - __host__ __device__ float operator()(CUDA_particle_data particle) { - return particle.q; - }; -}; - -float ek_get_particle_charge() { - auto device_particles = gpu_get_particle_pointer(); - float particle_charge = thrust::transform_reduce( - thrust::device_ptr(device_particles.begin()), - thrust::device_ptr(device_particles.end()), - ek_charge_of_particle(), 0.0f, thrust::plus()); - return particle_charge; -} - -float ek_calculate_net_charge() { - cuda_safe_mem(cudaMemset(charge_gpu, 0, sizeof(float))); - - dim3 dim_grid = - calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); - - KERNELCALL(ek_calculate_system_charge, dim_grid, threads_per_block, - charge_gpu); - - float charge; - cuda_safe_mem( - cudaMemcpy(&charge, charge_gpu, sizeof(float), cudaMemcpyDeviceToHost)); - - if (ek_parameters.es_coupling) - charge += ek_get_particle_charge(); - - return charge; -} - -int ek_neutralize_system(int species) { - int species_index = ek_parameters.species_index[species]; - - if (species_index == -1) - return 1; - - if (ek_parameters.valency[species_index] == 0.0f) - return 2; - - float compensating_species_density = 0.0f; - -#ifndef EK_BOUNDARIES - for (unsigned i = 0; i < ek_parameters.number_of_species; i++) - compensating_species_density += - ek_parameters.density[i] * ek_parameters.valency[i]; - - compensating_species_density = - ek_parameters.density[species_index] - - compensating_species_density / ek_parameters.valency[species_index]; - - if (ek_parameters.es_coupling) { - float particle_charge = ek_get_particle_charge(); - compensating_species_density -= particle_charge / - ek_parameters.valency[species_index] / - Utils::int_pow<3>(ek_parameters.agrid) / - float(ek_parameters.number_of_nodes); - } - -#else - float charge = ek_calculate_net_charge(); - - compensating_species_density = - ek_parameters.density[species_index] - - (charge / ek_parameters.valency[species_index]) / - (Utils::int_pow<3>(ek_parameters.agrid) * - float(static_cast(ek_parameters.number_of_nodes) - - ek_parameters.number_of_boundary_nodes)); -#endif // EK_BOUNDARIES - - if (compensating_species_density < 0.0f) - return 3; - - ek_parameters.density[species_index] = compensating_species_density; - - return 0; -} - -void ek_set_rng_state(uint64_t counter) { - if (ek_initialized) - philox_counter = Utils::Counter(counter); -} - -#endif /* ELECTROKINETICS */ - -#endif /* CUDA */ diff --git a/src/core/grid_based_algorithms/fd-electrostatics.cuh b/src/core/grid_based_algorithms/fd-electrostatics.cuh deleted file mode 100644 index 0bc81ec49b6..00000000000 --- a/src/core/grid_based_algorithms/fd-electrostatics.cuh +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef CORE_GRID_BASED_ALGORITHMS_FD_ELECTROSTATICS_HPP -#define CORE_GRID_BASED_ALGORITHMS_FD_ELECTROSTATICS_HPP - -#include - -class FdElectrostatics { -public: - struct InputParameters { - float prefactor; - int dim_x, dim_y, dim_z; - float agrid; - }; - - struct Parameters : public InputParameters { - Parameters() = default; - Parameters(InputParameters &inputParameters) - : InputParameters(inputParameters) { - charge_potential = nullptr; - greensfcn = nullptr; - dim_x_padded = (inputParameters.dim_x / 2 + 1) * 2; - } - - cufftComplex *charge_potential; - cufftReal *greensfcn; - int dim_x_padded; - }; - - struct Grid { - float *grid; - int dim_x; - int dim_y; - int dim_z; - float agrid; - }; - - ~FdElectrostatics(); - FdElectrostatics(InputParameters inputParameters, cudaStream_t stream); - void calculatePotential(); - void calculatePotential(cufftComplex *charge_potential); - Grid getGrid(); - -private: - Parameters parameters; - cudaStream_t cuda_stream; - cufftHandle plan_fft; - cufftHandle plan_ifft; - bool initialized; -}; - -#endif diff --git a/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu b/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu deleted file mode 100644 index ce543341304..00000000000 --- a/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -// TODO: throw exceptions upon errors initialization - -#include "grid_based_algorithms/fd-electrostatics.cuh" - -#include "cuda_utils.cuh" - -#include - -#include -#include - -#include -#include -#include - -#if defined(OMPI_MPI_H) || defined(_MPI_H) -#error CU-file includes mpi.h! This should not happen! -#endif - -static constexpr unsigned int threads_per_block = 64; - -__device__ cufftReal fde_getNode(int x, int y, int z); -__device__ cufftReal fde_getNode(int i); -__device__ void fde_setNode(int x, int y, int z, cufftReal value); -__device__ void fde_setNode(int i, cufftReal value); - -__global__ void createGreensfcn(); -__global__ void multiplyGreensfcn(cufftComplex *charge_potential); - -__device__ __constant__ FdElectrostatics::Parameters fde_parameters_gpu[1]; - -__device__ unsigned int fde_getThreadIndex() { - - return blockIdx.y * gridDim.x * blockDim.x + blockDim.x * blockIdx.x + - threadIdx.x; -} - -__device__ cufftReal fde_getNode(int x, int y, int z) { - auto *field = - reinterpret_cast(fde_parameters_gpu->charge_potential); - return field[fde_parameters_gpu->dim_y * fde_parameters_gpu->dim_x_padded * - z + - fde_parameters_gpu->dim_x_padded * y + x]; -} - -__device__ void fde_setNode(int x, int y, int z, cufftReal value) { - auto *field = - reinterpret_cast(fde_parameters_gpu->charge_potential); - field[fde_parameters_gpu->dim_y * fde_parameters_gpu->dim_x_padded * z + - fde_parameters_gpu->dim_x_padded * y + x] = value; -} - -__device__ cufftReal fde_getNode(int i) { - int x = i % fde_parameters_gpu->dim_x_padded; - i /= fde_parameters_gpu->dim_x_padded; - int y = i % fde_parameters_gpu->dim_y; - int z = i / fde_parameters_gpu->dim_y; - return fde_getNode(x, y, z); -} - -__device__ void fde_setNode(int i, cufftReal value) { - int x = i % fde_parameters_gpu->dim_x_padded; - i /= fde_parameters_gpu->dim_x_padded; - int y = i % fde_parameters_gpu->dim_y; - int z = i / fde_parameters_gpu->dim_y; - fde_setNode(x, y, z, value); -} - -FdElectrostatics::~FdElectrostatics() { - cufftDestroy(plan_ifft); - cufftDestroy(plan_fft); - - cuda_safe_mem(cudaFree(parameters.greensfcn)); - cuda_safe_mem(cudaFree(parameters.charge_potential)); -} - -FdElectrostatics::FdElectrostatics(InputParameters inputParameters, - cudaStream_t stream) - : parameters(inputParameters), cuda_stream(stream) { - cuda_safe_mem(cudaMalloc((void **)¶meters.charge_potential, - sizeof(cufftComplex) * parameters.dim_z * - parameters.dim_y * (parameters.dim_x / 2 + 1))); - - cuda_safe_mem(cudaMalloc((void **)¶meters.greensfcn, - sizeof(cufftReal) * parameters.dim_z * - parameters.dim_y * (parameters.dim_x / 2 + 1))); - - if (cudaGetLastError() != cudaSuccess) { - throw std::runtime_error("Failed to allocate"); - } - - cuda_safe_mem( - cudaMemcpyToSymbol(fde_parameters_gpu, ¶meters, sizeof(Parameters))); - - dim3 dim_grid = calculate_dim_grid( - static_cast(parameters.dim_z * parameters.dim_y * - (parameters.dim_x / 2 + 1)), - 4, threads_per_block); - KERNELCALL_stream(createGreensfcn, dim_grid, threads_per_block, stream); - - /* create 3D FFT plans */ - - if (cufftPlan3d(&plan_fft, parameters.dim_z, parameters.dim_y, - parameters.dim_x, CUFFT_R2C) != CUFFT_SUCCESS) { - throw std::runtime_error("Unable to create fft plan"); - } - - if (cufftSetStream(plan_fft, cuda_stream) != CUFFT_SUCCESS) { - throw std::runtime_error("Unable to assign FFT to cuda stream"); - } - - if (cufftPlan3d(&plan_ifft, parameters.dim_z, parameters.dim_y, - parameters.dim_x, CUFFT_C2R) != CUFFT_SUCCESS) { - throw std::runtime_error("Unable to create ifft plan"); - } - - if (cufftSetStream(plan_ifft, cuda_stream) != CUFFT_SUCCESS) { - throw std::runtime_error("Unable to assign FFT to cuda stream"); - } - - initialized = true; -} - -__global__ void createGreensfcn() { - unsigned int index = fde_getThreadIndex(); - unsigned int tmp; - unsigned int coord[3]; - - coord[0] = index % (fde_parameters_gpu->dim_x / 2 + 1); - tmp = index / (fde_parameters_gpu->dim_x / 2 + 1); - coord[1] = tmp % fde_parameters_gpu->dim_y; - coord[2] = tmp / fde_parameters_gpu->dim_y; - - if (index < fde_parameters_gpu->dim_z * fde_parameters_gpu->dim_y * - (fde_parameters_gpu->dim_x / 2 + 1)) { - - if (index == 0) { - // setting 0th Fourier mode to 0 enforces charge neutrality - fde_parameters_gpu->greensfcn[index] = 0.0f; - } else { - constexpr cufftReal two_pi = 2.0f * Utils::pi(); - fde_parameters_gpu->greensfcn[index] = - -2.0f * two_pi * fde_parameters_gpu->prefactor * - fde_parameters_gpu->agrid * fde_parameters_gpu->agrid * 0.5f / - (cos(two_pi * static_cast(coord[0]) / - static_cast(fde_parameters_gpu->dim_x)) + - cos(two_pi * static_cast(coord[1]) / - static_cast(fde_parameters_gpu->dim_y)) + - cos(two_pi * static_cast(coord[2]) / - static_cast(fde_parameters_gpu->dim_z)) - - 3.0f) / - static_cast(fde_parameters_gpu->dim_x * - fde_parameters_gpu->dim_y * - fde_parameters_gpu->dim_z); - } - - // fde_parameters_gpu->greensfcn[index] = 0.0f; //TODO delete - } -} - -__global__ void multiplyGreensfcn(cufftComplex *charge_potential) { - - unsigned int index = fde_getThreadIndex(); - - if (index < fde_parameters_gpu->dim_z * fde_parameters_gpu->dim_y * - (fde_parameters_gpu->dim_x / 2 + 1)) { - charge_potential[index].x *= fde_parameters_gpu->greensfcn[index]; - charge_potential[index].y *= fde_parameters_gpu->greensfcn[index]; - } -} - -void FdElectrostatics::calculatePotential() { - calculatePotential(parameters.charge_potential); -} - -void FdElectrostatics::calculatePotential(cufftComplex *charge_potential) { - - if (cufftExecR2C(plan_fft, (cufftReal *)charge_potential, charge_potential) != - CUFFT_SUCCESS) { - - fprintf(stderr, "ERROR: Unable to execute FFT plan\n"); - } - - dim3 dim_grid = calculate_dim_grid( - static_cast(parameters.dim_z * parameters.dim_y * - (parameters.dim_x / 2 + 1)), - 4, threads_per_block); - - KERNELCALL(multiplyGreensfcn, dim_grid, threads_per_block, charge_potential); - - if (cufftExecC2R(plan_ifft, charge_potential, - (cufftReal *)charge_potential) != CUFFT_SUCCESS) { - - fprintf(stderr, "ERROR: Unable to execute iFFT plan\n"); - } -} - -FdElectrostatics::Grid FdElectrostatics::getGrid() { - Grid g = {(float *)parameters.charge_potential, parameters.dim_x, - parameters.dim_y, parameters.dim_z, parameters.agrid}; - return g; -} diff --git a/src/core/grid_based_algorithms/halo.cpp b/src/core/grid_based_algorithms/halo.cpp deleted file mode 100644 index 03291c95b18..00000000000 --- a/src/core/grid_based_algorithms/halo.cpp +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 - * Max-Planck-Institute for Polymer Research, Theory Group - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -/** \file - * - * Halo scheme for parallelization of lattice algorithms. - * Implementation of file \ref halo.hpp. - * - */ - -#include "config/config.hpp" - -#include "communication.hpp" -#include "grid.hpp" -#include "grid_based_algorithms/lattice.hpp" -#include "halo.hpp" - -#include - -#include -#include -#include - -/** Predefined fieldtype for double-precision LB */ -static std::shared_ptr fieldtype_double = - std::make_shared(static_cast(sizeof(double))); - -/** Set halo region to a given value - * @param[out] dest pointer to the halo buffer - * @param value integer value to write into the halo buffer - * @param type halo field layout description - */ -void halo_dtset(char *dest, int value, std::shared_ptr type) { - auto const vblocks = type->vblocks; - auto const vstride = type->vstride; - auto const vskip = type->vskip; - auto const &lens = type->lengths; - auto const &disps = type->disps; - auto const extent = type->extent; - auto const block_size = static_cast(vskip) * static_cast(extent); - - for (int i = 0; i < vblocks; i++) { - for (int j = 0; j < vstride; j++) { - for (std::size_t k = 0; k < disps.size(); k++) - memset(dest + disps[k], value, lens[k]); - } - dest += block_size; - } -} - -void halo_dtcopy(char *r_buffer, char *s_buffer, int count, - std::shared_ptr type); - -void halo_copy_vector(char *r_buffer, char *s_buffer, int count, - std::shared_ptr type, bool vflag) { - - auto const vblocks = type->vblocks; - auto const vstride = type->vstride; - auto const extent = type->extent; - - auto block_size = static_cast(type->vskip); - if (vflag) { - block_size *= static_cast(type->subtype->extent); - } - - for (int i = 0; i < count; i++, s_buffer += extent, r_buffer += extent) { - char *dest = r_buffer, *src = s_buffer; - for (int j = 0; j < vblocks; j++, dest += block_size, src += block_size) { - halo_dtcopy(dest, src, vstride, type->subtype); - } - } -} - -/** Copy lattice data with layout described by @p type. - * @param r_buffer data destination - * @param s_buffer data source - * @param count amount of data to copy - * @param type field layout type - */ -void halo_dtcopy(char *r_buffer, char *s_buffer, int count, - std::shared_ptr type) { - - if (type->subtype) { - halo_copy_vector(r_buffer, s_buffer, count, type, type->vflag); - } else { - - for (int i = 0; i < count; - i++, s_buffer += type->extent, r_buffer += type->extent) { - if (!type->count) { - memmove(r_buffer, s_buffer, type->extent); - } else { - for (int j = 0; j < type->count; j++) { - memmove(r_buffer + type->disps[j], s_buffer + type->disps[j], - type->lengths[j]); - } - } - } - } -} - -void prepare_halo_communication(HaloCommunicator &hc, const Lattice &lattice, - MPI_Datatype datatype, - const Utils::Vector3i &local_node_grid) { - - const auto &grid = lattice.grid; - const auto &period = lattice.halo_grid; - - for (int n = 0; n < hc.num; n++) { - MPI_Type_free(&(hc.halo_info[n].datatype)); - } - - int const num = 2 * 3; /* two communications in each space direction */ - hc.num = num; - hc.halo_info.resize(num); - - auto const extent = static_cast(fieldtype_double->extent); - - auto const node_neighbors = calc_node_neighbors(comm_cart); - - int cnt = 0; - for (int dir = 0; dir < 3; dir++) { - for (int lr = 0; lr < 2; lr++) { - - HaloInfo &hinfo = hc.halo_info[cnt]; - - int nblocks = 1; - for (int k = dir + 1; k < 3; k++) { - nblocks *= period[k]; - } - int stride = 1; - for (int k = 0; k < dir; k++) { - stride *= period[k]; - } - int skip = 1; - for (int k = 0; k < dir + 1 && k < 2; k++) { - skip *= period[k]; - } - - if (lr == 0) { - /* send to left, recv from right */ - hinfo.s_offset = extent * static_cast(stride * 1); - hinfo.r_offset = extent * static_cast(stride * (grid[dir] + 1)); - } else { - /* send to right, recv from left */ - hinfo.s_offset = extent * static_cast(stride * grid[dir]); - hinfo.r_offset = extent * static_cast(stride * 0); - } - - hinfo.source_node = node_neighbors[2 * dir + 1 - lr]; - hinfo.dest_node = node_neighbors[2 * dir + lr]; - - hinfo.fieldtype = std::make_shared(nblocks, stride, skip, true, - fieldtype_double); - - MPI_Type_vector(nblocks, stride, skip, datatype, &hinfo.datatype); - MPI_Type_commit(&hinfo.datatype); - - if (!box_geo.periodic(dir) && - (local_geo.boundary()[2 * dir + lr] != 0 || - local_geo.boundary()[2 * dir + 1 - lr] != 0)) { - if (local_node_grid[dir] == 1) { - hinfo.type = HALO_OPEN; - } else if (lr == 0) { - if (local_geo.boundary()[2 * dir + lr] == 1) { - hinfo.type = HALO_RECV; - } else { - hinfo.type = HALO_SEND; - } - } else { - if (local_geo.boundary()[2 * dir + lr] == -1) { - hinfo.type = HALO_RECV; - } else { - hinfo.type = HALO_SEND; - } - } - } else { - if (local_node_grid[dir] == 1) { - hc.halo_info[cnt].type = HALO_LOCL; - } else { - hc.halo_info[cnt].type = HALO_SENDRECV; - } - } - cnt++; - } - } -} - -void release_halo_communication(HaloCommunicator &hc) { - for (int n = 0; n < hc.num; n++) { - MPI_Type_free(&(hc.halo_info[n].datatype)); - } -} - -void halo_communication(const HaloCommunicator &hc, char *const base) { - - std::shared_ptr fieldtype; - MPI_Datatype datatype; - MPI_Request request; - MPI_Status status; - - for (int n = 0; n < hc.num; n++) { - int s_node, r_node; - int comm_type = hc.halo_info[n].type; - char *s_buffer = static_cast(base) + hc.halo_info[n].s_offset; - char *r_buffer = static_cast(base) + hc.halo_info[n].r_offset; - - switch (comm_type) { - - case HALO_LOCL: - fieldtype = hc.halo_info[n].fieldtype; - halo_dtcopy(r_buffer, s_buffer, 1, fieldtype); - break; - - case HALO_SENDRECV: - datatype = hc.halo_info[n].datatype; - s_node = hc.halo_info[n].source_node; - r_node = hc.halo_info[n].dest_node; - MPI_Sendrecv(s_buffer, 1, datatype, r_node, REQ_HALO_SPREAD, r_buffer, 1, - datatype, s_node, REQ_HALO_SPREAD, comm_cart, &status); - break; - - case HALO_SEND: - datatype = hc.halo_info[n].datatype; - fieldtype = hc.halo_info[n].fieldtype; - r_node = hc.halo_info[n].dest_node; - MPI_Isend(s_buffer, 1, datatype, r_node, REQ_HALO_SPREAD, comm_cart, - &request); - halo_dtset(r_buffer, 0, fieldtype); - MPI_Wait(&request, &status); - break; - - case HALO_RECV: - datatype = hc.halo_info[n].datatype; - s_node = hc.halo_info[n].source_node; - MPI_Irecv(r_buffer, 1, datatype, s_node, REQ_HALO_SPREAD, comm_cart, - &request); - MPI_Wait(&request, &status); - break; - - case HALO_OPEN: - fieldtype = hc.halo_info[n].fieldtype; - /** \todo this does not work for the n_i - \ */ - halo_dtset(r_buffer, 0, fieldtype); - break; - } - } -} diff --git a/src/core/grid_based_algorithms/halo.hpp b/src/core/grid_based_algorithms/halo.hpp deleted file mode 100644 index 989442605e3..00000000000 --- a/src/core/grid_based_algorithms/halo.hpp +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 - * Max-Planck-Institute for Polymer Research, Theory Group - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef CORE_GRID_BASED_ALGORITHMS_HALO_HPP -#define CORE_GRID_BASED_ALGORITHMS_HALO_HPP -/** \file - * - * Halo scheme for parallelization of lattice algorithms. - * Header file for \ref halo.cpp. - * - */ - -#include "grid_based_algorithms/lattice.hpp" - -#include - -#include - -#include -#include - -/** \name Types of halo communications */ -/**@{*/ -#define HALO_LOCL \ - 0 /**< Tag for local exchange of halo regions on the same processor */ -#define HALO_SENDRECV \ - 1 /**< Tag for halo exchange between different processors */ -#define HALO_SEND 2 /**< Tag for halo send only */ -#define HALO_RECV 3 /**< Tag for halo receive only */ -#define HALO_OPEN 4 /**< Tag for halo open boundary */ -/**@}*/ - -/** \name Tags for halo communications */ -/**@{*/ -#define REQ_HALO_SPREAD 501 /**< Tag for halo update */ -#define REQ_HALO_CHECK 599 /**< Tag for consistency check of halo regions */ -/**@}*/ - -/** Layout of the lattice data. - * The description is similar to MPI datatypes but a bit more compact. - */ -struct FieldType { - FieldType(int new_extent) - : count(0), disps({}), lengths({}), extent(new_extent), vblocks(0), - vstride(0), vskip(0), vflag(false), subtype(nullptr) {} - FieldType(int new_vblocks, int new_vstride, int new_vskip, bool new_vflag, - std::shared_ptr oldtype) - : count(oldtype->count), disps(oldtype->disps), lengths(oldtype->lengths), - extent(0), vblocks(new_vblocks), vstride(new_vstride), vskip(new_vskip), - vflag(new_vflag), subtype(oldtype) { - if (vflag) { - extent = oldtype->extent * ((vblocks - 1) * vskip + vstride); - } else { - extent = oldtype->extent * vstride + (vblocks - 1) * vskip; - } - } - int count; /**< number of subtypes in fieldtype */ - std::vector disps; /**< displacements of the subtypes */ - std::vector lengths; /**< lengths of the subtypes */ - int extent; /**< extent of the complete fieldtype including gaps */ - int vblocks; /**< number of blocks in field vectors */ - int vstride; /**< size of strides in field vectors */ - int vskip; /**< displacement between strides in field vectors */ - bool vflag; - std::shared_ptr subtype; -}; - -/** Structure describing a Halo region */ -struct HaloInfo { - - int type; /**< type of halo communication */ - - int source_node; /**< index of processor which sends halo data */ - int dest_node; /**< index of processor receiving halo data */ - - unsigned long s_offset; /**< offset for send buffer */ - unsigned long r_offset; /**< offset for receive buffer */ - - std::shared_ptr - fieldtype; /**< type layout of the data being exchanged */ - MPI_Datatype datatype; /**< MPI datatype of data being communicated */ -}; - -/** Structure holding a set of \ref HaloInfo which comprise a certain - * parallelization scheme */ -class HaloCommunicator { -public: - HaloCommunicator(int num) : num(num) {} - - int num; /**< number of halo communications in the scheme */ - - std::vector halo_info; /**< set of halo communications */ -}; - -/** Preparation of the halo parallelization scheme. Sets up the - * necessary data structures for \ref halo_communication - * @param[in,out] hc halo communicator being created - * @param[in] lattice lattice the communication is created for - * @param datatype MPI datatype for the lattice data - * @param local_node_grid Number of nodes in each spatial dimension - */ -void prepare_halo_communication(HaloCommunicator &hc, const Lattice &lattice, - MPI_Datatype datatype, - const Utils::Vector3i &local_node_grid); - -/** Frees data structures associated with a halo communicator - * @param[in,out] hc halo communicator to be released - */ -void release_halo_communication(HaloCommunicator &hc); - -/** Perform communication according to the parallelization scheme - * described by the halo communicator - * @param[in] hc halo communicator describing the parallelization scheme - * @param[in] base base plane of local node - */ -void halo_communication(const HaloCommunicator &hc, char *base); - -#endif /* CORE_GRID_BASED_ALGORITHMS_HALO_HPP */ diff --git a/src/core/grid_based_algorithms/lattice.cpp b/src/core/grid_based_algorithms/lattice.cpp deleted file mode 100644 index e2d41b0ddc2..00000000000 --- a/src/core/grid_based_algorithms/lattice.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 - * Max-Planck-Institute for Polymer Research, Theory Group - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "grid_based_algorithms/lattice.hpp" - -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - -Lattice::Lattice(double agrid, double offset, int halo_size, - Utils::Vector3d const &local_box, - Utils::Vector3d const &my_right, - Utils::Vector3d const &box_length, - Utils::Vector3i const &node_pos, - Utils::Vector3i const &node_grid) - : agrid(agrid), halo_size(halo_size), offset(offset), node_grid(node_grid), - local_box(local_box), my_right(my_right) { - /* determine the number of local lattice nodes */ - auto const epsilon = std::numeric_limits::epsilon(); - for (int d = 0; d < 3; d++) { - grid[d] = static_cast(round(local_box[d] / agrid)); - global_grid[d] = node_grid[d] * grid[d]; - local_index_offset[d] = node_pos[d] * grid[d]; - } - - // sanity checks - for (int dir = 0; dir < 3; dir++) { - // check if local_box_l is compatible with lattice spacing - auto diff = fabs(local_box[dir] - grid[dir] * agrid); - if (diff > epsilon * box_length[dir]) { - throw std::runtime_error( - "Lattice spacing agrid[" + std::to_string(dir) + - "]=" + std::to_string(agrid) + " is incompatible with local_box_l[" + - std::to_string(dir) + "]=" + std::to_string(local_box[dir]) + - " ( box_l[" + std::to_string(dir) + - "]=" + std::to_string(box_length[dir]) + - " ). Mismatch: " + std::to_string(diff)); - } - } - - /* determine the number of total nodes including halo */ - halo_grid = grid + Utils::Vector3i::broadcast(2 * halo_size); - halo_grid_volume = Utils::product(halo_grid); - halo_offset = - Utils::get_linear_index(halo_size, halo_size, halo_size, halo_grid); -} - -bool Lattice::is_local(Utils::Vector3i const &index) const noexcept { - auto const x = index * agrid; - return x >= my_right - local_box and x < my_right; -} - -void Lattice::map_position_to_lattice(const Utils::Vector3d &pos, - Utils::Vector &node_index, - Utils::Vector6d &delta) const { - Utils::Vector3i ind{}; - auto const epsilon = std::numeric_limits::epsilon(); - - /* determine the elementary lattice cell containing the particle - and the relative position of the particle in this cell */ - for (int dir = 0; dir < 3; dir++) { - auto const lpos = pos[dir] - (my_right[dir] - local_box[dir]); - auto const rel = lpos / agrid + offset; - ind[dir] = static_cast(floor(rel)); - - /* surrounding elementary cell is not completely inside this box, - adjust if this is due to round off errors */ - if (ind[dir] < 0) { - if (fabs(rel) < epsilon) { - ind[dir] = 0; - } else { - throw std::runtime_error("position outside local LB domain"); - } - } else if (ind[dir] > grid[dir]) { - if (lpos - local_box[dir] < epsilon * local_box[dir]) - ind[dir] = grid[dir]; - else - throw std::runtime_error("position outside local LB domain"); - } - - delta[3 + dir] = rel - ind[dir]; // delta_x/a - delta[dir] = 1.0 - delta[3 + dir]; - } - auto const slice_x = static_cast(halo_grid[0]); - auto const slice_xy = static_cast(halo_grid[1]) * slice_x; - node_index[0] = Utils::get_linear_index(ind, halo_grid); - node_index[1] = node_index[0] + 1u; - node_index[2] = node_index[0] + slice_x; - node_index[3] = node_index[0] + slice_x + 1u; - node_index[4] = node_index[0] + slice_xy; - node_index[5] = node_index[0] + slice_xy + 1u; - node_index[6] = node_index[0] + slice_xy + slice_x; - node_index[7] = node_index[0] + slice_xy + slice_x + 1u; -} - -Utils::Vector3i -Lattice::local_index(Utils::Vector3i const &global_index) const noexcept { - return global_index - local_index_offset + - Utils::Vector3i::broadcast(halo_size); -} diff --git a/src/core/grid_based_algorithms/lattice.hpp b/src/core/grid_based_algorithms/lattice.hpp deleted file mode 100644 index 32c6f32051d..00000000000 --- a/src/core/grid_based_algorithms/lattice.hpp +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 - * Max-Planck-Institute for Polymer Research, Theory Group - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -/** \file - * - * Lattice class definition. - * Contains the lattice layout. - * For parallelization purposes, it is assumed that a halo region - * surrounds the local lattice sites. - */ - -#ifndef CORE_LB_LATTICE_HPP -#define CORE_LB_LATTICE_HPP - -#include - -#include - -class Lattice { -public: - using index_t = int; - - /** number of local lattice sites in each direction (excluding halo) */ - Utils::Vector3i grid; - Utils::Vector3i global_grid; - double agrid; /**< lattice constant */ - - /** number of lattice sites in each direction (including halo) */ - Utils::Vector3i halo_grid; - index_t halo_size; /**< halo size in all directions */ - - double offset; /**< global offset */ - /** global index of the local domain origin */ - Utils::Vector3i local_index_offset; - /** global domain partition */ - Utils::Vector3i node_grid; - /** dimensions of the local domain */ - Utils::Vector3d local_box; - /** global position of the top right corner of the local domain */ - Utils::Vector3d my_right; - - /** total number of lattice sites (including halo) */ - index_t halo_grid_volume; - /** offset for number of halo sites stored in front of the local - * lattice sites - */ - index_t halo_offset; - - Lattice() = default; - /** @brief %Lattice constructor. - * - * This function initializes the variables describing the lattice - * layout. Important: The lattice data is not allocated here! - * - * @param agrid lattice spacing - * @param offset lattice offset - * @param halo_size halo size - * @param local_box dimensions of the local box - * @param myright right (top, back) corner of the local box - * @param box_length lengths of the local box - * @param node_pos position of this node in the domain decomposition - * @param node_grid node_grid of domain decomposition - */ - Lattice(double agrid, double offset, int halo_size, - const Utils::Vector3d &local_box, const Utils::Vector3d &myright, - const Utils::Vector3d &box_length, Utils::Vector3i const &node_pos, - Utils::Vector3i const &node_grid); - - /** Map a spatial position to the surrounding lattice sites. - * - * This function takes a global spatial position and determines the - * surrounding elementary cell of the lattice for this position. - * The distance fraction in each direction is also calculated. - * - * Remarks: - * - The spatial position has to be in the local domain - * - The lattice sites of the elementary cell are returned as local indices - * - * @param[in] pos spatial position - * @param[out] node_index local indices of the surrounding lattice sites - * @param[out] delta distance fraction of %p pos from the surrounding - * elementary cell, 6 directions - */ - void map_position_to_lattice(Utils::Vector3d const &pos, - Utils::Vector &node_index, - Utils::Vector6d &delta) const; - - /** - * @brief Determine if given global index is node-local. - * @param index Global lattice index. - */ - bool is_local(Utils::Vector3i const &index) const noexcept; - /** - * @brief Calculate the node-local index. - * @param global_index Index into global lattice. - */ - Utils::Vector3i - local_index(Utils::Vector3i const &global_index) const noexcept; -}; - -#endif /* CORE_LB_LATTICE_HPP */ diff --git a/src/core/grid_based_algorithms/lb-d3q19.hpp b/src/core/grid_based_algorithms/lb-d3q19.hpp deleted file mode 100644 index 39c25971670..00000000000 --- a/src/core/grid_based_algorithms/lb-d3q19.hpp +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 - * Max-Planck-Institute for Polymer Research, Theory Group - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -/** \file - * %Lattice Boltzmann D3Q19 model. - */ - -#ifndef D3Q19_H -#define D3Q19_H - -#include - -#include -#include - -namespace D3Q19 { - -static constexpr std::size_t n_vel = 19; - -/** Velocity sub-lattice of the D3Q19 model */ -static constexpr const std::array c = {{{{0, 0, 0}}, - {{1, 0, 0}}, - {{-1, 0, 0}}, - {{0, 1, 0}}, - {{0, -1, 0}}, - {{0, 0, 1}}, - {{0, 0, -1}}, - {{1, 1, 0}}, - {{-1, -1, 0}}, - {{1, -1, 0}}, - {{-1, 1, 0}}, - {{1, 0, 1}}, - {{-1, 0, -1}}, - {{1, 0, -1}}, - {{-1, 0, 1}}, - {{0, 1, 1}}, - {{0, -1, -1}}, - {{0, 1, -1}}, - {{0, -1, 1}}}}; - -/** Coefficients for pseudo-equilibrium distribution of the D3Q19 model */ -static constexpr const std::array, 19> coefficients = { - {{{1. / 3., 1., 3. / 2., -1. / 2.}}, - {{1. / 18., 1. / 6., 1. / 4., -1. / 12.}}, - {{1. / 18., 1. / 6., 1. / 4., -1. / 12.}}, - {{1. / 18., 1. / 6., 1. / 4., -1. / 12.}}, - {{1. / 18., 1. / 6., 1. / 4., -1. / 12.}}, - {{1. / 18., 1. / 6., 1. / 4., -1. / 12.}}, - {{1. / 18., 1. / 6., 1. / 4., -1. / 12.}}, - {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}}, - {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}}, - {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}}, - {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}}, - {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}}, - {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}}, - {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}}, - {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}}, - {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}}, - {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}}, - {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}}, - {{1. / 36., 1. / 12., 1. / 8., -1. / 24.}}}}; - -/** Coefficients in the functional for the equilibrium distribution */ -static constexpr const std::array w = { - {1. / 3., 1. / 18., 1. / 18., 1. / 18., 1. / 18., 1. / 18., 1. / 18., - 1. / 36., 1. / 36., 1. / 36., 1. / 36., 1. / 36., 1. / 36., 1. / 36., - 1. / 36., 1. / 36., 1. / 36., 1. / 36., 1. / 36.}}; - -/* the following values are the (weighted) lengths of the vectors */ -static constexpr const std::array w_k = { - {1.0, 1. / 3., 1. / 3., 1. / 3., 2. / 3., 4. / 9., 4. / 3., 1. / 9., - 1. / 9., 1. / 9., 2. / 3., 2. / 3., 2. / 3., 2. / 9., 2. / 9., 2. / 9., - 2.0, 4. / 9., 4. / 3.}}; - -template -static constexpr const T c_sound_sq = static_cast(1. / 3.); - -} // namespace D3Q19 - -#undef GCC_EXTERN_STATEMENT - -#endif /* D3Q19_H */ diff --git a/src/core/grid_based_algorithms/lb.cpp b/src/core/grid_based_algorithms/lb.cpp deleted file mode 100644 index fe3bce0959d..00000000000 --- a/src/core/grid_based_algorithms/lb.cpp +++ /dev/null @@ -1,1353 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 - * Max-Planck-Institute for Polymer Research, Theory Group - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -/** \file - * %Lattice Boltzmann algorithm for hydrodynamic degrees of freedom. - * - * Includes fluctuating LB and coupling to MD particles via frictional - * momentum transfer. - * - * The corresponding header file is lb.hpp. - */ - -#include "grid_based_algorithms/lb.hpp" - -#include "cell_system/CellStructureType.hpp" -#include "communication.hpp" -#include "errorhandling.hpp" -#include "event.hpp" -#include "grid.hpp" -#include "grid_based_algorithms/lb_boundaries.hpp" -#include "halo.hpp" -#include "lb-d3q19.hpp" -#include "random.hpp" - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using Utils::get_linear_index; - -namespace { -/** Basis of the mode space as described in @cite dunweg07a */ -extern constexpr const std::array, 19> e_ki = { - {{{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, - {{0, 1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0}}, - {{0, 0, 0, 1, -1, 0, 0, 1, -1, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1}}, - {{0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, -1, -1, 1, 1, -1, -1, 1}}, - {{-1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, - {{0, 1, 1, -1, -1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1}}, - {{-0, 1, 1, 1, 1, -2, -2, 2, 2, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1}}, - {{0, 0, 0, 0, 0, 0, 0, 1, 1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0}}, - {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, -1, -1, 0, 0, 0, 0}}, - {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, -1, -1}}, - {{0, -2, 2, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0}}, - {{0, 0, 0, -2, 2, 0, 0, 1, -1, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1}}, - {{0, 0, 0, 0, 0, -2, 2, 0, 0, 0, 0, 1, -1, -1, 1, 1, -1, -1, 1}}, - {{0, -0, 0, 0, 0, 0, 0, 1, -1, 1, -1, -1, 1, -1, 1, 0, 0, 0, 0}}, - {{0, 0, 0, -0, 0, 0, 0, 1, -1, -1, 1, 0, 0, 0, 0, -1, 1, -1, 1}}, - {{0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 1, -1, -1, 1, -1, 1, 1, -1}}, - {{1, -2, -2, -2, -2, -2, -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, - {{0, -1, -1, 1, 1, -0, -0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1}}, - {{0, -1, -1, -1, -1, 2, 2, 2, 2, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1}}}}; - -/** Transposed version of @ref e_ki */ -extern constexpr const std::array, 19> e_ki_transposed = { - {{{1, 0, 0, 0, -1, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0}}, - {{1, 1, 0, 0, 0, 1, 1, 0, 0, 0, -2, 0, 0, -0, 0, 0, -2, -1, -1}}, - {{1, -1, 0, 0, 0, 1, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, -2, -1, -1}}, - {{1, 0, 1, 0, 0, -1, 1, 0, 0, 0, 0, -2, 0, 0, -0, 0, -2, 1, -1}}, - {{1, 0, -1, 0, 0, -1, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, -2, 1, -1}}, - {{1, 0, 0, 1, 0, 0, -2, 0, 0, 0, 0, 0, -2, 0, 0, -0, -2, -0, 2}}, - {{1, 0, 0, -1, 0, 0, -2, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, -0, 2}}, - {{1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 2}}, - {{1, -1, -1, 0, 1, 0, 2, 1, 0, 0, -1, -1, 0, -1, -1, 0, 1, 0, 2}}, - {{1, 1, -1, 0, 1, 0, 2, -1, 0, 0, 1, -1, 0, 1, -1, 0, 1, 0, 2}}, - {{1, -1, 1, 0, 1, 0, 2, -1, 0, 0, -1, 1, 0, -1, 1, 0, 1, 0, 2}}, - {{1, 1, 0, 1, 1, 1, -1, 0, 1, 0, 1, 0, 1, -1, 0, 1, 1, 1, -1}}, - {{1, -1, 0, -1, 1, 1, -1, 0, 1, 0, -1, 0, -1, 1, 0, -1, 1, 1, -1}}, - {{1, 1, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, -1, 0, -1, 1, 1, -1}}, - {{1, -1, 0, 1, 1, 1, -1, 0, -1, 0, -1, 0, 1, 1, 0, 1, 1, 1, -1}}, - {{1, 0, 1, 1, 1, -1, -1, 0, 0, 1, 0, 1, 1, 0, -1, -1, 1, -1, -1}}, - {{1, 0, -1, -1, 1, -1, -1, 0, 0, 1, 0, -1, -1, 0, 1, 1, 1, -1, -1}}, - {{1, 0, 1, -1, 1, -1, -1, 0, 0, -1, 0, 1, -1, 0, -1, 1, 1, -1, -1}}, - {{1, 0, -1, 1, 1, -1, -1, 0, 0, -1, 0, -1, 1, 0, 1, -1, 1, -1, -1}}}}; -} // namespace - -void lb_on_param_change(LBParam param) { - switch (param) { - case LBParam::AGRID: - lb_init(lbpar); - break; - case LBParam::DENSITY: - lb_reinit_fluid(lbfields, lblattice, lbpar); - break; - case LBParam::VISCOSITY: - case LBParam::EXT_FORCE_DENSITY: - lb_initialize_fields(lbfields, lbpar, lblattice); - case LBParam::BULKVISC: - case LBParam::KT: - case LBParam::GAMMA_ODD: - case LBParam::GAMMA_EVEN: - case LBParam::TAU: - break; - } - lb_reinit_parameters(lbpar); -} - -#ifdef ADDITIONAL_CHECKS -static void lb_check_halo_regions(const LB_Fluid &lb_fluid, - const Lattice &lb_lattice); -#endif // ADDITIONAL_CHECKS - -boost::optional> rng_counter_fluid; - -LB_Parameters lbpar = { - // density - 0.0, - // viscosity - 0.0, - // bulk_viscosity - -1.0, - // agrid - -1.0, - // tau - -1.0, - // ext_force_density - {0.0, 0.0, 0.0}, - // gamma_odd - 0., - // gamma_even - 0., - // gamma_shear - 0., - // gamma_bulk - 0., - // is_TRT - false, - // phi - {}, - // Thermal energy - 0.0}; - -Lattice lblattice; - -using LB_FluidData = boost::multi_array; -static LB_FluidData lbfluid_a; -static LB_FluidData lbfluid_b; - -/** Span of the velocity populations of the fluid (pre-collision populations). - */ -LB_Fluid lbfluid; -/** Span of the velocity populations of the fluid (post-collision populations). - */ -static LB_Fluid lbfluid_post; - -std::vector lbfields; - -HaloCommunicator update_halo_comm = HaloCommunicator(0); - -/** - * @brief Initialize fluid nodes. - * @param[out] lb_fields Vector containing the fluid nodes - * @param[in] lb_parameters Parameters for the LB - * @param[in] lb_lattice Lattice instance - */ -void lb_initialize_fields(std::vector &lb_fields, - LB_Parameters const &lb_parameters, - Lattice const &lb_lattice) { - lb_fields.resize(lb_lattice.halo_grid_volume); - for (auto &field : lb_fields) { - field.force_density = lb_parameters.ext_force_density; -#ifdef LB_BOUNDARIES - field.boundary = false; -#endif // LB_BOUNDARIES - } - on_lbboundary_change(); -} - -/** (Re-)allocate memory for the fluid and initialize pointers. */ -void lb_realloc_fluid(LB_FluidData &lb_fluid_a, LB_FluidData &lb_fluid_b, - const Lattice::index_t halo_grid_volume, - LB_Fluid &lb_fluid, LB_Fluid &lb_fluid_post) { - const std::array size = {{D3Q19::n_vel, halo_grid_volume}}; - - lb_fluid_a.resize(size); - lb_fluid_b.resize(size); - - using Utils::Span; - for (int i = 0; i < size[0]; i++) { - lb_fluid[i] = Span(lb_fluid_a[i].origin(), size[1]); - lb_fluid_post[i] = Span(lb_fluid_b[i].origin(), size[1]); - } -} - -void lb_set_equilibrium_populations(const Lattice &lb_lattice, - const LB_Parameters &lb_parameters) { - for (Lattice::index_t index = 0; index < lb_lattice.halo_grid_volume; - ++index) { - lb_set_population_from_density_momentum_density_stress( - index, lb_parameters.density, Utils::Vector3d{} /*momentum density*/, - Utils::Vector6d{} /*stress*/); - } -} - -void lb_init(const LB_Parameters &lb_parameters) { - if (lb_parameters.agrid <= 0.0) { - runtimeErrorMsg() - << "Lattice Boltzmann agrid not set when initializing fluid"; - } - if (check_runtime_errors(comm_cart)) - return; - - /* initialize the local lattice domain */ - try { - lblattice = Lattice(lb_parameters.agrid, 0.5 /*offset*/, 1 /*halo size*/, - local_geo.length(), local_geo.my_right(), - box_geo.length(), calc_node_pos(comm_cart), node_grid); - } catch (const std::runtime_error &e) { - runtimeErrorMsg() << e.what(); - return; - } - - /* allocate memory for data structures */ - lb_realloc_fluid(lbfluid_a, lbfluid_b, lblattice.halo_grid_volume, lbfluid, - lbfluid_post); - - lb_initialize_fields(lbfields, lbpar, lblattice); - - /* prepare the halo communication */ - lb_prepare_communication(update_halo_comm, lblattice); - - /* initialize derived parameters */ - lb_reinit_parameters(lbpar); - - lb_set_equilibrium_populations(lblattice, lbpar); - -#ifdef LB_BOUNDARIES - LBBoundaries::lb_init_boundaries(); -#endif -} - -void lb_reinit_fluid(std::vector &lb_fields, - Lattice const &lb_lattice, - LB_Parameters const &lb_parameters) { - lb_set_equilibrium_populations(lb_lattice, lb_parameters); - lb_initialize_fields(lb_fields, lb_parameters, lb_lattice); -} - -void lb_reinit_parameters(LB_Parameters &lb_parameters) { - if (lb_parameters.viscosity > 0.0) { - /* Eq. (80) @cite dunweg07a. */ - lb_parameters.gamma_shear = 1. - 2. / (6. * lb_parameters.viscosity + 1.); - } - - if (lb_parameters.bulk_viscosity > 0.0) { - /* Eq. (81) @cite dunweg07a. */ - lb_parameters.gamma_bulk = - 1. - 2. / (9. * lb_parameters.bulk_viscosity + 1.); - } - - if (lb_parameters.is_TRT) { - lb_parameters.gamma_bulk = lb_parameters.gamma_shear; - lb_parameters.gamma_even = lb_parameters.gamma_shear; - lb_parameters.gamma_odd = -(7.0 * lb_parameters.gamma_even + 1.0) / - (lb_parameters.gamma_even + 7.0); - // gamma_odd = lb_parameters.gamma_shear; //uncomment for BGK - } - - // lb_parameters.gamma_shear = 0.0; //uncomment for special case of BGK - // lb_parameters.gamma_bulk = 0.0; - // gamma_odd = 0.0; - // gamma_even = 0.0; - - if (lb_parameters.kT > 0.0) { - /* Eq. (51) @cite dunweg07a. - * Note that the modes are not normalized as in the paper here! */ - double mu = lb_parameters.kT / D3Q19::c_sound_sq * - lb_parameters.tau * lb_parameters.tau / - (lb_parameters.agrid * lb_parameters.agrid); - - for (int i = 0; i < 4; i++) - lb_parameters.phi[i] = 0.0; - lb_parameters.phi[4] = - sqrt(mu * D3Q19::w_k[4] * (1. - Utils::sqr(lb_parameters.gamma_bulk))); - for (int i = 5; i < 10; i++) - lb_parameters.phi[i] = sqrt(mu * D3Q19::w_k[i] * - (1. - Utils::sqr(lb_parameters.gamma_shear))); - for (int i = 10; i < 16; i++) - lb_parameters.phi[i] = - sqrt(mu * D3Q19::w_k[i] * (1 - Utils::sqr(lb_parameters.gamma_odd))); - for (int i = 16; i < 19; i++) - lb_parameters.phi[i] = - sqrt(mu * D3Q19::w_k[i] * (1 - Utils::sqr(lb_parameters.gamma_even))); - } else { - for (int i = 0; i < D3Q19::n_vel; i++) - lb_parameters.phi[i] = 0.0; - } -} - -/** Halo communication for push scheme */ -static void halo_push_communication(LB_Fluid &lb_fluid, - const Lattice &lb_lattice) { - Lattice::index_t index; - int x, y, z, count; - int rnode, snode; - double *buffer; - MPI_Status status; - - auto const yperiod = lb_lattice.halo_grid[0]; - auto const zperiod = lb_lattice.halo_grid[0] * lb_lattice.halo_grid[1]; - - auto const node_neighbors = calc_node_neighbors(comm_cart); - - /*************** - * X direction * - ***************/ - count = 5 * lb_lattice.halo_grid[1] * lb_lattice.halo_grid[2]; - std::vector sbuf(count); - std::vector rbuf(count); - - /* send to right, recv from left i = 1, 7, 9, 11, 13 */ - snode = node_neighbors[1]; - rnode = node_neighbors[0]; - - buffer = sbuf.data(); - index = get_linear_index(lb_lattice.grid[0] + 1, 0, 0, lb_lattice.halo_grid); - for (z = 0; z < lb_lattice.halo_grid[2]; z++) { - for (y = 0; y < lb_lattice.halo_grid[1]; y++) { - buffer[0] = lb_fluid[1][index]; - buffer[1] = lb_fluid[7][index]; - buffer[2] = lb_fluid[9][index]; - buffer[3] = lb_fluid[11][index]; - buffer[4] = lb_fluid[13][index]; - buffer += 5; - - index += yperiod; - } - } - - MPI_Sendrecv(sbuf.data(), count, MPI_DOUBLE, snode, REQ_HALO_SPREAD, - rbuf.data(), count, MPI_DOUBLE, rnode, REQ_HALO_SPREAD, - comm_cart, &status); - - buffer = rbuf.data(); - index = get_linear_index(1, 0, 0, lb_lattice.halo_grid); - for (z = 0; z < lb_lattice.halo_grid[2]; z++) { - for (y = 0; y < lb_lattice.halo_grid[1]; y++) { - lb_fluid[1][index] = buffer[0]; - lb_fluid[7][index] = buffer[1]; - lb_fluid[9][index] = buffer[2]; - lb_fluid[11][index] = buffer[3]; - lb_fluid[13][index] = buffer[4]; - buffer += 5; - - index += yperiod; - } - } - - /* send to left, recv from right i = 2, 8, 10, 12, 14 */ - snode = node_neighbors[0]; - rnode = node_neighbors[1]; - - buffer = sbuf.data(); - index = get_linear_index(0, 0, 0, lb_lattice.halo_grid); - for (z = 0; z < lb_lattice.halo_grid[2]; z++) { - for (y = 0; y < lb_lattice.halo_grid[1]; y++) { - buffer[0] = lb_fluid[2][index]; - buffer[1] = lb_fluid[8][index]; - buffer[2] = lb_fluid[10][index]; - buffer[3] = lb_fluid[12][index]; - buffer[4] = lb_fluid[14][index]; - buffer += 5; - - index += yperiod; - } - } - - MPI_Sendrecv(sbuf.data(), count, MPI_DOUBLE, snode, REQ_HALO_SPREAD, - rbuf.data(), count, MPI_DOUBLE, rnode, REQ_HALO_SPREAD, - comm_cart, &status); - - buffer = rbuf.data(); - index = get_linear_index(lb_lattice.grid[0], 0, 0, lb_lattice.halo_grid); - for (z = 0; z < lb_lattice.halo_grid[2]; z++) { - for (y = 0; y < lb_lattice.halo_grid[1]; y++) { - lb_fluid[2][index] = buffer[0]; - lb_fluid[8][index] = buffer[1]; - lb_fluid[10][index] = buffer[2]; - lb_fluid[12][index] = buffer[3]; - lb_fluid[14][index] = buffer[4]; - buffer += 5; - - index += yperiod; - } - } - - /*************** - * Y direction * - ***************/ - count = 5 * lb_lattice.halo_grid[0] * lb_lattice.halo_grid[2]; - sbuf.resize(count); - rbuf.resize(count); - - /* send to right, recv from left i = 3, 7, 10, 15, 17 */ - snode = node_neighbors[3]; - rnode = node_neighbors[2]; - - buffer = sbuf.data(); - index = get_linear_index(0, lb_lattice.grid[1] + 1, 0, lb_lattice.halo_grid); - for (z = 0; z < lb_lattice.halo_grid[2]; z++) { - for (x = 0; x < lb_lattice.halo_grid[0]; x++) { - buffer[0] = lb_fluid[3][index]; - buffer[1] = lb_fluid[7][index]; - buffer[2] = lb_fluid[10][index]; - buffer[3] = lb_fluid[15][index]; - buffer[4] = lb_fluid[17][index]; - buffer += 5; - - ++index; - } - index += zperiod - lb_lattice.halo_grid[0]; - } - - MPI_Sendrecv(sbuf.data(), count, MPI_DOUBLE, snode, REQ_HALO_SPREAD, - rbuf.data(), count, MPI_DOUBLE, rnode, REQ_HALO_SPREAD, - comm_cart, &status); - - buffer = rbuf.data(); - index = get_linear_index(0, 1, 0, lb_lattice.halo_grid); - for (z = 0; z < lb_lattice.halo_grid[2]; z++) { - for (x = 0; x < lb_lattice.halo_grid[0]; x++) { - lb_fluid[3][index] = buffer[0]; - lb_fluid[7][index] = buffer[1]; - lb_fluid[10][index] = buffer[2]; - lb_fluid[15][index] = buffer[3]; - lb_fluid[17][index] = buffer[4]; - buffer += 5; - - ++index; - } - index += zperiod - lb_lattice.halo_grid[0]; - } - - /* send to left, recv from right i = 4, 8, 9, 16, 18 */ - snode = node_neighbors[2]; - rnode = node_neighbors[3]; - - buffer = sbuf.data(); - index = get_linear_index(0, 0, 0, lb_lattice.halo_grid); - for (z = 0; z < lb_lattice.halo_grid[2]; z++) { - for (x = 0; x < lb_lattice.halo_grid[0]; x++) { - buffer[0] = lb_fluid[4][index]; - buffer[1] = lb_fluid[8][index]; - buffer[2] = lb_fluid[9][index]; - buffer[3] = lb_fluid[16][index]; - buffer[4] = lb_fluid[18][index]; - buffer += 5; - - ++index; - } - index += zperiod - lb_lattice.halo_grid[0]; - } - - MPI_Sendrecv(sbuf.data(), count, MPI_DOUBLE, snode, REQ_HALO_SPREAD, - rbuf.data(), count, MPI_DOUBLE, rnode, REQ_HALO_SPREAD, - comm_cart, &status); - - buffer = rbuf.data(); - index = get_linear_index(0, lb_lattice.grid[1], 0, lb_lattice.halo_grid); - for (z = 0; z < lb_lattice.halo_grid[2]; z++) { - for (x = 0; x < lb_lattice.halo_grid[0]; x++) { - lb_fluid[4][index] = buffer[0]; - lb_fluid[8][index] = buffer[1]; - lb_fluid[9][index] = buffer[2]; - lb_fluid[16][index] = buffer[3]; - lb_fluid[18][index] = buffer[4]; - buffer += 5; - - ++index; - } - index += zperiod - lb_lattice.halo_grid[0]; - } - - /*************** - * Z direction * - ***************/ - count = 5 * lb_lattice.halo_grid[0] * lb_lattice.halo_grid[1]; - sbuf.resize(count); - rbuf.resize(count); - - /* send to right, recv from left i = 5, 11, 14, 15, 18 */ - snode = node_neighbors[5]; - rnode = node_neighbors[4]; - - buffer = sbuf.data(); - index = get_linear_index(0, 0, lb_lattice.grid[2] + 1, lb_lattice.halo_grid); - for (y = 0; y < lb_lattice.halo_grid[1]; y++) { - for (x = 0; x < lb_lattice.halo_grid[0]; x++) { - buffer[0] = lb_fluid[5][index]; - buffer[1] = lb_fluid[11][index]; - buffer[2] = lb_fluid[14][index]; - buffer[3] = lb_fluid[15][index]; - buffer[4] = lb_fluid[18][index]; - buffer += 5; - - ++index; - } - } - - MPI_Sendrecv(sbuf.data(), count, MPI_DOUBLE, snode, REQ_HALO_SPREAD, - rbuf.data(), count, MPI_DOUBLE, rnode, REQ_HALO_SPREAD, - comm_cart, &status); - - buffer = rbuf.data(); - index = get_linear_index(0, 0, 1, lb_lattice.halo_grid); - for (y = 0; y < lb_lattice.halo_grid[1]; y++) { - for (x = 0; x < lb_lattice.halo_grid[0]; x++) { - lb_fluid[5][index] = buffer[0]; - lb_fluid[11][index] = buffer[1]; - lb_fluid[14][index] = buffer[2]; - lb_fluid[15][index] = buffer[3]; - lb_fluid[18][index] = buffer[4]; - buffer += 5; - - ++index; - } - } - - /* send to left, recv from right i = 6, 12, 13, 16, 17 */ - snode = node_neighbors[4]; - rnode = node_neighbors[5]; - - buffer = sbuf.data(); - index = get_linear_index(0, 0, 0, lb_lattice.halo_grid); - for (y = 0; y < lb_lattice.halo_grid[1]; y++) { - for (x = 0; x < lb_lattice.halo_grid[0]; x++) { - buffer[0] = lb_fluid[6][index]; - buffer[1] = lb_fluid[12][index]; - buffer[2] = lb_fluid[13][index]; - buffer[3] = lb_fluid[16][index]; - buffer[4] = lb_fluid[17][index]; - buffer += 5; - - ++index; - } - } - - MPI_Sendrecv(sbuf.data(), count, MPI_DOUBLE, snode, REQ_HALO_SPREAD, - rbuf.data(), count, MPI_DOUBLE, rnode, REQ_HALO_SPREAD, - comm_cart, &status); - - buffer = rbuf.data(); - index = get_linear_index(0, 0, lb_lattice.grid[2], lb_lattice.halo_grid); - for (y = 0; y < lb_lattice.halo_grid[1]; y++) { - for (x = 0; x < lb_lattice.halo_grid[0]; x++) { - lb_fluid[6][index] = buffer[0]; - lb_fluid[12][index] = buffer[1]; - lb_fluid[13][index] = buffer[2]; - lb_fluid[16][index] = buffer[3]; - lb_fluid[17][index] = buffer[4]; - buffer += 5; - - ++index; - } - } -} - -/***********************************************************************/ - -/** Performs basic sanity checks. */ -void lb_sanity_checks(const LB_Parameters &lb_parameters) { - if (lb_parameters.agrid <= 0.0) { - runtimeErrorMsg() << "Lattice Boltzmann agrid not set"; - } - if (lb_parameters.tau <= 0.0) { - runtimeErrorMsg() << "Lattice Boltzmann time step not set"; - } - if (lb_parameters.density <= 0.0) { - runtimeErrorMsg() << "Lattice Boltzmann fluid density not set"; - } - if (lb_parameters.viscosity <= 0.0) { - runtimeErrorMsg() << "Lattice Boltzmann fluid viscosity not set"; - } -} - -uint64_t lb_fluid_get_rng_state() { - assert(rng_counter_fluid); - return rng_counter_fluid->value(); -} - -void mpi_set_lb_fluid_counter(uint64_t counter) { - rng_counter_fluid = Utils::Counter(counter); -} - -REGISTER_CALLBACK(mpi_set_lb_fluid_counter) - -void lb_fluid_set_rng_state(uint64_t counter) { - mpi_call(mpi_set_lb_fluid_counter, counter); - mpi_set_lb_fluid_counter(counter); -} - -/***********************************************************************/ - -/** Set up the structures for exchange of the halo regions. - * See also \ref halo.cpp - */ -void lb_prepare_communication(HaloCommunicator &halo_comm, - const Lattice &lb_lattice) { - HaloCommunicator comm = HaloCommunicator(0); - - /* since the data layout is a structure of arrays, we have to - * generate a communication for this structure: first we generate - * the communication for one of the arrays (the 0-th velocity - * population), then we replicate this communication for the other - * velocity indices by constructing appropriate vector - * datatypes */ - - /* prepare the communication for a single velocity */ - prepare_halo_communication(comm, lb_lattice, MPI_DOUBLE, node_grid); - - halo_comm.num = comm.num; - halo_comm.halo_info.resize(comm.num); - - /* replicate the halo structure */ - for (int i = 0; i < comm.num; i++) { - HaloInfo &hinfo = halo_comm.halo_info[i]; - - hinfo.source_node = comm.halo_info[i].source_node; - hinfo.dest_node = comm.halo_info[i].dest_node; - hinfo.s_offset = comm.halo_info[i].s_offset; - hinfo.r_offset = comm.halo_info[i].r_offset; - hinfo.type = comm.halo_info[i].type; - - /* generate the vector datatype for the structure of lattices we - * have to use hvector here because the extent of the subtypes - * does not span the full lattice and hence we cannot get the - * correct vskip out of them */ - - MPI_Aint lower; - MPI_Aint extent; - MPI_Type_get_extent(MPI_DOUBLE, &lower, &extent); - MPI_Type_create_hvector(D3Q19::n_vel, 1, - lb_lattice.halo_grid_volume * extent, - comm.halo_info[i].datatype, &hinfo.datatype); - MPI_Type_commit(&hinfo.datatype); - - hinfo.fieldtype = std::make_shared( - D3Q19::n_vel, 1, - static_cast(lb_lattice.halo_grid_volume * sizeof(double)), false, - comm.halo_info[i].fieldtype); - } - - release_halo_communication(comm); -} - -/***********************************************************************/ -/** \name Mapping between hydrodynamic fields and particle populations */ -/***********************************************************************/ -/**@{*/ -template -std::array normalize_modes(const std::array &modes) { - auto normalized_modes = modes; - for (int i = 0; i < modes.size(); i++) { - normalized_modes[i] /= D3Q19::w_k[i]; - } - return normalized_modes; -} - -/** - * @brief Transform modes to populations. - */ -template -std::array lb_calc_n_from_m(const std::array &modes) { - auto ret = Utils::matrix_vector_product( - normalize_modes(modes)); - std::transform(ret.begin(), ret.end(), ::D3Q19::w.begin(), ret.begin(), - std::multiplies()); - return ret; -} - -Utils::Vector19d lb_get_population_from_density_momentum_density_stress( - double density, Utils::Vector3d const &momentum_density, - Utils::Vector6d const &stress) { - std::array modes{ - {density, momentum_density[0], momentum_density[1], momentum_density[2], - stress[0], stress[1], stress[2], stress[3], stress[4], stress[5]}}; - - return Utils::Vector19d{lb_calc_n_from_m(modes)}; -} - -void lb_set_population_from_density_momentum_density_stress( - Lattice::index_t const index, double density, - Utils::Vector3d const &momentum_density, Utils::Vector6d const &stress) { - auto const population = - lb_get_population_from_density_momentum_density_stress( - density, momentum_density, stress); - lb_set_population(index, population); -} -/**@}*/ - -std::array lb_calc_modes(Lattice::index_t index, - const LB_Fluid &lb_fluid) { - return Utils::matrix_vector_product( - LB_Fluid_Ref(index, lb_fluid)); -} - -template -std::array lb_relax_modes(const std::array &modes, - const Utils::Vector &force_density, - const LB_Parameters ¶meters) { - using Utils::sqr; - using Utils::Vector; - - /* re-construct the real density - * remember that the populations are stored as differences to their - * equilibrium value */ - auto const density = modes[0] + parameters.density; - auto const momentum_density = - Vector{modes[1], modes[2], modes[3]} + T{0.5} * force_density; - auto const momentum_density2 = momentum_density.norm2(); - - /* equilibrium part of the stress modes */ - auto const stress_eq = - Vector{momentum_density2, - (sqr(momentum_density[0]) - sqr(momentum_density[1])), - (momentum_density2 - 3.0 * sqr(momentum_density[2])), - momentum_density[0] * momentum_density[1], - momentum_density[0] * momentum_density[2], - momentum_density[1] * momentum_density[2]} / - density; - - return {{modes[0], modes[1], modes[2], modes[3], - /* relax the stress modes */ - stress_eq[0] + parameters.gamma_bulk * (modes[4] - stress_eq[0]), - stress_eq[1] + parameters.gamma_shear * (modes[5] - stress_eq[1]), - stress_eq[2] + parameters.gamma_shear * (modes[6] - stress_eq[2]), - stress_eq[3] + parameters.gamma_shear * (modes[7] - stress_eq[3]), - stress_eq[4] + parameters.gamma_shear * (modes[8] - stress_eq[4]), - stress_eq[5] + parameters.gamma_shear * (modes[9] - stress_eq[5]), - /* relax the ghost modes (project them out) */ - /* ghost modes have no equilibrium part due to orthogonality */ - parameters.gamma_odd * modes[10], parameters.gamma_odd * modes[11], - parameters.gamma_odd * modes[12], parameters.gamma_odd * modes[13], - parameters.gamma_odd * modes[14], parameters.gamma_odd * modes[15], - parameters.gamma_even * modes[16], parameters.gamma_even * modes[17], - parameters.gamma_even * modes[18]}}; -} - -template -std::array lb_thermalize_modes( - Lattice::index_t index, const std::array &modes, - const LB_Parameters &lb_parameters, - boost::optional> const &rng_counter) { - if (lb_parameters.kT > 0.0) { - using Utils::uniform; - using rng_type = r123::Philox4x64; - using ctr_type = rng_type::ctr_type; - - const ctr_type c{ - {rng_counter->value(), static_cast(RNGSalt::FLUID)}}; - const T rootdensity = - std::sqrt(std::fabs(modes[0] + lb_parameters.density)); - auto const pref = std::sqrt(12.) * rootdensity; - - const ctr_type noise[4] = { - rng_type{}(c, {{static_cast(index), 0ul}}), - rng_type{}(c, {{static_cast(index), 1ul}}), - rng_type{}(c, {{static_cast(index), 2ul}}), - rng_type{}(c, {{static_cast(index), 3ul}})}; - - auto rng = [&](int i) { return uniform(noise[i / 4][i % 4]) - 0.5; }; - - return {/* conserved modes */ - {modes[0], modes[1], modes[2], modes[3], - /* stress modes */ - modes[4] + pref * lb_parameters.phi[4] * rng(0), - modes[5] + pref * lb_parameters.phi[5] * rng(1), - modes[6] + pref * lb_parameters.phi[6] * rng(2), - modes[7] + pref * lb_parameters.phi[7] * rng(3), - modes[8] + pref * lb_parameters.phi[8] * rng(4), - modes[9] + pref * lb_parameters.phi[9] * rng(5), - - /* ghost modes */ - modes[10] + pref * lb_parameters.phi[10] * rng(6), - modes[11] + pref * lb_parameters.phi[11] * rng(7), - modes[12] + pref * lb_parameters.phi[12] * rng(8), - modes[13] + pref * lb_parameters.phi[13] * rng(9), - modes[14] + pref * lb_parameters.phi[14] * rng(10), - modes[15] + pref * lb_parameters.phi[15] * rng(11), - modes[16] + pref * lb_parameters.phi[16] * rng(12), - modes[17] + pref * lb_parameters.phi[17] * rng(13), - modes[18] + pref * lb_parameters.phi[18] * rng(14)}}; - } - return modes; -} - -template -std::array lb_apply_forces(const std::array &modes, - const LB_Parameters &lb_parameters, - Utils::Vector const &f) { - auto const density = modes[0] + lb_parameters.density; - - /* hydrodynamic momentum density is redefined when external forces present */ - auto const u = - Utils::Vector3d{modes[1], modes[2], modes[3]} + T{0.5} * f / density; - - auto const C = std::array{ - {(1. + lb_parameters.gamma_shear) * u[0] * f[0] + - 1. / 3. * (lb_parameters.gamma_bulk - lb_parameters.gamma_shear) * - (u * f), - 1. / 2. * (1. + lb_parameters.gamma_shear) * (u[0] * f[1] + u[1] * f[0]), - (1. + lb_parameters.gamma_shear) * u[1] * f[1] + - 1. / 3. * (lb_parameters.gamma_bulk - lb_parameters.gamma_shear) * - (u * f), - 1. / 2. * (1. + lb_parameters.gamma_shear) * (u[0] * f[2] + u[2] * f[0]), - 1. / 2. * (1. + lb_parameters.gamma_shear) * (u[1] * f[2] + u[2] * f[1]), - (1. + lb_parameters.gamma_shear) * u[2] * f[2] + - 1. / 3. * (lb_parameters.gamma_bulk - lb_parameters.gamma_shear) * - (u * f)}}; - - return {{modes[0], - /* update momentum modes */ - modes[1] + f[0], modes[2] + f[1], modes[3] + f[2], - /* update stress modes */ - modes[4] + C[0] + C[2] + C[5], modes[5] + C[0] - C[2], - modes[6] + C[0] + C[2] - 2. * C[5], modes[7] + C[1], modes[8] + C[3], - modes[9] + C[4], modes[10], modes[11], modes[12], modes[13], - modes[14], modes[15], modes[16], modes[17], modes[18]}}; -} - -/** - * @brief Relative index for the next node for each lattice velocity. - * - * @param lb_lattice The lattice parameters. - * @param c Lattice velocities. - */ -auto lb_next_offsets(const Lattice &lb_lattice, - std::array const &c) { - const Utils::Vector3 strides = { - {1, lb_lattice.halo_grid[0], - static_cast(lb_lattice.halo_grid[0]) * - static_cast(lb_lattice.halo_grid[1])}}; - - std::array offsets; - boost::transform(c, offsets.begin(), - [&strides](auto const &ci) { return strides * ci; }); - - return offsets; -} - -template -void lb_stream(LB_Fluid &lb_fluid, const std::array &populations, - std::size_t index, - std::array const &offsets) { - for (int i = 0; i < populations.size(); i++) { - lb_fluid[i][index + offsets[i]] = populations[i]; - } -} - -/* Collisions and streaming (push scheme) */ -void lb_integrate() { - ESPRESSO_PROFILER_CXX_MARK_FUNCTION; - /* loop over all lattice cells (halo excluded) */ -#ifdef LB_BOUNDARIES - for (auto &lbboundary : LBBoundaries::lbboundaries) { - (*lbboundary).reset_force(); - } -#endif // LB_BOUNDARIES - - auto const next_offsets = lb_next_offsets(lblattice, D3Q19::c); - - Lattice::index_t index = lblattice.halo_offset; - for (int z = 1; z <= lblattice.grid[2]; z++) { - for (int y = 1; y <= lblattice.grid[1]; y++) { - for (int x = 1; x <= lblattice.grid[0]; x++) { - // as we only want to apply this to non-boundary nodes we can throw out - // the if-clause if we have a non-bounded domain -#ifdef LB_BOUNDARIES - if (!lbfields[index].boundary) -#endif // LB_BOUNDARIES - { - /* calculate modes locally */ - auto const modes = lb_calc_modes(index, lbfluid); - - /* deterministic collisions */ - auto const relaxed_modes = - lb_relax_modes(modes, lbfields[index].force_density, lbpar); - - /* fluctuating hydrodynamics */ - auto const thermalized_modes = lb_thermalize_modes( - index, relaxed_modes, lbpar, rng_counter_fluid); - - /* apply forces */ - auto const modes_with_forces = lb_apply_forces( - thermalized_modes, lbpar, lbfields[index].force_density); - -#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS - // Safeguard the node forces so that we can later use them for the IBM - // particle update - lbfields[index].force_density_buf = lbfields[index].force_density; -#endif - - /* reset the force density */ - lbfields[index].force_density = lbpar.ext_force_density; - - /* transform back to populations and streaming */ - auto const populations = lb_calc_n_from_m(modes_with_forces); - lb_stream(lbfluid_post, populations, index, next_offsets); - } - - ++index; /* next node */ - } - index += 2; /* skip halo region */ - } - index += 2 * lblattice.halo_grid[0]; /* skip halo region */ - } - - /* exchange halo regions */ - halo_push_communication(lbfluid_post, lblattice); - -#ifdef LB_BOUNDARIES - /* boundary conditions for links */ - lb_bounce_back(lbfluid_post, lbpar, lbfields); -#endif // LB_BOUNDARIES - - /* swap the pointers for old and new population fields */ - std::swap(lbfluid, lbfluid_post); - - halo_communication(update_halo_comm, - reinterpret_cast(lbfluid[0].data())); - -#ifdef ADDITIONAL_CHECKS - lb_check_halo_regions(lbfluid, lblattice); -#endif -} - -#ifdef ADDITIONAL_CHECKS -int compare_buffers(std::array const &buff_a, - std::array const &buff_b) { - if (buff_a != buff_b) { - runtimeErrorMsg() << "Halo buffers are not identical"; - return ES_ERROR; - } - return ES_OK; -} - -void log_buffer_diff(std::ostream &out, int dir, Lattice::index_t index, int x, - int y, int z) { - out << "buffers differ in dir=" << dir << " at node index=" << index; - if (x != -1) - out << " x=" << x; - if (y != -1) - out << " y=" << y; - if (z != -1) - out << " z=" << z; - out << "\n"; -} - -/** Check consistency of the halo regions. - * Test whether the halo regions have been exchanged correctly. - */ -void lb_check_halo_regions(const LB_Fluid &lb_fluid, - const Lattice &lb_lattice) { - Lattice::index_t index; - std::size_t i; - int x, y, z, s_node, r_node; - std::array s_buffer; - std::array r_buffer; - - auto const node_neighbors = calc_node_neighbors(comm_cart); - - if (box_geo.periodic(0)) { - for (z = 0; z < lb_lattice.halo_grid[2]; ++z) { - for (y = 0; y < lb_lattice.halo_grid[1]; ++y) { - index = get_linear_index(0, y, z, lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - s_buffer[i] = lb_fluid[i][index]; - - s_node = node_neighbors[1]; - r_node = node_neighbors[0]; - if (n_nodes > 1) { - comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, - REQ_HALO_CHECK, r_buffer); - index = - get_linear_index(lb_lattice.grid[0], y, z, lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer); - } else { - index = - get_linear_index(lb_lattice.grid[0], y, z, lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer)) { - log_buffer_diff(std::cerr, 0, index, -1, y, z); - } - } - - index = get_linear_index(lb_lattice.grid[0] + 1, y, z, - lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - s_buffer[i] = lb_fluid[i][index]; - - s_node = node_neighbors[0]; - r_node = node_neighbors[1]; - if (n_nodes > 1) { - comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, - REQ_HALO_CHECK, r_buffer); - index = get_linear_index(1, y, z, lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer); - } else { - index = get_linear_index(1, y, z, lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer)) { - log_buffer_diff(std::cerr, 0, index, -1, y, z); - } - } - } - } - } - - if (box_geo.periodic(1)) { - for (z = 0; z < lb_lattice.halo_grid[2]; ++z) { - for (x = 0; x < lb_lattice.halo_grid[0]; ++x) { - index = get_linear_index(x, 0, z, lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - s_buffer[i] = lb_fluid[i][index]; - - s_node = node_neighbors[3]; - r_node = node_neighbors[2]; - if (n_nodes > 1) { - comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, - REQ_HALO_CHECK, r_buffer); - index = - get_linear_index(x, lb_lattice.grid[1], z, lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer); - } else { - index = - get_linear_index(x, lb_lattice.grid[1], z, lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer)) { - log_buffer_diff(std::cerr, 1, index, x, -1, z); - } - } - } - for (x = 0; x < lb_lattice.halo_grid[0]; ++x) { - index = get_linear_index(x, lb_lattice.grid[1] + 1, z, - lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - s_buffer[i] = lb_fluid[i][index]; - - s_node = node_neighbors[2]; - r_node = node_neighbors[3]; - if (n_nodes > 1) { - comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, - REQ_HALO_CHECK, r_buffer); - index = get_linear_index(x, 1, z, lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer); - } else { - index = get_linear_index(x, 1, z, lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer)) { - log_buffer_diff(std::cerr, 1, index, x, -1, z); - } - } - } - } - } - - if (box_geo.periodic(2)) { - for (y = 0; y < lb_lattice.halo_grid[1]; ++y) { - for (x = 0; x < lb_lattice.halo_grid[0]; ++x) { - index = get_linear_index(x, y, 0, lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - s_buffer[i] = lb_fluid[i][index]; - - s_node = node_neighbors[5]; - r_node = node_neighbors[4]; - if (n_nodes > 1) { - comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, - REQ_HALO_CHECK, r_buffer); - index = - get_linear_index(x, y, lb_lattice.grid[2], lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer); - } else { - index = - get_linear_index(x, y, lb_lattice.grid[2], lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer)) { - log_buffer_diff(std::cerr, 2, index, x, y, lb_lattice.grid[2]); - } - } - } - } - for (y = 0; y < lb_lattice.halo_grid[1]; ++y) { - for (x = 0; x < lb_lattice.halo_grid[0]; ++x) { - index = get_linear_index(x, y, lb_lattice.grid[2] + 1, - lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - s_buffer[i] = lb_fluid[i][index]; - - s_node = node_neighbors[4]; - r_node = node_neighbors[5]; - if (n_nodes > 1) { - comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, - REQ_HALO_CHECK, r_buffer); - index = get_linear_index(x, y, 1, lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer); - } else { - index = get_linear_index(x, y, 1, lb_lattice.halo_grid); - for (i = 0; i < D3Q19::n_vel; i++) - r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer)) { - log_buffer_diff(std::cerr, 2, index, x, y, -1); - } - } - } - } - } -} -#endif // ADDITIONAL_CHECKS - -double lb_calc_density(std::array const &modes, - const LB_Parameters &lb_parameters) { - return modes[0] + lb_parameters.density; -} - -Utils::Vector3d lb_calc_momentum_density(std::array const &modes, - Utils::Vector3d const &force_density) { - return Utils::Vector3d{{modes[1] + 0.5 * force_density[0], - modes[2] + 0.5 * force_density[1], - modes[3] + 0.5 * force_density[2]}}; -} - -Utils::Vector6d lb_calc_pressure_tensor(std::array const &modes, - Utils::Vector3d const &force_density, - const LB_Parameters &lb_parameters) { - auto const momentum_density = lb_calc_momentum_density(modes, force_density); - auto const density = lb_calc_density(modes, lb_parameters); - using Utils::sqr; - auto const momentum_density2 = sqr(momentum_density[0]) + - sqr(momentum_density[1]) + - sqr(momentum_density[2]); - /* equilibrium part of the stress modes */ - Utils::Vector6d modes_from_stress_eq{}; - modes_from_stress_eq[0] = momentum_density2 / density; - modes_from_stress_eq[1] = - (sqr(momentum_density[0]) - sqr(momentum_density[1])) / density; - modes_from_stress_eq[2] = - (momentum_density2 - 3.0 * sqr(momentum_density[2])) / density; - modes_from_stress_eq[3] = momentum_density[0] * momentum_density[1] / density; - modes_from_stress_eq[4] = momentum_density[0] * momentum_density[2] / density; - modes_from_stress_eq[5] = momentum_density[1] * momentum_density[2] / density; - - /* Now we must predict the outcome of the next collision */ - /* We immediately average pre- and post-collision. */ - - Utils::Vector6d avg_modes; - avg_modes[0] = - modes_from_stress_eq[0] + (0.5 + 0.5 * lb_parameters.gamma_bulk) * - (modes[4] - modes_from_stress_eq[0]); - avg_modes[1] = - modes_from_stress_eq[1] + (0.5 + 0.5 * lb_parameters.gamma_shear) * - (modes[5] - modes_from_stress_eq[1]); - avg_modes[2] = - modes_from_stress_eq[2] + (0.5 + 0.5 * lb_parameters.gamma_shear) * - (modes[6] - modes_from_stress_eq[2]); - avg_modes[3] = - modes_from_stress_eq[3] + (0.5 + 0.5 * lb_parameters.gamma_shear) * - (modes[7] - modes_from_stress_eq[3]); - avg_modes[4] = - modes_from_stress_eq[4] + (0.5 + 0.5 * lb_parameters.gamma_shear) * - (modes[8] - modes_from_stress_eq[4]); - avg_modes[5] = - modes_from_stress_eq[5] + (0.5 + 0.5 * lb_parameters.gamma_shear) * - (modes[9] - modes_from_stress_eq[5]); - - // Transform the stress tensor components according to the modes that - // correspond to those used by U. Schiller. In terms of populations this - // expression then corresponds exactly to those in eq. (116)-(121) in - // @cite dunweg07a, when these are written out in populations. - // But to ensure this, the expression in Schiller's modes has to be different! - - Utils::Vector6d stress; - stress[0] = - (2.0 * (modes[0] + avg_modes[0]) + avg_modes[2] + 3.0 * avg_modes[1]) / - 6.0; // xx - stress[1] = avg_modes[3]; // xy - stress[2] = - (2.0 * (modes[0] + avg_modes[0]) + avg_modes[2] - 3.0 * avg_modes[1]) / - 6.0; // yy - stress[3] = avg_modes[4]; // xz - stress[4] = avg_modes[5]; // yz - stress[5] = (modes[0] + avg_modes[0] - avg_modes[2]) / 3.0; // zz - return stress; -} - -#ifdef LB_BOUNDARIES -void lb_bounce_back(LB_Fluid &lb_fluid, const LB_Parameters &lb_parameters, - const std::vector &lb_fields) { - auto const next = lb_next_offsets(lblattice, D3Q19::c); - static constexpr int reverse[] = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, - 9, 12, 11, 14, 13, 16, 15, 18, 17}; - - /* bottom-up sweep */ - for (int z = 0; z < lblattice.grid[2] + 2; z++) { - for (int y = 0; y < lblattice.grid[1] + 2; y++) { - for (int x = 0; x < lblattice.grid[0] + 2; x++) { - auto const k = get_linear_index(x, y, z, lblattice.halo_grid); - - if (lb_fields[k].boundary) { - Utils::Vector3d boundary_force = {}; - for (int i = 0; i < 19; i++) { - auto const ci = D3Q19::c[i]; - - if (x - ci[0] > 0 && x - ci[0] < lblattice.grid[0] + 1 && - y - ci[1] > 0 && y - ci[1] < lblattice.grid[1] + 1 && - z - ci[2] > 0 && z - ci[2] < lblattice.grid[2] + 1) { - if (!lb_fields[k - next[i]].boundary) { - auto const population_shift = - -lb_parameters.density * 2 * D3Q19::w[i] * - (ci * lb_fields[k].slip_velocity) / - D3Q19::c_sound_sq; - - boundary_force += (2 * lb_fluid[i][k] + population_shift) * ci; - lb_fluid[reverse[i]][k - next[i]] = - lb_fluid[i][k] + population_shift; - } else { - lb_fluid[reverse[i]][k - next[i]] = lb_fluid[i][k] = 0.0; - } - } - } - LBBoundaries::lbboundaries[lb_fields[k].boundary - 1]->force() += - boundary_force; - } - } - } - } -} -#endif // LB_BOUNDARIES - -/** Calculate the local fluid momentum. - * The calculation is implemented explicitly for the special case of D3Q19. - * @param[in] index Local lattice site - * @param[in] lb_fluid Populations of the fluid - * @retval The local fluid momentum. - */ -Utils::Vector3d lb_calc_local_momentum_density(Lattice::index_t index, - const LB_Fluid &lb_fluid) { - return {{lb_fluid[1][index] - lb_fluid[2][index] + lb_fluid[7][index] - - lb_fluid[8][index] + lb_fluid[9][index] - lb_fluid[10][index] + - lb_fluid[11][index] - lb_fluid[12][index] + lb_fluid[13][index] - - lb_fluid[14][index], - lb_fluid[3][index] - lb_fluid[4][index] + lb_fluid[7][index] - - lb_fluid[8][index] - lb_fluid[9][index] + lb_fluid[10][index] + - lb_fluid[15][index] - lb_fluid[16][index] + lb_fluid[17][index] - - lb_fluid[18][index], - lb_fluid[5][index] - lb_fluid[6][index] + lb_fluid[11][index] - - lb_fluid[12][index] - lb_fluid[13][index] + lb_fluid[14][index] + - lb_fluid[15][index] - lb_fluid[16][index] - lb_fluid[17][index] + - lb_fluid[18][index]}}; -} - -/** Calculate momentum of the LB fluid. - * @param[in] lb_parameters LB parameters - * @param[in] lb_fields Hydrodynamic fields of the fluid - * @param[in] lb_lattice The underlying lattice - */ -Utils::Vector3d -mpi_lb_calc_fluid_momentum_local(LB_Parameters const &lb_parameters, - std::vector const &lb_fields, - Lattice const &lb_lattice) { - Utils::Vector3d momentum_density{}, momentum{}, result{}; - - for (int x = 1; x <= lb_lattice.grid[0]; x++) { - for (int y = 1; y <= lb_lattice.grid[1]; y++) { - for (int z = 1; z <= lb_lattice.grid[2]; z++) { - auto const index = get_linear_index(x, y, z, lb_lattice.halo_grid); - - momentum_density = lb_calc_local_momentum_density(index, lbfluid); - momentum += momentum_density + .5 * lb_fields[index].force_density; - } - } - } - - momentum *= lb_parameters.agrid / lb_parameters.tau; - boost::mpi::reduce(::comm_cart, momentum, result, std::plus<>(), 0); - return result; -} - -void lb_collect_boundary_forces(double *result) { -#ifdef LB_BOUNDARIES - auto const lbb_data_len = 3 * LBBoundaries::lbboundaries.size(); - std::vector boundary_forces(lbb_data_len); - std::size_t i = 0; - for (auto it = LBBoundaries::lbboundaries.begin(); - it != LBBoundaries::lbboundaries.end(); ++it, i++) - for (std::size_t j = 0; j < 3; j++) - boundary_forces[3 * i + j] = (**it).force()[j]; - - boost::mpi::reduce(comm_cart, boundary_forces.data(), - static_cast(lbb_data_len), result, std::plus<>(), 0); -#endif -} diff --git a/src/core/grid_based_algorithms/lb.hpp b/src/core/grid_based_algorithms/lb.hpp deleted file mode 100644 index f7dc8eae44d..00000000000 --- a/src/core/grid_based_algorithms/lb.hpp +++ /dev/null @@ -1,268 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 - * Max-Planck-Institute for Polymer Research, Theory Group - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef SRC_CORE_GRID_BASED_ALGORITHMS_LB_HPP -#define SRC_CORE_GRID_BASED_ALGORITHMS_LB_HPP -/** \file - * - * %Lattice Boltzmann algorithm for hydrodynamic degrees of freedom. - * - * For performance reasons it is clever to do streaming and collision at the - * same time because every fluid node has to be read and written only once. - * This increases mainly cache efficiency. - * - * The hydrodynamic fields, corresponding to density, velocity and pressure, - * are stored in @ref LB_FluidNode in the array @ref lbfields, the populations - * in @ref LB_Fluid in the array @ref lbfluid which is constructed as - * 2 x (Nx x Ny x Nz) x 19 array. - * - * Implementation in lb.cpp. - */ - -#include "config/config.hpp" -#include "grid_based_algorithms/lattice.hpp" -#include "grid_based_algorithms/lb-d3q19.hpp" -#include "grid_based_algorithms/lb_constants.hpp" - -#include "halo.hpp" - -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -/** Counter for the RNG */ -extern boost::optional> rng_counter_fluid; - -/** Data structure for fluid on a local lattice site */ -struct LB_FluidNode { -#ifdef LB_BOUNDARIES - /** flag indicating whether this site belongs to a boundary */ - int boundary; - Utils::Vector3d slip_velocity = {}; -#endif // LB_BOUNDARIES - - /** local force density */ - Utils::Vector3d force_density; -#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS - // For particle update, we need the force on the nodes in LBM - // Yet, ESPResSo resets the force immediately after the LBM update - // Therefore we save it here - Utils::Vector3d force_density_buf; -#endif -}; - -/** Data structure holding the parameters for the Lattice Boltzmann system. */ -struct LB_Parameters { - /** number density (LB units) */ - double density; - - /** kinematic viscosity (LB units) */ - double viscosity; - - /** bulk viscosity (LB units) */ - double bulk_viscosity; - - /** lattice spacing */ - double agrid; - - /** time step for fluid propagation (MD units) - * Note: Has to be larger than MD time step! */ - double tau; - - /** external force density applied to the fluid at each lattice site (LB - * Units) */ - Utils::Vector3d ext_force_density; - - /** relaxation of the odd kinetic modes */ - double gamma_odd; - /** relaxation of the even kinetic modes */ - double gamma_even; - /** relaxation rate of shear modes */ - double gamma_shear; - /** relaxation rate of bulk modes */ - double gamma_bulk; - - /** Flag determining whether lbpar.gamma_shear, gamma_odd, and gamma_even are - * calculated from lbpar.gamma_shear in such a way to yield a TRT LB with - * minimized slip at bounce-back boundaries - */ - bool is_TRT; - - /** \name Derived parameters */ - /**@{*/ - /** amplitudes of the fluctuations of the modes */ - Utils::Vector19d phi; - /**@}*/ - /** Thermal energy */ - double kT; - - template void serialize(Archive &ar, long int) { - ar &density &viscosity &bulk_viscosity &agrid &tau &ext_force_density - &gamma_odd &gamma_even &gamma_shear &gamma_bulk &is_TRT &phi &kT; - } -}; - -/** %Lattice Boltzmann parameters. */ -extern LB_Parameters lbpar; - -/** The underlying lattice */ -extern Lattice lblattice; - -/** Communicator for halo exchange between processors */ -extern HaloCommunicator update_halo_comm; - -void lb_init(const LB_Parameters &lb_parameters); - -void lb_reinit_fluid(std::vector &lb_fields, - const Lattice &lb_lattice, - const LB_Parameters &lb_parameters); - -void lb_reinit_parameters(LB_Parameters &lb_parameters); - -using LB_Fluid = std::array, 19>; -extern LB_Fluid lbfluid; - -class LB_Fluid_Ref { -public: - LB_Fluid_Ref(std::size_t index, const LB_Fluid &lb_fluid) - : m_index(index), m_lb_fluid(lb_fluid) {} - template const auto &get() const { - return m_lb_fluid[I][m_index]; - } - -private: - const std::size_t m_index; - const LB_Fluid &m_lb_fluid; -}; - -namespace Utils { - -template auto get(const LB_Fluid_Ref &lb_fluid) { - return lb_fluid.get(); -} - -} // namespace Utils - -/** Hydrodynamic fields of the fluid */ -extern std::vector lbfields; - -/** Integrate the lattice-Boltzmann system for one time step. - * This function performs the collision step and the streaming step. - * If external force densities are present, they are applied prior to the - * collisions. If boundaries are present, it also applies the boundary - * conditions. - */ -void lb_integrate(); - -void lb_sanity_checks(const LB_Parameters &lb_parameters); - -/** Sets the equilibrium distributions. - * @param index Index of the local site - * @param density local fluid density - * @param momentum_density local fluid flux density - * @param stress local fluid stress - */ -void lb_set_population_from_density_momentum_density_stress( - Lattice::index_t index, double density, - Utils::Vector3d const &momentum_density, Utils::Vector6d const &stress); - -double lb_calc_density(std::array const &modes, - const LB_Parameters &lb_parameters); -Utils::Vector3d lb_calc_momentum_density(std::array const &modes, - Utils::Vector3d const &force_density); -Utils::Vector6d lb_calc_pressure_tensor(std::array const &modes, - Utils::Vector3d const &force_density, - const LB_Parameters &lb_parameters); - -/** Calculation of hydrodynamic modes. - * - * @param[in] index Number of the node to calculate the modes for - * @param[in] lb_fluid Populations of the fluid - * @retval Array containing the modes. - */ -std::array lb_calc_modes(Lattice::index_t index, - const LB_Fluid &lb_fluid); - -/** - * @brief Get the populations as a function of density, flux density and stress. - * @param density fluid density - * @param momentum_density fluid flux density - * @param stress fluid stress - * @return 19 populations (including equilibrium density contribution). - */ -Utils::Vector19d lb_get_population_from_density_momentum_density_stress( - double density, Utils::Vector3d const &momentum_density, - Utils::Vector6d const &stress); - -inline Utils::Vector19d lb_get_population(Lattice::index_t index) { - Utils::Vector19d pop{}; - for (int i = 0; i < D3Q19::n_vel; ++i) { - pop[i] = lbfluid[i][index] + D3Q19::coefficients[i][0] * lbpar.density; - } - return pop; -} - -inline void lb_set_population(Lattice::index_t index, - const Utils::Vector19d &pop) { - for (int i = 0; i < D3Q19::n_vel; ++i) { - lbfluid[i][index] = pop[i] - D3Q19::coefficients[i][0] * lbpar.density; - } -} - -uint64_t lb_fluid_get_rng_state(); -void lb_fluid_set_rng_state(uint64_t counter); -void lb_prepare_communication(HaloCommunicator &halo_comm, - const Lattice &lb_lattice); - -#ifdef LB_BOUNDARIES -/** Bounce back boundary conditions. - * The populations that have propagated into a boundary node - * are bounced back to the node they came from. This results - * in no slip boundary conditions, cf. @cite ladd01a. - */ -void lb_bounce_back(LB_Fluid &lbfluid, const LB_Parameters &lb_parameters, - const std::vector &lb_fields); - -#endif /* LB_BOUNDARIES */ - -Utils::Vector3d -mpi_lb_calc_fluid_momentum_local(LB_Parameters const &lb_parameters, - std::vector const &lb_fields, - Lattice const &lb_lattice); -void lb_collect_boundary_forces(double *result); -void lb_initialize_fields(std::vector &fields, - LB_Parameters const &lb_parameters, - Lattice const &lb_lattice); -void lb_on_param_change(LBParam param); - -#ifdef ADDITIONAL_CHECKS -void log_buffer_diff(std::ostream &out, int dir, Lattice::index_t index, int x, - int y, int z); -#endif // ADDITIONAL_CHECKS - -#endif // SRC_CORE_GRID_BASED_ALGORITHMS_LB_HPP diff --git a/src/core/grid_based_algorithms/lb_boundaries.cpp b/src/core/grid_based_algorithms/lb_boundaries.cpp deleted file mode 100644 index 12a19f9d423..00000000000 --- a/src/core/grid_based_algorithms/lb_boundaries.cpp +++ /dev/null @@ -1,317 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 - * Max-Planck-Institute for Polymer Research, Theory Group, - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -/** \file - * - * Boundary conditions for lattice Boltzmann fluid dynamics. - * Source file for \ref lb_boundaries.hpp. - */ - -#include "grid_based_algorithms/lb_boundaries.hpp" - -#include "communication.hpp" -#include "errorhandling.hpp" -#include "event.hpp" -#include "grid.hpp" -#include "grid_based_algorithms/electrokinetics.hpp" -#include "grid_based_algorithms/lattice.hpp" -#include "grid_based_algorithms/lb.hpp" -#include "grid_based_algorithms/lb_interface.hpp" -#include "grid_based_algorithms/lbgpu.hpp" -#include "lbboundaries/LBBoundary.hpp" - -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -namespace LBBoundaries { - -std::vector> lbboundaries; -#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU) - -void add(const std::shared_ptr &b) { - assert(std::find(lbboundaries.begin(), lbboundaries.end(), b) == - lbboundaries.end()); - lbboundaries.emplace_back(b); - - on_lbboundary_change(); -} - -void remove(const std::shared_ptr &b) { - assert(std::find(lbboundaries.begin(), lbboundaries.end(), b) != - lbboundaries.end()); - lbboundaries.erase(std::remove(lbboundaries.begin(), lbboundaries.end(), b), - lbboundaries.end()); - - on_lbboundary_change(); -} - -bool sanity_check_mach_limit() { - // Boundary velocities are stored in MD units, therefore we need to scale them - // in order to get lattice units. - auto const conv_fac = 1. / lb_lbfluid_get_lattice_speed(); - auto constexpr mach_limit = 0.2; - return std::any_of(lbboundaries.begin(), lbboundaries.end(), - [conv_fac, mach_limit](auto const &b) { - return (b->velocity() * conv_fac).norm() >= mach_limit; - }); -} - -#if defined(EK_BOUNDARIES) -static void ek_init_boundaries() { - int number_of_boundnodes = 0; - - std::vector host_wallcharge_species_density; - float node_wallcharge = 0.0f; - int wallcharge_species = -1, charged_boundaries = 0; - bool node_charged = false; - - for (auto &lbboundary : lbboundaries) { - lbboundary->set_net_charge(0.0); - } - - if (ek_initialized) { - host_wallcharge_species_density.resize(ek_parameters.number_of_nodes); - for (auto &lbboundary : lbboundaries) { - if (lbboundary->charge_density() != 0.0f) { - charged_boundaries = 1; - break; - } - } - - for (int n = 0; n < int(ek_parameters.number_of_species); n++) - if (ek_parameters.valency[n] != 0.0f) { - wallcharge_species = n; - break; - } - - ek_gather_wallcharge_species_density(host_wallcharge_species_density.data(), - wallcharge_species); - - if (wallcharge_species == -1 && charged_boundaries) { - runtimeErrorMsg() - << "no charged species available to create wall charge\n"; - } - - auto const node_volume = Utils::int_pow<3>(ek_parameters.agrid); - for (int z = 0; z < int(lbpar_gpu.dim[2]); z++) { - for (int y = 0; y < int(lbpar_gpu.dim[1]); y++) { - for (int x = 0; x < int(lbpar_gpu.dim[0]); x++) { - auto const pos = static_cast(lbpar_gpu.agrid) * - (Utils::Vector3d{1. * x, 1. * y, 1. * z} + - Utils::Vector3d::broadcast(0.5)); - node_charged = false; - node_wallcharge = 0.0f; - - std::vector> boundaries; - std::copy_if(lbboundaries.begin(), lbboundaries.end(), - std::back_inserter(boundaries), [&pos](auto const lbb) { - return lbb->shape().is_inside(pos); - }); - for (auto lbb : boundaries) { - if ((*lbb).charge_density() != 0.0f) { - node_charged = true; - auto const node_charge = (*lbb).charge_density() * node_volume; - node_wallcharge += node_charge; - (*lbb).set_net_charge((*lbb).net_charge() + node_charge); - } - } - if (not boundaries.empty()) { - number_of_boundnodes++; - } - ek_parameters.number_of_boundary_nodes = number_of_boundnodes; - - if (wallcharge_species != -1) { - if (node_charged) - host_wallcharge_species_density[ek_parameters.dim_y * - ek_parameters.dim_x * z + - ek_parameters.dim_x * y + x] = - node_wallcharge / ek_parameters.valency[wallcharge_species]; - } - } - } - } - ek_init_species_density_wallcharge(host_wallcharge_species_density.data(), - wallcharge_species); - } -} -#endif // defined(EK_BOUNDARIES) - -/** Initialize boundary conditions for all constraints in the system. */ -void lb_init_boundaries() { - if (lattice_switch == ActiveLB::GPU) { - if (this_node != 0) { - return; - } -#if defined(CUDA) -#if defined(LB_BOUNDARIES_GPU) -#if defined(EK_BOUNDARIES) - ek_init_boundaries(); -#endif - unsigned number_of_boundnodes = 0; - std::vector host_boundary_node_list; - std::vector host_boundary_index_list; - std::size_t size_of_index; - - for (unsigned z = 0; z < lbpar_gpu.dim[2]; z++) { - for (unsigned y = 0; y < lbpar_gpu.dim[1]; y++) { - for (unsigned x = 0; x < lbpar_gpu.dim[0]; x++) { - auto const pos = static_cast(lbpar_gpu.agrid) * - (Utils::Vector3d{1. * x, 1. * y, 1. * z} + - Utils::Vector3d::broadcast(0.5)); - - // take last boundary containing the node - auto const boundary = boost::find_if( - lbboundaries | boost::adaptors::reversed, - [&pos](auto const lbb) { return lbb->shape().is_inside(pos); }); - - if (boundary != boost::rend(lbboundaries)) { - size_of_index = (number_of_boundnodes + 1) * sizeof(int); - host_boundary_node_list.resize(size_of_index); - host_boundary_index_list.resize(size_of_index); - host_boundary_node_list[number_of_boundnodes] = - static_cast(x + lbpar_gpu.dim[0] * y + - lbpar_gpu.dim[0] * lbpar_gpu.dim[1] * z); - host_boundary_index_list[number_of_boundnodes] = static_cast( - std::distance(lbboundaries.begin(), boundary.base())); - number_of_boundnodes++; - } - } - } - } - lbpar_gpu.number_of_boundnodes = number_of_boundnodes; - /* call of cuda fkt */ - std::vector boundary_velocity(3 * (lbboundaries.size() + 1)); - int n = 0; - for (auto lbb = lbboundaries.begin(); lbb != lbboundaries.end(); - ++lbb, n++) { - boundary_velocity[3 * n + 0] = static_cast((**lbb).velocity()[0]); - boundary_velocity[3 * n + 1] = static_cast((**lbb).velocity()[1]); - boundary_velocity[3 * n + 2] = static_cast((**lbb).velocity()[2]); - } - - boundary_velocity[3 * lbboundaries.size() + 0] = 0.0f; - boundary_velocity[3 * lbboundaries.size() + 1] = 0.0f; - boundary_velocity[3 * lbboundaries.size() + 2] = 0.0f; - - lb_init_boundaries_GPU(lbboundaries.size(), number_of_boundnodes, - host_boundary_node_list.data(), - host_boundary_index_list.data(), - boundary_velocity.data()); - -#else // defined (LB_BOUNDARIES_GPU) - if (not lbboundaries.empty()) { - runtimeErrorMsg() - << "LB boundaries not empty for GPU LB but LB_BOUNDARIES_GPU not " - "compiled in. Activate in myconfig.hpp."; - } -#endif // defined (LB_BOUNDARIES_GPU) -#endif // defined (CUDA) - } else if (lattice_switch == ActiveLB::CPU) { -#if defined(LB_BOUNDARIES) - using Utils::get_linear_index; - boost::for_each(lbfields, [](auto &f) { f.boundary = 0; }); - - auto const node_pos = calc_node_pos(comm_cart); - auto const offset = Utils::hadamard_product(node_pos, lblattice.grid); - auto const vel_conv = 1. / lb_lbfluid_get_lattice_speed(); - - for (int z = 0; z < lblattice.grid[2] + 2; z++) { - for (int y = 0; y < lblattice.grid[1] + 2; y++) { - for (int x = 0; x < lblattice.grid[0] + 2; x++) { - auto const pos = - (offset + Utils::Vector3d{x - 0.5, y - 0.5, z - 0.5}) * - lblattice.agrid; - - auto const boundary = boost::find_if( - lbboundaries | boost::adaptors::reversed, - [&pos](auto const lbb) { return lbb->shape().is_inside(pos); }); - auto const index = get_linear_index(x, y, z, lblattice.halo_grid); - if (boundary != boost::rend(lbboundaries)) { - auto &node = lbfields[index]; - node.boundary = static_cast( - std::distance(lbboundaries.begin(), boundary.base())); - node.slip_velocity = (*boundary)->velocity() * vel_conv; - } else { - lbfields[index].boundary = 0; - } - } - } - } -#else // defined(LB_BOUNDARIES) - if (not lbboundaries.empty()) { - runtimeErrorMsg() - << "LB boundaries not empty for CPU LB but LB_BOUNDARIES not " - "compiled in. Activate in myconfig.hpp."; - } -#endif // defined(LB_BOUNDARIES) - } -} - -#if defined(LB_BOUNDARIES) -static void lb_collect_boundary_forces_local() { - lb_collect_boundary_forces(nullptr); -} - -REGISTER_CALLBACK(lb_collect_boundary_forces_local) -#endif - -Utils::Vector3d lbboundary_get_force(LBBoundary const *lbb) { - Utils::Vector3d force{}; - auto const it = - boost::find_if(lbboundaries, [lbb](std::shared_ptr const &i) { - return i.get() == lbb; - }); - if (it == lbboundaries.end()) - throw std::runtime_error("You probably tried to get the force of an " - "lbboundary that was not added to " - "system.lbboundaries."); - std::vector forces(3 * lbboundaries.size()); - if (lattice_switch == ActiveLB::GPU) { -#if defined(LB_BOUNDARIES_GPU) - lb_gpu_get_boundary_forces(forces); -#endif - } else if (lattice_switch == ActiveLB::CPU) { -#if defined(LB_BOUNDARIES) - mpi_call(lb_collect_boundary_forces_local); - lb_collect_boundary_forces(forces.data()); -#endif - } - auto const container_index = std::distance(lbboundaries.begin(), it); - force[0] = forces[3 * container_index + 0]; - force[1] = forces[3 * container_index + 1]; - force[2] = forces[3 * container_index + 2]; - return force; -} - -#endif // defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU) - -} // namespace LBBoundaries diff --git a/src/core/grid_based_algorithms/lb_boundaries.hpp b/src/core/grid_based_algorithms/lb_boundaries.hpp deleted file mode 100644 index 3dcf7e35f60..00000000000 --- a/src/core/grid_based_algorithms/lb_boundaries.hpp +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 - * Max-Planck-Institute for Polymer Research, Theory Group - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -/** \file - * - * Boundary conditions for lattice Boltzmann fluid dynamics. - * Header file for \ref lb_boundaries.cpp. - * - * In the current version only simple bounce back walls are implemented. Thus - * after the streaming step, in all wall nodes all populations are bounced - * back from where they came from. - * - */ - -#ifndef LBBOUNDARIES_H -#define LBBOUNDARIES_H - -#include "lbboundaries/LBBoundary.hpp" - -#include "config/config.hpp" - -#include - -#include -#include -#include - -namespace LBBoundaries { -using LB_Fluid = std::array, 19>; - -extern std::vector> lbboundaries; -#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU) - -/** Initializes the constraints in the system. - * This function determines the lattice sites which belong to boundaries - * and marks them with a corresponding flag. - */ -void lb_init_boundaries(); - -void add(const std::shared_ptr &); -void remove(const std::shared_ptr &); - -/** - * @brief Check the boundary velocities. - * Sanity check if the velocity defined at LB boundaries is within the Mach - * number limit of the scheme, i.e. u < 0.2. - */ -bool sanity_check_mach_limit(); - -#endif // (LB_BOUNDARIES) || (LB_BOUNDARIES_GPU) -} // namespace LBBoundaries -#endif /* LB_BOUNDARIES_H */ diff --git a/src/core/grid_based_algorithms/lb_collective_interface.cpp b/src/core/grid_based_algorithms/lb_collective_interface.cpp deleted file mode 100644 index f1a9765c5c1..00000000000 --- a/src/core/grid_based_algorithms/lb_collective_interface.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "MpiCallbacks.hpp" -#include "communication.hpp" -#include "config/config.hpp" -#include "grid.hpp" -#include "lb.hpp" -#include "lb_constants.hpp" -#include "lb_interpolation.hpp" - -#include -#include - -#include - -using Utils::get_linear_index; - -/* LB CPU callback interface */ -namespace detail { - -template -void lb_set(Utils::Vector3i const &index, Kernel kernel) { - if (lblattice.is_local(index)) { - kernel(index); - } -} - -template -auto lb_calc(Utils::Vector3i const &index, Kernel kernel) { - using R = decltype(kernel(index)); - if (lblattice.is_local(index)) { - return boost::optional(kernel(index)); - } - return boost::optional(); -} - -template -auto lb_calc_for_pos(Utils::Vector3d const &pos, Kernel kernel) { - using R = decltype(kernel(pos)); - if (map_position_node_array(pos) == this_node) { - return boost::optional(kernel(pos)); - } - return boost::optional(); -} - -template -auto lb_calc_fluid_kernel(Utils::Vector3i const &index, Kernel kernel) { - return lb_calc(index, [&](auto index) { - auto const linear_index = - get_linear_index(lblattice.local_index(index), lblattice.halo_grid); - auto const force_density = lbfields[linear_index].force_density; - auto const modes = lb_calc_modes(linear_index, lbfluid); - return kernel(modes, force_density); - }); -} -} // namespace detail - -boost::optional -mpi_lb_get_interpolated_velocity(Utils::Vector3d const &pos) { - return detail::lb_calc_for_pos(pos, [&](auto pos) { - return lb_lbinterpolation_get_interpolated_velocity(pos); - }); -} - -REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_interpolated_velocity) - -boost::optional -mpi_lb_get_interpolated_density(Utils::Vector3d const &pos) { - return detail::lb_calc_for_pos(pos, [&](auto pos) { - return lb_lbinterpolation_get_interpolated_density(pos); - }); -} - -REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_interpolated_density) - -auto mpi_lb_get_density(Utils::Vector3i const &index) { - return detail::lb_calc_fluid_kernel(index, - [&](auto const &modes, auto const &) { - return lb_calc_density(modes, lbpar); - }); -} - -REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_density) - -auto mpi_lb_get_populations(Utils::Vector3i const &index) { - return detail::lb_calc(index, [&](auto index) { - auto const linear_index = - get_linear_index(lblattice.local_index(index), lblattice.halo_grid); - return lb_get_population(linear_index); - }); -} - -REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_populations) - -boost::optional mpi_lb_get_boundary_flag(Utils::Vector3i const &index) { - return detail::lb_calc(index, [&](auto index) { -#ifdef LB_BOUNDARIES - auto const linear_index = - get_linear_index(lblattice.local_index(index), lblattice.halo_grid); - return lbfields[linear_index].boundary; -#else - return 0; -#endif - }); -} - -REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_boundary_flag) - -void mpi_lb_set_population(Utils::Vector3i const &index, - Utils::Vector19d const &population) { - detail::lb_set(index, [&](auto index) { - auto const linear_index = - get_linear_index(lblattice.local_index(index), lblattice.halo_grid); - lb_set_population(linear_index, population); - }); -} - -REGISTER_CALLBACK(mpi_lb_set_population) - -void mpi_lb_set_force_density(Utils::Vector3i const &index, - Utils::Vector3d const &force_density) { - detail::lb_set(index, [&](auto index) { - auto const linear_index = - get_linear_index(lblattice.local_index(index), lblattice.halo_grid); - lbfields[linear_index].force_density = force_density; - }); -} - -REGISTER_CALLBACK(mpi_lb_set_force_density) - -auto mpi_lb_get_momentum_density(Utils::Vector3i const &index) { - return detail::lb_calc_fluid_kernel( - index, [&](auto const &modes, auto const &force_density) { - return lb_calc_momentum_density(modes, force_density); - }); -} - -REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_momentum_density) - -auto mpi_lb_get_pressure_tensor(Utils::Vector3i const &index) { - return detail::lb_calc_fluid_kernel( - index, [&](auto const &modes, auto const &force_density) { - return lb_calc_pressure_tensor(modes, force_density, lbpar); - }); -} - -REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_pressure_tensor) - -void mpi_bcast_lb_params_local(LBParam field, LB_Parameters const ¶ms) { - lbpar = params; - lb_on_param_change(field); -} - -REGISTER_CALLBACK(mpi_bcast_lb_params_local) - -/** @brief Broadcast a parameter for lattice Boltzmann. - * @param[in] field References the parameter field to be broadcasted. - * The references are defined in lb.hpp - */ -void mpi_bcast_lb_params(LBParam field) { - mpi_call(mpi_bcast_lb_params_local, field, lbpar); - lb_on_param_change(field); -} diff --git a/src/core/grid_based_algorithms/lb_collective_interface.hpp b/src/core/grid_based_algorithms/lb_collective_interface.hpp deleted file mode 100644 index 1375278ce5a..00000000000 --- a/src/core/grid_based_algorithms/lb_collective_interface.hpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef LB_COLLECTIVE_INTERFACE_HPP -#define LB_COLLECTIVE_INTERFACE_HPP - -#include "grid_based_algorithms/lb_constants.hpp" - -#include -#include - -/* collective getter functions */ -boost::optional -mpi_lb_get_interpolated_velocity(Utils::Vector3d const &pos); -boost::optional -mpi_lb_get_interpolated_density(Utils::Vector3d const &pos); -boost::optional mpi_lb_get_density(Utils::Vector3i const &index); -boost::optional -mpi_lb_get_populations(Utils::Vector3i const &index); -boost::optional mpi_lb_get_boundary_flag(Utils::Vector3i const &index); -boost::optional -mpi_lb_get_momentum_density(Utils::Vector3i const &index); -boost::optional -mpi_lb_get_pressure_tensor(Utils::Vector3i const &index); - -/* collective setter functions */ -void mpi_lb_set_population(Utils::Vector3i const &index, - Utils::Vector19d const &population); -void mpi_lb_set_force_density(Utils::Vector3i const &index, - Utils::Vector3d const &force_density); - -/* collective sync functions */ -void mpi_bcast_lb_params(LBParam field); - -#endif diff --git a/src/core/grid_based_algorithms/lb_constants.hpp b/src/core/grid_based_algorithms/lb_constants.hpp deleted file mode 100644 index e7864bda377..00000000000 --- a/src/core/grid_based_algorithms/lb_constants.hpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2019-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -/** \file - * Constants and enumerators for LB. - */ - -#ifndef LB_CONSTANTS_HPP -#define LB_CONSTANTS_HPP - -/** @brief Parameter fields for lattice Boltzmann - * - * Determine what actions have to take place upon change of the respective - * parameter. - */ -enum class LBParam { - DENSITY, /**< fluid density */ - VISCOSITY, /**< fluid kinematic viscosity */ - AGRID, /**< grid constant for fluid lattice */ - EXT_FORCE_DENSITY, /**< external force density acting on the fluid */ - BULKVISC, /**< fluid bulk viscosity */ - KT, /**< thermal energy */ - GAMMA_ODD, /**< Relaxation constant for odd modes */ - GAMMA_EVEN, /**< Relaxation constant for even modes */ - TAU /**< LB time step */ -}; - -#endif /* LB_CONSTANTS_HPP */ diff --git a/src/core/grid_based_algorithms/lb_interface.cpp b/src/core/grid_based_algorithms/lb_interface.cpp index dd9b32d9430..c573b88ee1e 100644 --- a/src/core/grid_based_algorithms/lb_interface.cpp +++ b/src/core/grid_based_algorithms/lb_interface.cpp @@ -16,29 +16,26 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#include "lb_interface.hpp" + +#include "grid_based_algorithms/lb_interface.hpp" +#include "grid_based_algorithms/lb_walberla_instance.hpp" + #include "BoxGeometry.hpp" #include "MpiCallbacks.hpp" #include "communication.hpp" #include "config/config.hpp" -#include "electrokinetics.hpp" #include "errorhandling.hpp" #include "grid.hpp" -#include "halo.hpp" -#include "lb-d3q19.hpp" -#include "lb.hpp" -#include "lb_boundaries.hpp" -#include "lb_collective_interface.hpp" -#include "lb_constants.hpp" -#include "lb_interpolation.hpp" -#include "lbgpu.hpp" #include +#include +#include +#include + #include -#include +#include #include -#include #include #include #include @@ -46,361 +43,55 @@ ActiveLB lattice_switch = ActiveLB::NONE; -ActiveLB lb_lbfluid_get_lattice_switch() { return lattice_switch; } +namespace LB { -struct NoLBActive : public std::exception { - const char *what() const noexcept override { return "LB not activated"; } -}; +ActiveLB get_lattice_switch() { return lattice_switch; } -void lb_lbfluid_integrate() { - if (lattice_switch == ActiveLB::CPU) { - lb_integrate(); - } else if (lattice_switch == ActiveLB::GPU and this_node == 0) { -#ifdef CUDA -#ifdef ELECTROKINETICS - if (ek_initialized) { - ek_integrate(); - } else { -#endif - lb_integrate_GPU(); -#ifdef ELECTROKINETICS - } -#endif -#endif - } +int get_steps_per_md_step(double md_timestep) { + return static_cast(std::round(get_tau() / md_timestep)); } -void lb_lbfluid_propagate() { - if (lattice_switch != ActiveLB::NONE) { - lb_lbfluid_integrate(); - if (lb_lbfluid_get_kT() > 0.0) { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - rng_counter_fluid_gpu->increment(); -#endif - } else if (lattice_switch == ActiveLB::CPU) { - rng_counter_fluid->increment(); - } - } - } -} +void init() {} -/** - * @brief Check the boundary velocities. - */ -inline void lb_boundary_mach_check() { -#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU) - if (LBBoundaries::sanity_check_mach_limit()) { - runtimeErrorMsg() << "Lattice velocity exceeds the Mach number limit"; - } +void propagate() { + if (lattice_switch == ActiveLB::WALBERLA_LB) { +#ifdef WALBERLA + lb_walberla()->integrate(); #endif -} - -void lb_lbfluid_sanity_checks(double time_step) { - if (lattice_switch == ActiveLB::GPU && this_node == 0) { -#ifdef CUDA - lb_GPU_sanity_checks(); - lb_boundary_mach_check(); - if (time_step > 0.) - check_tau_time_step_consistency(lb_lbfluid_get_tau(), time_step); -#endif - } - if (lattice_switch == ActiveLB::CPU) { - lb_sanity_checks(lbpar); - lb_boundary_mach_check(); - if (time_step > 0.) - check_tau_time_step_consistency(lb_lbfluid_get_tau(), time_step); } } -void lb_lbfluid_on_integration_start() { - if (lattice_switch == ActiveLB::CPU) { - halo_communication(update_halo_comm, - reinterpret_cast(lbfluid[0].data())); - } -} +void sanity_checks(double time_step) { + if (lattice_switch == ActiveLB::NONE) + return; -/** (Re-)initialize the fluid. */ -void lb_lbfluid_reinit_parameters() { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - if (this_node == 0) - lb_reinit_parameters_gpu(); + if (lattice_switch == ActiveLB::WALBERLA_LB) { +#ifdef WALBERLA + lb_sanity_checks(*lb_walberla(), *lb_walberla_params(), time_step); #endif - } else if (lattice_switch == ActiveLB::CPU) { - lb_reinit_parameters(lbpar); } } -/** Perform a full initialization of the lattice Boltzmann system. - * All derived parameters and the fluid are reset to their default values. - */ -void lb_lbfluid_init() { - if (lattice_switch == ActiveLB::GPU && this_node == 0) { -#ifdef CUDA - lb_init_gpu(); +void lebc_sanity_checks(unsigned int shear_direction, + unsigned int shear_plane_normal) { + if (lattice_switch == ActiveLB::WALBERLA_LB) { +#ifdef WALBERLA + lb_walberla()->check_lebc(shear_direction, shear_plane_normal); #endif - } else if (lattice_switch == ActiveLB::CPU) { - lb_init(lbpar); - } -} - -uint64_t lb_lbfluid_get_rng_state() { - if (lattice_switch == ActiveLB::CPU) { - return lb_fluid_get_rng_state(); - } - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - return lb_fluid_get_rng_state_gpu(); -#endif - } - throw NoLBActive(); -} - -void lb_lbfluid_set_rng_state(uint64_t counter) { - if (lattice_switch == ActiveLB::CPU) { - lb_fluid_set_rng_state(counter); - } else if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - lb_fluid_set_rng_state_gpu(counter); -#endif - } else { - throw NoLBActive(); - } -} - -void lb_lbfluid_set_density(double density) { - if (density <= 0) - throw std::invalid_argument("Density has to be > 0. but got " + - std::to_string(density)); - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - lbpar_gpu.rho = static_cast(density); - lb_reinit_fluid_gpu(); - lb_lbfluid_reinit_parameters(); -#endif // CUDA - } else if (lattice_switch == ActiveLB::CPU) { - lbpar.density = density; - mpi_bcast_lb_params(LBParam::DENSITY); - } else { - throw NoLBActive(); - } -} - -double lb_lbfluid_get_density() { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - return static_cast(lbpar_gpu.rho); -#endif // CUDA - } - if (lattice_switch == ActiveLB::CPU) { - return lbpar.density; - } - throw NoLBActive(); -} - -void lb_lbfluid_set_viscosity(double viscosity) { - if (viscosity <= 0) - throw std::invalid_argument("Viscosity has to be >0."); - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - lbpar_gpu.viscosity = static_cast(viscosity); - lb_lbfluid_reinit_parameters(); -#endif // CUDA - } else if (lattice_switch == ActiveLB::CPU) { - lbpar.viscosity = viscosity; - mpi_bcast_lb_params(LBParam::VISCOSITY); - } else { - throw NoLBActive(); - } -} - -double lb_lbfluid_get_viscosity() { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - return static_cast(lbpar_gpu.viscosity); -#endif // CUDA - } - if (lattice_switch == ActiveLB::CPU) { - return lbpar.viscosity; - } - throw NoLBActive(); -} - -void lb_lbfluid_set_bulk_viscosity(double bulk_viscosity) { - if (bulk_viscosity <= 0) - throw std::invalid_argument("Bulk viscosity has to be >0. but got " + - std::to_string(bulk_viscosity)); - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - lbpar_gpu.bulk_viscosity = static_cast(bulk_viscosity); - lbpar_gpu.is_TRT = false; - lb_lbfluid_reinit_parameters(); -#endif // CUDA - } else if (lattice_switch == ActiveLB::CPU) { - lbpar.bulk_viscosity = bulk_viscosity; - lbpar.is_TRT = false; - mpi_bcast_lb_params(LBParam::BULKVISC); - } else { - throw NoLBActive(); - } -} - -double lb_lbfluid_get_bulk_viscosity() { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - return lbpar_gpu.bulk_viscosity; -#endif // CUDA - } - if (lattice_switch == ActiveLB::CPU) { - return lbpar.bulk_viscosity; - } - throw NoLBActive(); -} - -void lb_lbfluid_set_gamma_odd(double gamma_odd) { - if (fabs(gamma_odd) > 1) - throw std::invalid_argument("Gamma odd has to be <= 1."); - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - lbpar_gpu.gamma_odd = static_cast(gamma_odd); - lbpar_gpu.is_TRT = false; - lb_lbfluid_reinit_parameters(); -#endif // CUDA - } else if (lattice_switch == ActiveLB::CPU) { - lbpar.gamma_odd = gamma_odd; - lbpar.is_TRT = false; - mpi_bcast_lb_params(LBParam::GAMMA_ODD); - } else { - throw NoLBActive(); - } -} - -double lb_lbfluid_get_gamma_odd() { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - return lbpar_gpu.gamma_odd; -#endif // CUDA - } - if (lattice_switch == ActiveLB::CPU) { - return lbpar.gamma_odd; - } - throw NoLBActive(); -} - -void lb_lbfluid_set_gamma_even(double gamma_even) { - if (fabs(gamma_even) > 1) - throw std::invalid_argument("gamma_even has to be <= 1."); - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - lbpar_gpu.gamma_even = static_cast(gamma_even); - lbpar_gpu.is_TRT = false; - lb_lbfluid_reinit_parameters(); -#endif // CUDA - } else if (lattice_switch == ActiveLB::CPU) { - lbpar.gamma_even = gamma_even; - lbpar.is_TRT = false; - mpi_bcast_lb_params(LBParam::DENSITY); - } else { - throw NoLBActive(); } } -double lb_lbfluid_get_gamma_even() { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - return lbpar_gpu.gamma_even; -#endif // CUDA - } - if (lattice_switch == ActiveLB::CPU) { - return lbpar.gamma_even; - } - throw NoLBActive(); -} - -void lb_lbfluid_set_agrid(double agrid) { - if (agrid <= 0) - throw std::invalid_argument("agrid has to be > 0."); - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - lb_set_agrid_gpu(agrid); - lb_init_gpu(); -#if defined(LB_BOUNDARIES_GPU) - LBBoundaries::lb_init_boundaries(); +double get_agrid() { + if (lattice_switch == ActiveLB::WALBERLA_LB) { +#ifdef WALBERLA + return lb_walberla_params()->get_agrid(); #endif -#endif // CUDA - } else if (lattice_switch == ActiveLB::CPU) { - lbpar.agrid = agrid; - mpi_bcast_lb_params(LBParam::AGRID); - } else { - throw NoLBActive(); - } -} - -double lb_lbfluid_get_agrid() { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - return lbpar_gpu.agrid; -#endif // CUDA - } - if (lattice_switch == ActiveLB::CPU) { - return lbpar.agrid; } throw NoLBActive(); } -void lb_lbfluid_set_ext_force_density(const Utils::Vector3d &force_density) { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - lbpar_gpu.ext_force_density[0] = static_cast(force_density[0]); - lbpar_gpu.ext_force_density[1] = static_cast(force_density[1]); - lbpar_gpu.ext_force_density[2] = static_cast(force_density[2]); - lbpar_gpu.external_force_density = force_density[0] != 0. || - force_density[1] != 0. || - force_density[2] != 0.; - lb_reinit_extern_nodeforce_GPU(&lbpar_gpu); - -#endif // CUDA - } else if (lattice_switch == ActiveLB::CPU) { - lbpar.ext_force_density = force_density; - mpi_bcast_lb_params(LBParam::EXT_FORCE_DENSITY); - } else { - throw NoLBActive(); - } -} - -const Utils::Vector3d lb_lbfluid_get_ext_force_density() { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - return {static_cast(lbpar_gpu.ext_force_density[0]), - static_cast(lbpar_gpu.ext_force_density[1]), - static_cast(lbpar_gpu.ext_force_density[2])}; -#endif // CUDA - } - if (lattice_switch == ActiveLB::CPU) { - return lbpar.ext_force_density; - } - throw NoLBActive(); -} - -void lb_lbfluid_set_tau(double tau) { - if (tau <= 0.) - throw std::invalid_argument("LB tau has to be positive."); - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - lbpar_gpu.tau = static_cast(tau); - lb_lbfluid_reinit_parameters(); -#endif // CUDA - } else if (lattice_switch == ActiveLB::CPU) { - lbpar.tau = tau; - mpi_bcast_lb_params(LBParam::TAU); - } else { - throw NoLBActive(); - } -} - void check_tau_time_step_consistency(double tau, double time_step) { - // use float epsilon since tau may be a float (GPU LB) + // use float epsilon since tau may be a float auto const eps = static_cast(std::numeric_limits::epsilon()); if ((tau - time_step) / (tau + time_step) < -eps) throw std::invalid_argument("LB tau (" + std::to_string(tau) + @@ -415,766 +106,95 @@ void check_tau_time_step_consistency(double tau, double time_step) { std::to_string(factor)); } -double lb_lbfluid_get_tau() { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - return lbpar_gpu.tau; -#endif // CUDA - } - if (lattice_switch == ActiveLB::CPU) { - return lbpar.tau; +double get_tau() { +#ifdef WALBERLA + if (lattice_switch == ActiveLB::WALBERLA_LB) { + return lb_walberla_params()->get_tau(); } - throw NoLBActive(); -} - -void lb_lbfluid_set_lattice_switch(ActiveLB local_lattice_switch) { - switch (local_lattice_switch) { - case ActiveLB::NONE: - case ActiveLB::CPU: - case ActiveLB::GPU: - break; - default: - throw std::invalid_argument("Invalid lattice switch."); - } - mpi_set_lattice_switch(local_lattice_switch); -} - -void lb_lbfluid_set_kT(double kT) { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - lbpar_gpu.kT = static_cast(kT); #endif - } else if (lattice_switch == ActiveLB::CPU) { - lbpar.kT = kT; - mpi_bcast_lb_params(LBParam::KT); - } else { - throw NoLBActive(); - } + throw NoLBActive(); } -double lb_lbfluid_get_kT() { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - return static_cast(lbpar_gpu.kT); +double get_kT() { + if (lattice_switch == ActiveLB::WALBERLA_LB) { +#ifdef WALBERLA + return lb_walberla()->get_kT(); #endif } - if (lattice_switch == ActiveLB::CPU) { - return lbpar.kT; - } throw NoLBActive(); } -double lb_lbfluid_get_lattice_speed() { - return lb_lbfluid_get_agrid() / lb_lbfluid_get_tau(); -} +double get_lattice_speed() { return get_agrid() / get_tau(); } -void lb_lbfluid_print_vtk_boundary(const std::string &filename) { - std::fstream cpfile; - cpfile.open(filename, std::ios::out); +#ifdef WALBERLA +namespace Walberla { - if (!cpfile) { - throw std::runtime_error("Could not open '" + filename + "' for writing."); - } +static Utils::Vector3d get_momentum() { return lb_walberla()->get_momentum(); } - auto const vtk_writer = [&](std::string const &label, - auto const &write_boundaries) { - using Utils::Vector3d; - cpfile.precision(6); - cpfile << std::fixed; - auto constexpr vtk_format = Vector3d::formatter(" "); - auto const agrid = lb_lbfluid_get_agrid(); - auto const grid_size = lb_lbfluid_get_shape(); - auto const origin = Vector3d::broadcast(0.5) * agrid; - cpfile << "# vtk DataFile Version 2.0\n" - << label << "\n" - << "ASCII\n" - << "DATASET STRUCTURED_POINTS\n" - << "DIMENSIONS " << vtk_format << grid_size << "\n" - << "ORIGIN " << vtk_format << origin << "\n" - << "SPACING " << vtk_format << Vector3d::broadcast(agrid) << "\n" - << "POINT_DATA " << Utils::product(grid_size) << "\n" - << "SCALARS boundary float 1\n" - << "LOOKUP_TABLE default\n"; - write_boundaries(); - }; - - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - std::vector bound_array(lbpar_gpu.number_of_nodes); - lb_get_boundary_flags_GPU(bound_array.data()); - vtk_writer("lbboundaries", [&]() { - for (unsigned int j = 0; j < lbpar_gpu.number_of_nodes; ++j) { - cpfile << bound_array[j] << "\n"; - } - }); -#endif // CUDA - } else { - vtk_writer("lbboundaries", [&]() { - auto const grid_size = lb_lbfluid_get_shape(); - Utils::Vector3i pos; - for (pos[2] = 0; pos[2] < grid_size[2]; pos[2]++) - for (pos[1] = 0; pos[1] < grid_size[1]; pos[1]++) - for (pos[0] = 0; pos[0] < grid_size[0]; pos[0]++) - cpfile << lb_lbnode_get_boundary(pos) << "\n"; - }); - } - cpfile.close(); +static boost::optional +get_velocity_at_pos(Utils::Vector3d pos) { + return lb_walberla()->get_velocity_at_pos(pos); } -void lb_lbfluid_print_vtk_velocity(const std::string &filename, - std::vector bb1, std::vector bb2) { - std::fstream cpfile; - cpfile.open(filename, std::ios::out); - - if (!cpfile) { - throw std::runtime_error("Could not open '" + filename + "' for writing."); - } - - auto bb_low = Utils::Vector3i{}; - auto bb_high = lb_lbfluid_get_shape(); - - auto const vtk_writer = [&](std::string const &label, auto const &get_vel) { - using Utils::Vector3d; - cpfile.precision(6); - cpfile << std::fixed; - auto constexpr vtk_format = Vector3d::formatter(" "); - auto const agrid = lb_lbfluid_get_agrid(); - auto const bb_dim = bb_high - bb_low; - auto const origin = (bb_low + Vector3d::broadcast(0.5)) * agrid; - auto const lattice_speed = lb_lbfluid_get_lattice_speed(); - cpfile << "# vtk DataFile Version 2.0\n" - << label << "\n" - << "ASCII\n" - << "DATASET STRUCTURED_POINTS\n" - << "DIMENSIONS " << vtk_format << bb_dim << "\n" - << "ORIGIN " << vtk_format << origin << "\n" - << "SPACING " << vtk_format << Vector3d::broadcast(agrid) << "\n" - << "POINT_DATA " << Utils::product(bb_dim) << "\n" - << "SCALARS velocity float 3\n" - << "LOOKUP_TABLE default\n"; - - Utils::Vector3i pos; - for (pos[2] = bb_low[2]; pos[2] < bb_high[2]; pos[2]++) - for (pos[1] = bb_low[1]; pos[1] < bb_high[1]; pos[1]++) - for (pos[0] = bb_low[0]; pos[0] < bb_high[0]; pos[0]++) - cpfile << vtk_format << get_vel(pos) * lattice_speed << "\n"; - }; - - int it = 0; - for (auto val1 = bb1.begin(), val2 = bb2.begin(); - val1 != bb1.end() && val2 != bb2.end(); ++val1, ++val2) { - if (*val1 == -1 || *val2 == -1) { - break; - } - auto const lower = std::min(*val1, *val2); - auto const upper = std::max(*val1, *val2); - if (lower < 0 or upper >= bb_high[it]) { - throw std::runtime_error( - "Tried to access index " + std::to_string(lower) + " and index " + - std::to_string(upper) + " on dimension " + std::to_string(it) + - " that has size " + std::to_string(bb_high[it])); - } - bb_low[it] = lower; - bb_high[it] = upper; - it++; - } +REGISTER_CALLBACK_ONE_RANK(get_velocity_at_pos) - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - host_values.resize(lbpar_gpu.number_of_nodes); - lb_get_values_GPU(host_values.data()); - auto const box_l = lb_lbfluid_get_shape(); - vtk_writer("lbfluid_gpu", [&box_l](Utils::Vector3i const &pos) { - auto const j = box_l[0] * box_l[1] * pos[2] + box_l[0] * pos[1] + pos[0]; - return Utils::Vector3d{host_values[j].v}; - }); -#endif // CUDA - } else { - vtk_writer("lbfluid_cpu", lb_lbnode_get_velocity); - } - cpfile.close(); -} - -void lb_lbfluid_print_boundary(const std::string &filename) { - std::fstream cpfile; - cpfile.open(filename, std::ios::out); - - if (!cpfile) { - throw std::runtime_error("Could not open '" + filename + "' for writing."); - } - - using Utils::Vector3d; - auto constexpr vtk_format = Vector3d::formatter(" "); - cpfile.precision(6); - cpfile << std::fixed; - - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - std::vector bound_array(lbpar_gpu.number_of_nodes); - lb_get_boundary_flags_GPU(bound_array.data()); - auto const agrid = lb_lbfluid_get_agrid(); - Utils::Vector3d pos; - for (unsigned int j = 0; j < lbpar_gpu.number_of_nodes; ++j) { - auto const k = j / lbpar_gpu.dim[0]; - auto const l = k / lbpar_gpu.dim[1]; - pos[0] = (static_cast(j % lbpar_gpu.dim[0]) + 0.5) * agrid; - pos[1] = (static_cast(k % lbpar_gpu.dim[1]) + 0.5) * agrid; - pos[2] = (static_cast(l) + 0.5) * agrid; - cpfile << vtk_format << pos << " " << bound_array[j] << "\n"; - } -#endif // CUDA - } else { - auto const shift = Vector3d{{0.5, 0.5, 0.5}}; - auto const agrid = lb_lbfluid_get_agrid(); - auto const grid_size = lb_lbfluid_get_shape(); - Utils::Vector3i pos; - for (pos[2] = 0; pos[2] < grid_size[2]; pos[2]++) - for (pos[1] = 0; pos[1] < grid_size[1]; pos[1]++) - for (pos[0] = 0; pos[0] < grid_size[0]; pos[0]++) { - auto const flag = (lb_lbnode_get_boundary(pos) != 0) ? 1 : 0; - cpfile << vtk_format << (pos + shift) * agrid << " " << flag << "\n"; - } - } - cpfile.close(); +static boost::optional +get_interpolated_density_at_pos(Utils::Vector3d pos) { + return lb_walberla()->get_interpolated_density_at_pos(pos); } -void lb_lbfluid_print_velocity(const std::string &filename) { - std::fstream cpfile; - cpfile.open(filename, std::ios::out); - - if (!cpfile) { - throw std::runtime_error("Could not open '" + filename + "' for writing."); - } - - using Utils::Vector3d; - auto constexpr vtk_format = Vector3d::formatter(" "); - cpfile.precision(6); - cpfile << std::fixed; +REGISTER_CALLBACK_ONE_RANK(get_interpolated_density_at_pos) - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - std::vector host_values(lbpar_gpu.number_of_nodes); - lb_get_values_GPU(host_values.data()); - auto const agrid = lb_lbfluid_get_agrid(); - auto const lattice_speed = - static_cast(lb_lbfluid_get_lattice_speed()); - Utils::Vector3d pos; - for (unsigned int j = 0; j < lbpar_gpu.number_of_nodes; ++j) { - auto const k = j / lbpar_gpu.dim[0]; - auto const l = k / lbpar_gpu.dim[1]; - pos[0] = (static_cast(j % lbpar_gpu.dim[0]) + 0.5) * agrid; - pos[1] = (static_cast(k % lbpar_gpu.dim[1]) + 0.5) * agrid; - pos[2] = (static_cast(l) + 0.5) * agrid; - auto const velocity = Utils::Vector3f(host_values[j].v) * lattice_speed; - cpfile << vtk_format << pos << " " << vtk_format << velocity << "\n"; - } -#endif // CUDA - } else { - auto const shift = Vector3d{{0.5, 0.5, 0.5}}; - auto const agrid = lb_lbfluid_get_agrid(); - auto const grid_size = lb_lbfluid_get_shape(); - auto const lattice_speed = lb_lbfluid_get_lattice_speed(); - Utils::Vector3i pos; - for (pos[2] = 0; pos[2] < grid_size[2]; pos[2]++) - for (pos[1] = 0; pos[1] < grid_size[1]; pos[1]++) - for (pos[0] = 0; pos[0] < grid_size[0]; pos[0]++) - cpfile << vtk_format << (pos + shift) * agrid << " " << vtk_format - << lb_lbnode_get_velocity(pos) * lattice_speed << "\n"; - } - - cpfile.close(); +static Utils::VectorXd<9> get_pressure_tensor() { + return lb_walberla()->get_pressure_tensor(); } -/** Handle for a LB checkpoint file. */ -class LBCheckpointFile { -private: - bool m_binary; - -public: - std::fstream stream; +REGISTER_CALLBACK_REDUCTION(get_pressure_tensor, std::plus<>()) - LBCheckpointFile(std::string const &filename, std::ios_base::openmode mode, - bool binary) { - m_binary = binary; - auto flags = mode; - if (m_binary) - flags |= std::ios_base::binary; - stream.open(filename, flags); - } - - ~LBCheckpointFile() = default; - - template void write(std::vector const &vector) { - if (m_binary) { - stream.write(reinterpret_cast(vector.data()), - vector.size() * sizeof(T)); - } else { - for (auto const &value : vector) { - stream << value << "\n"; - } - } - } - - template - void write(Utils::Vector const &vector) { - if (m_binary) { - stream.write(reinterpret_cast(vector.data()), - N * sizeof(T)); - } else { - stream << Utils::Vector::formatter(" ") << vector << "\n"; - } - } - - template void read(Utils::Vector &vector) { - if (m_binary) { - stream.read(reinterpret_cast(vector.data()), N * sizeof(T)); - } else { - for (auto &value : vector) { - stream >> value; - } - } - } - - template void read(std::vector &vector) { - if (m_binary) { - stream.read(reinterpret_cast(vector.data()), - vector.size() * sizeof(T)); - } else { - for (auto &value : vector) { - stream >> value; - } - } - } -}; - -void lb_lbfluid_save_checkpoint(const std::string &filename, bool binary) { - auto const err_msg = std::string("Error while writing LB checkpoint: "); - - // open file and set exceptions - LBCheckpointFile cpfile(filename, std::ios_base::out, binary); - if (!cpfile.stream) { - throw std::runtime_error(err_msg + "could not open file " + filename); - } - cpfile.stream.exceptions(std::ios_base::failbit | std::ios_base::badbit); +} // namespace Walberla +#endif // WALBERLA - try { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - if (!binary) { - cpfile.stream.precision(8); - cpfile.stream << std::fixed; - } - - auto const grid_size = lb_lbfluid_get_shape(); - auto const data_length = lbpar_gpu.number_of_nodes * D3Q19::n_vel; - cpfile.write(grid_size); - - std::vector host_checkpoint_vd(data_length); - lb_save_checkpoint_GPU(host_checkpoint_vd.data()); - cpfile.write(host_checkpoint_vd); -#endif // CUDA - } else if (lattice_switch == ActiveLB::CPU) { - if (!binary) { - cpfile.stream.precision(16); - cpfile.stream << std::fixed; - } - - auto const grid_size = lb_lbfluid_get_shape(); - cpfile.write(grid_size); - - for (int i = 0; i < grid_size[0]; i++) { - for (int j = 0; j < grid_size[1]; j++) { - for (int k = 0; k < grid_size[2]; k++) { - auto const ind = Utils::Vector3i{{i, j, k}}; - auto const pop = mpi_call(::Communication::Result::one_rank, - mpi_lb_get_populations, ind); - cpfile.write(pop); - } - } - } - } - } catch (std::ios_base::failure const &) { - cpfile.stream.close(); - throw std::runtime_error(err_msg + "could not write data to " + filename); - } catch (std::runtime_error const &) { - cpfile.stream.close(); - throw; - } -} - -void lb_lbfluid_load_checkpoint(const std::string &filename, bool binary) { - auto const err_msg = std::string("Error while reading LB checkpoint: "); - - // open file and set exceptions - LBCheckpointFile cpfile(filename, std::ios_base::in, binary); - if (!cpfile.stream) { - throw std::runtime_error(err_msg + "could not open file " + filename); - } - cpfile.stream.exceptions(std::ios_base::failbit | std::ios_base::badbit); - - // check the grid size in the checkpoint header matches the current grid size - auto const check_header = [&](Utils::Vector3i const &expected_grid_size) { - Utils::Vector3i grid_size; - cpfile.read(grid_size); - if (grid_size != expected_grid_size) { - std::stringstream message; - message << " grid dimensions mismatch," - << " read [" << grid_size << "]," - << " expected [" << expected_grid_size << "]."; - throw std::runtime_error(err_msg + message.str()); - } - }; - - try { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - auto const gridsize = lb_lbfluid_get_shape(); - auto const data_length = lbpar_gpu.number_of_nodes * D3Q19::n_vel; - std::vector host_checkpoint_vd(data_length); - check_header(gridsize); - - cpfile.read(host_checkpoint_vd); - lb_load_checkpoint_GPU(host_checkpoint_vd.data()); -#endif // CUDA - } else if (lattice_switch == ActiveLB::CPU) { - auto const gridsize = lb_lbfluid_get_shape(); - mpi_bcast_lb_params(LBParam::DENSITY); - check_header(gridsize); - - Utils::Vector19d pop; - for (int i = 0; i < gridsize[0]; i++) { - for (int j = 0; j < gridsize[1]; j++) { - for (int k = 0; k < gridsize[2]; k++) { - auto const ind = Utils::Vector3i{{i, j, k}}; - cpfile.read(pop); - lb_lbnode_set_pop(ind, pop); - } - } - } - } else { - throw std::runtime_error( - "To load an LB checkpoint one needs to have already " - "initialized the LB fluid with the same grid size."); - } - // check EOF - if (!binary) { - if (cpfile.stream.peek() == '\n') { - std::ignore = cpfile.stream.get(); - } - } - if (cpfile.stream.peek() != EOF) { - throw std::runtime_error(err_msg + "extra data found, expected EOF."); - } - } catch (std::ios_base::failure const &) { - auto const eof_error = cpfile.stream.eof(); - cpfile.stream.close(); - if (eof_error) { - throw std::runtime_error(err_msg + "EOF found."); - } - throw std::runtime_error(err_msg + "incorrectly formatted data."); - } catch (std::runtime_error const &) { - cpfile.stream.close(); - throw; - } -} - -Utils::Vector3i lb_lbfluid_get_shape() { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - return {static_cast(lbpar_gpu.dim[0]), - static_cast(lbpar_gpu.dim[1]), - static_cast(lbpar_gpu.dim[2])}; -#endif - } - if (lattice_switch == ActiveLB::CPU) { - return lblattice.global_grid; - } - throw NoLBActive(); -} - -bool lb_lbnode_is_index_valid(Utils::Vector3i const &ind) { - auto const limit = lb_lbfluid_get_shape(); - return ind < limit && ind >= Utils::Vector3i{}; -} - -double lb_lbnode_get_density(const Utils::Vector3i &ind) { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind); - static LB_rho_v_pi_gpu host_print_values; - lb_print_node_GPU(single_nodeindex, &host_print_values); - return host_print_values.rho; -#endif // CUDA - } - if (lattice_switch == ActiveLB::CPU) { +Utils::VectorXd<9> const get_pressure_tensor() { + if (lattice_switch == ActiveLB::WALBERLA_LB) { +#ifdef WALBERLA return ::Communication::mpiCallbacks().call( - ::Communication::Result::one_rank, mpi_lb_get_density, ind); + ::Communication::Result::reduction, std::plus<>(), + Walberla::get_pressure_tensor); +#endif } throw NoLBActive(); } -const Utils::Vector3d lb_lbnode_get_velocity(const Utils::Vector3i &ind) { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - static LB_rho_v_pi_gpu host_print_values; - auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind); - lb_print_node_GPU(single_nodeindex, &host_print_values); - return {static_cast(host_print_values.v[0]), - static_cast(host_print_values.v[1]), - static_cast(host_print_values.v[2])}; +Utils::Vector3d calc_fluid_momentum() { + if (lattice_switch == ActiveLB::WALBERLA_LB) { +#ifdef WALBERLA + return Walberla::get_momentum(); #endif } - if (lattice_switch == ActiveLB::CPU) { - auto const density = ::Communication::mpiCallbacks().call( - ::Communication::Result::one_rank, mpi_lb_get_density, ind); - auto const momentum_density = ::Communication::mpiCallbacks().call( - ::Communication::Result::one_rank, mpi_lb_get_momentum_density, ind); - return momentum_density / density; - } throw NoLBActive(); } -const Utils::Vector6d -lb_lbnode_get_pressure_tensor(const Utils::Vector3i &ind) { - // Add equilibrium pressure to the diagonal (in LB units) - auto const p0 = lb_lbfluid_get_density() * D3Q19::c_sound_sq; - - auto tensor = lb_lbnode_get_pressure_tensor_neq(ind); - tensor[0] += p0; - tensor[2] += p0; - tensor[5] += p0; - - return tensor; -} - -const Utils::Vector6d -lb_lbnode_get_pressure_tensor_neq(const Utils::Vector3i &ind) { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - Utils::Vector6d tensor{}; - static LB_rho_v_pi_gpu host_print_values; - auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind); - lb_print_node_GPU(single_nodeindex, &host_print_values); - for (int i = 0; i < 6; i++) { - tensor[i] = static_cast(host_print_values.pi[i]); - } - return tensor; -#endif // CUDA - } - if (lattice_switch == ActiveLB::CPU) { +Utils::Vector3d const get_interpolated_velocity(Utils::Vector3d const &pos) { + if (lattice_switch == ActiveLB::WALBERLA_LB) { +#ifdef WALBERLA + auto const folded_pos = folded_position(pos, box_geo); return mpi_call(::Communication::Result::one_rank, - mpi_lb_get_pressure_tensor, ind); - } - throw NoLBActive(); -} - -const Utils::Vector6d lb_lbfluid_get_pressure_tensor() { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - auto const stress_tmp = stress_tensor_GPU(); - Utils::Vector6d tensor(stress_tmp.begin(), stress_tmp.end()); - - // Normalize - tensor /= static_cast(lbpar_gpu.number_of_nodes); - - // Add equilibrium pressure to the diagonal (in LB units) - double const p0 = lb_lbfluid_get_density() * D3Q19::c_sound_sq; - - tensor[0] += p0; - tensor[2] += p0; - tensor[5] += p0; - return tensor; + Walberla::get_velocity_at_pos, folded_pos / get_agrid()); #endif } - if (lattice_switch == ActiveLB::CPU) { - auto const grid_size = lb_lbfluid_get_shape(); - Utils::Vector6d tensor{}; - for (int i = 0; i < grid_size[0]; i++) { - for (int j = 0; j < grid_size[1]; j++) { - for (int k = 0; k < grid_size[2]; k++) { - const Utils::Vector3i node{{i, j, k}}; - tensor += lb_lbnode_get_pressure_tensor(node); - } - } - } - - tensor /= static_cast(Utils::product(grid_size)); - return tensor; - } - throw NoLBActive(); -} - -int lb_lbnode_get_boundary(const Utils::Vector3i &ind) { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - unsigned int host_flag; - auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind); - lb_get_boundary_flag_GPU(single_nodeindex, &host_flag); - return static_cast(host_flag); -#endif // CUDA - } - if (lattice_switch == ActiveLB::CPU) { - return mpi_call(::Communication::Result::one_rank, mpi_lb_get_boundary_flag, - ind); - } - throw NoLBActive(); -} - -const Utils::Vector19d lb_lbnode_get_pop(const Utils::Vector3i &ind) { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - float population[D3Q19::n_vel]; - - lb_lbfluid_get_population(ind, population); - Utils::Vector19d p_pop; - for (std::size_t i = 0; i < D3Q19::n_vel; ++i) - p_pop[i] = static_cast(population[i]); - return p_pop; -#endif // CUDA - } - if (lattice_switch == ActiveLB::CPU) { - return mpi_call(::Communication::Result::one_rank, mpi_lb_get_populations, - ind); - } throw NoLBActive(); } -void lb_lbnode_set_density(const Utils::Vector3i &ind, double p_density) { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind); - auto const host_density = static_cast(p_density); - lb_set_node_rho_GPU(single_nodeindex, host_density); -#endif // CUDA - } else if (lattice_switch == ActiveLB::CPU) { - auto const tensor = lb_lbnode_get_pressure_tensor(ind); - auto const momentum_density = - lb_lbnode_get_velocity(ind) * lb_lbnode_get_density(ind); - auto const population = - lb_get_population_from_density_momentum_density_stress( - p_density, momentum_density, tensor); - mpi_call_all(mpi_lb_set_population, ind, population); - } else { - throw NoLBActive(); - } -} - -void lb_lbnode_set_velocity(const Utils::Vector3i &ind, - const Utils::Vector3d &u) { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - float host_velocity[3]; - host_velocity[0] = static_cast(u[0]); - host_velocity[1] = static_cast(u[1]); - host_velocity[2] = static_cast(u[2]); - auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind); - lb_set_node_velocity_GPU(single_nodeindex, host_velocity); -#endif // CUDA - } else if (lattice_switch == ActiveLB::CPU) { - auto const density = lb_lbnode_get_density(ind); - auto const momentum_density = u * density; - auto const tensor = lb_lbnode_get_pressure_tensor(ind); - auto const population = - lb_get_population_from_density_momentum_density_stress( - density, momentum_density, tensor); - mpi_call_all(mpi_lb_set_population, ind, population); - mpi_call_all(mpi_lb_set_force_density, ind, Utils::Vector3d{}); - } else { - throw NoLBActive(); - } -} - -void lb_lbnode_set_pop(const Utils::Vector3i &ind, - const Utils::Vector19d &p_pop) { - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - float population[D3Q19::n_vel]; - - for (std::size_t i = 0; i < D3Q19::n_vel; ++i) - population[i] = static_cast(p_pop[i]); - - lb_lbfluid_set_population(ind, population); -#endif // CUDA - } else if (lattice_switch == ActiveLB::CPU) { - mpi_call_all(mpi_lb_set_population, ind, p_pop); - } else { - throw NoLBActive(); - } -} - -Utils::Vector3d lb_lbfluid_calc_fluid_momentum() { - Utils::Vector3d momentum{}; - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - if (::comm_cart.rank() == 0) { - lb_calc_fluid_momentum_GPU(momentum.data()); - } -#endif - } else if (lattice_switch == ActiveLB::CPU) { - momentum = mpi_lb_calc_fluid_momentum_local(lbpar, lbfields, lblattice); - } - return momentum; -} - -const Utils::Vector3d -lb_lbfluid_get_interpolated_velocity(const Utils::Vector3d &pos) { - auto const folded_pos = folded_position(pos, box_geo); - auto const interpolation_order = lb_lbinterpolation_get_interpolation_order(); - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - Utils::Vector3d interpolated_u{}; - switch (interpolation_order) { - case (InterpolationOrder::linear): - lb_get_interpolated_velocity_gpu<8>(folded_pos.data(), - interpolated_u.data(), 1); - break; - case (InterpolationOrder::quadratic): - lb_get_interpolated_velocity_gpu<27>(folded_pos.data(), - interpolated_u.data(), 1); - break; - } - return interpolated_u; +double get_interpolated_density(Utils::Vector3d const &pos) { + if (lattice_switch == ActiveLB::WALBERLA_LB) { +#ifdef WALBERLA + auto const folded_pos = folded_position(pos, box_geo); + return mpi_call(::Communication::Result::one_rank, + Walberla::get_interpolated_density_at_pos, + folded_pos / get_agrid()); #endif } - if (lattice_switch == ActiveLB::CPU) { - switch (interpolation_order) { - case (InterpolationOrder::quadratic): - throw std::runtime_error("The non-linear interpolation scheme is not " - "implemented for the CPU LB."); - case (InterpolationOrder::linear): - return mpi_call(::Communication::Result::one_rank, - mpi_lb_get_interpolated_velocity, folded_pos); - } - } throw NoLBActive(); } -double lb_lbfluid_get_interpolated_density(const Utils::Vector3d &pos) { - auto const folded_pos = folded_position(pos, box_geo); - auto const interpolation_order = lb_lbinterpolation_get_interpolation_order(); - if (lattice_switch == ActiveLB::GPU) { - throw std::runtime_error( - "Density interpolation is not implemented for the GPU LB."); - } - if (lattice_switch == ActiveLB::CPU) { - switch (interpolation_order) { - case (InterpolationOrder::quadratic): - throw std::runtime_error("The non-linear interpolation scheme is not " - "implemented for the CPU LB."); - case (InterpolationOrder::linear): - return mpi_call(::Communication::Result::one_rank, - mpi_lb_get_interpolated_density, folded_pos); - } - } - throw NoLBActive(); -} - -void mpi_set_lattice_switch_local(ActiveLB lattice_switch) { - ::lattice_switch = lattice_switch; -} - -REGISTER_CALLBACK(mpi_set_lattice_switch_local) - -void mpi_set_lattice_switch(ActiveLB lattice_switch) { - mpi_call_all(mpi_set_lattice_switch_local, lattice_switch); -} +} // namespace LB diff --git a/src/core/grid_based_algorithms/lb_interface.hpp b/src/core/grid_based_algorithms/lb_interface.hpp index 3dc14ecc537..ad8a811f322 100644 --- a/src/core/grid_based_algorithms/lb_interface.hpp +++ b/src/core/grid_based_algorithms/lb_interface.hpp @@ -20,251 +20,106 @@ #define CORE_LB_INTERFACE #include "config/config.hpp" -#include "grid_based_algorithms/lattice.hpp" #include #include -#include +#include #include /** @brief LB implementation currently active. */ -enum class ActiveLB : int { NONE, CPU, GPU }; +enum class ActiveLB : int { NONE, WALBERLA_LB }; /** @brief Switch determining the type of lattice dynamics. */ extern ActiveLB lattice_switch; -/** - * @brief Propagate the LB fluid. - */ -void lb_lbfluid_propagate(); - -/** - * @brief Event handler for integration start. - */ -void lb_lbfluid_on_integration_start(); +struct NoLBActive : public std::exception { + const char *what() const noexcept override { return "LB not activated"; } +}; -/** - * @brief Perform a full initialization of the lattice-Boltzmann system. - * All derived parameters and the fluid are reset to their default values. - */ -void lb_lbfluid_init(); - -/** - * @brief Reinitialize the derived parameters for the lattice-Boltzmann system. - * The current state of the fluid is unchanged. - */ -void lb_lbfluid_reinit_parameters(); - -/** - * @brief Get the current counter of the Philox RNG. - */ -uint64_t lb_lbfluid_get_rng_state(); - -/** - * @brief Set the current counter of the Philox RNG. - */ -void lb_lbfluid_set_rng_state(uint64_t counter); +namespace LB { /** * @brief Get the global variable @ref lattice_switch. */ -ActiveLB lb_lbfluid_get_lattice_switch(); +ActiveLB get_lattice_switch(); -/** - * @brief Set the global variable @ref lattice_switch. - */ -void lb_lbfluid_set_lattice_switch(ActiveLB local_lattice_switch); +int get_steps_per_md_step(double md_timestep); /** - * @brief Set the LB time step. - */ -void lb_lbfluid_set_tau(double p_tau); - -/** - * @brief Check if tau is an integer multiple of time_step, throws if not - */ -void check_tau_time_step_consistency(double tau, double time_s); - -/** - * @brief Set the global LB density. - */ -void lb_lbfluid_set_density(double p_dens); - -/** - * @brief Set the global LB viscosity. - */ -void lb_lbfluid_set_viscosity(double p_visc); - -/** - * @brief Set the global LB bulk viscosity. - */ -void lb_lbfluid_set_bulk_viscosity(double p_bulk_visc); - -/** - * @brief Set the global LB relaxation parameter for odd modes. - */ -void lb_lbfluid_set_gamma_odd(double p_gamma_odd); - -/** - * @brief Set the global LB relaxation parameter for even modes. - */ -void lb_lbfluid_set_gamma_even(double p_gamma_even); - -/** - * @brief Set the global LB lattice spacing. + * @brief Propagate the LB fluid. */ -void lb_lbfluid_set_agrid(double p_agrid); +void propagate(); /** - * @brief Set the external force density acting on the LB fluid. + * @brief Perform a full initialization of the lattice-Boltzmann system. + * All derived parameters and the fluid are reset to their default values. */ -void lb_lbfluid_set_ext_force_density(const Utils::Vector3d &force_density); +void init(); /** - * @brief Set the LB fluid thermal energy. + * @brief Check if tau is an integer multiple of time_step, throws if not */ -void lb_lbfluid_set_kT(double kT); +void check_tau_time_step_consistency(double tau, double time_step); /** * @brief Perform LB parameter and boundary velocity checks. */ -void lb_lbfluid_sanity_checks(double time_step); +void sanity_checks(double time_step); /** - * @brief Set the LB density for a single node. + * @brief Perform LB LEbc parameter checks. */ -void lb_lbnode_set_density(const Utils::Vector3i &ind, double density); +void lebc_sanity_checks(unsigned int shear_direction, + unsigned int shear_plane_normal); /** * @brief Set the LB fluid velocity for a single node. */ -void lb_lbnode_set_velocity(const Utils::Vector3i &ind, - const Utils::Vector3d &u); - -/** - * @brief Set the LB fluid populations for a single node. - */ -void lb_lbnode_set_pop(const Utils::Vector3i &ind, const Utils::Vector19d &pop); +void set_velocity(Utils::Vector3i const &ind, Utils::Vector3d const &u); /** * @brief Get the LB time step. */ -double lb_lbfluid_get_tau(); +double get_tau(); /** * @brief Get the LB grid spacing. */ -double lb_lbfluid_get_agrid(); - -/** - * @brief Get the global LB relaxation parameter for odd modes. - */ -double lb_lbfluid_get_gamma_odd(); - -/** - * @brief Get the global LB relaxation parameter for even modes. - */ -double lb_lbfluid_get_gamma_even(); - -/** - * @brief Get the global LB bulk viscosity. - */ -double lb_lbfluid_get_bulk_viscosity(); - -/** - * @brief Get the global LB viscosity. - */ -double lb_lbfluid_get_viscosity(); - -/** - * @brief Get the global LB density. - */ -double lb_lbfluid_get_density(); - -/** - * @brief Get the external force density acting on the LB fluid. - */ -const Utils::Vector3d lb_lbfluid_get_ext_force_density(); +double get_agrid(); /** * @brief Get the thermal energy parameter of the LB fluid. */ -double lb_lbfluid_get_kT(); +double get_kT(); /** * @brief Get the lattice speed (agrid/tau). */ -double lb_lbfluid_get_lattice_speed(); - -/** - * @brief Get the LB fluid density for a single node. - */ -double lb_lbnode_get_density(const Utils::Vector3i &ind); - -/** - * @brief Get the LB fluid velocity for a single node. - */ -const Utils::Vector3d lb_lbnode_get_velocity(const Utils::Vector3i &ind); -const Utils::Vector6d lb_lbnode_get_pressure_tensor(const Utils::Vector3i &ind); -const Utils::Vector6d -lb_lbnode_get_pressure_tensor_neq(const Utils::Vector3i &ind); +double get_lattice_speed(); /** @brief Calculate the average pressure tensor of all nodes by accumulating * over all nodes and dividing by the number of nodes. * Returns the lower triangle of the LB pressure tensor. */ -const Utils::Vector6d lb_lbfluid_get_pressure_tensor(); - -/** - * @brief Get the LB fluid boundary bool for a single node. - */ -int lb_lbnode_get_boundary(const Utils::Vector3i &ind); - -/** - * @brief Get the LB fluid populations for a single node. - */ -const Utils::Vector19d lb_lbnode_get_pop(const Utils::Vector3i &ind); - -/* IO routines */ -void lb_lbfluid_print_vtk_boundary(const std::string &filename); -void lb_lbfluid_print_vtk_velocity(const std::string &filename, - std::vector = {-1, -1, -1}, - std::vector = {-1, -1, -1}); - -void lb_lbfluid_print_boundary(const std::string &filename); -void lb_lbfluid_print_velocity(const std::string &filename); - -void lb_lbfluid_save_checkpoint(const std::string &filename, bool binary); -void lb_lbfluid_load_checkpoint(const std::string &filename, bool binary); - -/** - * @brief Checks whether the given node index is within the LB lattice. - */ -bool lb_lbnode_is_index_valid(const Utils::Vector3i &ind); - -/** - * @brief returns the shape of the LB fluid lattice - */ -Utils::Vector3i lb_lbfluid_get_shape(); +Utils::VectorXd<9> const get_pressure_tensor(); -Utils::Vector3d lb_lbfluid_calc_fluid_momentum(); +Utils::Vector3d calc_fluid_momentum(); /** * @brief Calculates the interpolated fluid velocity on the head node process. * @param pos Position at which the velocity is to be calculated. * @retval interpolated fluid velocity. */ -const Utils::Vector3d -lb_lbfluid_get_interpolated_velocity(const Utils::Vector3d &pos); +Utils::Vector3d const get_interpolated_velocity(Utils::Vector3d const &pos); /** * @brief Calculates the interpolated fluid density on the head node process. * @param pos Position at which the density is to be calculated. * @retval interpolated fluid density. */ -double lb_lbfluid_get_interpolated_density(const Utils::Vector3d &pos); +double get_interpolated_density(Utils::Vector3d const &pos); -void mpi_set_lattice_switch(ActiveLB lattice_switch); +} // namespace LB #endif diff --git a/src/core/grid_based_algorithms/lb_interpolation.cpp b/src/core/grid_based_algorithms/lb_interpolation.cpp index baeaac1446d..05cacf3b2f4 100644 --- a/src/core/grid_based_algorithms/lb_interpolation.cpp +++ b/src/core/grid_based_algorithms/lb_interpolation.cpp @@ -17,122 +17,44 @@ * along with this program. If not, see . */ -#include "lb_interpolation.hpp" +#include "grid_based_algorithms/lb_interpolation.hpp" +#include "grid_based_algorithms/lb_interface.hpp" +#include "grid_based_algorithms/lb_walberla_instance.hpp" #include "communication.hpp" #include "config/config.hpp" -#include "grid_based_algorithms/lattice.hpp" -#include "lb.hpp" #include -#include -#include +#include #include -namespace { -InterpolationOrder interpolation_order = InterpolationOrder::linear; -} - -void mpi_set_interpolation_order_local(InterpolationOrder const &order) { - interpolation_order = order; -} - -REGISTER_CALLBACK(mpi_set_interpolation_order_local) - -void lb_lbinterpolation_set_interpolation_order( - InterpolationOrder const &order) { - mpi_call_all(mpi_set_interpolation_order_local, order); -} - -InterpolationOrder lb_lbinterpolation_get_interpolation_order() { - return interpolation_order; -} - -namespace { -template -void lattice_interpolation(Lattice const &lattice, Utils::Vector3d const &pos, - Op &&op) { - Utils::Vector node_index{}; - Utils::Vector6d delta{}; - - /* determine elementary lattice cell surrounding the particle - and the relative position of the particle in this cell */ - lattice.map_position_to_lattice(pos, node_index, delta); - for (int z = 0; z < 2; z++) { - for (int y = 0; y < 2; y++) { - for (int x = 0; x < 2; x++) { - auto &index = node_index[(z * 2 + y) * 2 + x]; - auto const w = delta[3 * x + 0] * delta[3 * y + 1] * delta[3 * z + 2]; - - op(index, w); - } - } - } -} - -Utils::Vector3d node_u(Lattice::index_t index) { -#ifdef LB_BOUNDARIES - if (lbfields[index].boundary) { - return lbfields[index].slip_velocity; - } -#endif // LB_BOUNDARIES - auto const modes = lb_calc_modes(index, lbfluid); - auto const local_density = lbpar.density + modes[0]; - return Utils::Vector3d{modes[1], modes[2], modes[3]} / local_density; -} - -double node_dens(Lattice::index_t index) { -#ifdef LB_BOUNDARIES - if (lbfields[index].boundary) { - return lbpar.density; - } -#endif // LB_BOUNDARIES - auto const modes = lb_calc_modes(index, lbfluid); - return lbpar.density + modes[0]; -} - -} // namespace - const Utils::Vector3d lb_lbinterpolation_get_interpolated_velocity(const Utils::Vector3d &pos) { - Utils::Vector3d interpolated_u{}; - - /* Calculate fluid velocity at particle's position. - This is done by linear interpolation (eq. (11) @cite ahlrichs99a) */ - lattice_interpolation(lblattice, pos, - [&interpolated_u](Lattice::index_t index, double w) { - interpolated_u += w * node_u(index); - }); - - return interpolated_u; -} - -double lb_lbinterpolation_get_interpolated_density(const Utils::Vector3d &pos) { - double interpolated_dens = 0.; - - /* Calculate fluid density at the position. - This is done by linear interpolation (eq. (11) @cite ahlrichs99a) */ - lattice_interpolation(lblattice, pos, - [&interpolated_dens](Lattice::index_t index, double w) { - interpolated_dens += w * node_dens(index); - }); - - return interpolated_dens; + /* calculate fluid velocity at particle's position + this is done by linear interpolation + (Eq. (11) Ahlrichs and Duenweg, JCP 111(17):8225 (1999)) */ + if (lattice_switch == ActiveLB::WALBERLA_LB) { +#ifdef WALBERLA + auto res = lb_walberla()->get_velocity_at_pos(pos / LB::get_agrid(), true); + if (!res) { + std::cout << this_node << ": position: [" << pos << "]\n"; + throw std::runtime_error( + "Interpolated velocity could not be obtained from Walberla"); + } + return *res; +#endif + } + throw std::runtime_error("No LB active."); } void lb_lbinterpolation_add_force_density( const Utils::Vector3d &pos, const Utils::Vector3d &force_density) { - switch (interpolation_order) { - case (InterpolationOrder::quadratic): - throw std::runtime_error("The non-linear interpolation scheme is not " - "implemented for the CPU LB."); - case (InterpolationOrder::linear): - lattice_interpolation(lblattice, pos, - [&force_density](Lattice::index_t index, double w) { - auto &field = lbfields[index]; - field.force_density += w * force_density; - }); - break; - } + if (lattice_switch == ActiveLB::WALBERLA_LB) { +#ifdef WALBERLA + if (!lb_walberla()->add_force_at_pos(pos / LB::get_agrid(), force_density)) + throw std::runtime_error("Could not apply force to lb."); +#endif + } else + throw std::runtime_error("No LB active."); } diff --git a/src/core/grid_based_algorithms/lb_interpolation.hpp b/src/core/grid_based_algorithms/lb_interpolation.hpp index 173a4ddbfd1..26de6289db6 100644 --- a/src/core/grid_based_algorithms/lb_interpolation.hpp +++ b/src/core/grid_based_algorithms/lb_interpolation.hpp @@ -21,26 +21,6 @@ #include -/** - * @brief Interpolation order for the LB fluid interpolation. - * @note For the CPU LB only linear interpolation is available. - */ -enum class InterpolationOrder { linear, quadratic }; - -/** - * @brief Set the interpolation order for the LB. - */ -void lb_lbinterpolation_set_interpolation_order( - InterpolationOrder const &interpolation_order); - -// MPI callback exposed for unit testing only -void mpi_set_interpolation_order_local(InterpolationOrder const &order); - -/** - * @brief Get the interpolation order for the LB. - */ -InterpolationOrder lb_lbinterpolation_get_interpolation_order(); - /** * @brief Calculates the fluid velocity at a given position of the * lattice. @@ -48,18 +28,11 @@ InterpolationOrder lb_lbinterpolation_get_interpolation_order(); * position is not within the local lattice. */ const Utils::Vector3d -lb_lbinterpolation_get_interpolated_velocity(const Utils::Vector3d &p); - -/** - * @brief Calculates the fluid density at a given position of the lattice. - * @note It can lead to undefined behaviour if the - * position is not within the local lattice. - */ -double lb_lbinterpolation_get_interpolated_density(const Utils::Vector3d &p); +lb_lbinterpolation_get_interpolated_velocity(const Utils::Vector3d &pos); /** * @brief Add a force density to the fluid at the given position. */ -void lb_lbinterpolation_add_force_density(const Utils::Vector3d &p, +void lb_lbinterpolation_add_force_density(const Utils::Vector3d &pos, const Utils::Vector3d &force_density); #endif diff --git a/src/core/grid_based_algorithms/lb_particle_coupling.cpp b/src/core/grid_based_algorithms/lb_particle_coupling.cpp index 9923d0f54f2..a9de5d30392 100644 --- a/src/core/grid_based_algorithms/lb_particle_coupling.cpp +++ b/src/core/grid_based_algorithms/lb_particle_coupling.cpp @@ -16,7 +16,6 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#include "lb_particle_coupling.hpp" #include "LocalBox.hpp" #include "Particle.hpp" #include "cells.hpp" @@ -24,16 +23,16 @@ #include "config/config.hpp" #include "errorhandling.hpp" #include "grid.hpp" -#include "grid_based_algorithms/OptionalCounter.hpp" -#include "integrate.hpp" -#include "lb_interface.hpp" -#include "lb_interpolation.hpp" -#include "lbgpu.hpp" #include "random.hpp" +#include "grid_based_algorithms/lb_interface.hpp" +#include "grid_based_algorithms/lb_interpolation.hpp" +#include "grid_based_algorithms/lb_particle_coupling.hpp" + #include #include #include +#include #include @@ -43,7 +42,7 @@ #include #include -static LB_Particle_Coupling lb_particle_coupling; +LB_Particle_Coupling lb_particle_coupling; void mpi_bcast_lb_particle_coupling_local() { boost::mpi::broadcast(comm_cart, lb_particle_coupling, 0); @@ -71,20 +70,14 @@ void lb_lbcoupling_deactivate() { void lb_lbcoupling_set_gamma(double gamma) { lb_particle_coupling.gamma = gamma; - mpi_bcast_lb_particle_coupling(); } double lb_lbcoupling_get_gamma() { return lb_particle_coupling.gamma; } bool lb_lbcoupling_is_seed_required() { - if (lattice_switch == ActiveLB::CPU) { + if (lattice_switch == ActiveLB::WALBERLA_LB) { return not lb_particle_coupling.rng_counter_coupling.is_initialized(); } -#ifdef CUDA - if (lattice_switch == ActiveLB::GPU) { - return not rng_counter_coupling_gpu.is_initialized(); - } -#endif return false; } @@ -93,79 +86,54 @@ uint64_t lb_coupling_get_rng_state_cpu() { } uint64_t lb_lbcoupling_get_rng_state() { - if (lattice_switch == ActiveLB::CPU) { + if (lattice_switch == ActiveLB::WALBERLA_LB) { return lb_coupling_get_rng_state_cpu(); } - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - return lb_coupling_get_rng_state_gpu(); -#endif - } - return {}; + throw std::runtime_error("No LB active"); } void lb_lbcoupling_set_rng_state(uint64_t counter) { - if (lattice_switch == ActiveLB::CPU) { + if (lattice_switch == ActiveLB::WALBERLA_LB) { lb_particle_coupling.rng_counter_coupling = Utils::Counter(counter); - mpi_bcast_lb_particle_coupling(); - } else if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - lb_coupling_set_rng_state_gpu(counter); -#endif - } + } else + throw std::runtime_error("No LB active"); } -namespace { -/** - * @brief Add a force to the lattice force density. - * @param pos Position of the force - * @param force Force in MD units. - * @param time_step MD time step. - */ void add_md_force(Utils::Vector3d const &pos, Utils::Vector3d const &force, double time_step) { /* transform momentum transfer to lattice units (eq. (12) @cite ahlrichs99a) */ - auto const delta_j = -(time_step / lb_lbfluid_get_lattice_speed()) * force; + auto const delta_j = -(time_step / LB::get_lattice_speed()) * force; lb_lbinterpolation_add_force_density(pos, delta_j); } -} // namespace -/** Coupling of a single particle to viscous fluid with Stokesian friction. - * - * Section II.C. @cite ahlrichs99a - * - * @param[in] p The coupled particle. - * @param[in] pos Local position of particle or its ghost. - * @param[in] f_random Additional force to be included. - * - * @return The viscous coupling force plus @p f_random. - */ -Utils::Vector3d lb_viscous_coupling(Particle const &p, - Utils::Vector3d const &pos, - Utils::Vector3d const &f_random) { - /* calculate fluid velocity at particle's position - this is done by linear interpolation (eq. (11) @cite ahlrichs99a) */ - auto const interpolated_u = - lb_lbinterpolation_get_interpolated_velocity(pos) * - lb_lbfluid_get_lattice_speed(); - - Utils::Vector3d v_drift = interpolated_u; +Utils::Vector3d lb_particle_coupling_drift_vel_offset(const Particle &p) { + Utils::Vector3d vel_offset{}; #ifdef ENGINE if (p.swimming().swimming) { - v_drift += p.swimming().v_swim * p.calc_director(); + vel_offset += p.swimming().v_swim * p.calc_director(); } #endif #ifdef LB_ELECTROHYDRODYNAMICS - v_drift += p.mu_E(); + vel_offset += p.mu_E(); #endif + return vel_offset; +} - /* calculate viscous force (eq. (9) @cite ahlrichs99a) */ - auto const force = -lb_lbcoupling_get_gamma() * (p.v() - v_drift) + f_random; +Utils::Vector3d lb_drag_force(Particle const &p, + Utils::Vector3d const &shifted_pos, + Utils::Vector3d const &vel_offset) { + /* calculate fluid velocity at particle's position + this is done by linear interpolation (eq. (11) @cite ahlrichs99a) */ + auto const interpolated_u = + lb_lbinterpolation_get_interpolated_velocity(shifted_pos) * + LB::get_lattice_speed(); - return force; + Utils::Vector3d v_drift = interpolated_u + vel_offset; + /* calculate viscous force (eq. (9) @cite ahlrichs99a) */ + return -lb_lbcoupling_get_gamma() * (p.v() - v_drift); } /** @@ -178,27 +146,22 @@ Utils::Vector3d lb_viscous_coupling(Particle const &p, */ inline bool in_local_domain(Utils::Vector3d const &pos, double halo = 0.) { auto const halo_vec = Utils::Vector3d::broadcast(halo); + auto const lower_corner = local_geo.my_left() - halo_vec; + auto const upper_corner = local_geo.my_right() + halo_vec; - return in_box( - pos, {local_geo.my_left() - halo_vec, local_geo.my_right() + halo_vec}); + return pos >= lower_corner and pos < upper_corner; } -/** - * @brief Check if a position is within the local LB domain - * plus halo. - * - * @param pos Position to check - * - * @return True iff the point is inside of the domain. - */ bool in_local_halo(Utils::Vector3d const &pos) { - auto const halo = 0.5 * lb_lbfluid_get_agrid(); + auto const halo = 0.5 * LB::get_agrid(); return in_local_domain(pos, halo); } -/** @brief Return a vector of positions shifted by +,- box length in each - ** coordinate */ +/** + * @brief Return a vector of positions shifted by +,- box length in each + * coordinate + */ std::vector positions_in_halo(Utils::Vector3d pos, const BoxGeometry &box) { std::vector res; @@ -208,6 +171,16 @@ std::vector positions_in_halo(Utils::Vector3d pos, Utils::Vector3d shift{{double(i), double(j), double(k)}}; Utils::Vector3d pos_shifted = pos + Utils::hadamard_product(box.length(), shift); + + if (box_geo.type() == BoxType::LEES_EDWARDS) { + auto le = box_geo.lees_edwards_bc(); + auto normal_shift = (pos_shifted - pos)[le.shear_plane_normal]; + if (normal_shift > std::numeric_limits::epsilon()) + pos_shifted[le.shear_direction] += le.pos_offset; + if (normal_shift < -std::numeric_limits::epsilon()) + pos_shifted[le.shear_direction] -= le.pos_offset; + } + if (in_local_halo(pos_shifted)) { res.push_back(pos_shifted); } @@ -217,16 +190,20 @@ std::vector positions_in_halo(Utils::Vector3d pos, return res; } -/** @brief Return if locally there exists a physical particle - ** for a given (ghost) particle */ +/** + * @brief Return if locally there exists a physical particle + * for a given (ghost) particle + */ bool is_ghost_for_local_particle(const Particle &p) { return !cell_structure.get_local_particle(p.id())->is_ghost(); } -/** @brief Determine if a given particle should be coupled. - ** In certain cases, there may be more than one ghost for the same particle. - ** To make sure, that these are only coupled once, ghosts' ids are stored - ** in an unordered_set. */ +/** + * @brief Determine if a given particle should be coupled. + * In certain cases, there may be more than one ghost for the same particle. + * To make sure, that these are only coupled once, ghosts' ids are stored + * in an unordered_set. + */ bool should_be_coupled(const Particle &p, std::unordered_set &coupled_ghost_particles) { // always couple physical particles @@ -247,10 +224,10 @@ bool should_be_coupled(const Particle &p, void add_swimmer_force(Particle const &p, double time_step) { if (p.swimming().swimming) { // calculate source position - const double direction = - double(p.swimming().push_pull) * p.swimming().dipole_length; + auto const magnitude = p.swimming().dipole_length; + auto const direction = static_cast(p.swimming().push_pull); auto const director = p.calc_director(); - auto const source_position = p.pos() + direction * director; + auto const source_position = p.pos() + direction * magnitude * director; auto const force = p.swimming().f_swim * director; // couple positions including shifts by one box length to add forces @@ -262,99 +239,88 @@ void add_swimmer_force(Particle const &p, double time_step) { } #endif +Utils::Vector3d lb_particle_coupling_noise(bool enabled, int part_id, + const OptionalCounter &rng_counter) { + if (enabled) { + if (rng_counter) { + return Random::noise_uniform(rng_counter->value(), 0, + part_id); + } + throw std::runtime_error( + "Access to uninitialized LB particle coupling RNG counter"); + } + return {}; +} + +void couple_particle(Particle &p, bool couple_virtual, double noise_amplitude, + const OptionalCounter &rng_counter, double time_step) { + + if (p.is_virtual() and not couple_virtual) + return; + + // Calculate coupling force + Utils::Vector3d coupling_force = {}; + for (auto pos : positions_in_halo(p.pos(), box_geo)) { + if (in_local_halo(pos)) { + auto const drag_force = + lb_drag_force(p, pos, lb_particle_coupling_drift_vel_offset(p)); + auto const random_force = + noise_amplitude * lb_particle_coupling_noise(noise_amplitude > 0.0, + p.id(), rng_counter); + coupling_force = drag_force + random_force; + break; + } + } + + // couple positions including shifts by one box length to add + // forces to ghost layers + for (auto pos : positions_in_halo(p.pos(), box_geo)) { + if (in_local_domain(pos)) { + /* Particle is in our LB volume, so this node + * is responsible to adding its force */ + p.force() += coupling_force; + } + add_md_force(pos, coupling_force, time_step); + } + +#ifdef ENGINE + add_swimmer_force(p, time_step); +#endif +} + void lb_lbcoupling_calc_particle_lattice_ia(bool couple_virtual, const ParticleRange &particles, const ParticleRange &more_particles, double time_step) { ESPRESSO_PROFILER_CXX_MARK_FUNCTION; - if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - if (lb_particle_coupling.couple_to_md && this_node == 0) { - switch (lb_lbinterpolation_get_interpolation_order()) { - case (InterpolationOrder::linear): - lb_calc_particle_lattice_ia_gpu<8>( - couple_virtual, lb_lbcoupling_get_gamma(), time_step); - break; - case (InterpolationOrder::quadratic): - lb_calc_particle_lattice_ia_gpu<27>( - couple_virtual, lb_lbcoupling_get_gamma(), time_step); - break; - } - } -#endif - } else if (lattice_switch == ActiveLB::CPU) { + if (lattice_switch == ActiveLB::WALBERLA_LB) { if (lb_particle_coupling.couple_to_md) { - switch (lb_lbinterpolation_get_interpolation_order()) { - case (InterpolationOrder::quadratic): - throw std::runtime_error("The non-linear interpolation scheme is not " - "implemented for the CPU LB."); - case (InterpolationOrder::linear): { - auto const kT = lb_lbfluid_get_kT(); - /* Eq. (16) @cite ahlrichs99a. - * The factor 12 comes from the fact that we use random numbers - * from -0.5 to 0.5 (equally distributed) which have variance 1/12. - * time_step comes from the discretization. - */ - auto const noise_amplitude = - (kT > 0.) ? std::sqrt(12. * 2. * lb_lbcoupling_get_gamma() * kT / - time_step) - : 0.0; - - auto f_random = [noise_amplitude](int id) -> Utils::Vector3d { - if (noise_amplitude > 0.0) { - return Random::noise_uniform( - lb_particle_coupling.rng_counter_coupling->value(), 0, id); - } - return {}; - }; - - auto couple_particle = [&](Particle &p) -> void { - if (p.is_virtual() and !couple_virtual) - return; - - // Calculate coupling force - Utils::Vector3d force = {}; - for (auto pos : positions_in_halo(p.pos(), box_geo)) { - if (in_local_halo(pos)) { - force = lb_viscous_coupling(p, pos, - noise_amplitude * f_random(p.id())); - break; - } - } - - // couple positions including shifts by one box length to add - // forces to ghost layers - for (auto pos : positions_in_halo(p.pos(), box_geo)) { - if (in_local_domain(pos)) { - /* if the particle is in our LB volume, this node - * is responsible to adding its force */ - p.force() += force; - } - add_md_force(pos, force, time_step); - } - -#ifdef ENGINE - add_swimmer_force(p, time_step); -#endif - }; - - std::unordered_set coupled_ghost_particles; - - /* Couple particles ranges */ - for (auto &p : particles) { - if (should_be_coupled(p, coupled_ghost_particles)) { - couple_particle(p); - } + auto const kT = LB::get_kT() * Utils::sqr(LB::get_lattice_speed()); + /* Eq. (16) @cite ahlrichs99a. + * The factor 12 comes from the fact that we use random numbers + * from -0.5 to 0.5 (equally distributed) which have variance 1/12. + * time_step comes from the discretization. + */ + auto const noise_amplitude = + (kT > 0.) + ? std::sqrt(12. * 2. * lb_lbcoupling_get_gamma() * kT / time_step) + : 0.0; + + std::unordered_set coupled_ghost_particles; + + /* Couple particles ranges */ + for (auto &p : particles) { + if (should_be_coupled(p, coupled_ghost_particles)) { + couple_particle(p, couple_virtual, noise_amplitude, + lb_particle_coupling.rng_counter_coupling, time_step); } + } - for (auto &p : more_particles) { - if (should_be_coupled(p, coupled_ghost_particles)) { - couple_particle(p); - } + for (auto &p : more_particles) { + if (should_be_coupled(p, coupled_ghost_particles)) { + couple_particle(p, couple_virtual, noise_amplitude, + lb_particle_coupling.rng_counter_coupling, time_step); } - - break; - } } } } @@ -362,13 +328,9 @@ void lb_lbcoupling_calc_particle_lattice_ia(bool couple_virtual, void lb_lbcoupling_propagate() { if (lattice_switch != ActiveLB::NONE) { - if (lb_lbfluid_get_kT() > 0.0) { - if (lattice_switch == ActiveLB::CPU) { + if (LB::get_kT() > 0.0) { + if (lattice_switch == ActiveLB::WALBERLA_LB) { lb_particle_coupling.rng_counter_coupling->increment(); - } else if (lattice_switch == ActiveLB::GPU) { -#ifdef CUDA - rng_counter_coupling_gpu->increment(); -#endif } } } diff --git a/src/core/grid_based_algorithms/lb_particle_coupling.hpp b/src/core/grid_based_algorithms/lb_particle_coupling.hpp index 362c330858b..c93a98ca008 100644 --- a/src/core/grid_based_algorithms/lb_particle_coupling.hpp +++ b/src/core/grid_based_algorithms/lb_particle_coupling.hpp @@ -19,14 +19,22 @@ #ifndef LB_PARTICLE_COUPLING_HPP #define LB_PARTICLE_COUPLING_HPP -#include "BoxGeometry.hpp" -#include "OptionalCounter.hpp" +#include "Particle.hpp" #include "ParticleRange.hpp" +#include "grid.hpp" +#include +#include + +#include #include +#include #include #include +#include + +using OptionalCounter = boost::optional>; /** Calculate particle lattice interactions. * So far, only viscous coupling with Stokesian friction is implemented. @@ -58,6 +66,69 @@ void lb_lbcoupling_activate(); */ void lb_lbcoupling_deactivate(); +/** + * @brief Check if a position is within the local LB domain plus halo. + * + * @param pos Position to check + * + * @return True iff the point is inside of the domain. + */ +bool in_local_halo(Utils::Vector3d const &pos); + +/** @brief Determine if a given particle should be coupled. + * In certain cases, there may be more than one ghost for the same particle. + * To make sure, that these are only coupled once, ghosts' ids are stored + * in an unordered_set. + */ +bool should_be_coupled(const Particle &p, + std::unordered_set &coupled_ghost_particles); + +/** + * @brief Add a force to the lattice force density. + * @param pos Position of the force + * @param force Force in MD units. + * @param time_step MD time step. + */ +void add_md_force(Utils::Vector3d const &pos, Utils::Vector3d const &force, + double time_step); + +Utils::Vector3d lb_particle_coupling_noise(bool enabled, int part_id, + const OptionalCounter &rng_counter); + +// internal function exposed for unit testing +std::vector positions_in_halo(Utils::Vector3d pos, + const BoxGeometry &box); + +// internal function exposed for unit testing +void couple_particle(Particle &p, bool couple_virtual, double noise_amplitude, + const OptionalCounter &rng_counter, double time_step); + +// internal function exposed for unit testing +void add_swimmer_force(Particle const &p, double time_step); + +/** + * @brief Calculate particle drift velocity offset due to ENGINE and + * ELECTROHYDRODYNAMICS. + */ +Utils::Vector3d lb_particle_coupling_drift_vel_offset(const Particle &p); + +void mpi_bcast_lb_particle_coupling(); + +/** @brief Calculate drag force on a single particle. + * + * See section II.C. @cite ahlrichs99a + * + * @param[in] p The coupled particle + * @param[in] shifted_pos The particle position with optional shift + * @param[in] vel_offset Velocity offset to be added to interpolated LB + * velocity before calculating the force + * + * @return The viscous coupling force + */ +Utils::Vector3d lb_drag_force(Particle const &p, + Utils::Vector3d const &shifted_pos, + Utils::Vector3d const &vel_offset); + struct LB_Particle_Coupling { OptionalCounter rng_counter_coupling = {}; /** @brief Friction coefficient for the particle coupling. */ @@ -74,32 +145,7 @@ struct LB_Particle_Coupling { } }; -// expose functions that are also used to couple lb_inertialess_tracers -template -using Box = std::pair, Utils::Vector>; - -/** - * @brief Check if a position is in a box. - * - * The left boundary belong to the box, the - * right one does not. Periodic boundaries are - * not considered. - * - * @param pos Position to check - * @param box Box to check - * - * @return True iff the point is inside of the box. - */ -template -bool in_box(Utils::Vector const &pos, Box const &box) { - return (pos >= box.first) and (pos < box.second); -} - -bool in_local_halo(Utils::Vector3d const &pos); -std::vector positions_in_halo(Utils::Vector3d pos, - const BoxGeometry &box); -bool is_ghost_for_local_particle(const Particle &p); -bool should_be_coupled(const Particle &p, - std::unordered_set &coupled_ghost_particles); +// internal global exposed for unit testing +extern LB_Particle_Coupling lb_particle_coupling; #endif diff --git a/src/core/grid_based_algorithms/lb_walberla_instance.cpp b/src/core/grid_based_algorithms/lb_walberla_instance.cpp new file mode 100644 index 00000000000..c5f2ecab70c --- /dev/null +++ b/src/core/grid_based_algorithms/lb_walberla_instance.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2019-2023 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include "config/config.hpp" + +#ifdef WALBERLA +#include "lb_walberla_instance.hpp" + +#include "communication.hpp" +#include "errorhandling.hpp" +#include "grid.hpp" +#include "integrate.hpp" +#include "lb_interface.hpp" + +#include + +#include + +#include +#include + +#include +#include +#include + +static std::weak_ptr lb_walberla_instance; +static std::shared_ptr lb_walberla_params_instance; + +std::shared_ptr lb_walberla() { + auto lb_walberla_instance_handle = ::lb_walberla_instance.lock(); + if (not lb_walberla_instance_handle) { + throw std::runtime_error( + "Attempted access to uninitialized LBWalberla instance."); + } + return lb_walberla_instance_handle; +} + +std::shared_ptr lb_walberla_params() { + if (not ::lb_walberla_params_instance) { + throw std::runtime_error( + "Attempted access to uninitialized LBWalberlaParams instance."); + } + return ::lb_walberla_params_instance; +} + +void lb_sanity_checks(LBWalberlaBase const &lb_fluid, + LBWalberlaParams const &lb_params, double md_time_step) { + auto const agrid = lb_params.get_agrid(); + auto const tau = lb_params.get_tau(); + // waLBerla and ESPResSo must agree on domain decomposition + auto [lb_my_left, lb_my_right] = lb_fluid.get_lattice().get_local_domain(); + lb_my_left *= agrid; + lb_my_right *= agrid; + auto const my_left = local_geo.my_left(); + auto const my_right = local_geo.my_right(); + auto const tol = agrid / 1E6; + if ((lb_my_left - my_left).norm2() > tol or + (lb_my_right - my_right).norm2() > tol) { + runtimeErrorMsg() << "\nMPI rank " << this_node << ": " + << "left ESPResSo: [" << my_left << "], " + << "left waLBerla: [" << lb_my_left << "]" + << "\nMPI rank " << this_node << ": " + << "right ESPResSo: [" << my_right << "], " + << "right waLBerla: [" << lb_my_right << "]"; + throw std::runtime_error( + "waLBerla and ESPResSo disagree about domain decomposition."); + } + // LB time step and MD time step must agree + if (md_time_step > 0.) { + LB::check_tau_time_step_consistency(tau, md_time_step); + } +} + +void activate_lb_walberla(std::shared_ptr lb_fluid, + std::shared_ptr lb_params) { + if (::lattice_switch != ActiveLB::NONE) { + throw std::runtime_error("Cannot add a second LB instance"); + } + lb_sanity_checks(*lb_fluid, *lb_params, get_time_step()); + auto const &lebc = ::box_geo.lees_edwards_bc(); + lb_fluid->check_lebc(lebc.shear_direction, lebc.shear_plane_normal); + ::lb_walberla_instance = std::weak_ptr{lb_fluid}; + ::lb_walberla_params_instance = lb_params; + ::lattice_switch = ActiveLB::WALBERLA_LB; +} + +void deactivate_lb_walberla() { + ::lb_walberla_instance.reset(); + ::lb_walberla_params_instance.reset(); + ::lattice_switch = ActiveLB::NONE; +} + +#endif diff --git a/src/core/grid_based_algorithms/lb_walberla_instance.hpp b/src/core/grid_based_algorithms/lb_walberla_instance.hpp new file mode 100644 index 00000000000..90bf1c65a8e --- /dev/null +++ b/src/core/grid_based_algorithms/lb_walberla_instance.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2019-2023 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef GRID_BASED_ALGORITHMS_LBWALBERLA_INSTANCE_HPP +#define GRID_BASED_ALGORITHMS_LBWALBERLA_INSTANCE_HPP + +#include "config/config.hpp" + +#ifdef WALBERLA + +#include + +#include + +struct LBWalberlaParams { + LBWalberlaParams(double agrid, double tau) : m_agrid(agrid), m_tau(tau) {} + double get_agrid() const { return m_agrid; }; + double get_tau() const { return m_tau; }; + +private: + double m_agrid; + double m_tau; +}; + +/** @brief Access the per-MPI-node waLBerla LB instance */ +std::shared_ptr lb_walberla(); + +/** @brief Access the waLBerla parameters */ +std::shared_ptr lb_walberla_params(); + +void lb_sanity_checks(LBWalberlaBase const &lb_fluid, + LBWalberlaParams const &lb_params, double md_time_step); + +/** @brief Register a waLBerla LB instance and update lattice switch. */ +void activate_lb_walberla(std::shared_ptr lb_fluid, + std::shared_ptr lb_params); + +/** @brief De-register a waLBerla LB instance and update lattice switch. */ +void deactivate_lb_walberla(); + +#endif // WALBERLA + +#endif diff --git a/src/core/grid_based_algorithms/lbboundaries/LBBoundary.hpp b/src/core/grid_based_algorithms/lbboundaries/LBBoundary.hpp deleted file mode 100644 index ead5c0d7497..00000000000 --- a/src/core/grid_based_algorithms/lbboundaries/LBBoundary.hpp +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef LBBOUNDARIES_LBBOUNDARY_HPP -#define LBBOUNDARIES_LBBOUNDARY_HPP - -#include "config/config.hpp" - -#include -#include - -#include - -#include - -namespace LBBoundaries { -#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU) -class LBBoundary; -Utils::Vector3d lbboundary_get_force(LBBoundary const *lbb); -void lb_init_boundaries(); -#endif -class LBBoundary { -public: - LBBoundary() - : m_shape(std::make_shared()), - m_velocity(Utils::Vector3d{0, 0, 0}), - m_force(Utils::Vector3d{0, 0, 0}) { -#ifdef EK_BOUNDARIES - m_charge_density = 0.0; - m_net_charge = 0.0; -#endif - } - - /* Calculate distance from the lbboundary */ - void calc_dist(const Utils::Vector3d &pos, double &dist, - Utils::Vector3d &vec) const { - m_shape->calculate_dist(pos, dist, vec); - } - - void set_shape(std::shared_ptr const &shape) { - m_shape = shape; - } - - void set_velocity(const Utils::Vector3d &velocity) { - m_velocity = velocity; -#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU) - lb_init_boundaries(); -#endif - } - void reset_force() { m_force = Utils::Vector3d{0, 0, 0}; } - - Shapes::Shape const &shape() const { return *m_shape; } - Utils::Vector3d &velocity() { return m_velocity; } - Utils::Vector3d &force() { return m_force; } - Utils::Vector3d get_force() const { -#if defined(LB_BOUNDARIES) || defined(LB_BOUNDARIES_GPU) - return lbboundary_get_force(this); -#else - throw std::runtime_error("Needs LB_BOUNDARIES or LB_BOUNDARIES_GPU."); -#endif - } - -#ifdef EK_BOUNDARIES // TODO: ugly. Better would be a class EKBoundaries, - // deriving from LBBoundaries, but that requires completely - // different initialization infrastructure. - void set_charge_density(double charge_density) { - m_charge_density = static_cast(charge_density); - } - void set_net_charge(double net_charge) { - m_net_charge = static_cast(net_charge); - } - - float &charge_density() { return m_charge_density; } - float &net_charge() { return m_net_charge; } -#endif - -private: - /** Private data members */ - std::shared_ptr m_shape; - Utils::Vector3d m_velocity; - Utils::Vector3d m_force; - -#ifdef EK_BOUNDARIES // TODO: ugly. Better would be a class EKBoundaries, - // deriving from LBBoundaries, but that requires completely - // different initialization infrastructure. - float m_charge_density; - float m_net_charge; -#endif -}; - -} /* namespace LBBoundaries */ - -#endif diff --git a/src/core/grid_based_algorithms/lbgpu.cpp b/src/core/grid_based_algorithms/lbgpu.cpp deleted file mode 100644 index 083327d0ce1..00000000000 --- a/src/core/grid_based_algorithms/lbgpu.cpp +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -/** \file - * %Lattice Boltzmann on GPUs. - * - * The corresponding header file is lbgpu.hpp. - */ - -#include "config/config.hpp" - -#ifdef CUDA - -#include "lbgpu.hpp" - -#include "communication.hpp" -#include "cuda_interface.hpp" -#include "errorhandling.hpp" -#include "grid.hpp" -#include "integrate.hpp" -#include "lb-d3q19.hpp" - -#include - -#include -#include -#include - -LB_parameters_gpu lbpar_gpu = { - // rho - 0.f, - // mu - 0.f, - // viscosity - 0.f, - // gamma_shear - 0.f, - // gamma_bulk - 0.f, - // gamma_odd - 0.f, - // gamma_even - 0.f, - // is_TRT - false, - // bulk_viscosity - -1.f, - // agrid - -1.f, - // tau - -1.f, - // dim - {{{0u, 0u, 0u}}}, - // number_of_nodes - 0u, -#ifdef LB_BOUNDARIES_GPU - // number_of_boundnodes - 0u, -#endif - // external_force_density - false, - // ext_force_density - {{{0.f, 0.f, 0.f}}}, - // Thermal energy - 0.f}; - -/** this is the array that stores the hydrodynamic fields for the output */ -std::vector host_values(0); - -#ifdef ELECTROKINETICS -bool ek_initialized = false; -#endif - -/** (Re-)initialize the fluid according to the given value of rho. */ -void lb_reinit_fluid_gpu() { - lb_reinit_parameters_gpu(); - if (lbpar_gpu.number_of_nodes != 0u) { - lb_reinit_GPU(&lbpar_gpu); - lb_reinit_extern_nodeforce_GPU(&lbpar_gpu); - } -} - -/** (Re-)initialize the fluid. - * See @cite dunweg07a and @cite dhumieres09a. - */ -void lb_reinit_parameters_gpu() { - lbpar_gpu.mu = 0.f; - - if (lbpar_gpu.viscosity > 0.f && lbpar_gpu.agrid > 0.f && - lbpar_gpu.tau > 0.f) { - /* Eq. (80) @cite dunweg07a. */ - lbpar_gpu.gamma_shear = 1.f - 2.f / (6.f * lbpar_gpu.viscosity + 1.f); - } - - if (lbpar_gpu.bulk_viscosity > 0.f) { - /* Eq. (81) @cite dunweg07a. */ - lbpar_gpu.gamma_bulk = 1.f - 2.f / (9.f * lbpar_gpu.bulk_viscosity + 1.f); - } - - // By default, gamma_even and gamma_odd are chosen such that the MRT becomes - // a TRT with ghost mode relaxation factors that minimize unphysical wall - // slip at bounce-back boundaries. For the relation between the gammas - // achieving this, consult @cite dhumieres09a. - // Note that the relaxation operator in ESPResSo is defined as - // m* = m_eq + gamma * (m - m_eq) - // as opposed to this reference, where - // m* = m + lambda * (m - m_eq) - - if (lbpar_gpu.is_TRT) { - lbpar_gpu.gamma_bulk = lbpar_gpu.gamma_shear; - lbpar_gpu.gamma_even = lbpar_gpu.gamma_shear; - lbpar_gpu.gamma_odd = - -(7.f * lbpar_gpu.gamma_even + 1.f) / (lbpar_gpu.gamma_even + 7.f); - } - - if (lbpar_gpu.kT > 0.f) { /* fluctuating hydrodynamics ? */ - - /* Eq. (51) @cite dunweg07a.*/ - /* Note that the modes are not normalized as in the paper here! */ - lbpar_gpu.mu = lbpar_gpu.kT * Utils::sqr(lbpar_gpu.tau) / - D3Q19::c_sound_sq / Utils::sqr(lbpar_gpu.agrid); - } - - lb_set_agrid_gpu(lbpar_gpu.agrid); - -#ifdef ELECTROKINETICS - if (ek_initialized) { - lbpar_gpu.tau = static_cast(get_time_step()); - } -#endif - - reinit_parameters_GPU(&lbpar_gpu); -} - -/** Performs a full initialization of the lattice Boltzmann system. - * All derived parameters and the fluid are reset to their default values. - */ -void lb_init_gpu_local() { - if (this_node == 0) { - /* set parameters for transfer to gpu */ - lb_reinit_parameters_gpu(); - lb_init_GPU(lbpar_gpu); - } - gpu_init_particle_comm(this_node); - cuda_bcast_global_part_params(); -} - -REGISTER_CALLBACK(lb_init_gpu_local) - -void lb_init_gpu() { mpi_call_all(lb_init_gpu_local); } - -void lb_GPU_sanity_checks() { - if (this_node == 0) { - if (lbpar_gpu.agrid < 0.f) { - runtimeErrorMsg() << "Lattice Boltzmann agrid not set"; - } - if (lbpar_gpu.tau < 0.f) { - runtimeErrorMsg() << "Lattice Boltzmann time step not set"; - } - if (lbpar_gpu.rho < 0.f) { - runtimeErrorMsg() << "Lattice Boltzmann fluid density not set"; - } - if (lbpar_gpu.viscosity < 0.f) { - runtimeErrorMsg() << "Lattice Boltzmann fluid viscosity not set"; - } - } -} - -void lb_set_agrid_gpu(double agrid) { - lbpar_gpu.agrid = static_cast(agrid); - - lbpar_gpu.dim[0] = - static_cast(round(box_geo.length()[0] / agrid)); - lbpar_gpu.dim[1] = - static_cast(round(box_geo.length()[1] / agrid)); - lbpar_gpu.dim[2] = - static_cast(round(box_geo.length()[2] / agrid)); - - Utils::Vector box_from_dim( - Utils::Vector(lbpar_gpu.dim) * agrid); - Utils::Vector box_lf(box_geo.length()); - - auto const rel_difference_vec = - Utils::hadamard_division(box_lf - box_from_dim, box_lf); - auto const commensurable = std::all_of( - rel_difference_vec.begin(), rel_difference_vec.end(), [](auto d) { - return std::abs(d) < std::numeric_limits::epsilon(); - }); - if (not commensurable) { - runtimeErrorMsg() << "Lattice spacing agrid=" << agrid - << " is incompatible with one of the box dimensions: " - << "[" << box_geo.length() << "]"; - } - lbpar_gpu.number_of_nodes = - std::accumulate(lbpar_gpu.dim.begin(), lbpar_gpu.dim.end(), 1u, - std::multiplies()); -} - -#endif // CUDA diff --git a/src/core/grid_based_algorithms/lbgpu.cuh b/src/core/grid_based_algorithms/lbgpu.cuh deleted file mode 100644 index 352bed60596..00000000000 --- a/src/core/grid_based_algorithms/lbgpu.cuh +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -/** \file - * %Lattice Boltzmann on GPUs. - * - * Implementation in lbgpu_cuda.cu. - */ - -#ifndef LBGPU_CUH -#define LBGPU_CUH - -#include "config/config.hpp" - -#ifdef CUDA -#include - -#include - -/** Velocity densities for the lattice Boltzmann system. */ -struct LB_nodes_gpu { - /** velocity density of the node */ - Utils::Array *populations = nullptr; - unsigned int *boundary = nullptr; - Utils::Array *boundary_velocity = nullptr; -}; - -__device__ __inline__ float -calc_mode_x_from_n(Utils::Array const &populations, int x) { - switch (x) { - case 0: - return populations[0] + populations[1] + populations[2] + populations[3] + - populations[4] + populations[5] + populations[6] + populations[7] + - populations[8] + populations[9] + populations[10] + populations[11] + - populations[12] + populations[13] + populations[14] + - populations[15] + populations[16] + populations[17] + - populations[18]; - case 1: - return (populations[1] - populations[2]) + - (populations[7] - populations[8]) + - (populations[9] - populations[10]) + - (populations[11] - populations[12]) + - (populations[13] - populations[14]); - case 2: - return (populations[3] - populations[4]) + - (populations[7] - populations[8]) - - (populations[9] - populations[10]) + - (populations[15] - populations[16]) + - (populations[17] - populations[18]); - case 3: - return (populations[5] - populations[6]) + - (populations[11] - populations[12]) - - (populations[13] - populations[14]) + - (populations[15] - populations[16]) - - (populations[17] - populations[18]); - case 4: - return -populations[0] + populations[7] + populations[8] + populations[9] + - populations[10] + populations[11] + populations[12] + - populations[13] + populations[14] + populations[15] + - populations[16] + populations[17] + populations[18]; - case 5: - return (populations[1] + populations[2]) - - (populations[3] + populations[4]) + - (populations[11] + populations[12]) + - (populations[13] + populations[14]) - - (populations[15] + populations[16]) - - (populations[17] + populations[18]); - case 6: - return (populations[1] + populations[2]) + - (populations[3] + populations[4]) - - (populations[11] + populations[12]) - - (populations[13] + populations[14]) - - (populations[15] + populations[16]) - - (populations[17] + populations[18]) - - 2.0f * ((populations[5] + populations[6]) - - (populations[7] + populations[8]) - - (populations[9] + populations[10])); - case 7: - return (populations[7] + populations[8]) - - (populations[9] + populations[10]); - case 8: - return (populations[11] + populations[12]) - - (populations[13] + populations[14]); - case 9: - return (populations[15] + populations[16]) - - (populations[17] + populations[18]); - case 10: - return -2.0f * (populations[1] - populations[2]) + - (populations[7] - populations[8]) + - (populations[9] - populations[10]) + - (populations[11] - populations[12]) + - (populations[13] - populations[14]); - case 11: - return -2.0f * (populations[3] - populations[4]) + - (populations[7] - populations[8]) - - (populations[9] - populations[10]) + - (populations[15] - populations[16]) + - (populations[17] - populations[18]); - case 12: - return -2.0f * (populations[5] - populations[6]) + - (populations[11] - populations[12]) - - (populations[13] - populations[14]) + - (populations[15] - populations[16]) - - (populations[17] - populations[18]); - case 13: - return (populations[7] - populations[8]) + - (populations[9] - populations[10]) - - (populations[11] - populations[12]) - - (populations[13] - populations[14]); - case 14: - return (populations[7] - populations[8]) - - (populations[9] - populations[10]) - - (populations[15] - populations[16]) - - (populations[17] - populations[18]); - case 15: - return (populations[11] - populations[12]) - - (populations[13] - populations[14]) - - (populations[15] - populations[16]) + - (populations[17] - populations[18]); - case 16: - return populations[0] + populations[7] + populations[8] + populations[9] + - populations[10] + populations[11] + populations[12] + - populations[13] + populations[14] + populations[15] + - populations[16] + populations[17] + populations[18] - - 2.0f * ((populations[1] + populations[2]) + - (populations[3] + populations[4]) + - (populations[5] + populations[6])); - case 17: - return -(populations[1] + populations[2]) + - (populations[3] + populations[4]) + - (populations[11] + populations[12]) + - (populations[13] + populations[14]) - - (populations[15] + populations[16]) - - (populations[17] + populations[18]); - case 18: - return -(populations[1] + populations[2]) - - (populations[3] + populations[4]) - - (populations[11] + populations[12]) - - (populations[13] + populations[14]) - - (populations[15] + populations[16]) - - (populations[17] + populations[18]) + - 2.0f * ((populations[5] + populations[6]) + - (populations[7] + populations[8]) + - (populations[9] + populations[10])); - } - return 0.0; -} - -/** - * @param[in] node_index Node index around (8) particle - * @param[out] mode Local register values mode - * @param[in] n_a Local node residing in array a - */ -__device__ __inline__ void -calc_mass_and_momentum_mode(Utils::Array &mode, LB_nodes_gpu n_a, - unsigned int node_index) { - /* mass mode */ - mode[0] = calc_mode_x_from_n(n_a.populations[node_index], 0); - - /* momentum modes */ - mode[1] = calc_mode_x_from_n(n_a.populations[node_index], 1); - - mode[2] = calc_mode_x_from_n(n_a.populations[node_index], 2); - - mode[3] = calc_mode_x_from_n(n_a.populations[node_index], 3); -} - -struct LB_boundaries_gpu { - /** For each fluid node this array contains either - * 0 if the node is not a boundary, or the index of - * the boundary in LBBoundaries::lbboundaries minus one. - */ - unsigned int *index = nullptr; - /** If the node is a boundary node, this contains the - * velocity of the boundary - */ - Utils::Array *velocity = nullptr; -}; - -inline __device__ float4 random_wrapper_philox(unsigned int index, - unsigned int mode, - uint64_t philox_counter) { - // Split the 64 bit counter into two 32 bit ints. - auto const philox_counter_hi = static_cast(philox_counter >> 32); - auto const philox_counter_low = static_cast(philox_counter); - uint4 rnd_ints = - curand_Philox4x32_10(make_uint4(index, philox_counter_hi, 0, mode), - make_uint2(philox_counter_low, 0)); - float4 rnd_floats; - rnd_floats.w = static_cast(rnd_ints.w) * CURAND_2POW32_INV + - (CURAND_2POW32_INV / 2.0f); - rnd_floats.x = static_cast(rnd_ints.x) * CURAND_2POW32_INV + - (CURAND_2POW32_INV / 2.0f); - rnd_floats.y = static_cast(rnd_ints.y) * CURAND_2POW32_INV + - (CURAND_2POW32_INV / 2.0f); - rnd_floats.z = static_cast(rnd_ints.z) * CURAND_2POW32_INV + - (CURAND_2POW32_INV / 2.0f); - return rnd_floats; -} - -#endif // CUDA -#endif diff --git a/src/core/grid_based_algorithms/lbgpu.hpp b/src/core/grid_based_algorithms/lbgpu.hpp deleted file mode 100644 index 7f3d27b7c4c..00000000000 --- a/src/core/grid_based_algorithms/lbgpu.hpp +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -/** \file - * %Lattice Boltzmann implementation on GPUs. - * - * Implementation in lbgpu.cpp. - */ - -#ifndef LBGPU_HPP -#define LBGPU_HPP - -#include "config/config.hpp" - -#ifdef CUDA -#include "OptionalCounter.hpp" - -#include -#include - -#include -#include -#include - -/* For the D3Q19 model most functions have a separate implementation - * where the coefficients and the velocity vectors are hardcoded - * explicitly. This saves a lot of multiplications with 1's and 0's - * thus making the code more efficient. */ -#define LBQ 19 - -/** Parameters for the lattice Boltzmann system for GPU. */ -struct LB_parameters_gpu { - /** number density (LB units) */ - float rho; - /** mu (LJ units) */ - float mu; - /** viscosity (LJ) units */ - float viscosity; - /** relaxation rate of shear modes */ - float gamma_shear; - /** relaxation rate of bulk modes */ - float gamma_bulk; - /** relaxation rate of odd modes */ - float gamma_odd; - /** relaxation rate of even modes */ - float gamma_even; - /** flag determining whether gamma_shear, gamma_odd, and gamma_even are - * calculated from gamma_shear in such a way to yield a TRT LB with minimized - * slip at bounce-back boundaries - */ - bool is_TRT; - - float bulk_viscosity; - - /** lattice spacing (LJ units) */ - float agrid; - - /** time step for fluid propagation (LJ units) - * Note: Has to be larger than MD time step! - */ - float tau; - - Utils::Array dim; - - unsigned int number_of_nodes; -#ifdef LB_BOUNDARIES_GPU - unsigned int number_of_boundnodes; -#endif - - bool external_force_density; - - Utils::Array ext_force_density; - - // Thermal energy - float kT; -}; - -/* this structure is almost duplicated for memory efficiency. When the stress - tensor element are needed at every timestep, this features should be - explicitly switched on */ -struct LB_rho_v_pi_gpu { - /** density of the node */ - float rho; - /** velocity of the node */ - Utils::Array v; - /** pressure tensor */ - Utils::Array pi; -}; - -struct LB_node_force_density_gpu { - Utils::Array *force_density; -#if defined(VIRTUAL_SITES_INERTIALESS_TRACERS) || defined(EK_DEBUG) - - // We need the node forces for the velocity interpolation at the virtual - // particles' position. However, LBM wants to reset them immediately - // after the LBM update. This variable keeps a backup - Utils::Array *force_density_buf; -#endif -}; - -/************************************************************/ -/** \name Exported Variables */ -/************************************************************/ -/**@{*/ - -/** Switch indicating momentum exchange between particles and fluid */ -extern LB_parameters_gpu lbpar_gpu; -extern std::vector host_values; -#ifdef ELECTROKINETICS -extern LB_node_force_density_gpu node_f; -extern bool ek_initialized; -#endif -extern OptionalCounter rng_counter_fluid_gpu; -extern OptionalCounter rng_counter_coupling_gpu; - -/**@}*/ - -/************************************************************/ -/** \name Exported Functions */ -/************************************************************/ -/**@{*/ -/** Conserved quantities for the lattice Boltzmann system. */ -struct LB_rho_v_gpu { - - /** density of the node */ - float rho; - /** velocity of the node */ - - Utils::Array v; -}; -void lb_GPU_sanity_checks(); - -void lb_get_boundary_force_pointer(float **pointer_address); -void lb_get_para_pointer(LB_parameters_gpu **pointer_address); - -/** Perform a full initialization of the lattice Boltzmann system. - * All derived parameters and the fluid are reset to their default values. - */ -void lb_init_gpu(); - -/** (Re-)initialize the derived parameters for the lattice Boltzmann system. - * The current state of the fluid is unchanged. - */ -void lb_reinit_parameters_gpu(); - -/** (Re-)initialize the fluid. */ -void lb_reinit_fluid_gpu(); - -/** Reset the forces on the fluid nodes */ -void reset_LB_force_densities_GPU(bool buffer = true); - -void lb_init_GPU(const LB_parameters_gpu &lbpar_gpu); - -/** Integrate the lattice-Boltzmann system for one time step. */ -void lb_integrate_GPU(); - -void lb_get_values_GPU(LB_rho_v_pi_gpu *host_values); -void lb_print_node_GPU(unsigned single_nodeindex, - LB_rho_v_pi_gpu *host_print_values); -#ifdef LB_BOUNDARIES_GPU -void lb_init_boundaries_GPU(std::size_t n_lb_boundaries, - unsigned number_of_boundnodes, - int *host_boundary_node_list, - int *host_boundary_index_list, - float *lb_bounday_velocity); -#endif - -void lb_set_agrid_gpu(double agrid); - -template -void lb_calc_particle_lattice_ia_gpu(bool couple_virtual, double friction, - double time_step); - -void lb_calc_fluid_mass_GPU(double *mass); -void lb_calc_fluid_momentum_GPU(double *host_mom); -void lb_get_boundary_flag_GPU(unsigned int single_nodeindex, - unsigned int *host_flag); -void lb_get_boundary_flags_GPU(unsigned int *host_bound_array); - -void lb_set_node_velocity_GPU(unsigned single_nodeindex, float *host_velocity); -void lb_set_node_rho_GPU(unsigned single_nodeindex, float host_rho); - -void reinit_parameters_GPU(LB_parameters_gpu *lbpar_gpu); -void lb_reinit_extern_nodeforce_GPU(LB_parameters_gpu *lbpar_gpu); -void lb_reinit_GPU(LB_parameters_gpu *lbpar_gpu); -void lb_gpu_get_boundary_forces(std::vector &forces); -void lb_save_checkpoint_GPU(float *host_checkpoint_vd); -void lb_load_checkpoint_GPU(float const *host_checkpoint_vd); - -void lb_lbfluid_set_population(const Utils::Vector3i &, float[LBQ]); -void lb_lbfluid_get_population(const Utils::Vector3i &, float[LBQ]); - -template -void lb_get_interpolated_velocity_gpu(double const *positions, - double *velocities, int length); -void linear_velocity_interpolation(double const *positions, double *velocities, - int length); -void quadratic_velocity_interpolation(double const *positions, - double *velocities, int length); -Utils::Array stress_tensor_GPU(); -uint64_t lb_fluid_get_rng_state_gpu(); -void lb_fluid_set_rng_state_gpu(uint64_t counter); -uint64_t lb_coupling_get_rng_state_gpu(); -void lb_coupling_set_rng_state_gpu(uint64_t counter); - -/** Calculate the node index from its coordinates */ -inline unsigned int calculate_node_index(LB_parameters_gpu const &lbpar_gpu, - Utils::Vector3i const &coord) { - return static_cast( - Utils::get_linear_index(coord, Utils::Vector3i(lbpar_gpu.dim))); -} -/**@}*/ - -#endif /* CUDA */ - -#endif /* LBGPU_HPP */ diff --git a/src/core/grid_based_algorithms/lbgpu_cuda.cu b/src/core/grid_based_algorithms/lbgpu_cuda.cu deleted file mode 100644 index bd291f646b7..00000000000 --- a/src/core/grid_based_algorithms/lbgpu_cuda.cu +++ /dev/null @@ -1,2703 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -/** \file - * %Lattice Boltzmann on GPUs. - * - * The corresponding header file is lbgpu.cuh. - */ - -#include "config/config.hpp" - -#ifdef CUDA - -#include "grid_based_algorithms/OptionalCounter.hpp" -#include "grid_based_algorithms/lb-d3q19.hpp" -#include "grid_based_algorithms/lb_boundaries.hpp" -#include "grid_based_algorithms/lbgpu.cuh" -#include "grid_based_algorithms/lbgpu.hpp" - -#include "cuda_interface.hpp" -#include "cuda_utils.cuh" -#include "errorhandling.hpp" -#include "lbgpu.hpp" - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -/** struct for hydrodynamic fields: this is for internal use - * (i.e. stores values in LB units) and should not be used for - * printing values - */ -static LB_rho_v_gpu *device_rho_v = nullptr; - -/** struct for hydrodynamic fields: this is the interface - * and stores values in MD units. It should not be used - * as an input for any LB calculations. TODO: in the future, - * one might want to have several structures for printing - * separately rho, v, pi without having to compute/store - * the complete set. - */ -static LB_rho_v_pi_gpu *print_rho_v_pi = nullptr; - -/** @name structs for velocity densities */ -/**@{*/ -static LB_nodes_gpu nodes_a; -static LB_nodes_gpu nodes_b; -/**@}*/ - -/** boundary information */ -static LB_boundaries_gpu boundaries; - -/** struct for node force density */ -LB_node_force_density_gpu node_f = { - // force_density - nullptr, -#if defined(VIRTUAL_SITES_INERTIALESS_TRACERS) || defined(EK_DEBUG) - // force_density_buf - nullptr -#endif -}; - -#ifdef LB_BOUNDARIES_GPU -/** @brief Force on the boundary nodes */ -static float *lb_boundary_force = nullptr; -#endif - -/** @brief Whether LB GPU was initialized */ -static bool *device_gpu_lb_initialized = nullptr; - -/** @brief Direction of data transfer between @ref nodes_a and @ref nodes_b - * during integration in @ref lb_integrate_GPU - */ -static bool intflag = true; -LB_nodes_gpu *current_nodes = nullptr; - -/** Parameters residing in constant memory */ -__device__ __constant__ LB_parameters_gpu para[1]; - -static constexpr float sqrt12 = 3.4641016151377544f; -static constexpr unsigned int threads_per_block = 64; -OptionalCounter rng_counter_coupling_gpu; -OptionalCounter rng_counter_fluid_gpu; - -/** Transformation from 1d array-index to xyz - * @param[in] index Node index / thread index - */ -template __device__ uint3 index_to_xyz(T index) { - auto const x = index % para->dim[0]; - index /= para->dim[0]; - auto const y = index % para->dim[1]; - index /= para->dim[1]; - auto const z = index; - return {x, y, z}; -} - -/** Transformation from xyz to 1d array-index - * @param[in] x,y,z The xyz array - */ -template __device__ T xyz_to_index(T x, T y, T z) { - return x + - static_cast(para->dim[0]) * (y + static_cast(para->dim[1]) * z); -} - -/** Calculate modes from the populations (space-transform). - * @param[in] populations Populations of one node. - * @param[out] mode Modes corresponding to given @p populations. - */ -__device__ void calc_m_from_n(Utils::Array const &populations, - Utils::Array &mode) { - /** - * The following convention and equations from @cite dunweg09a are used: - * The \f$\hat{c}_i\f$ are given by: - * - * \f{align*}{ - * c_{ 0} &= ( 0, 0, 0) \\ - * c_{ 1} &= ( 1, 0, 0) \\ - * c_{ 2} &= (-1, 0, 0) \\ - * c_{ 3} &= ( 0, 1, 0) \\ - * c_{ 4} &= ( 0,-1, 0) \\ - * c_{ 5} &= ( 0, 0, 1) \\ - * c_{ 6} &= ( 0, 0,-1) \\ - * c_{ 7} &= ( 1, 1, 0) \\ - * c_{ 8} &= (-1,-1, 0) \\ - * c_{ 9} &= ( 1,-1, 0) \\ - * c_{10} &= (-1, 1, 0) \\ - * c_{11} &= ( 1, 0, 1) \\ - * c_{12} &= (-1, 0,-1) \\ - * c_{13} &= ( 1, 0,-1) \\ - * c_{14} &= (-1, 0, 1) \\ - * c_{15} &= ( 0, 1, 1) \\ - * c_{16} &= ( 0,-1,-1) \\ - * c_{17} &= ( 0, 1,-1) \\ - * c_{18} &= ( 0,-1, 1) - * \f} - * - * The basis vectors (modes) are constructed as follows (eq. (111)): - * \f[m_k = \sum_{i} e_{ki} n_{i}\f] where the \f$e_{ki}\f$ form a - * linear transformation (matrix) that is given by (modified from table 1): - * - * \f{align*}{ - * e_{ 0,i} &= 1 \\ - * e_{ 1,i} &= \hat{c}_{i,x} \\ - * e_{ 2,i} &= \hat{c}_{i,y} \\ - * e_{ 3,i} &= \hat{c}_{i,z} \\ - * e_{ 4,i} &= \hat{c}_{i}^2 - 1 \\ - * e_{ 5,i} &= \hat{c}_{i,x}^2 - \hat{c}_{i,y}^2 \\ - * e_{ 6,i} &= \hat{c}_{i}^2 - 3 \hat{c}_{i,z}^2 \\ - * e_{ 7,i} &= \hat{c}_{i,x} \hat{c}_{i,y} \\ - * e_{ 8,i} &= \hat{c}_{i,x} \hat{c}_{i,z} \\ - * e_{ 9,i} &= \hat{c}_{i,y} \hat{c}_{i,z} \\ - * e_{10,i} &= (3 \hat{c}_{i}^2 - 5) \hat{c}_{i,x} \\ - * e_{11,i} &= (3 \hat{c}_{i}^2 - 5) \hat{c}_{i,y} \\ - * e_{12,i} &= (3 \hat{c}_{i}^2 - 5) \hat{c}_{i,z} \\ - * e_{13,i} &= (\hat{c}_{i,y}^2 - \hat{c}_{i,z}^2) \hat{c}_{i,x} \\ - * e_{14,i} &= (\hat{c}_{i,x}^2 - \hat{c}_{i,z}^2) \hat{c}_{i,y} \\ - * e_{15,i} &= (\hat{c}_{i,x}^2 - \hat{c}_{i,y}^2) \hat{c}_{i,z} \\ - * e_{16,i} &= 3 \hat{c}_{i}^4 - 6 \hat{c}_{i}^2 + 1 \\ - * e_{17,i} &= (2 \hat{c}_{i}^2 - 3) (\hat{c}_{i,x}^2 - \hat{c}_{i,y}^2) \\ - * e_{18,i} &= (2 \hat{c}_{i}^2 - 3) (\hat{c}_{i}^2 - 3 \hat{c}_{i,z}^2) - * \f} - * - * Such that the transformation matrix is given by: - * - * \code{.cpp} - * {{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - * { 0, 1,-1, 0, 0, 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 0, 0, 0, 0}, - * { 0, 0, 0, 1,-1, 0, 0, 1,-1,-1, 1, 0, 0, 0, 0, 1,-1, 1,-1}, - * { 0, 0, 0, 0, 0, 1,-1, 0, 0, 0, 0, 1,-1,-1, 1, 1,-1,-1, 1}, - * {-1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - * { 0, 1, 1,-1,-1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,-1,-1,-1,-1}, - * { 0, 1, 1, 1, 1,-2,-2, 2, 2, 2, 2,-1,-1,-1,-1,-1,-1,-1,-1}, - * { 0, 0, 0, 0, 0, 0, 0, 1, 1,-1,-1, 0, 0, 0, 0, 0, 0, 0, 0}, - * { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,-1,-1, 0, 0, 0, 0}, - * { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,-1,-1}, - * { 0,-2, 2, 0, 0, 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 0, 0, 0, 0}, - * { 0, 0, 0,-2, 2, 0, 0, 1,-1,-1, 1, 0, 0, 0, 0, 1,-1, 1,-1}, - * { 0, 0, 0, 0, 0,-2, 2, 0, 0, 0, 0, 1,-1,-1, 1, 1,-1,-1, 1}, - * { 0, 0, 0, 0, 0, 0, 0, 1,-1, 1,-1,-1, 1,-1, 1, 0, 0, 0, 0}, - * { 0, 0, 0, 0, 0, 0, 0, 1,-1,-1, 1, 0, 0, 0, 0,-1, 1,-1, 1}, - * { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,-1,-1, 1,-1, 1, 1,-1}, - * { 1,-2,-2,-2,-2,-2,-2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - * { 0,-1,-1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,-1,-1,-1,-1}, - * { 0,-1,-1,-1,-1, 2, 2, 2, 2, 2, 2,-1,-1,-1,-1,-1,-1,-1,-1}} - * \endcode - * - * with weights - * - * \f[q^{c_{i}} = ( 1/3, 1/18, 1/18, 1/18, - * 1/18, 1/18, 1/18, 1/36, - * 1/36, 1/36, 1/36, 1/36, - * 1/36, 1/36, 1/36, 1/36, - * 1/36, 1/36, 1/36 )\f] - * - * which makes the transformation satisfy the following - * orthogonality condition (eq. (109)): - * \f[\sum_{i} q^{c_{i}} e_{ki} e_{li} = w_{k} \delta_{kl}\f] - * where the weights are: - * - * \f[w_{i} = ( 1, 1/3, 1/3, 1/3, - * 2/3, 4/9, 4/3, 1/9, - * 1/9, 1/9, 2/3, 2/3, - * 2/3, 2/9, 2/9, 2/9, - * 2, 4/9, 4/3 )\f] - */ - for (int i = 0; i < 19; ++i) { - mode[i] = calc_mode_x_from_n(populations, i); - } -} - -__device__ void reset_LB_force_densities(unsigned int index, - LB_node_force_density_gpu node_f, - bool buffer = true) { -#if defined(VIRTUAL_SITES_INERTIALESS_TRACERS) || defined(EK_DEBUG) - // Store backup of the node forces - if (buffer) { - node_f.force_density_buf[index] = node_f.force_density[index]; - } -#endif - - if (para->external_force_density) { - node_f.force_density[index] = para->ext_force_density; - } else { - node_f.force_density[index] = {}; - } -} - -__global__ void -reset_LB_force_densities_kernel(LB_node_force_density_gpu node_f, - bool buffer = true) { - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - - if (index < para->number_of_nodes) - reset_LB_force_densities(index, node_f, buffer); -} - -void reset_LB_force_densities_GPU(bool buffer) { - dim3 dim_grid = - calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); - - KERNELCALL(reset_LB_force_densities_kernel, dim_grid, threads_per_block, - node_f, buffer); -} - -/** - * @param[in] modes Local register values modes - * @param[in] index Node index / thread index - * @param[in] node_f Local node force - * @param[out] d_v Local device values - */ -__device__ void update_rho_v(Utils::Array const &modes, - unsigned int index, - LB_node_force_density_gpu const &node_f, - LB_rho_v_gpu *d_v) { - float Rho_tot = 0.0f; - Utils::Array u_tot = {}; - - /* re-construct the real density - * remember that the populations are stored as differences to their - * equilibrium value */ - - d_v[index].rho = modes[0] + para->rho; - Rho_tot += modes[0] + para->rho; - u_tot[0] += modes[1]; - u_tot[1] += modes[2]; - u_tot[2] += modes[3]; - - /** If forces are present, the momentum density is redefined to - * include one half-step of the force action. See the - * Chapman-Enskog expansion in @cite ladd01a. - */ - - u_tot[0] += 0.5f * node_f.force_density[index][0]; - u_tot[1] += 0.5f * node_f.force_density[index][1]; - u_tot[2] += 0.5f * node_f.force_density[index][2]; - - u_tot[0] /= Rho_tot; - u_tot[1] /= Rho_tot; - u_tot[2] /= Rho_tot; - - d_v[index].v[0] = u_tot[0]; - d_v[index].v[1] = u_tot[1]; - d_v[index].v[2] = u_tot[2]; -} - -/** lb_relax_modes, means collision update of the modes - * @param[in] index Node index / thread index - * @param[in,out] mode Local register values mode - * @param[in] node_f Local node force - * @param[in,out] d_v Local device values - */ -__device__ void relax_modes(Utils::Array &mode, unsigned int index, - LB_node_force_density_gpu node_f, - LB_rho_v_gpu *d_v) { - float u_tot[3] = {0.0f, 0.0f, 0.0f}; - - update_rho_v(mode, index, node_f, d_v); - - u_tot[0] = d_v[index].v[0]; - u_tot[1] = d_v[index].v[1]; - u_tot[2] = d_v[index].v[2]; - - float Rho; - float j[3]; - Utils::Array modes_from_pi_eq; - - Rho = mode[0] + para->rho; - j[0] = Rho * u_tot[0]; - j[1] = Rho * u_tot[1]; - j[2] = Rho * u_tot[2]; - - /* equilibrium part of the stress modes (eq13 schiller) */ - - modes_from_pi_eq[0] = ((j[0] * j[0]) + (j[1] * j[1]) + (j[2] * j[2])) / Rho; - modes_from_pi_eq[1] = ((j[0] * j[0]) - (j[1] * j[1])) / Rho; - modes_from_pi_eq[2] = - (((j[0] * j[0]) + (j[1] * j[1]) + (j[2] * j[2])) - 3.0f * (j[2] * j[2])) / - Rho; - modes_from_pi_eq[3] = j[0] * j[1] / Rho; - modes_from_pi_eq[4] = j[0] * j[2] / Rho; - modes_from_pi_eq[5] = j[1] * j[2] / Rho; - - /* relax the stress modes (eq14 schiller) */ - - mode[4] = - modes_from_pi_eq[0] + para->gamma_bulk * (mode[4] - modes_from_pi_eq[0]); - mode[5] = - modes_from_pi_eq[1] + para->gamma_shear * (mode[5] - modes_from_pi_eq[1]); - mode[6] = - modes_from_pi_eq[2] + para->gamma_shear * (mode[6] - modes_from_pi_eq[2]); - mode[7] = - modes_from_pi_eq[3] + para->gamma_shear * (mode[7] - modes_from_pi_eq[3]); - mode[8] = - modes_from_pi_eq[4] + para->gamma_shear * (mode[8] - modes_from_pi_eq[4]); - mode[9] = - modes_from_pi_eq[5] + para->gamma_shear * (mode[9] - modes_from_pi_eq[5]); - - /* relax the ghost modes (project them out) */ - /* ghost modes have no equilibrium part due to orthogonality */ - - mode[10] = para->gamma_odd * mode[10]; - mode[11] = para->gamma_odd * mode[11]; - mode[12] = para->gamma_odd * mode[12]; - mode[13] = para->gamma_odd * mode[13]; - mode[14] = para->gamma_odd * mode[14]; - mode[15] = para->gamma_odd * mode[15]; - mode[16] = para->gamma_even * mode[16]; - mode[17] = para->gamma_even * mode[17]; - mode[18] = para->gamma_even * mode[18]; -} - -/** Thermalization of the modes with Gaussian random numbers - * @param[in] index Node index / thread index - * @param[in,out] mode Local register values mode - * @param[in] philox_counter Philox counter - */ -__device__ void thermalize_modes(Utils::Array &mode, - unsigned int index, uint64_t philox_counter) { - float Rho; - float4 random_floats; - /* mass mode */ - Rho = mode[0] + para->rho; - - /* stress modes */ - random_floats = random_wrapper_philox(index, 4, philox_counter); - mode[4] += sqrtf(Rho * (para->mu * (2.0f / 3.0f) * - (1.0f - (para->gamma_bulk * para->gamma_bulk)))) * - (random_floats.w - 0.5f) * sqrt12; - mode[5] += sqrtf(Rho * (para->mu * (4.0f / 9.0f) * - (1.0f - (para->gamma_shear * para->gamma_shear)))) * - (random_floats.x - 0.5f) * sqrt12; - - mode[6] += sqrtf(Rho * (para->mu * (4.0f / 3.0f) * - (1.0f - (para->gamma_shear * para->gamma_shear)))) * - (random_floats.y - 0.5f) * sqrt12; - mode[7] += sqrtf(Rho * (para->mu * (1.0f / 9.0f) * - (1.0f - (para->gamma_shear * para->gamma_shear)))) * - (random_floats.z - 0.5f) * sqrt12; - - random_floats = random_wrapper_philox(index, 8, philox_counter); - mode[8] += sqrtf(Rho * (para->mu * (1.0f / 9.0f) * - (1.0f - (para->gamma_shear * para->gamma_shear)))) * - (random_floats.w - 0.5f) * sqrt12; - mode[9] += sqrtf(Rho * (para->mu * (1.0f / 9.0f) * - (1.0f - (para->gamma_shear * para->gamma_shear)))) * - (random_floats.x - 0.5f) * sqrt12; - - /* ghost modes */ - mode[10] += sqrtf(Rho * (para->mu * (2.0f / 3.0f) * - (1.0f - (para->gamma_odd * para->gamma_odd)))) * - (random_floats.y - 0.5f) * sqrt12; - mode[11] += sqrtf(Rho * (para->mu * (2.0f / 3.0f) * - (1.0f - (para->gamma_odd * para->gamma_odd)))) * - (random_floats.z - 0.5f) * sqrt12; - - random_floats = random_wrapper_philox(index, 12, philox_counter); - mode[12] += sqrtf(Rho * (para->mu * (2.0f / 3.0f) * - (1.0f - (para->gamma_odd * para->gamma_odd)))) * - (random_floats.w - 0.5f) * sqrt12; - mode[13] += sqrtf(Rho * (para->mu * (2.0f / 9.0f) * - (1.0f - (para->gamma_odd * para->gamma_odd)))) * - (random_floats.x - 0.5f) * sqrt12; - - mode[14] += sqrtf(Rho * (para->mu * (2.0f / 9.0f) * - (1.0f - (para->gamma_odd * para->gamma_odd)))) * - (random_floats.y - 0.5f) * sqrt12; - mode[15] += sqrtf(Rho * (para->mu * (2.0f / 9.0f) * - (1.0f - (para->gamma_odd * para->gamma_odd)))) * - (random_floats.z - 0.5f) * sqrt12; - - random_floats = random_wrapper_philox(index, 16, philox_counter); - mode[16] += sqrtf(Rho * (para->mu * (2.0f) * - (1.0f - (para->gamma_even * para->gamma_even)))) * - (random_floats.w - 0.5f) * sqrt12; - mode[17] += sqrtf(Rho * (para->mu * (4.0f / 9.0f) * - (1.0f - (para->gamma_even * para->gamma_even)))) * - (random_floats.x - 0.5f) * sqrt12; - - mode[18] += sqrtf(Rho * (para->mu * (4.0f / 3.0f) * - (1.0f - (para->gamma_even * para->gamma_even)))) * - (random_floats.y - 0.5f) * sqrt12; -} - -/** Normalization of the modes need before back-transformation into velocity - * space - * @param[in,out] mode Local register values mode - */ -__device__ void normalize_modes(Utils::Array &mode) { - /* normalization factors enter in the back transformation */ - mode[0] *= 1.0f; - mode[1] *= 3.0f; - mode[2] *= 3.0f; - mode[3] *= 3.0f; - mode[4] *= 3.0f / 2.0f; - mode[5] *= 9.0f / 4.0f; - mode[6] *= 3.0f / 4.0f; - mode[7] *= 9.0f; - mode[8] *= 9.0f; - mode[9] *= 9.0f; - mode[10] *= 3.0f / 2.0f; - mode[11] *= 3.0f / 2.0f; - mode[12] *= 3.0f / 2.0f; - mode[13] *= 9.0f / 2.0f; - mode[14] *= 9.0f / 2.0f; - mode[15] *= 9.0f / 2.0f; - mode[16] *= 1.0f / 2.0f; - mode[17] *= 9.0f / 4.0f; - mode[18] *= 3.0f / 4.0f; -} - -/** Back-transformation from modespace to densityspace and streaming with - * the push method using pbc - * @param[in] index Node index / thread index - * @param[in] mode Local register values mode - * @param[out] n_b Local node residing in array b - */ -__device__ void calc_n_from_modes_push(LB_nodes_gpu n_b, - Utils::Array const &mode, - unsigned int index) { - auto const xyz = index_to_xyz(index); - unsigned int x = xyz.x; - unsigned int y = xyz.y; - unsigned int z = xyz.z; - - n_b.populations[x + para->dim[0] * y + para->dim[0] * para->dim[1] * z][0] = - 1.0f / 3.0f * (mode[0] - mode[4] + mode[16]); - - n_b.populations[(x + 1) % para->dim[0] + para->dim[0] * y + - para->dim[0] * para->dim[1] * z][1] = - 1.0f / 18.0f * - (mode[0] + mode[1] + mode[5] + mode[6] - mode[17] - mode[18] - - 2.0f * (mode[10] + mode[16])); - - n_b.populations[(para->dim[0] + x - 1) % para->dim[0] + para->dim[0] * y + - para->dim[0] * para->dim[1] * z][2] = - 1.0f / 18.0f * - (mode[0] - mode[1] + mode[5] + mode[6] - mode[17] - mode[18] + - 2.0f * (mode[10] - mode[16])); - - n_b.populations[x + para->dim[0] * ((y + 1) % para->dim[1]) + - para->dim[0] * para->dim[1] * z][3] = - 1.0f / 18.0f * - (mode[0] + mode[2] - mode[5] + mode[6] + mode[17] - mode[18] - - 2.0f * (mode[11] + mode[16])); - - n_b.populations[x + para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) + - para->dim[0] * para->dim[1] * z][4] = - 1.0f / 18.0f * - (mode[0] - mode[2] - mode[5] + mode[6] + mode[17] - mode[18] + - 2.0f * (mode[11] - mode[16])); - - n_b.populations[x + para->dim[0] * y + - para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][5] = - 1.0f / 18.0f * - (mode[0] + mode[3] - 2.0f * (mode[6] + mode[12] + mode[16] - mode[18])); - - n_b.populations[x + para->dim[0] * y + - para->dim[0] * para->dim[1] * - ((para->dim[2] + z - 1) % para->dim[2])][6] = - 1.0f / 18.0f * - (mode[0] - mode[3] - 2.0f * (mode[6] - mode[12] + mode[16] - mode[18])); - - n_b.populations[(x + 1) % para->dim[0] + - para->dim[0] * ((y + 1) % para->dim[1]) + - para->dim[0] * para->dim[1] * z][7] = - 1.0f / 36.0f * - (mode[0] + mode[1] + mode[2] + mode[4] + 2.0f * mode[6] + mode[7] + - mode[10] + mode[11] + mode[13] + mode[14] + mode[16] + 2.0f * mode[18]); - - n_b.populations[(para->dim[0] + x - 1) % para->dim[0] + - para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) + - para->dim[0] * para->dim[1] * z][8] = - 1.0f / 36.0f * - (mode[0] - mode[1] - mode[2] + mode[4] + 2.0f * mode[6] + mode[7] - - mode[10] - mode[11] - mode[13] - mode[14] + mode[16] + 2.0f * mode[18]); - - n_b.populations[(x + 1) % para->dim[0] + - para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) + - para->dim[0] * para->dim[1] * z][9] = - 1.0f / 36.0f * - (mode[0] + mode[1] - mode[2] + mode[4] + 2.0f * mode[6] - mode[7] + - mode[10] - mode[11] + mode[13] - mode[14] + mode[16] + 2.0f * mode[18]); - - n_b.populations[(para->dim[0] + x - 1) % para->dim[0] + - para->dim[0] * ((y + 1) % para->dim[1]) + - para->dim[0] * para->dim[1] * z][10] = - 1.0f / 36.0f * - (mode[0] - mode[1] + mode[2] + mode[4] + 2.0f * mode[6] - mode[7] - - mode[10] + mode[11] - mode[13] + mode[14] + mode[16] + 2.0f * mode[18]); - - n_b.populations[(x + 1) % para->dim[0] + para->dim[0] * y + - para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][11] = - 1.0f / 36.0f * - (mode[0] + mode[1] + mode[3] + mode[4] + mode[5] - mode[6] + mode[8] + - mode[10] + mode[12] - mode[13] + mode[15] + mode[16] + mode[17] - - mode[18]); - - n_b.populations[(para->dim[0] + x - 1) % para->dim[0] + para->dim[0] * y + - para->dim[0] * para->dim[1] * - ((para->dim[2] + z - 1) % para->dim[2])][12] = - 1.0f / 36.0f * - (mode[0] - mode[1] - mode[3] + mode[4] + mode[5] - mode[6] + mode[8] - - mode[10] - mode[12] + mode[13] - mode[15] + mode[16] + mode[17] - - mode[18]); - - n_b.populations[(x + 1) % para->dim[0] + para->dim[0] * y + - para->dim[0] * para->dim[1] * - ((para->dim[2] + z - 1) % para->dim[2])][13] = - 1.0f / 36.0f * - (mode[0] + mode[1] - mode[3] + mode[4] + mode[5] - mode[6] - mode[8] + - mode[10] - mode[12] - mode[13] - mode[15] + mode[16] + mode[17] - - mode[18]); - - n_b.populations[(para->dim[0] + x - 1) % para->dim[0] + para->dim[0] * y + - para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][14] = - 1.0f / 36.0f * - (mode[0] - mode[1] + mode[3] + mode[4] + mode[5] - mode[6] - mode[8] - - mode[10] + mode[12] + mode[13] + mode[15] + mode[16] + mode[17] - - mode[18]); - - n_b.populations[x + para->dim[0] * ((y + 1) % para->dim[1]) + - para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][15] = - 1.0f / 36.0f * - (mode[0] + mode[2] + mode[3] + mode[4] - mode[5] - mode[6] + mode[9] + - mode[11] + mode[12] - mode[14] - mode[15] + mode[16] - mode[17] - - mode[18]); - - n_b.populations[x + para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) + - para->dim[0] * para->dim[1] * - ((para->dim[2] + z - 1) % para->dim[2])][16] = - 1.0f / 36.0f * - (mode[0] - mode[2] - mode[3] + mode[4] - mode[5] - mode[6] + mode[9] - - mode[11] - mode[12] + mode[14] + mode[15] + mode[16] - mode[17] - - mode[18]); - - n_b.populations[x + para->dim[0] * ((y + 1) % para->dim[1]) + - para->dim[0] * para->dim[1] * - ((para->dim[2] + z - 1) % para->dim[2])][17] = - 1.0f / 36.0f * - (mode[0] + mode[2] - mode[3] + mode[4] - mode[5] - mode[6] - mode[9] + - mode[11] - mode[12] - mode[14] + mode[15] + mode[16] - mode[17] - - mode[18]); - - n_b.populations[x + para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) + - para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][18] = - 1.0f / 36.0f * - (mode[0] - mode[2] + mode[3] + mode[4] - mode[5] - mode[6] - mode[9] - - mode[11] + mode[12] + mode[14] - mode[15] + mode[16] - mode[17] - - mode[18]); -} - -/** Bounce back boundary conditions. - * - * The populations that have propagated into a boundary node - * are bounced back to the node they came from. This results - * in no slip boundary conditions, cf. @cite ladd01a. - * - * @param[in] index Node index / thread index - * @param[in] n_curr Local node receiving the current node field - * @param[in] boundaries Constant velocity at the boundary, set by the user - * @param[out] lb_boundary_force Force on the boundary nodes - */ -__device__ void bounce_back_boundaries(LB_nodes_gpu n_curr, - LB_boundaries_gpu boundaries, - unsigned int index, - float *lb_boundary_force) { - int c[3]; - float shift, weight, pop_to_bounce_back; - float boundary_force[3] = {0.0f, 0.0f, 0.0f}; - std::size_t to_index, to_index_x, to_index_y, to_index_z; - unsigned population, inverse; - - if (boundaries.index[index] != 0) { - auto const v = boundaries.velocity[index]; - - auto const xyz = index_to_xyz(index); - - unsigned int x = xyz.x; - unsigned int y = xyz.y; - unsigned int z = xyz.z; - - /* store populations temporary in second lattice to avoid race conditions */ - - // TODO : PUT IN EQUILIBRIUM CONTRIBUTION TO THE BOUNCE-BACK DENSITY FOR THE - // BOUNDARY FORCE - // TODO : INITIALIZE BOUNDARY FORCE PROPERLY, HAS NONZERO ELEMENTS IN FIRST - // STEP - // TODO : SET INTERNAL BOUNDARY NODE VALUES TO ZERO - -#define BOUNCEBACK() \ - shift = 2.0f / para->agrid * para->rho * 3.0f * weight * para->tau * \ - (v[0] * static_cast(c[0]) + v[1] * static_cast(c[1]) + \ - v[2] * static_cast(c[2])); \ - pop_to_bounce_back = n_curr.populations[index][population]; \ - to_index_x = \ - (x + static_cast(c[0]) + para->dim[0]) % para->dim[0]; \ - to_index_y = \ - (y + static_cast(c[1]) + para->dim[1]) % para->dim[1]; \ - to_index_z = \ - (z + static_cast(c[2]) + para->dim[2]) % para->dim[2]; \ - to_index = to_index_x + para->dim[0] * to_index_y + \ - para->dim[0] * para->dim[1] * to_index_z; \ - if (n_curr.boundary[to_index] == 0) { \ - boundary_force[0] += \ - (2.0f * pop_to_bounce_back + shift) * static_cast(c[0]); \ - boundary_force[1] += \ - (2.0f * pop_to_bounce_back + shift) * static_cast(c[1]); \ - boundary_force[2] += \ - (2.0f * pop_to_bounce_back + shift) * static_cast(c[2]); \ - n_curr.populations[to_index][inverse] = pop_to_bounce_back + shift; \ - } - - // the resting population does nothing, i.e., population 0. - c[0] = 1; - c[1] = 0; - c[2] = 0; - weight = 1.f / 18.f; - population = 2; - inverse = 1; - BOUNCEBACK(); - - c[0] = -1; - c[1] = 0; - c[2] = 0; - weight = 1.f / 18.f; - population = 1; - inverse = 2; - BOUNCEBACK(); - - c[0] = 0; - c[1] = 1; - c[2] = 0; - weight = 1.f / 18.f; - population = 4; - inverse = 3; - BOUNCEBACK(); - - c[0] = 0; - c[1] = -1; - c[2] = 0; - weight = 1.f / 18.f; - population = 3; - inverse = 4; - BOUNCEBACK(); - - c[0] = 0; - c[1] = 0; - c[2] = 1; - weight = 1.f / 18.f; - population = 6; - inverse = 5; - BOUNCEBACK(); - - c[0] = 0; - c[1] = 0; - c[2] = -1; - weight = 1.f / 18.f; - population = 5; - inverse = 6; - BOUNCEBACK(); - - c[0] = 1; - c[1] = 1; - c[2] = 0; - weight = 1.f / 36.f; - population = 8; - inverse = 7; - BOUNCEBACK(); - - c[0] = -1; - c[1] = -1; - c[2] = 0; - weight = 1.f / 36.f; - population = 7; - inverse = 8; - BOUNCEBACK(); - - c[0] = 1; - c[1] = -1; - c[2] = 0; - weight = 1.f / 36.f; - population = 10; - inverse = 9; - BOUNCEBACK(); - - c[0] = -1; - c[1] = 1; - c[2] = 0; - weight = 1.f / 36.f; - population = 9; - inverse = 10; - BOUNCEBACK(); - - c[0] = 1; - c[1] = 0; - c[2] = 1; - weight = 1.f / 36.f; - population = 12; - inverse = 11; - BOUNCEBACK(); - - c[0] = -1; - c[1] = 0; - c[2] = -1; - weight = 1.f / 36.f; - population = 11; - inverse = 12; - BOUNCEBACK(); - - c[0] = 1; - c[1] = 0; - c[2] = -1; - weight = 1.f / 36.f; - population = 14; - inverse = 13; - BOUNCEBACK(); - - c[0] = -1; - c[1] = 0; - c[2] = 1; - weight = 1.f / 36.f; - population = 13; - inverse = 14; - BOUNCEBACK(); - - c[0] = 0; - c[1] = 1; - c[2] = 1; - weight = 1.f / 36.f; - population = 16; - inverse = 15; - BOUNCEBACK(); - - c[0] = 0; - c[1] = -1; - c[2] = -1; - weight = 1.f / 36.f; - population = 15; - inverse = 16; - BOUNCEBACK(); - - c[0] = 0; - c[1] = 1; - c[2] = -1; - weight = 1.f / 36.f; - population = 18; - inverse = 17; - BOUNCEBACK(); - - c[0] = 0; - c[1] = -1; - c[2] = 1; - weight = 1.f / 36.f; - population = 17; - inverse = 18; - BOUNCEBACK(); - - atomicAdd(&lb_boundary_force[3 * (n_curr.boundary[index] - 1) + 0], - boundary_force[0]); - atomicAdd(&lb_boundary_force[3 * (n_curr.boundary[index] - 1) + 1], - boundary_force[1]); - atomicAdd(&lb_boundary_force[3 * (n_curr.boundary[index] - 1) + 2], - boundary_force[2]); - } -} - -/** Add external forces within the modespace, needed for particle-interaction - * @param[in] index Node index / thread index - * @param[in,out] mode Local register values mode - * @param[in,out] node_f Local node force - * @param[in] d_v Local device values - */ -__device__ void apply_forces(unsigned int index, Utils::Array &mode, - LB_node_force_density_gpu node_f, - LB_rho_v_gpu *d_v) { - float u[3] = {0.0f, 0.0f, 0.0f}, C[6] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; - /* Note: the values d_v were calculated in relax_modes() */ - - u[0] = d_v[index].v[0]; - u[1] = d_v[index].v[1]; - u[2] = d_v[index].v[2]; - - C[0] += (1.0f + para->gamma_shear) * u[0] * node_f.force_density[index][0] + - 1.0f / 3.0f * (para->gamma_bulk - para->gamma_shear) * - (u[0] * node_f.force_density[index][0] + - u[1] * node_f.force_density[index][1] + - u[2] * node_f.force_density[index][2]); - - C[2] += (1.0f + para->gamma_shear) * u[1] * node_f.force_density[index][1] + - 1.0f / 3.0f * (para->gamma_bulk - para->gamma_shear) * - (u[0] * node_f.force_density[index][0] + - u[1] * node_f.force_density[index][1] + - u[2] * node_f.force_density[index][2]); - - C[5] += (1.0f + para->gamma_shear) * u[2] * node_f.force_density[index][2] + - 1.0f / 3.0f * (para->gamma_bulk - para->gamma_shear) * - (u[0] * node_f.force_density[index][0] + - u[1] * node_f.force_density[index][1] + - u[2] * node_f.force_density[index][2]); - - C[1] += 1.0f / 2.0f * (1.0f + para->gamma_shear) * - (u[0] * node_f.force_density[index][1] + - u[1] * node_f.force_density[index][0]); - - C[3] += 1.0f / 2.0f * (1.0f + para->gamma_shear) * - (u[0] * node_f.force_density[index][2] + - u[2] * node_f.force_density[index][0]); - - C[4] += 1.0f / 2.0f * (1.0f + para->gamma_shear) * - (u[1] * node_f.force_density[index][2] + - u[2] * node_f.force_density[index][1]); - - /* update momentum modes */ - mode[1] += node_f.force_density[index][0]; - mode[2] += node_f.force_density[index][1]; - mode[3] += node_f.force_density[index][2]; - - /* update stress modes */ - mode[4] += C[0] + C[2] + C[5]; - mode[5] += C[0] - C[2]; - mode[6] += C[0] + C[2] - 2.0f * C[5]; - mode[7] += C[1]; - mode[8] += C[3]; - mode[9] += C[4]; - - reset_LB_force_densities(index, node_f); -} - -__device__ Utils::Array -stress_modes(LB_rho_v_gpu const &rho_v, const Utils::Array &modes) { - /* note that d_v[index].v[] already includes the 1/2 f term, accounting - * for the pre- and post-collisional average - */ - auto const density = rho_v.rho; - Utils::Array j{density * rho_v.v[0], density * rho_v.v[1], - density * rho_v.v[2]}; - // equilibrium part of the stress modes, which comes from - // the equality between modes and stress tensor components - - /* m4 = trace(pi) - rho - m5 = pi_xx - pi_yy - m6 = trace(pi) - 3 pi_zz - m7 = pi_xy - m8 = pi_xz - m9 = pi_yz */ - - // and plugging in the Euler stress for the equilibrium: - // pi_eq = rho_0*c_s^2*I3 + (j \otimes j)/rho - // with I3 the 3D identity matrix and - // rho = \trace(rho_0*c_s^2*I3), which yields - - /* m4_from_pi_eq = j.j - m5_from_pi_eq = j_x*j_x - j_y*j_y - m6_from_pi_eq = j.j - 3*j_z*j_z - m7_from_pi_eq = j_x*j_y - m8_from_pi_eq = j_x*j_z - m9_from_pi_eq = j_y*j_z */ - - // where the / density term has been dropped. We thus obtain: - /* Now we must predict the outcome of the next collision */ - /* We immediately average pre- and post-collision. */ - /* TODO: need a reference for this. */ - Utils::Array modes_from_pi_eq{ - (j[0] * j[0] + j[1] * j[1] + j[2] * j[2]) / density, - (j[0] * j[0] - j[1] * j[1]) / density, - (j[0] * j[0] + j[1] * j[1] + j[2] * j[2] - 3.0f * j[2] * j[2]) / density, - j[0] * j[1] / density, - j[0] * j[2] / density, - j[1] * j[2] / density}; - auto res = modes; - res[4] = modes_from_pi_eq[0] + - (0.5f + 0.5f * para->gamma_bulk) * (modes[4] - modes_from_pi_eq[0]); - res[5] = modes_from_pi_eq[1] + - (0.5f + 0.5f * para->gamma_shear) * (modes[5] - modes_from_pi_eq[1]); - res[6] = modes_from_pi_eq[2] + - (0.5f + 0.5f * para->gamma_shear) * (modes[6] - modes_from_pi_eq[2]); - res[7] = modes_from_pi_eq[3] + - (0.5f + 0.5f * para->gamma_shear) * (modes[7] - modes_from_pi_eq[3]); - res[8] = modes_from_pi_eq[4] + - (0.5f + 0.5f * para->gamma_shear) * (modes[8] - modes_from_pi_eq[4]); - res[9] = modes_from_pi_eq[5] + - (0.5f + 0.5f * para->gamma_shear) * (modes[9] - modes_from_pi_eq[5]); - return res; -} - -/** Calculate the stress tensor. - * Transform the stress tensor components according to the modes that - * correspond to those used by U. Schiller. In terms of populations this - * expression then corresponds exactly to those in eq. (116)-(121) in - * @cite dunweg07a, when these are written out in populations. - * But to ensure this, the expression in Schiller's modes has to be - * different! - * @param[in] modes Local register values modes - */ -__device__ Utils::Array -stress_from_stress_modes(Utils::Array const &modes) { - return {(2.0f * (modes[0] + modes[4]) + modes[6] + 3.0f * modes[5]) / 6.0f, - modes[7], - (2.0f * (modes[0] + modes[4]) + modes[6] - 3.0f * modes[5]) / 6.0f, - modes[8], - modes[9], - (modes[0] + modes[4] - modes[6]) / 3.0f}; -} - -/** Calculate hydrodynamic fields in LB units - * @param[in] n_a Local node residing in array a for boundary flag - * @param[in] modes Local register values modes - * @param[out] d_p_v Local print values - * @param[out] d_v Local device values - * @param[in] node_f Local node force - * @param[in] index Node index / thread index - * @param[in] print_index Node index / thread index - * TODO: code duplication with \ref calc_values_from_m - */ -__device__ void -calc_values_in_LB_units(LB_nodes_gpu n_a, Utils::Array const &modes, - LB_rho_v_pi_gpu *d_p_v, LB_rho_v_gpu *d_v, - LB_node_force_density_gpu node_f, unsigned int index, - unsigned int print_index) { - - if (n_a.boundary[index] == 0) { - /* Ensure we are working with the current values of d_v */ - update_rho_v(modes, index, node_f, d_v); - - d_p_v[print_index].rho = d_v[index].rho; - - d_p_v[print_index].v = d_v[index].v; - auto const modes_tmp = stress_modes(d_v[index], modes); - - d_p_v[print_index].pi = stress_from_stress_modes(modes_tmp); - - } else { - d_p_v[print_index].rho = 0.0f; - d_p_v[print_index].v = {}; - d_p_v[print_index].pi = {}; - } -} - -/** Calculate hydrodynamic fields in MD units - * @param[out] mode_single Local register values mode - * @param[in] d_v_single Local device values - * @param[out] rho_out Density - * @param[out] j_out Momentum - * @param[out] pi_out Pressure tensor - */ -__device__ void calc_values_from_m(Utils::Array const &mode_single, - LB_rho_v_gpu const &d_v_single, - float *rho_out, float *j_out, - Utils::Array &pi_out) { - *rho_out = d_v_single.rho; - float Rho = d_v_single.rho; - j_out[0] = Rho * d_v_single.v[0]; - j_out[1] = Rho * d_v_single.v[1]; - j_out[2] = Rho * d_v_single.v[2]; - - // Now we must predict the outcome of the next collision - // We immediately average pre- and post-collision. - // Transform the stress tensor components according to the mode_singles. - pi_out = stress_from_stress_modes(stress_modes(d_v_single, mode_single)); -} - -/** Interpolation kernel. - * See @cite dunweg09a - * @param u Distance to grid point in units of agrid - * @retval Value for the interpolation function. - */ -__device__ __inline__ float -three_point_polynomial_smallerequal_than_half(float u) { - return 1.f / 3.f * (1.f + sqrtf(1.f - 3.f * u * u)); -} - -/** Interpolation kernel. - * See @cite dunweg09a - * @param u Distance to grid point in units of agrid - * @retval Value for the interpolation function. - */ -__device__ __inline__ float three_point_polynomial_larger_than_half(float u) { - return 1.f / 6.f * - (5.f + -3 * fabsf(u) - sqrtf(-2.f + 6.f * fabsf(u) - 3.f * u * u)); -} - -/** - * @brief Get velocity of at index. - */ -__device__ __inline__ float3 node_velocity(float rho_eq, LB_nodes_gpu n_a, - unsigned index) { - auto const boundary_index = n_a.boundary[index]; - - if (boundary_index) { - auto const inv_lattice_speed = para->tau / para->agrid; - auto const &u = n_a.boundary_velocity[index]; - return make_float3(inv_lattice_speed * u[0], inv_lattice_speed * u[1], - inv_lattice_speed * u[2]); - } - - auto const rho = rho_eq + calc_mode_x_from_n(n_a.populations[index], 0); - return make_float3(calc_mode_x_from_n(n_a.populations[index], 1) / rho, - calc_mode_x_from_n(n_a.populations[index], 2) / rho, - calc_mode_x_from_n(n_a.populations[index], 3) / rho); -} - -__device__ __inline__ float3 -velocity_interpolation(LB_nodes_gpu n_a, float const *particle_position, - Utils::Array &node_indices, - Utils::Array &delta) { - Utils::Array center_node_index{}; - Utils::Array temp_delta{}; - - for (unsigned i = 0; i < 3; ++i) { - // position of particle in units of agrid. - auto const scaled_pos = particle_position[i] / para->agrid - 0.5f; - center_node_index[i] = static_cast(rint(scaled_pos)); - // distance to center node in agrid - auto const dist = scaled_pos - static_cast(center_node_index[i]); - // distance to left node in agrid - auto const dist_m1 = - scaled_pos - static_cast(center_node_index[i] - 1); - // distance to right node in agrid - auto const dist_p1 = - scaled_pos - static_cast(center_node_index[i] + 1); - if (i == 0) { - temp_delta[0].x = three_point_polynomial_larger_than_half(dist_m1); - temp_delta[1].x = three_point_polynomial_smallerequal_than_half(dist); - temp_delta[2].x = three_point_polynomial_larger_than_half(dist_p1); - } else if (i == 1) { - temp_delta[0].y = three_point_polynomial_larger_than_half(dist_m1); - temp_delta[1].y = three_point_polynomial_smallerequal_than_half(dist); - temp_delta[2].y = three_point_polynomial_larger_than_half(dist_p1); - } else if (i == 2) { - temp_delta[0].z = three_point_polynomial_larger_than_half(dist_m1); - temp_delta[1].z = three_point_polynomial_smallerequal_than_half(dist); - temp_delta[2].z = three_point_polynomial_larger_than_half(dist_p1); - } - } - - auto fold_if_necessary = [](int ind, int dim) { - if (ind >= dim) { - return ind - dim; - } - if (ind < 0) { - return ind + dim; - } - return ind; - }; - - unsigned cnt = 0; - float3 interpolated_u{0.0f, 0.0f, 0.0f}; -#pragma unroll 1 - for (int i = 0; i < 3; ++i) { -#pragma unroll 1 - for (int j = 0; j < 3; ++j) { -#pragma unroll 1 - for (int k = 0; k < 3; ++k) { - auto const x = fold_if_necessary(center_node_index[0] - 1 + i, - static_cast(para->dim[0])); - auto const y = fold_if_necessary(center_node_index[1] - 1 + j, - static_cast(para->dim[1])); - auto const z = fold_if_necessary(center_node_index[2] - 1 + k, - static_cast(para->dim[2])); - delta[cnt] = temp_delta[i].x * temp_delta[j].y * temp_delta[k].z; - auto const index = static_cast(xyz_to_index(x, y, z)); - node_indices[cnt] = index; - - auto const node_u = node_velocity(para->rho, n_a, index); - interpolated_u.x += delta[cnt] * node_u.x; - interpolated_u.y += delta[cnt] * node_u.y; - interpolated_u.z += delta[cnt] * node_u.z; - - ++cnt; - } - } - } - return interpolated_u; -} - -/** Velocity interpolation. - * Eq. (12) @cite ahlrichs99a. - * @param[in] n_a Local node residing in array a - * @param[in] particle_position Particle position - * @param[out] node_index Node index around (8) particle - * @param[out] delta Weighting of particle position - * @retval Interpolated velocity - */ -__device__ __inline__ float3 -velocity_interpolation(LB_nodes_gpu n_a, float const *particle_position, - Utils::Array &node_index, - Utils::Array &delta) { - Utils::Array left_node_index; - Utils::Array temp_delta; - // Eq. (10) and (11) in @cite ahlrichs99a page 8227 -#pragma unroll - for (unsigned i = 0; i < 3; ++i) { - auto const scaledpos = particle_position[i] / para->agrid - 0.5f; - left_node_index[i] = static_cast(floorf(scaledpos)); - temp_delta[3 + i] = scaledpos - static_cast(left_node_index[i]); - temp_delta[i] = 1.0f - temp_delta[3 + i]; - } - - delta[0] = temp_delta[0] * temp_delta[1] * temp_delta[2]; - delta[1] = temp_delta[3] * temp_delta[1] * temp_delta[2]; - delta[2] = temp_delta[0] * temp_delta[4] * temp_delta[2]; - delta[3] = temp_delta[3] * temp_delta[4] * temp_delta[2]; - delta[4] = temp_delta[0] * temp_delta[1] * temp_delta[5]; - delta[5] = temp_delta[3] * temp_delta[1] * temp_delta[5]; - delta[6] = temp_delta[0] * temp_delta[4] * temp_delta[5]; - delta[7] = temp_delta[3] * temp_delta[4] * temp_delta[5]; - - // modulo for negative numbers is strange at best, shift to make sure we are - // positive - int const x = (left_node_index[0] + static_cast(para->dim[0])) % - static_cast(para->dim[0]); - int const y = (left_node_index[1] + static_cast(para->dim[1])) % - static_cast(para->dim[1]); - int const z = (left_node_index[2] + static_cast(para->dim[2])) % - static_cast(para->dim[2]); - auto fold_if_necessary = [](int ind, int dim) { - return ind >= dim ? ind % dim : ind; - }; - auto const xp1 = fold_if_necessary(x + 1, static_cast(para->dim[0])); - auto const yp1 = fold_if_necessary(y + 1, static_cast(para->dim[1])); - auto const zp1 = fold_if_necessary(z + 1, static_cast(para->dim[2])); - node_index[0] = static_cast(xyz_to_index(x, y, z)); - node_index[1] = static_cast(xyz_to_index(xp1, y, z)); - node_index[2] = static_cast(xyz_to_index(x, yp1, z)); - node_index[3] = static_cast(xyz_to_index(xp1, yp1, z)); - node_index[4] = static_cast(xyz_to_index(x, y, zp1)); - node_index[5] = static_cast(xyz_to_index(xp1, y, zp1)); - node_index[6] = static_cast(xyz_to_index(x, yp1, zp1)); - node_index[7] = static_cast(xyz_to_index(xp1, yp1, zp1)); - - float3 interpolated_u{0.0f, 0.0f, 0.0f}; - for (unsigned i = 0; i < 8; ++i) { - auto const node_u = node_velocity(para->rho, n_a, node_index[i]); - interpolated_u.x += delta[i] * node_u.x; - interpolated_u.y += delta[i] * node_u.y; - interpolated_u.z += delta[i] * node_u.z; - } - return interpolated_u; -} - -/** Calculate viscous force. - * Eq. (12) @cite ahlrichs99a. - * @param[in] n_a Local node residing in array a - * @param[out] delta Weighting of particle position - * @param[out] delta_j Weighting of particle momentum - * @param[in,out] particle_data Particle position and velocity - * @param[in,out] particle_force Particle force - * @param[in] part_index Particle id / thread id - * @param[out] node_index Node index around (8) particle - * @param[in] flag_cs Determine if we are at the centre (0, - * typical) or at the source (1, swimmer only) - * @param[in] philox_counter Philox counter - * @param[in] friction Friction constant for the particle coupling - * @param[in] time_step MD time step - * @tparam no_of_neighbours The number of neighbours to consider for - * interpolation - */ -template -__device__ void calc_viscous_force( - LB_nodes_gpu n_a, Utils::Array &delta, - CUDA_particle_data *particle_data, float *particle_force, - unsigned int part_index, float *delta_j, - Utils::Array &node_index, bool flag_cs, - uint64_t philox_counter, float friction, float time_step) { - auto const flag_cs_float = static_cast(flag_cs); - // Zero out workspace -#pragma unroll - for (int jj = 0; jj < 3; ++jj) { - delta_j[jj] = 0.0f; - } - - // Zero out only if we are at the centre of the particle <=> flag_cs = 0 - particle_force[3 * part_index + 0] = - flag_cs_float * particle_force[3 * part_index + 0]; - particle_force[3 * part_index + 1] = - flag_cs_float * particle_force[3 * part_index + 1]; - particle_force[3 * part_index + 2] = - flag_cs_float * particle_force[3 * part_index + 2]; - - float position[3]; - position[0] = particle_data[part_index].p[0]; - position[1] = particle_data[part_index].p[1]; - position[2] = particle_data[part_index].p[2]; - - float velocity[3]; - velocity[0] = particle_data[part_index].v[0]; - velocity[1] = particle_data[part_index].v[1]; - velocity[2] = particle_data[part_index].v[2]; - -#ifdef ENGINE - // First calculate interpolated velocity for dipole source, - // such that we don't overwrite mode, etc. for the rest of the function - float direction = float(particle_data[part_index].swim.push_pull) * - particle_data[part_index].swim.dipole_length; - // Extrapolate position by dipole length if we are at the centre of the - // particle - position[0] += - flag_cs_float * direction * particle_data[part_index].swim.director[0]; - position[1] += - flag_cs_float * direction * particle_data[part_index].swim.director[1]; - position[2] += - flag_cs_float * direction * particle_data[part_index].swim.director[2]; -#endif - - float3 const interpolated_u = - velocity_interpolation(n_a, position, node_index, delta); - -#ifdef ENGINE - velocity[0] -= particle_data[part_index].swim.v_swim * - particle_data[part_index].swim.director[0]; - velocity[1] -= particle_data[part_index].swim.v_swim * - particle_data[part_index].swim.director[1]; - velocity[2] -= particle_data[part_index].swim.v_swim * - particle_data[part_index].swim.director[2]; - - // The first three components are v_center, the last three v_source - // Do not use within LB, because these have already been converted back to MD - // units - particle_data[part_index].swim.v_cs[0 + 3 * flag_cs] = - interpolated_u.x * para->agrid / para->tau; - particle_data[part_index].swim.v_cs[1 + 3 * flag_cs] = - interpolated_u.y * para->agrid / para->tau; - particle_data[part_index].swim.v_cs[2 + 3 * flag_cs] = - interpolated_u.z * para->agrid / para->tau; -#endif - - /* take care to rescale velocities with time_step and transform to MD units - * (eq. (9) @cite ahlrichs99a) */ - - /* Viscous force */ - float3 viscforce_density{0.0f, 0.0f, 0.0f}; - viscforce_density.x -= - friction * (velocity[0] - interpolated_u.x * para->agrid / para->tau); - viscforce_density.y -= - friction * (velocity[1] - interpolated_u.y * para->agrid / para->tau); - viscforce_density.z -= - friction * (velocity[2] - interpolated_u.z * para->agrid / para->tau); - -#ifdef LB_ELECTROHYDRODYNAMICS - viscforce_density.x += friction * particle_data[part_index].mu_E[0]; - viscforce_density.y += friction * particle_data[part_index].mu_E[1]; - viscforce_density.z += friction * particle_data[part_index].mu_E[2]; -#endif - - if (para->kT > 0.0) { - /* add stochastic force of zero mean (eq. (15) @cite ahlrichs99a) */ - float4 random_floats = random_wrapper_philox( - static_cast(particle_data[part_index].identity), LBQ * 32, - philox_counter); - /* lb_coupl_pref is stored in MD units (force). - * Eq. (16) @cite ahlrichs99a. - * The factor 12 comes from the fact that we use random numbers - * from -0.5 to 0.5 (equally distributed) which have variance 1/12. - * time_step comes from the discretization. - */ - float lb_coupl_pref = sqrtf(12.f * 2.f * friction * para->kT / time_step); - viscforce_density.x += lb_coupl_pref * (random_floats.w - 0.5f); - viscforce_density.y += lb_coupl_pref * (random_floats.x - 0.5f); - viscforce_density.z += lb_coupl_pref * (random_floats.y - 0.5f); - } - /* delta_j for transform momentum transfer to lattice units which is done - in calc_node_force (eq. (12) @cite ahlrichs99a) */ - - // only add to particle_force for particle centre <=> (1-flag_cs) = 1 - particle_force[3 * part_index + 0] += - (1 - flag_cs_float) * viscforce_density.x; - particle_force[3 * part_index + 1] += - (1 - flag_cs_float) * viscforce_density.y; - particle_force[3 * part_index + 2] += - (1 - flag_cs_float) * viscforce_density.z; - - // only add to particle_force for particle centre <=> (1-flag_cs) = 1 - delta_j[0] -= ((1 - flag_cs_float) * viscforce_density.x) * time_step * - para->tau / para->agrid; - delta_j[1] -= ((1 - flag_cs_float) * viscforce_density.y) * time_step * - para->tau / para->agrid; - delta_j[2] -= ((1 - flag_cs_float) * viscforce_density.z) * time_step * - para->tau / para->agrid; - -#ifdef ENGINE - // add swimming force to source position - delta_j[0] -= flag_cs_float * particle_data[part_index].swim.f_swim * - particle_data[part_index].swim.director[0] * time_step * - para->tau / para->agrid; - delta_j[1] -= flag_cs_float * particle_data[part_index].swim.f_swim * - particle_data[part_index].swim.director[1] * time_step * - para->tau / para->agrid; - delta_j[2] -= flag_cs_float * particle_data[part_index].swim.f_swim * - particle_data[part_index].swim.director[2] * time_step * - para->tau / para->agrid; -#endif -} - -/** Calculate the node force caused by the particles, with atomicAdd due to - * avoiding race conditions. - * Eq. (14) @cite ahlrichs99a. - * @param[in] delta Weighting of particle position - * @param[in] delta_j Weighting of particle momentum - * @param[in] node_index Node index around (8) particle - * @param[out] node_f Node force - * @tparam no_of_neighbours The number of neighbours to consider for - * interpolation - */ -template -__device__ void -calc_node_force(Utils::Array const &delta, - float const *delta_j, - Utils::Array const &node_index, - LB_node_force_density_gpu node_f) { - for (std::size_t node = 0; node < no_of_neighbours; ++node) { - for (unsigned i = 0; i < 3; ++i) { - atomicAdd(&(node_f.force_density[node_index[node]][i]), - delta[node] * delta_j[i]); - } - } -} - -/*********************************************************/ -/** \name System setup and Kernel functions */ -/*********************************************************/ - -/** Kernel to calculate local populations from hydrodynamic fields. - * The mapping is given in terms of the equilibrium distribution. - * - * Eq. (2.15) @cite ladd94a. - * Eq. (4) in @cite usta05a. - * - * @param[out] n_a %Lattice site - * @param[out] gpu_check Additional check if GPU kernel are executed - * @param[out] d_v Local device values - * @param[in] node_f Node forces - */ -__global__ void calc_n_from_rho_j_pi(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, - LB_node_force_density_gpu node_f, - bool *gpu_check) { - /* TODO: this can handle only a uniform density, something similar, but local, - has to be called every time the fields are set by the user ! */ - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - if (index < para->number_of_nodes) { - Utils::Array mode; - - gpu_check[0] = true; - - /* default values for fields in lattice units */ - float Rho = para->rho; - Utils::Array v{}; - Utils::Array pi = {{Rho * D3Q19::c_sound_sq, 0.0f, - Rho * D3Q19::c_sound_sq, 0.0f, 0.0f, - Rho * D3Q19::c_sound_sq}}; - Utils::Array local_pi{}; - float rhoc_sq = Rho * D3Q19::c_sound_sq; - float avg_rho = para->rho; - float local_rho, trace; - Utils::Array local_j{}; - - local_rho = Rho; - - local_j[0] = Rho * v[0]; - local_j[1] = Rho * v[1]; - local_j[2] = Rho * v[2]; - - local_pi = pi; - - // reduce the pressure tensor to the part needed here. - - local_pi[0] -= rhoc_sq; - local_pi[2] -= rhoc_sq; - local_pi[5] -= rhoc_sq; - - trace = local_pi[0] + local_pi[2] + local_pi[5]; - - float rho_times_coeff; - float tmp1, tmp2; - - /* update the q=0 sublattice */ - n_a.populations[index][0] = - 1.0f / 3.0f * (local_rho - avg_rho) - 1.0f / 2.0f * trace; - - /* update the q=1 sublattice */ - rho_times_coeff = 1.0f / 18.0f * (local_rho - avg_rho); - - n_a.populations[index][1] = rho_times_coeff + 1.0f / 6.0f * local_j[0] + - 1.0f / 4.0f * local_pi[0] - - 1.0f / 12.0f * trace; - n_a.populations[index][2] = rho_times_coeff - 1.0f / 6.0f * local_j[0] + - 1.0f / 4.0f * local_pi[0] - - 1.0f / 12.0f * trace; - n_a.populations[index][3] = rho_times_coeff + 1.0f / 6.0f * local_j[1] + - 1.0f / 4.0f * local_pi[2] - - 1.0f / 12.0f * trace; - n_a.populations[index][4] = rho_times_coeff - 1.0f / 6.0f * local_j[1] + - 1.0f / 4.0f * local_pi[2] - - 1.0f / 12.0f * trace; - n_a.populations[index][5] = rho_times_coeff + 1.0f / 6.0f * local_j[2] + - 1.0f / 4.0f * local_pi[5] - - 1.0f / 12.0f * trace; - n_a.populations[index][6] = rho_times_coeff - 1.0f / 6.0f * local_j[2] + - 1.0f / 4.0f * local_pi[5] - - 1.0f / 12.0f * trace; - - /* update the q=2 sublattice */ - rho_times_coeff = 1.0f / 36.0f * (local_rho - avg_rho); - - tmp1 = local_pi[0] + local_pi[2]; - tmp2 = 2.0f * local_pi[1]; - n_a.populations[index][7] = - rho_times_coeff + 1.0f / 12.0f * (local_j[0] + local_j[1]) + - 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.populations[index][8] = - rho_times_coeff - 1.0f / 12.0f * (local_j[0] + local_j[1]) + - 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.populations[index][9] = - rho_times_coeff + 1.0f / 12.0f * (local_j[0] - local_j[1]) + - 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - n_a.populations[index][10] = - rho_times_coeff - 1.0f / 12.0f * (local_j[0] - local_j[1]) + - 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - - tmp1 = local_pi[0] + local_pi[5]; - tmp2 = 2.0f * local_pi[3]; - - n_a.populations[index][11] = - rho_times_coeff + 1.0f / 12.0f * (local_j[0] + local_j[2]) + - 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.populations[index][12] = - rho_times_coeff - 1.0f / 12.0f * (local_j[0] + local_j[2]) + - 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.populations[index][13] = - rho_times_coeff + 1.0f / 12.0f * (local_j[0] - local_j[2]) + - 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - n_a.populations[index][14] = - rho_times_coeff - 1.0f / 12.0f * (local_j[0] - local_j[2]) + - 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - - tmp1 = local_pi[2] + local_pi[5]; - tmp2 = 2.0f * local_pi[4]; - - n_a.populations[index][15] = - rho_times_coeff + 1.0f / 12.0f * (local_j[1] + local_j[2]) + - 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.populations[index][16] = - rho_times_coeff - 1.0f / 12.0f * (local_j[1] + local_j[2]) + - 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.populations[index][17] = - rho_times_coeff + 1.0f / 12.0f * (local_j[1] - local_j[2]) + - 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - n_a.populations[index][18] = - rho_times_coeff - 1.0f / 12.0f * (local_j[1] - local_j[2]) + - 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - - calc_m_from_n(n_a.populations[index], mode); - update_rho_v(mode, index, node_f, d_v); - } -} - -__global__ void set_force_density(unsigned single_nodeindex, - float const *force_density, - LB_node_force_density_gpu node_f) { - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - - if (index == 0) { - node_f.force_density[single_nodeindex][0] = force_density[0]; - node_f.force_density[single_nodeindex][1] = force_density[1]; - node_f.force_density[single_nodeindex][2] = force_density[2]; - } -} - -/** Kernel to calculate local populations from hydrodynamic fields - * from given flow field velocities. The mapping is given in terms of - * the equilibrium distribution. - * - * Eq. (2.15) @cite ladd94a. - * Eq. (4) in @cite usta05a. - * - * @param[out] n_a Current nodes array (double buffering!) - * @param[in] single_nodeindex Single node index - * @param[in] velocity Velocity - * @param[out] d_v Local device values - * @param[in] node_f Node forces - */ -__global__ void set_u_from_rho_v_pi(LB_nodes_gpu n_a, unsigned single_nodeindex, - float const *velocity, LB_rho_v_gpu *d_v, - LB_node_force_density_gpu node_f) { - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - - if (index == 0) { - float local_rho; - float local_j[3]; - float local_pi[6]; - float trace, avg_rho; - float rho_times_coeff; - float tmp1, tmp2; - - Utils::Array mode_for_pi; - float rho_from_m; - float j_from_m[3]; - Utils::Array pi_from_m; - - // Calculate the modes for this node - - calc_m_from_n(n_a.populations[single_nodeindex], mode_for_pi); - - // Reset the d_v - - update_rho_v(mode_for_pi, single_nodeindex, node_f, d_v); - - // Calculate the density, velocity, and pressure tensor - // in LB unit for this node - - calc_values_from_m(mode_for_pi, d_v[single_nodeindex], &rho_from_m, - j_from_m, pi_from_m); - - // Take LB component density and calculate the equilibrium part - local_rho = rho_from_m; - avg_rho = para->rho; - - // Take LB component velocity and make it a momentum - - local_j[0] = local_rho * velocity[0]; - local_j[1] = local_rho * velocity[1]; - local_j[2] = local_rho * velocity[2]; - // Take LB component pressure tensor and put in equilibrium - - local_pi[0] = pi_from_m[0]; - local_pi[1] = pi_from_m[1]; - local_pi[2] = pi_from_m[2]; - local_pi[3] = pi_from_m[3]; - local_pi[4] = pi_from_m[4]; - local_pi[5] = pi_from_m[5]; - - trace = local_pi[0] + local_pi[2] + local_pi[5]; - - // update the q=0 sublattice - - n_a.populations[single_nodeindex][0] = - 1.0f / 3.0f * (local_rho - avg_rho) - 1.0f / 2.0f * trace; - - // update the q=1 sublattice - - rho_times_coeff = 1.0f / 18.0f * (local_rho - avg_rho); - - n_a.populations[single_nodeindex][1] = - rho_times_coeff + 1.0f / 6.0f * local_j[0] + 1.0f / 4.0f * local_pi[0] - - 1.0f / 12.0f * trace; - n_a.populations[single_nodeindex][2] = - rho_times_coeff - 1.0f / 6.0f * local_j[0] + 1.0f / 4.0f * local_pi[0] - - 1.0f / 12.0f * trace; - n_a.populations[single_nodeindex][3] = - rho_times_coeff + 1.0f / 6.0f * local_j[1] + 1.0f / 4.0f * local_pi[2] - - 1.0f / 12.0f * trace; - n_a.populations[single_nodeindex][4] = - rho_times_coeff - 1.0f / 6.0f * local_j[1] + 1.0f / 4.0f * local_pi[2] - - 1.0f / 12.0f * trace; - n_a.populations[single_nodeindex][5] = - rho_times_coeff + 1.0f / 6.0f * local_j[2] + 1.0f / 4.0f * local_pi[5] - - 1.0f / 12.0f * trace; - n_a.populations[single_nodeindex][6] = - rho_times_coeff - 1.0f / 6.0f * local_j[2] + 1.0f / 4.0f * local_pi[5] - - 1.0f / 12.0f * trace; - - // update the q=2 sublattice - - rho_times_coeff = 1.0f / 36.0f * (local_rho - avg_rho); - - tmp1 = local_pi[0] + local_pi[2]; - tmp2 = 2.0f * local_pi[1]; - - n_a.populations[single_nodeindex][7] = - rho_times_coeff + 1.0f / 12.0f * (local_j[0] + local_j[1]) + - 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.populations[single_nodeindex][8] = - rho_times_coeff - 1.0f / 12.0f * (local_j[0] + local_j[1]) + - 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.populations[single_nodeindex][9] = - rho_times_coeff + 1.0f / 12.0f * (local_j[0] - local_j[1]) + - 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - n_a.populations[single_nodeindex][10] = - rho_times_coeff - 1.0f / 12.0f * (local_j[0] - local_j[1]) + - 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - - tmp1 = local_pi[0] + local_pi[5]; - tmp2 = 2.0f * local_pi[3]; - - n_a.populations[single_nodeindex][11] = - rho_times_coeff + 1.0f / 12.0f * (local_j[0] + local_j[2]) + - 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.populations[single_nodeindex][12] = - rho_times_coeff - 1.0f / 12.0f * (local_j[0] + local_j[2]) + - 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.populations[single_nodeindex][13] = - rho_times_coeff + 1.0f / 12.0f * (local_j[0] - local_j[2]) + - 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - n_a.populations[single_nodeindex][14] = - rho_times_coeff - 1.0f / 12.0f * (local_j[0] - local_j[2]) + - 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - - tmp1 = local_pi[2] + local_pi[5]; - tmp2 = 2.0f * local_pi[4]; - - n_a.populations[single_nodeindex][15] = - rho_times_coeff + 1.0f / 12.0f * (local_j[1] + local_j[2]) + - 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.populations[single_nodeindex][16] = - rho_times_coeff - 1.0f / 12.0f * (local_j[1] + local_j[2]) + - 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.populations[single_nodeindex][17] = - rho_times_coeff + 1.0f / 12.0f * (local_j[1] - local_j[2]) + - 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - n_a.populations[single_nodeindex][18] = - rho_times_coeff - 1.0f / 12.0f * (local_j[1] - local_j[2]) + - 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - - // Calculate the modes for this node - - calc_m_from_n(n_a.populations[single_nodeindex], mode_for_pi); - - // Update the density and velocity field for this mode - - update_rho_v(mode_for_pi, single_nodeindex, node_f, d_v); - } -} - -/** Calculate the mass of the whole fluid kernel - * @param[out] sum Resulting mass - * @param[in] n_a Local node residing in array a - */ -__global__ void calc_mass(LB_nodes_gpu n_a, float *sum) { - - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - - if (index < para->number_of_nodes) { - Utils::Array mode; - calc_mass_and_momentum_mode(mode, n_a, index); - float Rho = mode[0] + para->rho; - atomicAdd(&(sum[0]), Rho); - } -} - -/** (Re-)initialize the node force density / set the external force - * density in lb units - * @param[out] node_f Local node force density - */ -__global__ void reinit_node_force(LB_node_force_density_gpu node_f) { - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - - if (index < para->number_of_nodes) { - node_f.force_density[index][0] = para->ext_force_density[0]; - node_f.force_density[index][1] = para->ext_force_density[1]; - node_f.force_density[index][2] = para->ext_force_density[2]; - } -} - -/** Kernel to set the local density - * - * @param[out] n_a Current nodes array (double buffering!) - * @param[in] single_nodeindex Node to set the velocity for - * @param[in] rho Density to set - * @param[in] d_v Local modes - */ -__global__ void set_rho(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, - unsigned single_nodeindex, float rho) { - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - /* Note: this sets the velocities to zero */ - if (index == 0) { - float local_rho; - - /* default values for fields in lattice units */ - local_rho = (rho - para->rho); - d_v[single_nodeindex].rho = rho; - - n_a.populations[single_nodeindex][0] = 1.0f / 3.0f * local_rho; - n_a.populations[single_nodeindex][1] = 1.0f / 18.0f * local_rho; - n_a.populations[single_nodeindex][2] = 1.0f / 18.0f * local_rho; - n_a.populations[single_nodeindex][3] = 1.0f / 18.0f * local_rho; - n_a.populations[single_nodeindex][4] = 1.0f / 18.0f * local_rho; - n_a.populations[single_nodeindex][5] = 1.0f / 18.0f * local_rho; - n_a.populations[single_nodeindex][6] = 1.0f / 18.0f * local_rho; - n_a.populations[single_nodeindex][7] = 1.0f / 36.0f * local_rho; - n_a.populations[single_nodeindex][8] = 1.0f / 36.0f * local_rho; - n_a.populations[single_nodeindex][9] = 1.0f / 36.0f * local_rho; - n_a.populations[single_nodeindex][10] = 1.0f / 36.0f * local_rho; - n_a.populations[single_nodeindex][11] = 1.0f / 36.0f * local_rho; - n_a.populations[single_nodeindex][12] = 1.0f / 36.0f * local_rho; - n_a.populations[single_nodeindex][13] = 1.0f / 36.0f * local_rho; - n_a.populations[single_nodeindex][14] = 1.0f / 36.0f * local_rho; - n_a.populations[single_nodeindex][15] = 1.0f / 36.0f * local_rho; - n_a.populations[single_nodeindex][16] = 1.0f / 36.0f * local_rho; - n_a.populations[single_nodeindex][17] = 1.0f / 36.0f * local_rho; - n_a.populations[single_nodeindex][18] = 1.0f / 36.0f * local_rho; - } -} - -/** Set the boundary flag for all boundary nodes - * @param[in] boundary_node_list Indices of the boundary nodes - * @param[in] boundary_index_list Flag for the corresponding boundary - * @param[in] boundary_velocities Boundary velocities - * @param[in] number_of_boundnodes Number of boundary nodes - * @param[in] boundaries Boundary information - */ -__global__ void init_boundaries(int const *boundary_node_list, - int const *boundary_index_list, - float const *boundary_velocities, - unsigned number_of_boundnodes, - LB_boundaries_gpu boundaries) { - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - - if (index < number_of_boundnodes) { - auto const node_index = boundary_node_list[index]; - auto const boundary_index = boundary_index_list[index]; - - Utils::Array v = { - boundary_velocities[3 * (boundary_index - 1) + 0], - boundary_velocities[3 * (boundary_index - 1) + 1], - boundary_velocities[3 * (boundary_index - 1) + 2]}; - - boundaries.index[node_index] = static_cast(boundary_index); - boundaries.velocity[node_index] = v; - } -} - -/** Reset the boundary flag of every node */ -__global__ void reset_boundaries(LB_boundaries_gpu boundaries) { - std::size_t index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - if (index < para->number_of_nodes) { - boundaries.index[index] = 0; - } -} - -/** Integration step of the LB-fluid-solver - * @param[in] n_a Local node residing in array a - * @param[out] n_b Local node residing in array b - * @param[in,out] d_v Local device values - * @param[in,out] node_f Local node force density - * @param[in] philox_counter Philox counter - */ -__global__ void integrate(LB_nodes_gpu n_a, LB_nodes_gpu n_b, LB_rho_v_gpu *d_v, - LB_node_force_density_gpu node_f, - uint64_t philox_counter) { - /* every node is connected to a thread via the index */ - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - /* the 19 moments (modes) are only temporary register values */ - Utils::Array mode; - - if (index < para->number_of_nodes) { - calc_m_from_n(n_a.populations[index], mode); - relax_modes(mode, index, node_f, d_v); - thermalize_modes(mode, index, philox_counter); - apply_forces(index, mode, node_f, d_v); - normalize_modes(mode); - calc_n_from_modes_push(n_b, mode, index); - } -} - -/** Integration step of the LB-fluid-solver - * @param[in] n_a Local node residing in array a - * @param[out] n_b Local node residing in array b - * @param[in,out] d_v Local device values - * @param[in,out] node_f Local node force density - */ -__global__ void integrate(LB_nodes_gpu n_a, LB_nodes_gpu n_b, LB_rho_v_gpu *d_v, - LB_node_force_density_gpu node_f) { - /* every node is connected to a thread via the index */ - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - /* the 19 moments (modes) are only temporary register values */ - Utils::Array mode; - - if (index < para->number_of_nodes) { - calc_m_from_n(n_a.populations[index], mode); - relax_modes(mode, index, node_f, d_v); - apply_forces(index, mode, node_f, d_v); - normalize_modes(mode); - calc_n_from_modes_push(n_b, mode, index); - } -} - -/** Particle interaction kernel - * @param[in] n_a Local node residing in array a - * @param[in,out] particle_data Particle position and velocity - * @param[in,out] particle_force Particle force - * @param[out] node_f Local node force - * @param[in] couple_virtual If true, virtual particles are also coupled - * @param[in] philox_counter Philox counter - * @param[in] friction Friction constant for the particle coupling - * @param[in] time_step MD time step - * @tparam no_of_neighbours The number of neighbours to consider for - * interpolation - */ -template -__global__ void -calc_fluid_particle_ia(LB_nodes_gpu n_a, - Utils::Span particle_data, - float *particle_force, LB_node_force_density_gpu node_f, - bool couple_virtual, uint64_t philox_counter, - float friction, float time_step) { - - unsigned int part_index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - Utils::Array node_index; - Utils::Array delta; - float delta_j[3]; - if (part_index < particle_data.size()) { -#if defined(VIRTUAL_SITES) - if (!particle_data[part_index].is_virtual || couple_virtual) -#endif - { - /* force acting on the particle. delta_j will be used later to compute the - * force that acts back onto the fluid. */ - calc_viscous_force( - n_a, delta, particle_data.data(), particle_force, part_index, delta_j, - node_index, false, philox_counter, friction, time_step); - calc_node_force(delta, delta_j, node_index, node_f); - -#ifdef ENGINE - if (particle_data[part_index].swim.swimming) { - calc_viscous_force( - n_a, delta, particle_data.data(), particle_force, part_index, - delta_j, node_index, true, philox_counter, friction, time_step); - calc_node_force(delta, delta_j, node_index, node_f); - } -#endif - } - } -} - -#ifdef LB_BOUNDARIES_GPU -/** Bounce back boundary kernel - * @param[in] n_curr Pointer to local node receiving the current node field - * @param[in] boundaries Constant velocity at the boundary, set by the user - * @param[out] lb_boundary_force Force on the boundary nodes - */ -__global__ void apply_boundaries(LB_nodes_gpu n_curr, - LB_boundaries_gpu boundaries, - float *lb_boundary_force) { - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - - if (index < para->number_of_nodes) - bounce_back_boundaries(n_curr, boundaries, index, lb_boundary_force); -} - -#endif - -/** Get physical values of the nodes (density, velocity, ...) - * @param[in] n_a Local node residing in array a - * @param[out] p_v Local print values - * @param[out] d_v Local device values - * @param[in] node_f Local node force - */ -__global__ void -get_mesoscopic_values_in_LB_units(LB_nodes_gpu n_a, LB_rho_v_pi_gpu *p_v, - LB_rho_v_gpu *d_v, - LB_node_force_density_gpu node_f) { - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - - if (index < para->number_of_nodes) { - Utils::Array mode; - calc_m_from_n(n_a.populations[index], mode); - calc_values_in_LB_units(n_a, mode, p_v, d_v, node_f, index, index); - } -} - -/** Get boundary flags - * @param[in] n_a Local node residing in array a - * @param[out] device_bound_array Local device values - */ -__global__ void lb_get_boundaries(LB_nodes_gpu n_a, - unsigned int *device_bound_array) { - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - - if (index < para->number_of_nodes) - device_bound_array[index] = n_a.boundary[index]; -} - -/** Print single node values kernel - * @param[in] single_nodeindex Node index - * @param[out] d_p_v Result - * @param[in] n_a Local node residing in array a - * @param[out] d_v Local device values - * @param[in] node_f Local node force - */ -__global__ void lb_print_node(unsigned int single_nodeindex, - LB_rho_v_pi_gpu *d_p_v, LB_nodes_gpu n_a, - LB_rho_v_gpu *d_v, - LB_node_force_density_gpu node_f) { - Utils::Array mode; - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - - if (index == 0) { - calc_m_from_n(n_a.populations[single_nodeindex], mode); - - /* the following actually copies rho and v from d_v, and calculates pi */ - calc_values_in_LB_units(n_a, mode, d_p_v, d_v, node_f, single_nodeindex, 0); - } -} - -__global__ void momentum(LB_nodes_gpu n_a, LB_node_force_density_gpu node_f, - float *sum) { - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - - if (index < para->number_of_nodes) { - float j[3] = {0.0f, 0.0f, 0.0f}; - Utils::Array mode{}; - - calc_mass_and_momentum_mode(mode, n_a, index); - - j[0] += mode[1] + 0.5f * node_f.force_density[index][0]; - j[1] += mode[2] + 0.5f * node_f.force_density[index][1]; - j[2] += mode[3] + 0.5f * node_f.force_density[index][2]; - -#ifdef LB_BOUNDARIES_GPU - if (n_a.boundary[index]) - j[0] = j[1] = j[2] = 0.0f; -#endif - - atomicAdd(&(sum[0]), j[0]); - atomicAdd(&(sum[1]), j[1]); - atomicAdd(&(sum[2]), j[2]); - } -} - -/** Print single node boundary flag - * @param[in] single_nodeindex Node index - * @param[out] device_flag Result - * @param[in] n_a Local node residing in array a - */ -__global__ void lb_get_boundary_flag(unsigned int single_nodeindex, - unsigned int *device_flag, - LB_nodes_gpu n_a) { - unsigned int index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - - if (index == 0) - device_flag[0] = n_a.boundary[single_nodeindex]; -} - -/**********************************************************************/ -/* Host functions to setup and call kernels*/ -/**********************************************************************/ - -void lb_get_para_pointer(LB_parameters_gpu **pointer_address) { - auto const error = cudaGetSymbolAddress((void **)pointer_address, para); - if (error != cudaSuccess) { - fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(error)); - errexit(); - } -} - -void lb_get_boundary_force_pointer(float **pointer_address) { -#ifdef LB_BOUNDARIES_GPU - *pointer_address = lb_boundary_force; -#endif -} - -/** Initialization for the lb gpu fluid called from host - * @param lbpar_gpu Pointer to parameters to setup the lb field - */ -void lb_init_GPU(const LB_parameters_gpu &lbpar_gpu) { -#define free_realloc_and_clear(var, size) \ - { \ - if ((var) != nullptr) \ - cuda_safe_mem(cudaFree((var))); \ - cuda_safe_mem(cudaMalloc((void **)&(var), size)); \ - cudaMemset(var, 0, size); \ - } - - /* Allocate structs in device memory*/ - free_realloc_and_clear(device_rho_v, - lbpar_gpu.number_of_nodes * sizeof(LB_rho_v_gpu)); - - /* TODO: this is almost a copy of device_rho_v; think about eliminating - * it, and maybe pi can be added to device_rho_v in this case */ - free_realloc_and_clear(print_rho_v_pi, - lbpar_gpu.number_of_nodes * sizeof(LB_rho_v_pi_gpu)); - free_realloc_and_clear(nodes_a.populations, - lbpar_gpu.number_of_nodes * - sizeof(Utils::Array)); - free_realloc_and_clear(nodes_b.populations, - lbpar_gpu.number_of_nodes * - sizeof(Utils::Array)); - free_realloc_and_clear(node_f.force_density, - lbpar_gpu.number_of_nodes * - sizeof(Utils::Array)); -#if defined(VIRTUAL_SITES_INERTIALESS_TRACERS) || defined(EK_DEBUG) - free_realloc_and_clear(node_f.force_density_buf, - lbpar_gpu.number_of_nodes * - sizeof(Utils::Array)); -#endif - free_realloc_and_clear(boundaries.index, - lbpar_gpu.number_of_nodes * sizeof(unsigned int)); - free_realloc_and_clear(boundaries.velocity, - lbpar_gpu.number_of_nodes * - sizeof(Utils::Array)); - - nodes_a.boundary = nodes_b.boundary = boundaries.index; - nodes_a.boundary_velocity = nodes_b.boundary_velocity = boundaries.velocity; - - /* write parameters in const memory */ - cuda_safe_mem( - cudaMemcpyToSymbol(para, &lbpar_gpu, sizeof(LB_parameters_gpu))); - - free_realloc_and_clear(device_gpu_lb_initialized, sizeof(bool)); - - dim3 dim_grid = - calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); - - KERNELCALL(reset_boundaries, dim_grid, threads_per_block, boundaries); - - /* calc of velocity densities from given parameters and initialize the - * Node_Force array with zero */ - KERNELCALL(reinit_node_force, dim_grid, threads_per_block, (node_f)); - KERNELCALL(calc_n_from_rho_j_pi, dim_grid, threads_per_block, nodes_a, - device_rho_v, node_f, device_gpu_lb_initialized); - - intflag = true; - current_nodes = &nodes_a; - bool host_gpu_lb_initialized = false; - cuda_safe_mem(cudaMemcpy(&host_gpu_lb_initialized, device_gpu_lb_initialized, - sizeof(bool), cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); - - if (!host_gpu_lb_initialized) { - fprintf(stderr, "initialization of LB GPU code failed!\n"); - errexit(); - } -} - -/** Reinitialization for the lb gpu fluid called from host - * @param lbpar_gpu Pointer to parameters to setup the lb field - */ -void lb_reinit_GPU(LB_parameters_gpu *lbpar_gpu) { - /* write parameters in const memory */ - cuda_safe_mem(cudaMemcpyToSymbol(para, lbpar_gpu, sizeof(LB_parameters_gpu))); - - dim3 dim_grid = - calculate_dim_grid(lbpar_gpu->number_of_nodes, 4, threads_per_block); - - /* calc of velocity densities from given parameters and initialize the - * Node_Force array with zero */ - KERNELCALL(calc_n_from_rho_j_pi, dim_grid, threads_per_block, nodes_a, - device_rho_v, node_f, device_gpu_lb_initialized); -} - -#ifdef LB_BOUNDARIES_GPU -/** Setup and call boundaries from the host - * @param host_n_lb_boundaries Number of LB boundaries - * @param number_of_boundnodes Number of boundnodes - * @param host_boundary_node_list The indices of the boundary nodes - * @param host_boundary_index_list The flag representing the corresponding - * boundary - * @param host_lb_boundary_velocity The constant velocity at the boundary, - * set by the user - */ -void lb_init_boundaries_GPU(std::size_t host_n_lb_boundaries, - unsigned number_of_boundnodes, - int *host_boundary_node_list, - int *host_boundary_index_list, - float *host_lb_boundary_velocity) { - - float *boundary_velocity = nullptr; - int *boundary_node_list = nullptr; - int *boundary_index_list = nullptr; - - auto const size_of_boundindex = number_of_boundnodes * sizeof(int); - cuda_safe_mem(cudaMalloc((void **)&boundary_node_list, size_of_boundindex)); - cuda_safe_mem(cudaMalloc((void **)&boundary_index_list, size_of_boundindex)); - cuda_safe_mem(cudaMemcpy(boundary_index_list, host_boundary_index_list, - size_of_boundindex, cudaMemcpyHostToDevice)); - cuda_safe_mem(cudaMemcpy(boundary_node_list, host_boundary_node_list, - size_of_boundindex, cudaMemcpyHostToDevice)); - cuda_safe_mem(cudaMalloc((void **)&lb_boundary_force, - 3 * host_n_lb_boundaries * sizeof(float))); - cuda_safe_mem(cudaMalloc((void **)&boundary_velocity, - 3 * host_n_lb_boundaries * sizeof(float))); - cuda_safe_mem( - cudaMemcpy(boundary_velocity, host_lb_boundary_velocity, - 3 * LBBoundaries::lbboundaries.size() * sizeof(float), - cudaMemcpyHostToDevice)); - - /* values for the kernel call */ - dim3 dim_grid = - calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); - - KERNELCALL(reset_boundaries, dim_grid, threads_per_block, boundaries); - - if (LBBoundaries::lbboundaries.empty()) { - cudaDeviceSynchronize(); - return; - } - - if (number_of_boundnodes == 0) { - fprintf(stderr, - "WARNING: boundary cmd executed but no boundary node found!\n"); - } else { - dim3 dim_grid_bound = - calculate_dim_grid(number_of_boundnodes, 4, threads_per_block); - - KERNELCALL(init_boundaries, dim_grid_bound, threads_per_block, - boundary_node_list, boundary_index_list, boundary_velocity, - number_of_boundnodes, boundaries); - } - - cudaFree(boundary_velocity); - cudaFree(boundary_node_list); - cudaFree(boundary_index_list); - - cudaDeviceSynchronize(); -} -#endif -/** Setup and call extern single node force initialization from the host - * @param lbpar_gpu Host parameter struct - */ -void lb_reinit_extern_nodeforce_GPU(LB_parameters_gpu *lbpar_gpu) { - cuda_safe_mem(cudaMemcpyToSymbol(para, lbpar_gpu, sizeof(LB_parameters_gpu))); - - dim3 dim_grid = - calculate_dim_grid(lbpar_gpu->number_of_nodes, 4, threads_per_block); - - KERNELCALL(reinit_node_force, dim_grid, threads_per_block, node_f); -} - -/** Setup and call particle kernel from the host - * @tparam no_of_neighbours The number of neighbours to consider for - * interpolation - */ -template -void lb_calc_particle_lattice_ia_gpu(bool couple_virtual, double friction, - double time_step) { - auto device_particles = gpu_get_particle_pointer(); - - if (device_particles.empty()) { - return; - } - - dim3 dim_grid = calculate_dim_grid( - static_cast(device_particles.size()), 4, threads_per_block); - if (lbpar_gpu.kT > 0.f) { - assert(rng_counter_coupling_gpu); - KERNELCALL(calc_fluid_particle_ia, dim_grid, - threads_per_block, *current_nodes, device_particles, - gpu_get_particle_force_pointer(), node_f, couple_virtual, - rng_counter_coupling_gpu->value(), static_cast(friction), - static_cast(time_step)); - } else { - // We use a dummy value for the RNG counter if no temperature is set. - KERNELCALL(calc_fluid_particle_ia, dim_grid, - threads_per_block, *current_nodes, device_particles, - gpu_get_particle_force_pointer(), node_f, couple_virtual, 0, - static_cast(friction), static_cast(time_step)); - } -} -template void lb_calc_particle_lattice_ia_gpu<8>(bool couple_virtual, - double friction, - double time_step); -template void lb_calc_particle_lattice_ia_gpu<27>(bool couple_virtual, - double friction, - double time_step); - -/** Setup and call kernel for getting macroscopic fluid values of all nodes - * @param host_values struct to save the gpu values - */ -void lb_get_values_GPU(LB_rho_v_pi_gpu *host_values) { - dim3 dim_grid = - calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); - - KERNELCALL(get_mesoscopic_values_in_LB_units, dim_grid, threads_per_block, - *current_nodes, print_rho_v_pi, device_rho_v, node_f); - cuda_safe_mem(cudaMemcpy(host_values, print_rho_v_pi, - lbpar_gpu.number_of_nodes * sizeof(LB_rho_v_pi_gpu), - cudaMemcpyDeviceToHost)); -} - -/** Get all the boundary flags for all nodes - * @param host_bound_array here go the values of the boundary flag - */ -void lb_get_boundary_flags_GPU(unsigned int *host_bound_array) { - unsigned int *device_bound_array; - cuda_safe_mem(cudaMalloc((void **)&device_bound_array, - lbpar_gpu.number_of_nodes * sizeof(unsigned int))); - - dim3 dim_grid = - calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); - - KERNELCALL(lb_get_boundaries, dim_grid, threads_per_block, *current_nodes, - device_bound_array); - - cuda_safe_mem(cudaMemcpy(host_bound_array, device_bound_array, - lbpar_gpu.number_of_nodes * sizeof(unsigned int), - cudaMemcpyDeviceToHost)); - - cudaFree(device_bound_array); -} - -/** Setup and call kernel for getting macroscopic fluid values of a single - * node - */ -void lb_print_node_GPU(unsigned single_nodeindex, - LB_rho_v_pi_gpu *host_print_values) { - LB_rho_v_pi_gpu *device_print_values; - cuda_safe_mem( - cudaMalloc((void **)&device_print_values, sizeof(LB_rho_v_pi_gpu))); - unsigned threads_per_block_print = 1; - unsigned blocks_per_grid_print_y = 1; - unsigned blocks_per_grid_print_x = 1; - dim3 dim_grid_print = - make_uint3(blocks_per_grid_print_x, blocks_per_grid_print_y, 1); - - KERNELCALL(lb_print_node, dim_grid_print, threads_per_block_print, - single_nodeindex, device_print_values, *current_nodes, - device_rho_v, node_f); - - cuda_safe_mem(cudaMemcpy(host_print_values, device_print_values, - sizeof(LB_rho_v_pi_gpu), cudaMemcpyDeviceToHost)); - cudaFree(device_print_values); -} - -/** Setup and call kernel to calculate the total momentum of the hole fluid - * @param mass value of the mass calculated on the GPU - */ -void lb_calc_fluid_mass_GPU(double *mass) { - float *tot_mass; - float cpu_mass = 0.0f; - cuda_safe_mem(cudaMalloc((void **)&tot_mass, sizeof(float))); - cuda_safe_mem( - cudaMemcpy(tot_mass, &cpu_mass, sizeof(float), cudaMemcpyHostToDevice)); - - dim3 dim_grid = - calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); - - KERNELCALL(calc_mass, dim_grid, threads_per_block, *current_nodes, tot_mass); - - cuda_safe_mem( - cudaMemcpy(&cpu_mass, tot_mass, sizeof(float), cudaMemcpyDeviceToHost)); - - cudaFree(tot_mass); - mass[0] = (double)(cpu_mass); -} - -/** Setup and call kernel to calculate the total momentum of the whole fluid - * @param host_mom value of the momentum calculated on the GPU - */ -void lb_calc_fluid_momentum_GPU(double *host_mom) { - float *tot_momentum; - float host_momentum[3] = {0.0f, 0.0f, 0.0f}; - cuda_safe_mem(cudaMalloc((void **)&tot_momentum, 3 * sizeof(float))); - cuda_safe_mem(cudaMemcpy(tot_momentum, host_momentum, 3 * sizeof(float), - cudaMemcpyHostToDevice)); - - dim3 dim_grid = - calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); - - KERNELCALL(momentum, dim_grid, threads_per_block, *current_nodes, node_f, - tot_momentum); - - cuda_safe_mem(cudaMemcpy(host_momentum, tot_momentum, 3 * sizeof(float), - cudaMemcpyDeviceToHost)); - - cudaFree(tot_momentum); - auto const lattice_speed = lbpar_gpu.agrid / lbpar_gpu.tau; - host_mom[0] = static_cast(host_momentum[0] * lattice_speed); - host_mom[1] = static_cast(host_momentum[1] * lattice_speed); - host_mom[2] = static_cast(host_momentum[2] * lattice_speed); -} - -/** Setup and call kernel for getting macroscopic fluid values of all nodes - * @param[out] host_checkpoint_vd LB populations - */ -void lb_save_checkpoint_GPU(float *const host_checkpoint_vd) { - cuda_safe_mem(cudaMemcpy(host_checkpoint_vd, current_nodes->populations, - lbpar_gpu.number_of_nodes * 19 * sizeof(float), - cudaMemcpyDeviceToHost)); -} - -/** Setup and call kernel for getting macroscopic fluid values of all nodes - * @param[in] host_checkpoint_vd LB populations - */ -void lb_load_checkpoint_GPU(float const *const host_checkpoint_vd) { - current_nodes = &nodes_a; - intflag = true; - - cuda_safe_mem( - cudaMemcpy(current_nodes->populations, host_checkpoint_vd, - lbpar_gpu.number_of_nodes * sizeof(Utils::Array), - cudaMemcpyHostToDevice)); -} - -/** Setup and call kernel to get the boundary flag of a single node - * @param single_nodeindex number of the node to get the flag for - * @param host_flag here goes the value of the boundary flag - */ -void lb_get_boundary_flag_GPU(unsigned int single_nodeindex, - unsigned int *host_flag) { - unsigned int *device_flag; - cuda_safe_mem(cudaMalloc((void **)&device_flag, sizeof(unsigned int))); - unsigned threads_per_block_flag = 1; - unsigned blocks_per_grid_flag_y = 1; - unsigned blocks_per_grid_flag_x = 1; - dim3 dim_grid_flag = - make_uint3(blocks_per_grid_flag_x, blocks_per_grid_flag_y, 1); - - KERNELCALL(lb_get_boundary_flag, dim_grid_flag, threads_per_block_flag, - single_nodeindex, device_flag, *current_nodes); - - cuda_safe_mem(cudaMemcpy(host_flag, device_flag, sizeof(unsigned int), - cudaMemcpyDeviceToHost)); - - cudaFree(device_flag); -} - -/** Set the density at a single node - * @param single_nodeindex the node to set the velocity for - * @param host_rho the density to set - */ -void lb_set_node_rho_GPU(unsigned single_nodeindex, float host_rho) { - unsigned threads_per_block_flag = 1; - unsigned blocks_per_grid_flag_y = 1; - unsigned blocks_per_grid_flag_x = 1; - dim3 dim_grid_flag = - make_uint3(blocks_per_grid_flag_x, blocks_per_grid_flag_y, 1); - KERNELCALL(set_rho, dim_grid_flag, threads_per_block_flag, *current_nodes, - device_rho_v, single_nodeindex, host_rho); -} - -/** Set the net velocity at a single node - * @param single_nodeindex the node to set the velocity for - * @param host_velocity the velocity to set - */ -void lb_set_node_velocity_GPU(unsigned single_nodeindex, float *host_velocity) { - float *device_velocity; - cuda_safe_mem(cudaMalloc((void **)&device_velocity, 3 * sizeof(float))); - cuda_safe_mem(cudaMemcpy(device_velocity, host_velocity, 3 * sizeof(float), - cudaMemcpyHostToDevice)); - unsigned threads_per_block_flag = 1; - unsigned blocks_per_grid_flag_y = 1; - unsigned blocks_per_grid_flag_x = 1; - dim3 dim_grid_flag = - make_uint3(blocks_per_grid_flag_x, blocks_per_grid_flag_y, 1); - - KERNELCALL(set_u_from_rho_v_pi, dim_grid_flag, threads_per_block_flag, - *current_nodes, single_nodeindex, device_velocity, device_rho_v, - node_f); - float force_density[3] = {0.0f, 0.0f, 0.0f}; - float *device_force_density; - cuda_safe_mem(cudaMalloc((void **)&device_force_density, 3 * sizeof(float))); - cuda_safe_mem(cudaMemcpy(device_force_density, force_density, - 3 * sizeof(float), cudaMemcpyHostToDevice)); - KERNELCALL(set_force_density, dim_grid_flag, threads_per_block_flag, - single_nodeindex, device_force_density, node_f); - cudaFree(device_velocity); - cudaFree(device_force_density); -} - -/** Reinitialize parameters - * @param lbpar_gpu struct containing the parameters of the fluid - */ -void reinit_parameters_GPU(LB_parameters_gpu *lbpar_gpu) { - /* write parameters in const memory */ - cuda_safe_mem(cudaMemcpyToSymbol(para, lbpar_gpu, sizeof(LB_parameters_gpu))); -} - -/** Integration kernel for the lb gpu fluid update called from host */ -void lb_integrate_GPU() { - dim3 dim_grid = - calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); -#ifdef LB_BOUNDARIES_GPU - if (!LBBoundaries::lbboundaries.empty()) { - cuda_safe_mem( - cudaMemset(lb_boundary_force, 0, - 3 * LBBoundaries::lbboundaries.size() * sizeof(float))); - } -#endif - - /* call of fluid step */ - if (intflag) { - if (lbpar_gpu.kT > 0.0) { - assert(rng_counter_fluid_gpu); - KERNELCALL(integrate, dim_grid, threads_per_block, nodes_a, nodes_b, - device_rho_v, node_f, rng_counter_fluid_gpu->value()); - } else { - KERNELCALL(integrate, dim_grid, threads_per_block, nodes_a, nodes_b, - device_rho_v, node_f); - } - current_nodes = &nodes_b; - intflag = false; - } else { - if (lbpar_gpu.kT > 0.0) { - assert(rng_counter_fluid_gpu); - KERNELCALL(integrate, dim_grid, threads_per_block, nodes_b, nodes_a, - device_rho_v, node_f, rng_counter_fluid_gpu->value()); - } else { - KERNELCALL(integrate, dim_grid, threads_per_block, nodes_b, nodes_a, - device_rho_v, node_f); - } - current_nodes = &nodes_a; - intflag = true; - } - -#ifdef LB_BOUNDARIES_GPU - if (!LBBoundaries::lbboundaries.empty()) { - KERNELCALL(apply_boundaries, dim_grid, threads_per_block, *current_nodes, - boundaries, lb_boundary_force); - } -#endif -} - -void lb_gpu_get_boundary_forces(std::vector &forces) { -#ifdef LB_BOUNDARIES_GPU - std::vector temp(3 * LBBoundaries::lbboundaries.size()); - cuda_safe_mem(cudaMemcpy(temp.data(), lb_boundary_force, - temp.size() * sizeof(float), - cudaMemcpyDeviceToHost)); - std::transform(temp.begin(), temp.end(), forces.begin(), - [](float val) { return -static_cast(val); }); -#endif -} - -struct lb_lbfluid_mass_of_particle { - __host__ __device__ float operator()(CUDA_particle_data particle) const { -#ifdef MASS - return particle.mass; -#else - return 1.f; -#endif - } -}; - -/** Set the populations of a specific node on the GPU - * @param[out] n_a Local node residing in array a - * @param[in] population New population - * @param[in] x x-coordinate of node - * @param[in] y y-coordinate of node - * @param[in] z z-coordinate of node - */ -__global__ void lb_lbfluid_set_population_kernel(LB_nodes_gpu n_a, - float const population[LBQ], - int x, int y, int z) { - auto const index = static_cast(xyz_to_index(x, y, z)); - - for (unsigned i = 0; i < LBQ; ++i) { - n_a.populations[index][i] = population[i]; - } -} - -/** Interface to set the populations of a specific node for the GPU - * @param[in] xyz Node coordinates - * @param[in] population_host Population - */ -void lb_lbfluid_set_population(const Utils::Vector3i &xyz, - float population_host[LBQ]) { - float *population_device; - cuda_safe_mem(cudaMalloc((void **)&population_device, LBQ * sizeof(float))); - cuda_safe_mem(cudaMemcpy(population_device, population_host, - LBQ * sizeof(float), cudaMemcpyHostToDevice)); - - dim3 dim_grid = make_uint3(1, 1, 1); - KERNELCALL(lb_lbfluid_set_population_kernel, dim_grid, 1, *current_nodes, - population_device, xyz[0], xyz[1], xyz[2]); - - cuda_safe_mem(cudaFree(population_device)); -} - -/** Get the populations of a specific node on the GPU - * @param[in] n_a Local node residing in array a - * @param[out] population Population - * @param[in] x x-coordinate of node - * @param[in] y y-coordinate of node - * @param[in] z z-coordinate of node - */ -__global__ void lb_lbfluid_get_population_kernel(LB_nodes_gpu n_a, - float population[LBQ], int x, - int y, int z) { - auto const index = static_cast(xyz_to_index(x, y, z)); - - for (unsigned i = 0; i < LBQ; ++i) { - population[i] = n_a.populations[index][i]; - } -} - -/** Interface to get the populations of a specific node for the GPU - * @param[in] xyz Node coordinates - * @param[out] population_host Population - */ -void lb_lbfluid_get_population(const Utils::Vector3i &xyz, - float population_host[LBQ]) { - float *population_device; - cuda_safe_mem(cudaMalloc((void **)&population_device, LBQ * sizeof(float))); - - dim3 dim_grid = make_uint3(1, 1, 1); - KERNELCALL(lb_lbfluid_get_population_kernel, dim_grid, 1, *current_nodes, - population_device, xyz[0], xyz[1], xyz[2]); - - cuda_safe_mem(cudaMemcpy(population_host, population_device, - LBQ * sizeof(float), cudaMemcpyDeviceToHost)); - - cuda_safe_mem(cudaFree(population_device)); -} - -/** - * @brief Velocity interpolation functor - * @tparam no_of_neighbours The number of neighbours to consider for - * interpolation - */ -template struct interpolation { - LB_nodes_gpu current_nodes_gpu; - LB_rho_v_gpu *d_v_gpu; - interpolation(LB_nodes_gpu _current_nodes_gpu, LB_rho_v_gpu *_d_v_gpu) - : current_nodes_gpu(_current_nodes_gpu), d_v_gpu(_d_v_gpu) {} - __device__ float3 operator()(const float3 &position) const { - float _position[3] = {position.x, position.y, position.z}; - Utils::Array node_indices; - Utils::Array delta; - return velocity_interpolation(current_nodes_gpu, _position, node_indices, - delta); - } -}; - -struct Plus : public thrust::binary_function, - Utils::Array, - Utils::Array> { - - __device__ Utils::Array - operator()(Utils::Array const &a, Utils::Array const &b) { - return {a[0] + b[0], a[1] + b[1], a[2] + b[2], - a[3] + b[3], a[4] + b[4], a[5] + b[5]}; - } -}; - -struct Stress { - template - __device__ Utils::Array operator()(T const &t) const { - Utils::Array modes; - calc_m_from_n(thrust::get<0>(t), modes); // NOLINT - return stress_from_stress_modes(stress_modes(thrust::get<1>(t), modes)); - } -}; - -Utils::Array stress_tensor_GPU() { - if (not current_nodes->populations or not device_rho_v) - throw std::runtime_error("LB not initialized"); - - auto pop_begin = thrust::device_pointer_cast(current_nodes->populations); - auto rho_v_begin = thrust::device_pointer_cast(device_rho_v); - auto begin = - thrust::make_zip_iterator(thrust::make_tuple(pop_begin, rho_v_begin)); - - auto pop_end = - thrust::device_pointer_cast(pop_begin + lbpar_gpu.number_of_nodes); - auto rho_v_end = - thrust::device_pointer_cast(rho_v_begin + lbpar_gpu.number_of_nodes); - auto end = thrust::make_zip_iterator(thrust::make_tuple(pop_end, rho_v_end)); - - return thrust::transform_reduce(begin, end, Stress(), - Utils::Array{}, Plus()); -}; - -template -void lb_get_interpolated_velocity_gpu(double const *positions, - double *velocities, int length) { - auto const size = static_cast(length); - thrust::host_vector positions_host(size); - for (unsigned p = 0; p < 3 * size; p += 3) { - // Cast double coming from python to float. - positions_host[p / 3].x = static_cast(positions[p]); - positions_host[p / 3].y = static_cast(positions[p + 1]); - positions_host[p / 3].z = static_cast(positions[p + 2]); - } - thrust::device_vector positions_device = positions_host; - thrust::device_vector velocities_device(size); - thrust::transform( - positions_device.begin(), positions_device.end(), - velocities_device.begin(), - interpolation(*current_nodes, device_rho_v)); - thrust::host_vector velocities_host = velocities_device; - unsigned index = 0; - for (auto v : velocities_host) { - velocities[index] = static_cast(v.x); - velocities[index + 1] = static_cast(v.y); - velocities[index + 2] = static_cast(v.z); - index += 3; - } -} -template void lb_get_interpolated_velocity_gpu<8>(double const *positions, - double *velocities, - int length); -template void lb_get_interpolated_velocity_gpu<27>(double const *positions, - double *velocities, - int length); - -void linear_velocity_interpolation(double const *positions, double *velocities, - int length) { - return lb_get_interpolated_velocity_gpu<8>(positions, velocities, length); -} - -void quadratic_velocity_interpolation(double const *positions, - double *velocities, int length) { - return lb_get_interpolated_velocity_gpu<27>(positions, velocities, length); -} - -void lb_coupling_set_rng_state_gpu(uint64_t counter) { - rng_counter_coupling_gpu = Utils::Counter(counter); -} - -void lb_fluid_set_rng_state_gpu(uint64_t counter) { - rng_counter_fluid_gpu = Utils::Counter(counter); -} - -uint64_t lb_coupling_get_rng_state_gpu() { - assert(rng_counter_coupling_gpu); - return rng_counter_coupling_gpu->value(); -} -uint64_t lb_fluid_get_rng_state_gpu() { - assert(rng_counter_fluid_gpu); - return rng_counter_fluid_gpu->value(); -} - -#endif /* CUDA */ diff --git a/src/core/integrate.cpp b/src/core/integrate.cpp index a229ccecde8..8bb284e44bb 100644 --- a/src/core/integrate.cpp +++ b/src/core/integrate.cpp @@ -44,6 +44,7 @@ #include "event.hpp" #include "forces.hpp" #include "grid.hpp" +#include "grid_based_algorithms/ek_container.hpp" #include "grid_based_algorithms/lb_interface.hpp" #include "grid_based_algorithms/lb_particle_coupling.hpp" #include "interactions.hpp" @@ -73,6 +74,12 @@ #include #endif +#ifdef WALBERLA +#ifdef WALBERLA_STATIC_ASSERT +#error "waLberla headers should not be visible to the ESPResSo core" +#endif +#endif + int integ_switch = INTEG_METHOD_NVT; /** Time step for the integration. */ @@ -92,6 +99,7 @@ bool recalc_forces = true; static double verlet_reuse = 0.0; static int fluid_step = 0; +static int ek_step = 0; namespace { volatile std::sig_atomic_t ctrl_C = 0; @@ -101,6 +109,8 @@ namespace LeesEdwards { /** @brief Currently active Lees-Edwards protocol. */ static std::shared_ptr protocol = nullptr; +std::weak_ptr get_protocol() { return protocol; } + /** * @brief Update the Lees-Edwards parameters of the box geometry * for the current simulation time. @@ -348,7 +358,7 @@ int integrate(int n_steps, int reuse_forces) { force_calc(cell_structure, time_step, temperature); #ifdef VIRTUAL_SITES - virtual_sites()->after_force_calc(); + virtual_sites()->after_force_calc(time_step); #endif integrator_step_2(particles, temperature); LeesEdwards::run_kernel(); @@ -361,16 +371,48 @@ int integrate(int n_steps, int reuse_forces) { // propagate one-step functionalities if (integ_switch != INTEG_METHOD_STEEPEST_DESCENT) { - if (lb_lbfluid_get_lattice_switch() != ActiveLB::NONE) { - auto const tau = lb_lbfluid_get_tau(); - auto const lb_steps_per_md_step = - static_cast(std::round(tau / time_step)); + auto const lb_active = LB::get_lattice_switch() != ActiveLB::NONE; +#ifdef WALBERLA + auto const ek_active = not EK::ek_container.empty(); +#else + auto constexpr ek_active = false; +#endif + + if (lb_active and ek_active) { + // assume that they are coupled, which is not necessarily true + auto const lb_steps_per_md_step = LB::get_steps_per_md_step(time_step); + auto const ek_steps_per_md_step = EK::get_steps_per_md_step(time_step); + + if (lb_steps_per_md_step != ek_steps_per_md_step) { + runtimeErrorMsg() + << "LB and EK are active but with different time steps."; + } + + // only use fluid_step in this case + assert(fluid_step == ek_step); + + fluid_step += 1; + if (fluid_step >= lb_steps_per_md_step) { + fluid_step = 0; + LB::propagate(); + EK::propagate(); + } + lb_lbcoupling_propagate(); + } else if (lb_active) { + auto const lb_steps_per_md_step = LB::get_steps_per_md_step(time_step); fluid_step += 1; if (fluid_step >= lb_steps_per_md_step) { fluid_step = 0; - lb_lbfluid_propagate(); + LB::propagate(); } lb_lbcoupling_propagate(); + } else if (ek_active) { + auto const ek_steps_per_md_step = EK::get_steps_per_md_step(time_step); + ek_step += 1; + if (ek_step >= ek_steps_per_md_step) { + ek_step = 0; + EK::propagate(); + } } #ifdef VIRTUAL_SITES @@ -507,8 +549,9 @@ void increment_sim_time(double amount) { sim_time += amount; } void set_time_step(double value) { if (value <= 0.) throw std::domain_error("time_step must be > 0."); - if (lb_lbfluid_get_lattice_switch() != ActiveLB::NONE) - check_tau_time_step_consistency(lb_lbfluid_get_tau(), value); + if (LB::get_lattice_switch() != ActiveLB::NONE) { + LB::check_tau_time_step_consistency(LB::get_tau(), value); + } ::time_step = value; on_timestep_change(); } diff --git a/src/core/lees_edwards/lees_edwards.hpp b/src/core/lees_edwards/lees_edwards.hpp index abadb0966d5..629fd4cfed9 100644 --- a/src/core/lees_edwards/lees_edwards.hpp +++ b/src/core/lees_edwards/lees_edwards.hpp @@ -26,6 +26,7 @@ #include #include +#include namespace LeesEdwards { class UpdateOffset { protected: @@ -36,9 +37,6 @@ class UpdateOffset { void operator()(Particle &p, double pos_prefactor = 1.0) const { // Disabled as long as we do not use a two step LE update - // p.lees_edwards_offset() -= pos_prefactor * - // static_cast(p.lees_edwards_flag()) - // * m_le.pos_offset / 2; } }; @@ -92,6 +90,9 @@ inline Utils::Vector3d verlet_list_offset(BoxGeometry const &box, return {}; } +/** @brief Get currently active Lees-Edwards protocol. */ +std::weak_ptr get_protocol(); + /** @brief Set a new Lees-Edwards protocol. */ void set_protocol(std::shared_ptr new_protocol); diff --git a/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp b/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp index a49718719d9..0d8cea95aaa 100644 --- a/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp +++ b/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp @@ -39,9 +39,8 @@ CylindricalLBFluxDensityProfileAtParticlePositions::evaluate( for (auto p : particles) { auto const pos = folded_position(traits.position(p), box_geo); - auto const v = lb_lbfluid_get_interpolated_velocity(pos) * - lb_lbfluid_get_lattice_speed(); - auto const flux_dens = lb_lbfluid_get_interpolated_density(pos) * v; + auto const v = LB::get_interpolated_velocity(pos) * LB::get_lattice_speed(); + auto const flux_dens = LB::get_interpolated_density(pos) * v; histogram.update(Utils::transform_coordinate_cartesian_to_cylinder( pos - transform_params->center(), diff --git a/src/core/observables/CylindricalLBVelocityProfile.cpp b/src/core/observables/CylindricalLBVelocityProfile.cpp index 068184b16c4..b0b98bc7be3 100644 --- a/src/core/observables/CylindricalLBVelocityProfile.cpp +++ b/src/core/observables/CylindricalLBVelocityProfile.cpp @@ -33,8 +33,8 @@ namespace Observables { std::vector CylindricalLBVelocityProfile::operator()() const { Utils::CylindricalHistogram histogram(n_bins(), limits()); for (auto const &p : sampling_positions) { - auto const velocity = lb_lbfluid_get_interpolated_velocity(p) * - lb_lbfluid_get_lattice_speed(); + auto const velocity = + LB::get_interpolated_velocity(p) * LB::get_lattice_speed(); auto const pos_shifted = p - transform_params->center(); auto const pos_cyl = Utils::transform_coordinate_cartesian_to_cylinder( pos_shifted, transform_params->axis(), transform_params->orientation()); diff --git a/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp b/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp index 35bbb4f8bb0..d7ec93b4c0c 100644 --- a/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp +++ b/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp @@ -37,8 +37,7 @@ std::vector CylindricalLBVelocityProfileAtParticlePositions::evaluate( for (auto const &p : particles) { auto const pos = folded_position(traits.position(p), box_geo); - auto const v = lb_lbfluid_get_interpolated_velocity(pos) * - lb_lbfluid_get_lattice_speed(); + auto const v = LB::get_interpolated_velocity(pos) * LB::get_lattice_speed(); histogram.update( Utils::transform_coordinate_cartesian_to_cylinder( diff --git a/src/core/observables/LBFluidPressureTensor.hpp b/src/core/observables/LBFluidPressureTensor.hpp index 758278239f5..933d4c17c11 100644 --- a/src/core/observables/LBFluidPressureTensor.hpp +++ b/src/core/observables/LBFluidPressureTensor.hpp @@ -33,12 +33,9 @@ class LBFluidPressureTensor : public Observable { std::vector shape() const override { return {3, 3}; } std::vector operator()() const override { auto const unit_conversion = - 1. / (lb_lbfluid_get_agrid() * Utils::sqr(lb_lbfluid_get_tau())); - auto const lower_triangle = - lb_lbfluid_get_pressure_tensor() * unit_conversion; - return {lower_triangle[0], lower_triangle[1], lower_triangle[3], - lower_triangle[1], lower_triangle[2], lower_triangle[4], - lower_triangle[3], lower_triangle[4], lower_triangle[5]}; + 1. / (LB::get_agrid() * Utils::sqr(LB::get_tau())); + auto const tensor = LB::get_pressure_tensor() * unit_conversion; + return tensor.as_vector(); } }; diff --git a/src/core/observables/LBVelocityProfile.cpp b/src/core/observables/LBVelocityProfile.cpp index 2471dfb9ebc..4c8b2740d37 100644 --- a/src/core/observables/LBVelocityProfile.cpp +++ b/src/core/observables/LBVelocityProfile.cpp @@ -32,8 +32,7 @@ namespace Observables { std::vector LBVelocityProfile::operator()() const { Utils::Histogram histogram(n_bins(), limits()); for (auto const &p : sampling_positions) { - const auto v = lb_lbfluid_get_interpolated_velocity(p) * - lb_lbfluid_get_lattice_speed(); + const auto v = LB::get_interpolated_velocity(p) * LB::get_lattice_speed(); histogram.update(p, v); } auto hist_tmp = histogram.get_histogram(); diff --git a/src/core/thermostat.cpp b/src/core/thermostat.cpp index bc56e228838..e0780821818 100644 --- a/src/core/thermostat.cpp +++ b/src/core/thermostat.cpp @@ -28,6 +28,7 @@ #include "bonded_interactions/thermalized_bond_utils.hpp" #include "communication.hpp" #include "dpd.hpp" +#include "errorhandling.hpp" #include "event.hpp" #include "integrate.hpp" #include "npt.hpp" @@ -196,7 +197,11 @@ void mpi_set_thermo_virtual(bool thermo_virtual) { void mpi_set_temperature_local(double temperature) { ::temperature = temperature; - on_temperature_change(); + try { + on_temperature_change(); + } catch (std::exception const &err) { + runtimeErrorMsg() << err.what(); + } on_thermostat_param_change(); } @@ -228,4 +233,4 @@ REGISTER_CALLBACK(mpi_set_nptiso_gammas_local) void mpi_set_nptiso_gammas(double gamma0, double gammav) { mpi_call_all(mpi_set_nptiso_gammas_local, gamma0, gammav); } -#endif \ No newline at end of file +#endif diff --git a/src/core/unit_tests/CMakeLists.txt b/src/core/unit_tests/CMakeLists.txt index 2ece0659ee7..f623143c189 100644 --- a/src/core/unit_tests/CMakeLists.txt +++ b/src/core/unit_tests/CMakeLists.txt @@ -59,8 +59,6 @@ unit_test(NAME lees_edwards_test SRC lees_edwards_test.cpp DEPENDS espresso::core) unit_test(NAME BoxGeometry_test SRC BoxGeometry_test.cpp DEPENDS espresso::core) unit_test(NAME LocalBox_test SRC LocalBox_test.cpp DEPENDS espresso::core) -unit_test(NAME Lattice_test SRC Lattice_test.cpp DEPENDS espresso::core) -unit_test(NAME lb_exceptions SRC lb_exceptions.cpp DEPENDS espresso::core) unit_test(NAME Verlet_list_test SRC Verlet_list_test.cpp DEPENDS espresso::core NUM_PROC 4) unit_test(NAME VerletCriterion_test SRC VerletCriterion_test.cpp DEPENDS @@ -79,3 +77,14 @@ if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") unit_test(NAME specfunc_test SRC specfunc_test.cpp DEPENDS espresso::utils espresso::core) endif() +unit_test(NAME lb_particle_coupling_test SRC lb_particle_coupling_test.cpp + DEPENDS espresso::core Boost::mpi MPI::MPI_CXX NUM_PROC 2) +unit_test(NAME ek_interface_test SRC ek_interface_test.cpp DEPENDS + espresso::core Boost::mpi MPI::MPI_CXX NUM_PROC 2) +if(ESPRESSO_BUILD_WITH_WALBERLA) + target_link_libraries( + lb_particle_coupling_test PRIVATE espresso::walberla + espresso::walberla::cpp_flags) + target_link_libraries(ek_interface_test PRIVATE espresso::walberla + espresso::walberla::cpp_flags) +endif() diff --git a/src/core/unit_tests/EspressoSystemStandAlone_test.cpp b/src/core/unit_tests/EspressoSystemStandAlone_test.cpp index f1e156175ab..f0ef4040df5 100644 --- a/src/core/unit_tests/EspressoSystemStandAlone_test.cpp +++ b/src/core/unit_tests/EspressoSystemStandAlone_test.cpp @@ -25,6 +25,7 @@ namespace utf = boost::unit_test; #include "ParticleFactory.hpp" +#include "particle_management.hpp" #include "EspressoSystemStandAlone.hpp" #include "Particle.hpp" @@ -72,25 +73,6 @@ static void remove_translational_motion() { Galilei{}.kill_particle_motion(false); } -static auto copy_particle_to_head_node(boost::mpi::communicator const &comm, - int p_id) { - boost::optional result{}; - auto p = ::cell_structure.get_local_particle(p_id); - if (p and not p->is_ghost()) { - if (comm.rank() == 0) { - result = *p; - } else { - comm.send(0, p_id, *p); - } - } - if (comm.rank() == 0 and not result) { - Particle p{}; - comm.recv(boost::mpi::any_source, p_id, p); - result = p; - } - return result; -} - BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) { auto constexpr tol = 8. * 100. * std::numeric_limits::epsilon(); auto const comm = boost::mpi::communicator(); diff --git a/src/core/unit_tests/Lattice_test.cpp b/src/core/unit_tests/Lattice_test.cpp deleted file mode 100644 index f44ebe114e0..00000000000 --- a/src/core/unit_tests/Lattice_test.cpp +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (C) 2021-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#define BOOST_TEST_MODULE Lattice class tests -#define BOOST_TEST_DYN_LINK -#include - -#include "grid_based_algorithms/lattice.hpp" - -#include - -#include -#include -#include - -BOOST_AUTO_TEST_CASE(test_basic_lattice) { - // create a lattice for the second domain of a 2x1x1 partition of the box - auto const halo_size = Lattice::index_t{1}; - auto const agrid = 0.5; - auto const offset = 0.5; - auto const box_length = Utils::Vector3d{{6., 6., 6.}}; - auto const local_box = Utils::Vector3d{{3., 6., 6.}}; - auto const node_pos = Utils::Vector3i{{1, 0, 0}}; - auto const node_grid = Utils::Vector3i{{2, 1, 1}}; - Lattice lattice(agrid, offset, halo_size, local_box, box_length, box_length, - node_pos, node_grid); - - // check struct members - BOOST_CHECK_EQUAL(lattice.halo_size, halo_size); - BOOST_CHECK_EQUAL(lattice.agrid, agrid); - BOOST_CHECK_EQUAL(lattice.offset, offset); - BOOST_CHECK_EQUAL(lattice.halo_grid_volume, (6 + 2) * (12 + 2) * (12 + 2)); - auto const elementwise = boost::test_tools::per_element(); - auto const ref_grid = Utils::Vector3i{{6, 12, 12}}; - auto const ref_global_grid = Utils::hadamard_product(node_grid, ref_grid); - auto const local_index_offset = Utils::hadamard_product(node_pos, ref_grid); - BOOST_TEST(lattice.local_box == local_box, elementwise); - BOOST_TEST(lattice.node_grid == node_grid, elementwise); - BOOST_TEST(lattice.grid == ref_grid, elementwise); - BOOST_TEST(lattice.global_grid == ref_global_grid, elementwise); - BOOST_TEST(lattice.local_index_offset == local_index_offset, elementwise); - - // check methods - BOOST_CHECK(lattice.is_local({11, 11, 11})); - BOOST_CHECK(lattice.is_local({6, 11, 11})); - BOOST_CHECK(!lattice.is_local({5, 11, 11})); - BOOST_CHECK(!lattice.is_local({12, 12, 12})); - BOOST_CHECK(!lattice.is_local({0, 0, 0})); - auto const global_index = Utils::Vector3i{{11, 11, 11}}; - auto const local_index = Utils::Vector3i{{6, 12, 12}}; - BOOST_TEST(lattice.local_index(global_index) == local_index, elementwise); -} - -BOOST_AUTO_TEST_CASE(test_map_position_to_lattice) { - using boost::test_tools::per_element; - auto const halo_size = Lattice::index_t{1}; - auto const agrid = 1.0; - auto const offset = 0.5; - auto const box_l = Utils::Vector3d{{6., 6., 6.}}; - auto const local_box = Utils::Vector3d{{6., 6., 6.}}; - auto const node_pos = Utils::Vector3i{{0, 0, 0}}; - auto const node_grid = Utils::Vector3i{{1, 1, 1}}; - Lattice lattice(agrid, offset, halo_size, local_box, box_l, box_l, node_pos, - node_grid); - - // check methods - auto const slice_x = 6u + 2u; - auto const slice_xy = slice_x * slice_x; - auto const slice_xyz = 2u * 6u * 6u; - Utils::Vector const origin_index = { - 0u, 1u, - slice_x, slice_x + 1u, - slice_xy, slice_xy + 1u, - slice_xyz, slice_xyz + 1u}; - auto const delta1_ref = Utils::Vector6d{{.5, .5, .5, .5, .5, .5}}; - auto const delta2_ref = Utils::Vector6d{{1., 1., 1., 0., 0., 0.}}; - Utils::Vector node_index1; - Utils::Vector node_index2; - Utils::Vector idx; - Utils::Vector6d delta1; - Utils::Vector6d delta2; - Utils::Vector6d dx; - - // check inside local domain (edge cases) - auto const my_origin = Utils::Vector3d::broadcast(0.); - auto const my_lb_left = Utils::Vector3d::broadcast(-offset); - auto const my_lb_right = Utils::Vector3d::broadcast(offset - 1e-12) + box_l; - lattice.map_position_to_lattice(my_origin, node_index1, delta1); - lattice.map_position_to_lattice(my_lb_left, node_index2, delta2); - lattice.map_position_to_lattice(my_lb_right, idx, dx); - BOOST_TEST(node_index1 == origin_index, per_element()); - BOOST_TEST(node_index2 == origin_index, per_element()); - BOOST_TEST(delta1 == delta1_ref, per_element()); - BOOST_TEST(delta2 == delta2_ref, per_element()); - - // check almost inside local domain - auto constexpr epsilon = std::numeric_limits::epsilon(); - if (epsilon != epsilon / 2.) { // check for machine precision - auto const outside = Utils::Vector3d::broadcast(-offset - epsilon / 2.); - lattice.map_position_to_lattice(outside, node_index2, delta2); - BOOST_TEST(node_index2 == origin_index, per_element()); - } - - // check outside local domain - BOOST_CHECK_THROW(lattice.map_position_to_lattice({-2., -2., -2.}, idx, dx), - std::runtime_error); - BOOST_CHECK_THROW(lattice.map_position_to_lattice({6.5, 6.5, 6.5}, idx, dx), - std::runtime_error); -} diff --git a/src/core/unit_tests/Verlet_list_test.cpp b/src/core/unit_tests/Verlet_list_test.cpp index 2e8533f9ee0..57b341df586 100644 --- a/src/core/unit_tests/Verlet_list_test.cpp +++ b/src/core/unit_tests/Verlet_list_test.cpp @@ -33,6 +33,7 @@ namespace utf = boost::unit_test; namespace bdata = boost::unit_test::data; #include "ParticleFactory.hpp" +#include "particle_management.hpp" #include "EspressoSystemStandAlone.hpp" #include "Particle.hpp" @@ -61,25 +62,6 @@ namespace espresso { static std::unique_ptr system; } // namespace espresso -static auto copy_particle_to_head_node(boost::mpi::communicator const &comm, - int p_id) { - boost::optional result{}; - auto p = ::cell_structure.get_local_particle(p_id); - if (p and not p->is_ghost()) { - if (comm.rank() == 0) { - result = *p; - } else { - comm.send(0, p_id, *p); - } - } - if (comm.rank() == 0 and not result) { - Particle p{}; - comm.recv(boost::mpi::any_source, p_id, p); - result = p; - } - return result; -} - namespace Testing { /** * Helper class to setup an integrator and particle properties such that the diff --git a/src/core/unit_tests/ek_interface_test.cpp b/src/core/unit_tests/ek_interface_test.cpp new file mode 100644 index 00000000000..1d01ef76b59 --- /dev/null +++ b/src/core/unit_tests/ek_interface_test.cpp @@ -0,0 +1,145 @@ +/* + * Copyright (C) 2023 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define BOOST_TEST_MODULE LB particle coupling test +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_NO_MAIN +#include + +#include "EspressoSystemStandAlone.hpp" +#include "config/config.hpp" +#include "errorhandling.hpp" +#include "grid_based_algorithms/ek_container.hpp" +#include "grid_based_algorithms/ek_reactions.hpp" + +#include + +#include + +#include +#include +#include + +static struct { + double kT = 1.3E-4; + double density = 1.4; + double diffusion = 3e-3; + double valency = 1.; + bool advection = true; + bool friction_coupling = true; + double tau = 0.01; + double time_step = 0.01; + double agrid = 1.; + double skin = 0.51; + Utils::Vector3d ext_efield = Utils::Vector3d{{0.01, 0.02, 0.03}}; + Utils::Vector3d box_dimensions = Utils::Vector3d::broadcast(8.); + Utils::Vector3i grid_dimensions = Utils::Vector3i::broadcast(8); +} params; + +namespace espresso { +// ESPResSo system instance +static std::unique_ptr system; +} // namespace espresso + +static auto get_n_runtime_errors() { return check_runtime_errors_local(); } + +#ifdef WALBERLA + +#include "grid.hpp" + +#include +#include +#include +#include +#include + +BOOST_AUTO_TEST_CASE(ek_interface_walberla) { + { + // tau setters and getters + BOOST_CHECK_EQUAL(EK::ek_container.get_tau(), 0.); + BOOST_CHECK_EQUAL(EK::get_tau(), 0.); + BOOST_CHECK_EQUAL(EK::get_steps_per_md_step(1.), 0); + EK::ek_container.set_tau(2.); + BOOST_CHECK_EQUAL(EK::ek_container.get_tau(), 2.); + BOOST_CHECK_EQUAL(EK::get_tau(), 2.); + BOOST_CHECK_EQUAL(EK::get_steps_per_md_step(1.), 2); + BOOST_CHECK_EQUAL(EK::get_steps_per_md_step(2.), 1); + BOOST_CHECK_EQUAL(EK::get_steps_per_md_step(5.), 0); + } + + { + // setup a minimal EK model without coupling to LB + auto constexpr n_ghost_layers = 1u; + auto constexpr single_precision = true; + auto ek_lattice = std::make_shared( + params.grid_dimensions, ::node_grid, n_ghost_layers); + auto ek_species = new_ek_walberla( + ek_lattice, params.diffusion, params.kT, params.valency, + params.ext_efield, params.density, false, false, single_precision); + auto ek_solver_none = new_ek_poisson_none(ek_lattice, single_precision); + + BOOST_REQUIRE(EK::ek_reactions.empty()); + BOOST_REQUIRE(EK::ek_container.empty()); + BOOST_REQUIRE(not EK::ek_container.is_poisson_solver_set()); + EK::propagate(); // no-op + BOOST_REQUIRE_EQUAL(get_n_runtime_errors(), 0); + EK::ek_container.set_poisson_solver(ek_solver_none); + BOOST_REQUIRE(EK::ek_container.is_poisson_solver_set()); + BOOST_REQUIRE(EK::ek_container.empty()); + EK::ek_container.set_tau(0.); + BOOST_CHECK_THROW(EK::ek_container.add(ek_species), std::runtime_error); + EK::ek_container.set_tau(2.); + EK::ek_container.add(ek_species); + BOOST_REQUIRE(not EK::ek_container.empty()); + EK::propagate(); // no-op + BOOST_REQUIRE_EQUAL(get_n_runtime_errors(), 0); + EK::ek_container.remove(ek_species); + BOOST_REQUIRE(EK::ek_container.empty()); + EK::propagate(); // no-op + BOOST_REQUIRE_EQUAL(get_n_runtime_errors(), 0); + } +} + +#else // WALBERLA + +BOOST_AUTO_TEST_CASE(ek_interface) { + { + EK::propagate(); // no-op + BOOST_CHECK_THROW(EK::get_tau(), NoEKActive); + BOOST_CHECK_THROW(EK::get_tau(), std::exception); + BOOST_CHECK_THROW(EK::get_steps_per_md_step(1.), std::exception); + auto const err_msg = std::string(NoEKActive().what()); + auto const ref_msg = std::string("EK not activated"); + BOOST_CHECK_EQUAL(err_msg, ref_msg); + } +} + +#endif // WALBERLA + +int main(int argc, char **argv) { + espresso::system = std::make_unique(argc, argv); + espresso::system->set_box_l(params.box_dimensions); + espresso::system->set_time_step(params.time_step); + espresso::system->set_skin(params.skin); + + boost::mpi::communicator world; + assert(world.size() <= 2); + + return boost::unit_test::unit_test_main(init_unit_test, argc, argv); +} diff --git a/src/core/unit_tests/lb_exceptions.cpp b/src/core/unit_tests/lb_exceptions.cpp deleted file mode 100644 index 6e1b4232644..00000000000 --- a/src/core/unit_tests/lb_exceptions.cpp +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2021-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#define BOOST_TEST_MODULE LB exception mechanism -#define BOOST_TEST_DYN_LINK -#include - -#include "grid_based_algorithms/lb.hpp" -#include "grid_based_algorithms/lb_interface.hpp" -#include "grid_based_algorithms/lb_interpolation.hpp" -#include "grid_based_algorithms/lb_particle_coupling.hpp" - -#include - -BOOST_AUTO_TEST_CASE(exceptions) { - // getters and setters - BOOST_CHECK_THROW(lb_lbfluid_get_rng_state(), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_set_rng_state(0u), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_set_density(-1.), std::invalid_argument); - BOOST_CHECK_THROW(lb_lbfluid_set_density(1.), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_get_density(), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_set_viscosity(-1.), std::invalid_argument); - BOOST_CHECK_THROW(lb_lbfluid_set_viscosity(1.), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_get_viscosity(), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_set_bulk_viscosity(-1.), std::invalid_argument); - BOOST_CHECK_THROW(lb_lbfluid_set_bulk_viscosity(1.), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_get_bulk_viscosity(), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_set_gamma_odd(2.), std::invalid_argument); - BOOST_CHECK_THROW(lb_lbfluid_set_gamma_odd({}), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_get_gamma_odd(), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_set_gamma_even(2.), std::invalid_argument); - BOOST_CHECK_THROW(lb_lbfluid_set_gamma_even({}), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_get_gamma_even(), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_set_agrid(-1.), std::invalid_argument); - BOOST_CHECK_THROW(lb_lbfluid_set_agrid(1.), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_get_agrid(), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_set_ext_force_density({}), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_get_ext_force_density(), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_set_tau(-1.), std::invalid_argument); - BOOST_CHECK_THROW(lb_lbfluid_set_tau(1.), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_get_tau(), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_set_kT({}), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_get_kT(), std::exception); - BOOST_CHECK_THROW(lb_lbnode_get_boundary({}), std::exception); - BOOST_CHECK_THROW(lb_lbnode_set_density({}, {}), std::exception); - BOOST_CHECK_THROW(lb_lbnode_get_density({}), std::exception); - BOOST_CHECK_THROW(lb_lbnode_set_velocity({}, {}), std::exception); - BOOST_CHECK_THROW(lb_lbnode_get_velocity({}), std::exception); - BOOST_CHECK_THROW(lb_lbnode_set_pop({}, {}), std::exception); - BOOST_CHECK_THROW(lb_lbnode_get_pop({}), std::exception); - BOOST_CHECK_THROW(lb_lbnode_get_pressure_tensor({}), std::exception); - BOOST_CHECK_THROW(lb_lbnode_get_pressure_tensor_neq({}), std::exception); - // particle coupling and interpolation - BOOST_CHECK_EQUAL(lb_lbcoupling_get_rng_state(), 0u); - BOOST_CHECK_THROW(lb_lbfluid_get_interpolated_velocity({}), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_get_interpolated_density({}), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_get_shape(), std::exception); - BOOST_CHECK_EQUAL(lb_lbfluid_calc_fluid_momentum(), Utils::Vector3d{}); - BOOST_CHECK_THROW(lb_lbfluid_set_lattice_switch(static_cast(100)), - std::invalid_argument); - ::lattice_switch = ActiveLB::CPU; - mpi_set_interpolation_order_local(InterpolationOrder::quadratic); - BOOST_CHECK_THROW(lb_lbfluid_get_interpolated_density({}), std::exception); - BOOST_CHECK_THROW(lb_lbfluid_get_interpolated_velocity({}), - std::runtime_error); - BOOST_CHECK_THROW(lb_lbinterpolation_add_force_density({}, {}), - std::runtime_error); - ::lattice_switch = ActiveLB::GPU; - BOOST_CHECK_THROW(lb_lbfluid_get_interpolated_density({}), std::exception); - ::lattice_switch = ActiveLB::NONE; - mpi_set_interpolation_order_local(InterpolationOrder::linear); -#ifdef ADDITIONAL_CHECKS - { - std::stringstream stream_xy{}; - log_buffer_diff(stream_xy, 0, 1, 2, 3, -1); - BOOST_CHECK_EQUAL(stream_xy.str(), - "buffers differ in dir=0 at node index=1 x=2 y=3\n"); - std::stringstream stream_xyz{}; - log_buffer_diff(stream_xyz, 0, 1, 2, 3, 4); - BOOST_CHECK_EQUAL(stream_xyz.str(), - "buffers differ in dir=0 at node index=1 x=2 y=3 z=4\n"); - } -#endif // ADDITIONAL_CHECKS -} diff --git a/src/core/unit_tests/lb_particle_coupling_test.cpp b/src/core/unit_tests/lb_particle_coupling_test.cpp new file mode 100644 index 00000000000..4e308d089d5 --- /dev/null +++ b/src/core/unit_tests/lb_particle_coupling_test.cpp @@ -0,0 +1,612 @@ +/* + * Copyright (C) 2019-2023 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define BOOST_TEST_MODULE LB particle coupling test +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_NO_MAIN +#include +#include +#include +namespace bdata = boost::unit_test::data; +namespace utf = boost::unit_test; + +#include "config/config.hpp" + +#ifdef WALBERLA + +#include "ParticleFactory.hpp" +#include "particle_management.hpp" + +#include "EspressoSystemStandAlone.hpp" +#include "Particle.hpp" +#include "cells.hpp" +#include "errorhandling.hpp" +#include "event.hpp" +#include "grid.hpp" +#include "grid_based_algorithms/lb_interface.hpp" +#include "grid_based_algorithms/lb_interpolation.hpp" +#include "grid_based_algorithms/lb_particle_coupling.hpp" +#include "grid_based_algorithms/lb_walberla_instance.hpp" +#include "particle_node.hpp" +#include "random.hpp" +#include "thermostat.hpp" + +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// multiply by 6 to account for error accumulation +auto constexpr eps = 6. * std::numeric_limits::epsilon(); + +static struct { + unsigned int seed = 23u; + double kT = 0.; + double viscosity = 1e-3; + double density = 0.5; + double tau = 0.01; + double time_step = 0.01; + double agrid = 1.; + double skin = 0.5; + Utils::Vector3d box_dimensions = Utils::Vector3d::broadcast(8.); + Utils::Vector3i grid_dimensions = Utils::Vector3i::broadcast(8); + auto force_md_to_lb(Utils::Vector3d const &md_force) const { + return (-this->time_step * this->tau / this->agrid) * md_force; + } +} params; + +/** Boost unit test dataset */ +std::vector const kTs{0., 1E-4}; + +namespace espresso { +// ESPResSo system instance +static std::unique_ptr system; +// ESPResSo actors +static std::shared_ptr lb_params; +static std::shared_ptr lb_lattice; +static std::shared_ptr lb_fluid; + +static auto make_lb_actor() { + auto constexpr n_ghost_layers = 1u; + auto constexpr single_precision = false; + lb_params = std::make_shared(params.agrid, params.tau); + lb_lattice = std::make_shared(params.grid_dimensions, + ::node_grid, n_ghost_layers); + lb_fluid = new_lb_walberla(lb_lattice, params.viscosity, params.density, + single_precision); + lb_fluid->set_collision_model(params.kT, params.seed); + lb_fluid->ghost_communication(); +} + +static void add_lb_actor() { activate_lb_walberla(lb_fluid, lb_params); } + +static void remove_lb_actor() { deactivate_lb_walberla(); } + +static void set_lb_kT(double kT) { + lb_fluid->set_collision_model(kT, params.seed); +} +} // namespace espresso + +namespace LB { +static auto get_force_to_be_applied(Utils::Vector3d const &pos) { + auto const agrid = espresso::lb_params->get_agrid(); + auto const ind = Utils::Vector3i{static_cast(pos[0] / agrid), + static_cast(pos[1] / agrid), + static_cast(pos[2] / agrid)}; + auto const res = espresso::lb_fluid->get_node_force_to_be_applied(ind); + if (!res) { + auto const comm = boost::mpi::communicator(); + std::stringstream what; + what << "Force to be applied could not be obtained from Walberla " + << "on MPI rank " << comm.rank() << ": position = [" << pos << "]"; + throw std::runtime_error(what.str()); + } + return *res; +} +} // namespace LB + +/** Fixture to manage the lifetime of the LB actor. */ +struct CleanupActorLB : public ParticleFactory { + CleanupActorLB() : ParticleFactory() { + params.kT = 0.; + espresso::make_lb_actor(); + espresso::add_lb_actor(); + } + + // NOLINTNEXTLINE(bugprone-exception-escape) + ~CleanupActorLB() { espresso::remove_lb_actor(); } +}; + +BOOST_FIXTURE_TEST_SUITE(suite, CleanupActorLB) + +static void lb_lbcoupling_broadcast() { + boost::mpi::communicator world; + boost::mpi::broadcast(world, lb_particle_coupling, 0); +} + +BOOST_AUTO_TEST_CASE(activate) { + lb_lbcoupling_deactivate(); + lb_lbcoupling_broadcast(); + lb_lbcoupling_activate(); + lb_lbcoupling_broadcast(); + BOOST_CHECK(lb_particle_coupling.couple_to_md); +} + +BOOST_AUTO_TEST_CASE(de_activate) { + lb_lbcoupling_activate(); + lb_lbcoupling_broadcast(); + lb_lbcoupling_deactivate(); + lb_lbcoupling_broadcast(); + BOOST_CHECK(not lb_particle_coupling.couple_to_md); +} + +BOOST_AUTO_TEST_CASE(rng_initial_state) { + BOOST_CHECK(lb_lbcoupling_is_seed_required()); + BOOST_CHECK(!lb_particle_coupling.rng_counter_coupling); +} + +BOOST_AUTO_TEST_CASE(rng) { + lb_lbcoupling_set_rng_state(17); + BOOST_REQUIRE(lb_particle_coupling.rng_counter_coupling); + BOOST_CHECK_EQUAL(lb_lbcoupling_get_rng_state(), 17); + BOOST_CHECK(not lb_lbcoupling_is_seed_required()); + auto const step1_random1 = lb_particle_coupling_noise( + true, 1, lb_particle_coupling.rng_counter_coupling); + auto const step1_random2 = lb_particle_coupling_noise( + true, 4, lb_particle_coupling.rng_counter_coupling); + auto const step1_random2_try2 = lb_particle_coupling_noise( + true, 4, lb_particle_coupling.rng_counter_coupling); + BOOST_CHECK(step1_random1 != step1_random2); + BOOST_CHECK(step1_random2 == step1_random2_try2); + + // Propagation queries kT from LB, so LB needs to be initialized + espresso::set_lb_kT(1E-4); + lb_lbcoupling_propagate(); + + BOOST_REQUIRE(lb_particle_coupling.rng_counter_coupling); + BOOST_CHECK_EQUAL(lb_lbcoupling_get_rng_state(), 18); + auto const step2_random1 = lb_particle_coupling_noise( + true, 1, lb_particle_coupling.rng_counter_coupling); + auto const step2_random2 = lb_particle_coupling_noise( + true, 4, lb_particle_coupling.rng_counter_coupling); + BOOST_CHECK(step1_random1 != step2_random1); + BOOST_CHECK(step1_random1 != step2_random2); + + auto const step3_norandom = lb_particle_coupling_noise( + false, 4, lb_particle_coupling.rng_counter_coupling); + BOOST_CHECK((step3_norandom == Utils::Vector3d{0., 0., 0.})); +} + +BOOST_AUTO_TEST_CASE(access_outside_domain) { + auto const invalid_pos = 2 * params.box_dimensions; + BOOST_CHECK_THROW(lb_lbinterpolation_get_interpolated_velocity(invalid_pos), + std::runtime_error); + BOOST_CHECK_THROW(lb_lbinterpolation_add_force_density(invalid_pos, {}), + std::runtime_error); +} + +BOOST_AUTO_TEST_CASE(drift_vel_offset) { + Particle p{}; + BOOST_CHECK_EQUAL(lb_particle_coupling_drift_vel_offset(p).norm(), 0); + Utils::Vector3d expected{}; +#ifdef ENGINE + p.swimming().swimming = true; + p.swimming().v_swim = 2.; + expected += p.swimming().v_swim * p.calc_director(); +#endif +#ifdef LB_ELECTROHYDRODYNAMICS + p.mu_E() = Utils::Vector3d{-2., 1.5, 1.}; + expected += p.mu_E(); +#endif + BOOST_CHECK_SMALL( + (lb_particle_coupling_drift_vel_offset(p) - expected).norm(), eps); +} + +BOOST_DATA_TEST_CASE(drag_force, bdata::make(kTs), kT) { + espresso::set_lb_kT(kT); + Particle p{}; + p.v() = {-2.5, 1.5, 2.}; + p.pos() = lb_walberla()->get_lattice().get_local_domain().first; + lb_lbcoupling_set_gamma(0.2); + Utils::Vector3d drift_offset{-1., 1., 1.}; + + // Drag force in quiescent fluid + { + auto const observed = lb_drag_force(p, p.pos(), drift_offset); + const Utils::Vector3d expected{0.3, -0.1, -.2}; + BOOST_CHECK_SMALL((observed - expected).norm(), eps); + } +} + +#ifdef ENGINE +BOOST_DATA_TEST_CASE(swimmer_force, bdata::make(kTs), kT) { + espresso::set_lb_kT(kT); + auto const first_lb_node = + lb_walberla()->get_lattice().get_local_domain().first; + Particle p{}; + p.swimming().swimming = true; + p.swimming().f_swim = 2.; + p.swimming().dipole_length = 3.; + p.swimming().push_pull = 1; + p.pos() = first_lb_node + Utils::Vector3d::broadcast(0.5); + + auto const coupling_pos = + p.pos() + + Utils::Vector3d{0., 0., p.swimming().dipole_length / params.agrid}; + + // swimmer coupling + { + if (in_local_halo(p.pos())) { + add_swimmer_force(p, params.time_step); + } + if (in_local_halo(coupling_pos)) { + auto const interpolated = LB::get_force_to_be_applied(coupling_pos); + auto const expected = + params.force_md_to_lb(Utils::Vector3d{0., 0., p.swimming().f_swim}); + + // interpolation happened on the expected LB cell + BOOST_CHECK_SMALL((interpolated - expected).norm(), eps); + } + + // all other LB cells have no force + for (int i = 0; i < params.grid_dimensions[0]; ++i) { + for (int j = 0; j < params.grid_dimensions[1]; ++j) { + for (int k = 0; k < params.grid_dimensions[2]; ++k) { + auto const pos = Utils::Vector3d{ + 0.5 + static_cast(i) * params.agrid, + 0.5 + static_cast(j) * params.agrid, + 0.5 + static_cast(k) * params.agrid, + }; + if ((pos - coupling_pos).norm() < 1e-6) + continue; + if (in_local_halo(pos)) { + auto const interpolated = LB::get_force_to_be_applied(pos); + BOOST_CHECK_SMALL(interpolated.norm(), eps); + } + } + } + } + } + + // remove force of the particle from the fluid + { + if (in_local_halo(coupling_pos)) { + add_md_force(coupling_pos, -Utils::Vector3d{0., 0., p.swimming().f_swim}, + params.time_step); + auto const reset = LB::get_force_to_be_applied(coupling_pos); + BOOST_REQUIRE_SMALL(reset.norm(), eps); + } + } +} +#endif // ENGINE + +BOOST_DATA_TEST_CASE(particle_coupling, bdata::make(kTs), kT) { + espresso::set_lb_kT(kT); + lb_lbcoupling_set_rng_state(17); + auto const first_lb_node = + lb_walberla()->get_lattice().get_local_domain().first; + auto const gamma = 0.2; + auto const noise = + (kT > 0.) ? std::sqrt(24. * gamma * kT / params.time_step) : 0.0; + auto &rng = lb_particle_coupling.rng_counter_coupling; + Particle p{}; + Utils::Vector3d expected = noise * Random::noise_uniform( + rng->value(), 0, p.id()); +#ifdef ENGINE + p.swimming().swimming = true; + p.swimming().v_swim = 2.; + p.swimming().push_pull = 1; + expected += gamma * p.swimming().v_swim * p.calc_director(); +#endif +#ifdef LB_ELECTROHYDRODYNAMICS + p.mu_E() = Utils::Vector3d{-2., 1.5, 1.}; + expected += gamma * p.mu_E(); +#endif + p.pos() = first_lb_node + Utils::Vector3d::broadcast(0.5); + lb_lbcoupling_set_gamma(gamma); + + // coupling + { + if (in_local_halo(p.pos())) { + couple_particle(p, false, noise, rng, params.time_step); + BOOST_CHECK_SMALL((p.force() - expected).norm(), eps); + + auto const interpolated = LB::get_force_to_be_applied(p.pos()); + BOOST_CHECK_SMALL((interpolated - params.force_md_to_lb(expected)).norm(), + eps); + } + } + + // remove force of the particle from the fluid + { + if (in_local_halo(p.pos())) { + add_md_force(p.pos(), -expected, params.time_step); + } + } +} + +BOOST_DATA_TEST_CASE_F(CleanupActorLB, coupling_particle_lattice_ia, + bdata::make(kTs), kT) { + auto const comm = boost::mpi::communicator(); + auto const rank = comm.rank(); + espresso::set_lb_kT(kT); + lb_lbcoupling_set_rng_state(17); + auto const first_lb_node = + lb_walberla()->get_lattice().get_local_domain().first; + auto const gamma = 0.2; + auto const noise = std::sqrt(24. * gamma * kT / params.time_step * + Utils::sqr(params.agrid / params.tau)); + auto &rng = lb_particle_coupling.rng_counter_coupling; + + auto const pid = 0; + auto const skin = params.skin; + auto const &box_l = params.box_dimensions; + create_particle({box_l[0] / 2. - skin * 2., skin * 2., skin * 2.}, 0, 0); + + // sanity checks + BOOST_REQUIRE_EQUAL(get_particle_node_parallel(pid), rank ? -1 : 0); + BOOST_REQUIRE_EQUAL( + ErrorHandling::mpi_gather_runtime_errors_all(rank == 0).size(), 0); + +#ifdef ENGINE + set_particle_property(pid, &Particle::swimming, + ParticleParametersSwimming{true, 0., 2., 1, 3.}); +#endif +#ifdef LB_ELECTROHYDRODYNAMICS + set_particle_property(pid, &Particle::mu_E, Utils::Vector3d{-2., 1.5, 1.}); +#endif + + auto expected = + noise * Random::noise_uniform(rng->value(), 0, pid); + auto const p_opt = copy_particle_to_head_node(comm, pid); + if (rank == 0) { + auto const &p = *p_opt; +#ifdef ENGINE + expected += gamma * p.swimming().v_swim * p.calc_director(); +#endif +#ifdef LB_ELECTROHYDRODYNAMICS + expected += gamma * p.mu_E(); +#endif + } + boost::mpi::broadcast(comm, expected, 0); + auto const p_pos = first_lb_node + Utils::Vector3d::broadcast(0.5); + set_particle_pos(pid, p_pos); + lb_lbcoupling_set_gamma(gamma); + + for (bool with_ghosts : {false, true}) { + { + if (with_ghosts) { + cells_update_ghosts(global_ghost_flags()); + } + if (rank == 0) { + auto const particles = ::cell_structure.local_particles(); + auto const ghost_particles = ::cell_structure.ghost_particles(); + BOOST_REQUIRE_GE(particles.size(), 1); + BOOST_REQUIRE_GE(ghost_particles.size(), static_cast(with_ghosts)); + } + } + + // check box shifts + if (rank == 0) { + auto constexpr reference_shifts = + std::array{{{{0, 0, 0}}, + {{0, 0, 8}}, + {{0, 8, 0}}, + {{0, 8, 8}}, + {{8, 0, 0}}, + {{8, 0, 8}}, + {{8, 8, 0}}, + {{8, 8, 8}}}}; + boost::mpi::communicator world; + assert(world.size() <= 4); + auto const cutoff = 8 / world.size(); + { + auto const shifts = positions_in_halo({0., 0., 0.}, box_geo); + BOOST_REQUIRE_EQUAL(shifts.size(), cutoff); + for (std::size_t i = 0; i < shifts.size(); ++i) { + BOOST_REQUIRE_EQUAL(shifts[i], reference_shifts[i]); + } + } + { + auto const reference_shift = Utils::Vector3d{1., 1., 1.}; + auto const shifts = positions_in_halo({1., 1., 1.}, box_geo); + BOOST_REQUIRE_EQUAL(shifts.size(), 1); + BOOST_REQUIRE_EQUAL(shifts[0], reference_shift); + } + { + auto const reference_origin = Utils::Vector3d{1., 2., 0.}; + auto const reference_shift = Utils::Vector3d{1., 2., 8.}; + auto const shifts = positions_in_halo({1., 2., 0.}, box_geo); + BOOST_REQUIRE_EQUAL(shifts.size(), 2); + BOOST_REQUIRE_EQUAL(shifts[0], reference_origin); + BOOST_REQUIRE_EQUAL(shifts[1], reference_shift); + } + } + + // check without LB coupling + { + lb_lbcoupling_deactivate(); + lb_lbcoupling_broadcast(); + auto const particles = ::cell_structure.local_particles(); + auto const ghost_particles = ::cell_structure.ghost_particles(); + lb_lbcoupling_calc_particle_lattice_ia(thermo_virtual, particles, + ghost_particles, params.time_step); + auto const p_opt = copy_particle_to_head_node(comm, pid); + if (rank == 0) { + auto const &p = *p_opt; + BOOST_CHECK_EQUAL(p.force().norm(), 0.); + } + } + + // check with LB coupling + { + lb_lbcoupling_activate(); + lb_lbcoupling_broadcast(); + auto const particles = ::cell_structure.local_particles(); + auto const ghost_particles = ::cell_structure.ghost_particles(); + Utils::Vector3d lb_before{}; + { + auto const p_opt = copy_particle_to_head_node(comm, pid); + if (rank == 0) { + auto const &p = *p_opt; + // get original LB force + lb_before = LB::get_force_to_be_applied(p.pos()); + } + } + // couple particle to LB + lb_lbcoupling_calc_particle_lattice_ia(thermo_virtual, particles, + ghost_particles, params.time_step); + { + auto const p_opt = copy_particle_to_head_node(comm, pid); + if (rank == 0) { + auto const &p = *p_opt; + // check particle force + BOOST_CHECK_SMALL((p.force() - expected).norm(), eps); + // check LB force + auto const lb_after = LB::get_force_to_be_applied(p.pos()); + auto const lb_expected = params.force_md_to_lb(expected) + lb_before; + BOOST_CHECK_SMALL((lb_after - lb_expected).norm(), eps); + } + } + // remove force of the particle from the fluid + set_particle_property(pid, &Particle::force, Utils::Vector3d{}); + add_md_force(p_pos, -expected, params.time_step); + } + } + + // clean-up and sanity checks + { + boost::mpi::communicator world; + auto const error_message_ref = std::string( + "Recalculating forces, so the LB coupling forces are not included in " + "the particle force the first time step. This only matters if it " + "happens frequently during sampling."); + auto const error_messages = + ErrorHandling::mpi_gather_runtime_errors_all(world.rank() == 0); + for (auto const &error_message : error_messages) { + BOOST_CHECK_EQUAL(error_message.what(), error_message_ref); + } + } +} + +BOOST_AUTO_TEST_SUITE_END() + +bool test_lb_domain_mismatch_local() { + boost::mpi::communicator world; + auto const node_grid_original = ::node_grid; + auto const node_grid_reversed = + Utils::Vector3i{{::node_grid[2], ::node_grid[1], ::node_grid[0]}}; + auto const n_ghost_layers = 1u; + auto const params = LBWalberlaParams(0.5, 0.01); + ::node_grid = node_grid_reversed; + auto const lattice = std::make_shared( + Utils::Vector3i{12, 12, 12}, node_grid_original, n_ghost_layers); + auto const ptr = new_lb_walberla(lattice, 1.0, 1.0, false); + ptr->set_collision_model(0.0, 0); + ::node_grid = node_grid_original; + if (world.rank() == 0) { + try { + lb_sanity_checks(*ptr, params, params.get_tau()); + } catch (std::runtime_error const &err) { + auto const what_ref = std::string("waLBerla and ESPResSo disagree " + "about domain decomposition."); + return err.what() == what_ref; + } + } + return false; +} + +BOOST_AUTO_TEST_CASE(exceptions) { + { + using std::exception; + // accessing uninitialized pointers is not allowed + BOOST_CHECK_THROW(lb_walberla(), std::runtime_error); + BOOST_CHECK_THROW(lb_walberla_params(), std::runtime_error); + // getters and setters + BOOST_CHECK_THROW(LB::get_agrid(), exception); + BOOST_CHECK_THROW(LB::get_tau(), exception); + BOOST_CHECK_THROW(LB::get_kT(), exception); + BOOST_CHECK_THROW(LB::get_pressure_tensor(), exception); + BOOST_CHECK_THROW(LB::get_force_to_be_applied({-10., -10., -10.}), + std::runtime_error); + // coupling, interpolation, boundaries + BOOST_CHECK_THROW(lb_lbcoupling_get_rng_state(), std::runtime_error); + BOOST_CHECK_THROW(lb_lbcoupling_set_rng_state(0ul), std::runtime_error); + BOOST_CHECK_THROW(lb_particle_coupling_noise(true, 0, OptionalCounter{}), + std::runtime_error); + BOOST_CHECK_THROW(lb_lbinterpolation_get_interpolated_velocity({}), + std::runtime_error); + BOOST_CHECK_THROW(lb_lbinterpolation_add_force_density({}, {}), + std::runtime_error); + BOOST_CHECK_THROW(LB::get_interpolated_velocity({}), exception); + BOOST_CHECK_THROW(LB::get_interpolated_density({}), exception); + BOOST_CHECK_THROW(LB::calc_fluid_momentum(), exception); + } + + // waLBerla and ESPResSo must agree on domain decomposition + { + boost::mpi::communicator world; + auto const has_thrown_correct_exception = test_lb_domain_mismatch_local(); + auto const n_errors = check_runtime_errors_local(); + auto const error_queue = + ErrorHandling::mpi_gather_runtime_errors_all(world.rank() == 0); + if (world.rank() == 0) { + BOOST_TEST_REQUIRE(has_thrown_correct_exception); + BOOST_REQUIRE_EQUAL(n_errors, 1); + BOOST_REQUIRE_EQUAL(error_queue.size(), 1); + auto const what_ref = std::string("MPI rank 0: left ESPResSo: [0, 0, 0], " + "left waLBerla: [0, 0, 0]"); + for (auto const &error : error_queue) { + auto const error_what = error.what().substr(1, what_ref.size()); + BOOST_CHECK_EQUAL(error_what, what_ref); + } + } + } +} + +int main(int argc, char **argv) { + espresso::system = std::make_unique(argc, argv); + espresso::system->set_box_l(params.box_dimensions); + espresso::system->set_time_step(params.time_step); + espresso::system->set_skin(params.skin); + + boost::mpi::communicator world; + assert(world.size() <= 2); + + return boost::unit_test::unit_test_main(init_unit_test, argc, argv); +} + +#else // WALBERLA +int main(int argc, char **argv) {} +#endif diff --git a/src/core/unit_tests/particle_management.hpp b/src/core/unit_tests/particle_management.hpp new file mode 100644 index 00000000000..faf3d9565db --- /dev/null +++ b/src/core/unit_tests/particle_management.hpp @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2021-2023 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef ESPRESSO_SRC_CORE_UNIT_TESTS_PARTICLE_MANAGEMENT_HPP +#define ESPRESSO_SRC_CORE_UNIT_TESTS_PARTICLE_MANAGEMENT_HPP + +#include "Particle.hpp" +#include "cells.hpp" + +#include +#include + +inline auto copy_particle_to_head_node(boost::mpi::communicator const &comm, + int p_id) { + boost::optional result{}; + auto p = ::cell_structure.get_local_particle(p_id); + if (p and not p->is_ghost()) { + if (comm.rank() == 0) { + result = *p; + } else { + comm.send(0, p_id, *p); + } + } + if (comm.rank() == 0 and not result) { + Particle p{}; + comm.recv(boost::mpi::any_source, p_id, p); + result = p; + } + return result; +} + +#endif // ESPRESSO_SRC_CORE_UNIT_TESTS_PARTICLE_MANAGEMENT_HPP diff --git a/src/core/virtual_sites/CMakeLists.txt b/src/core/virtual_sites/CMakeLists.txt index 81a2cc09fcd..13401606e58 100644 --- a/src/core/virtual_sites/CMakeLists.txt +++ b/src/core/virtual_sites/CMakeLists.txt @@ -19,7 +19,5 @@ target_sources( espresso_core - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/lb_inertialess_tracers.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lb_inertialess_tracers_cuda_interface.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/VirtualSitesInertialessTracers.cpp + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/VirtualSitesInertialessTracers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/VirtualSitesRelative.cpp) diff --git a/src/core/virtual_sites/VirtualSites.hpp b/src/core/virtual_sites/VirtualSites.hpp index 8a80741e201..0d172059d22 100644 --- a/src/core/virtual_sites/VirtualSites.hpp +++ b/src/core/virtual_sites/VirtualSites.hpp @@ -51,7 +51,7 @@ class VirtualSites { /** Back-transfer forces (and torques) to non-virtual particles. */ virtual void back_transfer_forces_and_torques() const {} /** @brief Called after force calculation (and before rattle/shake) */ - virtual void after_force_calc() {} + virtual void after_force_calc(double) {} virtual void after_lb_propagation(double) {} /** @brief Pressure contribution. */ virtual Utils::Matrix pressure_tensor() const { return {}; } diff --git a/src/core/virtual_sites/VirtualSitesInertialessTracers.cpp b/src/core/virtual_sites/VirtualSitesInertialessTracers.cpp index f22415d1499..6c06d9d6657 100644 --- a/src/core/virtual_sites/VirtualSitesInertialessTracers.cpp +++ b/src/core/virtual_sites/VirtualSitesInertialessTracers.cpp @@ -23,43 +23,85 @@ #include "VirtualSitesInertialessTracers.hpp" #include "cells.hpp" -#include "communication.hpp" #include "errorhandling.hpp" +#include "forces.hpp" #include "grid_based_algorithms/lb_interface.hpp" -#include "virtual_sites/lb_inertialess_tracers.hpp" +#include "grid_based_algorithms/lb_interpolation.hpp" +#include "grid_based_algorithms/lb_particle_coupling.hpp" +#include "integrate.hpp" -#include +#include -static void check_no_vs_exist(char const *const message) { - if (std::any_of(cell_structure.local_particles().begin(), - cell_structure.local_particles().end(), - [](Particle const &p) { return p.is_virtual(); })) { - runtimeErrorMsg() << "Inertialess Tracers: " << message; +static bool lb_active_check() { + if (lattice_switch == ActiveLB::NONE) { + runtimeErrorMsg() << "LB needs to be active for inertialess tracers."; + return false; } + return true; } -void VirtualSitesInertialessTracers::after_force_calc() { - // Now the forces are computed and need to go into the LB fluid - if (lattice_switch == ActiveLB::CPU) { - IBM_ForcesIntoFluid_CPU(); - return; +void VirtualSitesInertialessTracers::after_force_calc(double time_step) { + auto const to_lb_units = + (lattice_switch == ActiveLB::NONE) ? 0. : 1. / LB::get_agrid(); + + // Distribute summed-up forces from physical particles to ghosts + init_forces_ghosts(cell_structure.ghost_particles()); + cells_update_ghosts(Cells::DATA_PART_FORCE); + + // Set to store ghost particles (ids) that have already been coupled + std::unordered_set coupled_ghost_particles; + // Apply particle forces to the LB fluid at particle positions + // For physical particles, also set particle velocity = fluid velocity + for (auto &p : cell_structure.local_particles()) { + if (!p.is_virtual()) + continue; + if (!lb_active_check()) { + return; + } + if (should_be_coupled(p, coupled_ghost_particles)) { + for (auto pos : positions_in_halo(p.pos(), box_geo)) { + add_md_force(pos * to_lb_units, -p.force(), time_step); + } + } } -#ifdef CUDA - if (lattice_switch == ActiveLB::GPU) { - IBM_ForcesIntoFluid_GPU(cell_structure.local_particles(), this_node); - if (comm_cart.size() != 1 and this_node != 0) { - check_no_vs_exist("The LB GPU method cannot integrate virtual sites when " - "more than 1 MPI ranks are used. The particles on MPI " - "rank >= 2 are now in an undeterminate state."); + for (auto const &p : cell_structure.ghost_particles()) { + if (!p.is_virtual()) + continue; + if (!lb_active_check()) { + return; + } + if (should_be_coupled(p, coupled_ghost_particles)) { + for (auto pos : positions_in_halo(p.pos(), box_geo)) { + add_md_force(pos * to_lb_units, -p.force(), time_step); + } } - return; } -#endif - check_no_vs_exist("No LB method was active but virtual sites present."); + + // Clear ghost forces to avoid double counting later + init_forces_ghosts(cell_structure.ghost_particles()); } void VirtualSitesInertialessTracers::after_lb_propagation(double time_step) { - IBM_UpdateParticlePositions(cell_structure.local_particles(), time_step, - this_node); + auto const to_md_units = + (lattice_switch == ActiveLB::NONE) ? 0. : LB::get_lattice_speed(); + + // Advect particles + for (auto &p : cell_structure.local_particles()) { + if (!p.is_virtual()) + continue; + if (!lb_active_check()) { + return; + } + p.v() = lb_lbinterpolation_get_interpolated_velocity(p.pos()) * to_md_units; + for (unsigned int i = 0; i < 3; i++) { + if (!p.is_fixed_along(i)) { + p.pos()[i] += p.v()[i] * time_step; + } + } + // Verlet list update check + if ((p.pos() - p.pos_at_last_verlet_update()).norm2() > skin * skin) { + cell_structure.set_resort_particles(Cells::RESORT_LOCAL); + } + } } #endif // VIRTUAL_SITES_INERTIALESS_TRACERS diff --git a/src/core/virtual_sites/VirtualSitesInertialessTracers.hpp b/src/core/virtual_sites/VirtualSitesInertialessTracers.hpp index 92abc09d7b7..0fbc25bb1be 100644 --- a/src/core/virtual_sites/VirtualSitesInertialessTracers.hpp +++ b/src/core/virtual_sites/VirtualSitesInertialessTracers.hpp @@ -29,7 +29,7 @@ * instantaneously transferred to the fluid */ class VirtualSitesInertialessTracers : public VirtualSites { - void after_force_calc() override; + void after_force_calc(double time_step) override; void after_lb_propagation(double time_step) override; }; diff --git a/src/core/virtual_sites/lb_inertialess_tracers.cpp b/src/core/virtual_sites/lb_inertialess_tracers.cpp deleted file mode 100644 index 2fd0015a73c..00000000000 --- a/src/core/virtual_sites/lb_inertialess_tracers.cpp +++ /dev/null @@ -1,309 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -/// \file -/// \brief Main of the Bayreuth Immersed-Boundary implementation - -#include "config/config.hpp" - -#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS - -#include "virtual_sites/lb_inertialess_tracers.hpp" - -#include "Particle.hpp" -#include "cells.hpp" -#include "grid.hpp" -#include "grid_based_algorithms/lb.hpp" -#include "grid_based_algorithms/lb_boundaries.hpp" -#include "grid_based_algorithms/lb_interface.hpp" -#include "grid_based_algorithms/lb_particle_coupling.hpp" -#include "integrate.hpp" -#include "lb_inertialess_tracers_cuda_interface.hpp" - -#include -#include -#include - -#include -#include - -void CoupleIBMParticleToFluid(Particle const &p, Utils::Vector3d const &pos); -void ParticleVelocitiesFromLB_CPU(); -bool IsHalo(std::size_t indexCheck); - -static bool *isHaloCache = nullptr; - -/** Put the calculated force stored on the ibm particles into the fluid by - * updating the @ref lbfields structure. - * Called from the integration loop right after the forces have been - * calculated. - */ -void IBM_ForcesIntoFluid_CPU() { - // Update the forces on the ghost particles - cell_structure.ghosts_update(Cells::DATA_PART_FORCE); - - // Loop over local cells - for (auto &p : cell_structure.local_particles()) { - if (p.is_virtual()) { - CoupleIBMParticleToFluid(p, p.pos()); - } - } - - for (auto &p : cell_structure.ghost_particles()) { - // for ghost particles we have to check if they lie - // in the range of the local lattice nodes - if (in_local_halo(p.pos())) { - if (p.is_virtual()) { - CoupleIBMParticleToFluid(p, p.pos()); - } - } - } -} - -/** Interpolate LB velocity at the particle positions and propagate the - * particles. - * Called from the integration loop right after the LB update. - */ -void IBM_UpdateParticlePositions(ParticleRange const &particles, - double time_step, int this_node) { - // Get velocities - if (lattice_switch == ActiveLB::CPU) - ParticleVelocitiesFromLB_CPU(); -#ifdef CUDA - if (lattice_switch == ActiveLB::GPU) - ParticleVelocitiesFromLB_GPU(particles, this_node); -#endif - - // Euler integrator - for (auto &p : particles) { - if (p.is_virtual()) { - for (unsigned int axis = 0; axis < 3; axis++) { -#ifdef EXTERNAL_FORCES - if (not p.is_fixed_along(axis)) -#endif - p.pos()[axis] += p.v()[axis] * time_step; - } - } - } - - if (cell_structure.check_resort_required(particles, skin)) { - cell_structure.set_resort_particles(Cells::RESORT_LOCAL); - } -} - -/** Put the momentum of a given particle into the LB fluid. */ -void CoupleIBMParticleToFluid(Particle const &p, Utils::Vector3d const &pos) { - // Convert units from MD to LB - auto const delta_j = p.force() * Utils::int_pow<4>(lbpar.tau) / lbpar.agrid; - - // Get indices and weights of affected nodes using discrete delta function - Utils::Vector node_index{}; - Utils::Vector6d delta{}; - lblattice.map_position_to_lattice(pos, node_index, delta); - - // Loop over all affected nodes - for (int z = 0; z < 2; z++) { - for (int y = 0; y < 2; y++) { - for (int x = 0; x < 2; x++) { - // Do not put force into a halo node - if (!IsHalo(static_cast(node_index[(z * 2 + y) * 2 + x]))) { - // Add force into the lbfields structure - auto &local_f = - lbfields[node_index[(z * 2 + y) * 2 + x]].force_density; - - local_f += - delta[3 * x + 0] * delta[3 * y + 1] * delta[3 * z + 2] * delta_j; - } - } - } - } -} - -/** Calculate the LB fluid velocity at a particle position. - * Very similar to the velocity interpolation done in standard ESPResSo, - * except that we add the f/2 contribution, cf. @cite guo02a. - * The fluid velocity is obtained by linear interpolation, - * cf. eq. (11) in @cite ahlrichs99a. - */ -template -Utils::Vector3d GetIBMInterpolatedVelocity(Utils::Vector3d const &pos) { - auto const f_ext = - lbpar.ext_force_density * Utils::sqr(lbpar.agrid * lbpar.tau); - - /* determine elementary lattice cell surrounding the particle - and the relative position of the particle in this cell */ - Utils::Vector node_index{}; - Utils::Vector6d delta{}; - lblattice.map_position_to_lattice(pos, node_index, delta); - - // This for the f/2 contribution to the velocity - Utils::Vector3d force_added = {}; - Utils::Vector3d interpolated_u = {}; - - for (int z = 0; z < 2; z++) { - for (int y = 0; y < 2; y++) { - for (int x = 0; x < 2; x++) { - auto const index = node_index[(z * 2 + y) * 2 + x]; - auto const local_delta = - delta[3 * x + 0] * delta[3 * y + 1] * delta[3 * z + 2]; - const auto &f = lbfields[index].force_density_buf; - - double local_density; - Utils::Vector3d local_j; - - // This can be done more easily without copying the code twice. - // We probably can even set the boundary velocity directly. -#ifdef LB_BOUNDARIES - if (lbfields[index].boundary) { - if (ReturnVelocity) { - local_density = lbpar.density; - auto const i = lbfields[index].boundary - 1; - local_j = lbpar.density * LBBoundaries::lbboundaries[i]->velocity(); - } - } else -#endif - { - auto const modes = lb_calc_modes(static_cast(index), lbfluid); - local_density = lbpar.density + modes[0]; - - if (ReturnVelocity) { - // Add the +f/2 contribution!! - local_j[0] = modes[1] + f[0] / 2.; - local_j[1] = modes[2] + f[1] / 2.; - local_j[2] = modes[3] + f[2] / 2.; - } else { - // Keep track of the forces that we added to the fluid. - // This is necessary for communication because this part is executed - // for real and ghost particles. - // Later on we sum the real and ghost contributions. - force_added += local_delta * (f - f_ext) / (2. * local_density); - } - } - - // Interpolate velocity - if (ReturnVelocity) { - interpolated_u += local_j * (local_delta / local_density); - } - } - } - } - - auto const unit_conversion = lbpar.agrid / lbpar.tau; - if (ReturnVelocity) { - return interpolated_u * unit_conversion; - } - return force_added * unit_conversion; -} - -/** Build a cache structure which contains a flag for each LB node whether - * that node is a halo node or not. - */ -bool IsHalo(std::size_t indexCheck) { - // First call --> build cache - if (isHaloCache == nullptr) { - isHaloCache = new bool[lblattice.halo_grid_volume]; - // Assume everything is a halo and correct in the next step - for (int i = 0; i < lblattice.halo_grid_volume; i++) - isHaloCache[i] = true; - // Loop through and check where indexCheck occurs - auto index = lblattice.halo_offset; - for (int z = 1; z <= lblattice.grid[2]; z++) { - for (int y = 1; y <= lblattice.grid[1]; y++) { - for (int x = 1; x <= lblattice.grid[0]; x++) { - isHaloCache[index] = false; - ++index; - } - index += 2; /* skip halo region */ - } - index += 2 * lblattice.halo_grid[0]; /* skip halo region */ - } - } - - // Return - return isHaloCache[indexCheck]; -} - -/** - * @brief Check if a position is within the local box + halo. - * - * @param pos Position to check - * @param halo Halo - * - * @return True iff the point is inside of the box up to halo. - */ -inline bool in_local_domain(Utils::Vector3d const &pos, double halo = 0.) { - auto const halo_vec = Utils::Vector3d::broadcast(halo); - - return in_box( - pos, {local_geo.my_left() - halo_vec, local_geo.my_right() + halo_vec}); -} - -/** Get particle velocities from LB and set the velocity field in the - * particles data structure. - */ -void ParticleVelocitiesFromLB_CPU() { - std::unordered_set coupled_ghost_particles; - - // Loop over particles in local cells. - // Here all contributions are included: velocity, external force and - // particle force. - for (auto &p : cell_structure.local_particles()) { - if (p.is_virtual() and should_be_coupled(p, coupled_ghost_particles)) { - for (auto pos : positions_in_halo(p.pos(), box_geo)) { - if (in_local_domain(pos)) { - p.force() = GetIBMInterpolatedVelocity(pos); - break; - } - } - } - } - // Loop over particles in ghost cells - // Here we only add the particle forces stemming from the ghosts - for (auto &p : cell_structure.ghost_particles()) { - if (p.is_virtual() and should_be_coupled(p, coupled_ghost_particles)) { - for (auto pos : positions_in_halo(p.pos(), box_geo)) { - if (in_local_domain(pos)) { - p.force() = GetIBMInterpolatedVelocity(pos); - break; - } - } - } else { - p.force() = {}; - } - } - - // Now the local particles contain a velocity (stored in the force field) - // and the ghosts contain the rest of the velocity in their respective force - // fields. - // We need to add these. Since we have stored them in the force, not the - // velocity fields, we can use the standard force communicator and then - // transfer to the velocity afterwards. - // Note that this overwrites the actual force which would be a problem for - // real particles. - // This could be solved by keeping a backup of the local forces before this - // operation is attempted. - cell_structure.ghosts_reduce_forces(); - - // Transfer to velocity field - for (auto &p : cell_structure.local_particles()) { - if (p.is_virtual()) { - p.v() = p.force(); - } - } -} -#endif // VIRTUAL_SITES_INERTIALESS_TRACERS diff --git a/src/core/virtual_sites/lb_inertialess_tracers_cuda.cu b/src/core/virtual_sites/lb_inertialess_tracers_cuda.cu deleted file mode 100644 index a1385036be2..00000000000 --- a/src/core/virtual_sites/lb_inertialess_tracers_cuda.cu +++ /dev/null @@ -1,408 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -// This is an internal file of the IMMERSED BOUNDARY implementation -// It should not be included by any main ESPResSo routines -// Functions to be exported for ESPResSo are in ibm_main.hpp - -#include "config/config.hpp" - -#if defined(VIRTUAL_SITES_INERTIALESS_TRACERS) && defined(CUDA) - -#include "virtual_sites/lb_inertialess_tracers.hpp" -#include "virtual_sites/lb_inertialess_tracers_cuda_interface.hpp" - -#include "Particle.hpp" -#include "cuda_interface.hpp" -#include "cuda_utils.cuh" -#include "grid_based_algorithms/lb_boundaries.hpp" -#include "grid_based_algorithms/lbgpu.cuh" -#include "grid_based_algorithms/lbgpu.hpp" - -#include - -#include - -// Other functions for internal use -void InitCUDA_IBM(std::size_t numParticles); - -// Our own global variables -IBM_CUDA_ParticleDataInput *IBM_ParticleDataInput_device = nullptr; -IBM_CUDA_ParticleDataOutput *IBM_ParticleDataOutput_device = nullptr; -bool IBM_initialized = false; -std::size_t IBM_numParticlesCache = 0; // To detect a change in particle number - // which requires reallocation of memory - -// These variables are defined in lbgpu_cuda.cu, but we also want them here -extern LB_node_force_density_gpu node_f; -extern LB_nodes_gpu *current_nodes; - -// These variables are static in lbgpu_cuda.cu, so we need to duplicate them -// here. They are initialized in ForcesIntoFluid. The pointers are on the host, -// but point into device memory. -LB_parameters_gpu *para_gpu = nullptr; -float *lb_boundary_velocity_IBM = nullptr; - -static constexpr unsigned int threads_per_block = 64; - -__global__ void -ForcesIntoFluid_Kernel(const IBM_CUDA_ParticleDataInput *const particle_input, - std::size_t number_of_particles, - LB_node_force_density_gpu node_f, - const LB_parameters_gpu *const paraP) { - const unsigned int particleIndex = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - const LB_parameters_gpu ¶ = *paraP; - - if (particleIndex < number_of_particles && - particle_input[particleIndex].is_virtual) { - // MD to LB units: mass is not affected, length are scaled by agrid, times - // by para.tau - const float factor = 1 / para.agrid * para.tau * para.tau; - const float particleForce[3] = {particle_input[particleIndex].f[0] * factor, - particle_input[particleIndex].f[1] * factor, - particle_input[particleIndex].f[2] * - factor}; - const float pos[3] = {particle_input[particleIndex].pos[0], - particle_input[particleIndex].pos[1], - particle_input[particleIndex].pos[2]}; - - // First part is the same as for interpolation --> merge into a single - // function - float temp_delta[6]; - float delta[8]; - int my_left[3]; - unsigned int node_index[8]; - for (int i = 0; i < 3; ++i) { - const float scaledpos = pos[i] / para.agrid - 0.5f; - my_left[i] = static_cast(floorf(scaledpos)); - temp_delta[3 + i] = scaledpos - static_cast(my_left[i]); - temp_delta[i] = 1.f - temp_delta[3 + i]; - } - - delta[0] = temp_delta[0] * temp_delta[1] * temp_delta[2]; - delta[1] = temp_delta[3] * temp_delta[1] * temp_delta[2]; - delta[2] = temp_delta[0] * temp_delta[4] * temp_delta[2]; - delta[3] = temp_delta[3] * temp_delta[4] * temp_delta[2]; - delta[4] = temp_delta[0] * temp_delta[1] * temp_delta[5]; - delta[5] = temp_delta[3] * temp_delta[1] * temp_delta[5]; - delta[6] = temp_delta[0] * temp_delta[4] * temp_delta[5]; - delta[7] = temp_delta[3] * temp_delta[4] * temp_delta[5]; - - // modulo for negative numbers is strange at best, shift to make sure we are - // positive - auto const x = static_cast(my_left[0] + para.dim[0]); - auto const y = static_cast(my_left[1] + para.dim[1]); - auto const z = static_cast(my_left[2] + para.dim[2]); - - node_index[0] = x % para.dim[0] + para.dim[0] * (y % para.dim[1]) + - para.dim[0] * para.dim[1] * (z % para.dim[2]); - node_index[1] = (x + 1) % para.dim[0] + para.dim[0] * (y % para.dim[1]) + - para.dim[0] * para.dim[1] * (z % para.dim[2]); - node_index[2] = x % para.dim[0] + para.dim[0] * ((y + 1) % para.dim[1]) + - para.dim[0] * para.dim[1] * (z % para.dim[2]); - node_index[3] = (x + 1) % para.dim[0] + - para.dim[0] * ((y + 1) % para.dim[1]) + - para.dim[0] * para.dim[1] * (z % para.dim[2]); - node_index[4] = x % para.dim[0] + para.dim[0] * (y % para.dim[1]) + - para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); - node_index[5] = (x + 1) % para.dim[0] + para.dim[0] * (y % para.dim[1]) + - para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); - node_index[6] = x % para.dim[0] + para.dim[0] * ((y + 1) % para.dim[1]) + - para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); - node_index[7] = (x + 1) % para.dim[0] + - para.dim[0] * ((y + 1) % para.dim[1]) + - para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); - - for (int i = 0; i < 8; ++i) { - // Atomic add is essential because this runs in parallel! - atomicAdd(&(node_f.force_density[node_index[i]][0]), - (particleForce[0] * delta[i])); - atomicAdd(&(node_f.force_density[node_index[i]][1]), - (particleForce[1] * delta[i])); - atomicAdd(&(node_f.force_density[node_index[i]][2]), - (particleForce[2] * delta[i])); - } - } -} - -__global__ void ParticleVelocitiesFromLB_Kernel( - LB_nodes_gpu n_curr, - const IBM_CUDA_ParticleDataInput *const particles_input, - std::size_t number_of_particles, - IBM_CUDA_ParticleDataOutput *const particles_output, - LB_node_force_density_gpu node_f, const float *const lb_boundary_velocity, - const LB_parameters_gpu *const paraP) { - - const unsigned int particleIndex = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - - const LB_parameters_gpu ¶ = *paraP; - - if (particleIndex < number_of_particles && - particles_input[particleIndex].is_virtual) { - - // Get position - float pos[3] = {particles_input[particleIndex].pos[0], - particles_input[particleIndex].pos[1], - particles_input[particleIndex].pos[2]}; - float v[3] = {0}; - - // This part is copied from get_interpolated_velocity - // + we add the force + we consider boundaries - - float temp_delta[6]; - float delta[8]; - int my_left[3]; - unsigned int node_index[8]; - Utils::Array mode; -#pragma unroll - for (int i = 0; i < 3; ++i) { - const float scaledpos = pos[i] / para.agrid - 0.5f; - my_left[i] = static_cast(floorf(scaledpos)); - temp_delta[3 + i] = scaledpos - static_cast(my_left[i]); - temp_delta[i] = 1.f - temp_delta[3 + i]; - } - - delta[0] = temp_delta[0] * temp_delta[1] * temp_delta[2]; - delta[1] = temp_delta[3] * temp_delta[1] * temp_delta[2]; - delta[2] = temp_delta[0] * temp_delta[4] * temp_delta[2]; - delta[3] = temp_delta[3] * temp_delta[4] * temp_delta[2]; - delta[4] = temp_delta[0] * temp_delta[1] * temp_delta[5]; - delta[5] = temp_delta[3] * temp_delta[1] * temp_delta[5]; - delta[6] = temp_delta[0] * temp_delta[4] * temp_delta[5]; - delta[7] = temp_delta[3] * temp_delta[4] * temp_delta[5]; - - // modulo for negative numbers is strange at best, shift to make sure we are - // positive - auto const x = static_cast(my_left[0] + para.dim[0]); - auto const y = static_cast(my_left[1] + para.dim[1]); - auto const z = static_cast(my_left[2] + para.dim[2]); - - node_index[0] = x % para.dim[0] + para.dim[0] * (y % para.dim[1]) + - para.dim[0] * para.dim[1] * (z % para.dim[2]); - node_index[1] = (x + 1) % para.dim[0] + para.dim[0] * (y % para.dim[1]) + - para.dim[0] * para.dim[1] * (z % para.dim[2]); - node_index[2] = x % para.dim[0] + para.dim[0] * ((y + 1) % para.dim[1]) + - para.dim[0] * para.dim[1] * (z % para.dim[2]); - node_index[3] = (x + 1) % para.dim[0] + - para.dim[0] * ((y + 1) % para.dim[1]) + - para.dim[0] * para.dim[1] * (z % para.dim[2]); - node_index[4] = x % para.dim[0] + para.dim[0] * (y % para.dim[1]) + - para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); - node_index[5] = (x + 1) % para.dim[0] + para.dim[0] * (y % para.dim[1]) + - para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); - node_index[6] = x % para.dim[0] + para.dim[0] * ((y + 1) % para.dim[1]) + - para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); - node_index[7] = (x + 1) % para.dim[0] + - para.dim[0] * ((y + 1) % para.dim[1]) + - para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); - - for (int i = 0; i < 8; ++i) { - double local_rho; - double local_j[3]; -#ifdef LB_BOUNDARIES_GPU - if (n_curr.boundary[node_index[i]]) { - // Boundary node - auto const boundary_index = - static_cast(n_curr.boundary[node_index[i]]); - - // lb_boundary_velocity is given in MD units --> convert to LB and - // reconvert back at the end of this function - local_rho = para.rho; - local_j[0] = - para.rho * lb_boundary_velocity[3 * (boundary_index - 1) + 0]; - local_j[1] = - para.rho * lb_boundary_velocity[3 * (boundary_index - 1) + 1]; - local_j[2] = - para.rho * lb_boundary_velocity[3 * (boundary_index - 1) + 2]; - - } else -#endif - { - calc_mass_and_momentum_mode(mode, n_curr, node_index[i]); - local_rho = para.rho + mode[0]; - - // Add the +f/2 contribution!! - local_j[0] = mode[1] + node_f.force_density_buf[node_index[i]][0] / 2.f; - local_j[1] = mode[2] + node_f.force_density_buf[node_index[i]][1] / 2.f; - local_j[2] = mode[3] + node_f.force_density_buf[node_index[i]][2] / 2.f; - } - - // Interpolate velocity - v[0] += static_cast(delta[i] * local_j[0] / local_rho); - v[1] += static_cast(delta[i] * local_j[1] / local_rho); - v[2] += static_cast(delta[i] * local_j[2] / local_rho); - } - - // Rescale and store output - particles_output[particleIndex].v[0] = v[0] * para.agrid / para.tau; - particles_output[particleIndex].v[1] = v[1] * para.agrid / para.tau; - particles_output[particleIndex].v[2] = v[2] * para.agrid / para.tau; - } -} - -__global__ void ResetLBForces_Kernel(LB_node_force_density_gpu node_f, - const LB_parameters_gpu *const paraP) { - - const std::size_t index = blockIdx.y * gridDim.x * blockDim.x + - blockDim.x * blockIdx.x + threadIdx.x; - const LB_parameters_gpu ¶ = *paraP; - - if (index < para.number_of_nodes) { - const float force_factor = powf(para.agrid, 2) * para.tau * para.tau; - if (para.external_force_density) { - node_f.force_density[index][0] = para.ext_force_density[0] * force_factor; - node_f.force_density[index][1] = para.ext_force_density[1] * force_factor; - node_f.force_density[index][2] = para.ext_force_density[2] * force_factor; - } else { - node_f.force_density[index] = {}; - } - } -} - -/** Transfer particle forces into the LB fluid. - * Called from @ref integrate. - * This must be the first CUDA-IBM function to be called because it also does - * some initialization. - */ -void IBM_ForcesIntoFluid_GPU(ParticleRange const &particles, int this_node) { - // This function does - // (1) Gather forces from all particles via MPI - // (2) Copy forces to the GPU - // (3) interpolate on the LBM grid and spread forces - - auto const numParticles = gpu_get_particle_pointer().size(); - - // Storage only needed on head node - if (this_node == 0 && - (IBM_ParticleDataInput_host.empty() || !IBM_initialized || - numParticles != IBM_numParticlesCache)) - InitCUDA_IBM(numParticles); - - // We gather particle positions and forces from all nodes - IBM_cuda_mpi_get_particles(particles); - - // GPU only on head node - if (this_node == 0 && numParticles > 0) { - - // Copy data to device - cuda_safe_mem(cudaMemcpy(IBM_ParticleDataInput_device, - IBM_ParticleDataInput_host.data(), - numParticles * sizeof(IBM_CUDA_ParticleDataInput), - cudaMemcpyHostToDevice)); - - // Kernel call for spreading the forces on the LB grid - dim3 dim_grid = calculate_dim_grid(static_cast(numParticles), 4, - threads_per_block); - KERNELCALL(ForcesIntoFluid_Kernel, dim_grid, threads_per_block, - IBM_ParticleDataInput_device, numParticles, node_f, para_gpu); - } -} - -void InitCUDA_IBM(std::size_t const numParticles) { - - // Check if we have to delete - if (!IBM_ParticleDataInput_host.empty()) { - IBM_ParticleDataInput_host.clear(); - IBM_ParticleDataOutput_host.clear(); - cuda_safe_mem(cudaFree(IBM_ParticleDataInput_device)); - cuda_safe_mem(cudaFree(IBM_ParticleDataOutput_device)); - cuda_safe_mem(cudaFree(lb_boundary_velocity_IBM)); - } - - // Back and forth communication of positions and velocities - IBM_ParticleDataInput_host.resize(numParticles); - IBM_ParticleDataOutput_host.resize(numParticles); - cuda_safe_mem(cudaMalloc((void **)&IBM_ParticleDataInput_device, - numParticles * sizeof(IBM_CUDA_ParticleDataInput))); - cuda_safe_mem(cudaMalloc((void **)&IBM_ParticleDataOutput_device, - numParticles * sizeof(IBM_CUDA_ParticleDataOutput))); - - // Use LB parameters - lb_get_para_pointer(¶_gpu); - - // Copy boundary velocities to the GPU - // First put them into correct format -#ifdef LB_BOUNDARIES_GPU - auto *host_lb_boundary_velocity = - new float[3 * (LBBoundaries::lbboundaries.size() + 1)]; - - for (int n = 0; n < LBBoundaries::lbboundaries.size(); n++) { - host_lb_boundary_velocity[3 * n + 0] = - static_cast(LBBoundaries::lbboundaries[n]->velocity()[0]); - host_lb_boundary_velocity[3 * n + 1] = - static_cast(LBBoundaries::lbboundaries[n]->velocity()[1]); - host_lb_boundary_velocity[3 * n + 2] = - static_cast(LBBoundaries::lbboundaries[n]->velocity()[2]); - } - - host_lb_boundary_velocity[3 * LBBoundaries::lbboundaries.size() + 0] = 0.0f; - host_lb_boundary_velocity[3 * LBBoundaries::lbboundaries.size() + 1] = 0.0f; - host_lb_boundary_velocity[3 * LBBoundaries::lbboundaries.size() + 2] = 0.0f; - - cuda_safe_mem( - cudaMalloc((void **)&lb_boundary_velocity_IBM, - 3 * LBBoundaries::lbboundaries.size() * sizeof(float))); - cuda_safe_mem( - cudaMemcpy(lb_boundary_velocity_IBM, host_lb_boundary_velocity, - 3 * LBBoundaries::lbboundaries.size() * sizeof(float), - cudaMemcpyHostToDevice)); - - delete[] host_lb_boundary_velocity; -#endif - - IBM_numParticlesCache = numParticles; - IBM_initialized = true; -} - -/** Call a kernel function to interpolate the velocity at each IBM particle's - * position. Store velocity in the particle data structure. - */ -void ParticleVelocitiesFromLB_GPU(ParticleRange const &particles, - int this_node) { - // This function performs three steps: - // (1) interpolate velocities on GPU - // (2) transfer velocities back to CPU - // (3) spread velocities to local cells via MPI - - auto const numParticles = gpu_get_particle_pointer().size(); - - // GPU only on head node - if (this_node == 0 && numParticles > 0) { - // Kernel call - dim3 dim_grid = calculate_dim_grid(static_cast(numParticles), 4, - threads_per_block); - KERNELCALL(ParticleVelocitiesFromLB_Kernel, dim_grid, threads_per_block, - *current_nodes, IBM_ParticleDataInput_device, numParticles, - IBM_ParticleDataOutput_device, node_f, lb_boundary_velocity_IBM, - para_gpu); - - // Copy velocities from device to host - cuda_safe_mem(cudaMemcpy(IBM_ParticleDataOutput_host.data(), - IBM_ParticleDataOutput_device, - numParticles * sizeof(IBM_CUDA_ParticleDataOutput), - cudaMemcpyDeviceToHost)); - } - - // Scatter to all nodes - IBM_cuda_mpi_send_velocities(particles); -} - -#endif diff --git a/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.cpp b/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.cpp deleted file mode 100644 index 53ab47eb9eb..00000000000 --- a/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -// This is an internal file of the IMMERSED BOUNDARY implementation -// It should not be included by any main ESPResSo routines -// Functions to be exported for ESPResSo are in ibm_main.hpp - -#include "config/config.hpp" - -#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS - -#include "Particle.hpp" -#include "communication.hpp" -#include "grid.hpp" -#include "integrate.hpp" -#include "serialization/ibm_cuda_particle_velocities_input.hpp" -#include "virtual_sites/lb_inertialess_tracers_cuda_interface.hpp" - -#include -#include - -#include - -// Variables for communication -std::vector IBM_ParticleDataInput_host = {}; -std::vector IBM_ParticleDataOutput_host = {}; - -static void pack_particles(ParticleRange const &particles, - std::vector &buffer) { - - int i = 0; - for (auto const &part : particles) { - auto const pos = folded_position(part.pos(), box_geo); - - buffer[i].pos[0] = static_cast(pos[0]); - buffer[i].pos[1] = static_cast(pos[1]); - buffer[i].pos[2] = static_cast(pos[2]); - - buffer[i].f[0] = static_cast(part.force()[0]); - buffer[i].f[1] = static_cast(part.force()[1]); - buffer[i].f[2] = static_cast(part.force()[2]); - - buffer[i].is_virtual = part.is_virtual(); - - i++; - } -} - -/** Gather particle positions on the head node in order to communicate them - * to GPU. We transfer all particles (real and virtual), but actually we would - * only need the virtual ones. Room for improvement... - * Analogous to @ref cuda_mpi_get_particles. - */ -void IBM_cuda_mpi_get_particles(ParticleRange const &particles) { - auto const n_part = particles.size(); - - if (this_node > 0) { - static std::vector buffer; - buffer.resize(n_part); - /* pack local parts into buffer */ - pack_particles(particles, buffer); - - Utils::Mpi::gather_buffer(buffer, comm_cart); - } else { - /* Pack own particles */ - pack_particles(particles, IBM_ParticleDataInput_host); - - Utils::Mpi::gather_buffer(IBM_ParticleDataInput_host, comm_cart); - } -} - -static void set_velocities(ParticleRange const &particles, - std::vector &buffer) { - int i = 0; - for (auto &part : particles) { - if (part.is_virtual()) { - for (int j = 0; j < 3; j++) - part.v()[j] = static_cast(buffer[i].v[j]); - } - i++; - } -} - -/** Particle velocities have been communicated from GPU, now transmit to all - * nodes. Analogous to @ref cuda_mpi_send_forces. - */ -void IBM_cuda_mpi_send_velocities(ParticleRange const &particles) { - auto const n_part = static_cast(particles.size()); - - if (this_node > 0) { - static std::vector buffer; - /* Alloc buffer */ - buffer.resize(n_part); - - Utils::Mpi::scatter_buffer(buffer.data(), n_part, comm_cart); - - set_velocities(particles, buffer); - } else { - /* Scatter forces */ - Utils::Mpi::scatter_buffer(IBM_ParticleDataOutput_host.data(), n_part, - comm_cart); - - set_velocities(particles, IBM_ParticleDataOutput_host); - } -} - -#endif diff --git a/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.hpp b/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.hpp deleted file mode 100644 index ddac4997d20..00000000000 --- a/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.hpp +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -// ******* -// This is an internal file of the IMMERSED BOUNDARY implementation -// It should not be included by any main ESPResSo routines -// Functions to be exported for ESPResSo are in ibm_main.hpp - -#ifndef IBM_CUDA_INTERFACE_HPP -#define IBM_CUDA_INTERFACE_HPP - -#include "config/config.hpp" - -#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS - -#include "ParticleRange.hpp" - -#include - -// *********** Communication functions ******** -// Implemented in real C++, but called from the ibm_cuda.cu -void IBM_cuda_mpi_send_velocities(ParticleRange const &particles); -void IBM_cuda_mpi_get_particles(ParticleRange const &particles); - -void ParticleVelocitiesFromLB_GPU(ParticleRange const &particles, - int this_node); - -// ******** data types for CUDA and MPI communication ****** -struct IBM_CUDA_ParticleDataInput { - float pos[3]; - float f[3]; - bool is_virtual; -}; - -struct IBM_CUDA_ParticleDataOutput { - float v[3]; -}; - -// ******** global variables for CUDA and MPI communication ****** -extern std::vector IBM_ParticleDataInput_host; -extern std::vector IBM_ParticleDataOutput_host; - -#endif - -#endif diff --git a/src/python/espressomd/CMakeLists.txt b/src/python/espressomd/CMakeLists.txt index 9cde375d117..14b3031c224 100644 --- a/src/python/espressomd/CMakeLists.txt +++ b/src/python/espressomd/CMakeLists.txt @@ -47,6 +47,7 @@ file(GLOB cython_AUX *.py) set(cython_AUX "${cython_AUX}" CACHE INTERNAL "cython_AUX") add_subdirectory(io) +add_subdirectory(detail) list(REMOVE_DUPLICATES cython_SRC) @@ -61,6 +62,7 @@ target_compile_options( $<$:-Wno-cpp> $<$:-Wno-strict-aliasing> $<$:-Wno-maybe-uninitialized> + $<$:-Wno-volatile> $<$:-Wno-sometimes-uninitialized> $<$:-Wno-\#warnings> -Wno-unused-variable) diff --git a/src/script_interface/lbboundaries/CMakeLists.txt b/src/python/espressomd/detail/CMakeLists.txt similarity index 81% rename from src/script_interface/lbboundaries/CMakeLists.txt rename to src/python/espressomd/detail/CMakeLists.txt index c98d7a553e5..926a01b5f2e 100644 --- a/src/script_interface/lbboundaries/CMakeLists.txt +++ b/src/python/espressomd/detail/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2020-2022 The ESPResSo project +# Copyright (C) 2023 The ESPResSo project # # This file is part of ESPResSo. # @@ -17,5 +17,5 @@ # along with this program. If not, see . # -target_sources(espresso_script_interface - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/initialize.cpp) +configure_file(__init__.py __init__.py COPYONLY) +configure_file(walberla.py walberla.py COPYONLY) diff --git a/src/python/espressomd/detail/__init__.py b/src/python/espressomd/detail/__init__.py new file mode 100644 index 00000000000..0d4274ef1c8 --- /dev/null +++ b/src/python/espressomd/detail/__init__.py @@ -0,0 +1,18 @@ +# +# Copyright (C) 2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# diff --git a/src/python/espressomd/detail/walberla.py b/src/python/espressomd/detail/walberla.py new file mode 100644 index 00000000000..6ec64dc94a4 --- /dev/null +++ b/src/python/espressomd/detail/walberla.py @@ -0,0 +1,171 @@ +# +# Copyright (C) 2020-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import os +import itertools +import numpy as np + +import espressomd.shapes +import espressomd.code_features +from espressomd.script_interface import ScriptInterfaceHelper, script_interface_register + + +@script_interface_register +class LatticeWalberla(ScriptInterfaceHelper): + """ + Interface to a waBLerla lattice. + """ + _so_name = "walberla::LatticeWalberla" + _so_creation_policy = "GLOBAL" + + def __init__(self, *args, **kwargs): + if not espressomd.code_features.has_features("WALBERLA"): + raise NotImplementedError("Feature WALBERLA not compiled in") + + if "sip" not in kwargs: + params = self.default_params() + params.update(kwargs) + super().__init__(*args, **params) + self._params = {k: getattr(self, k) for k in self.valid_keys()} + else: + super().__init__(**kwargs) + + def valid_keys(self): + return {"agrid", "n_ghost_layers"} + + def required_keys(self): + return self.valid_keys() + + def default_params(self): + return {} + + def get_node_indices_inside_shape(self, shape): + if not isinstance(shape, espressomd.shapes.Shape): + raise ValueError( + "Parameter 'shape' must be derived from espressomd.shapes.Shape") + agrid = self.agrid + idxs = itertools.product(*map(range, self.shape)) + for idx in idxs: + pos = (np.asarray(idx) + 0.5) * agrid + if shape.is_inside(position=pos): + yield idx + + def get_shape_bitmask(self, shape): + """Create a bitmask for the given shape.""" + if not isinstance(shape, espressomd.shapes.Shape): + raise ValueError( + "Parameter 'shape' must be derived from espressomd.shapes.Shape") + mask_flat = shape.call_method("rasterize", grid_size=self.shape, + grid_spacing=self.agrid, grid_offset=0.5) + return np.reshape(mask_flat, self.shape).astype(bool) + + +class LatticeModel: + + def save_checkpoint(self, path, binary): + tmp_path = path + ".__tmp__" + self.call_method("save_checkpoint", path=tmp_path, mode=int(binary)) + os.rename(tmp_path, path) + + def load_checkpoint(self, path, binary): + return self.call_method("load_checkpoint", path=path, mode=int(binary)) + + def get_nodes_inside_shape(self, shape=None): + """ + Provide a generator for iterating over all nodes inside the given shape. + + Parameters + ---------- + shape : :class:`espressomd.shapes.Shape` + Shape to use as filter. + + """ + for idx in self.lattice.get_node_indices_inside_shape(shape): + yield self[idx] + + def get_shape_bitmask(self, shape=None): + """ + Create a bitmask for the given shape. + + Parameters + ---------- + shape : :class:`espressomd.shapes.Shape` + Shape to rasterize. + + """ + return self.lattice.get_shape_bitmask(shape=shape) + + +def get_slice_bounding_box(slices, grid_size): + shape = [] + slice_lower_corner = [] + slice_upper_corner = [] + for i in range(3): + indices = np.arange(grid_size[i]) + if isinstance(slices[i], slice): + if slices[i].step not in [None, 1]: + raise NotImplementedError( + "Slices with step != 1 are not supported") + indices = indices[slices[i]] + else: + if isinstance(slices[i], (int, np.integer)): + indices = [indices[slices[i]]] + else: + raise NotImplementedError( + "Tuple-based indexing is not supported") + if len(indices) == 0: + slice_lower_corner.append(0) + slice_upper_corner.append(0) + shape.append(0) + elif isinstance(slices[i], (int, np.integer)): + slice_lower_corner.append(indices[0]) + slice_upper_corner.append(indices[0] + 1) + else: + slice_lower_corner.append(indices[0]) + slice_upper_corner.append(indices[-1] + 1) + shape.append(len(indices)) + return {"slice_lower_corner": slice_lower_corner, + "slice_upper_corner": slice_upper_corner, + "shape": shape} + + +class VTKOutputBase(ScriptInterfaceHelper): + + def __init__(self, *args, **kwargs): + if not espressomd.code_features.has_features("WALBERLA"): + raise NotImplementedError("Feature WALBERLA not compiled in") + if "sip" not in kwargs: + params = self.default_params() + params.update(kwargs) + if isinstance(params["observables"], str): + params["observables"] = [params["observables"]] + super().__init__(*args, **params) + else: + super().__init__(**kwargs) + + def valid_observables(self): + return set(self.call_method("get_valid_observable_names")) + + def valid_keys(self): + return {"delta_N", "execution_count", "observables", "identifier", + "base_folder", "prefix", "enabled"} + + def default_params(self): + return {"delta_N": 0, "enabled": True, "execution_count": 0, + "base_folder": "vtk_out", "prefix": "simulation_step"} diff --git a/src/python/espressomd/electrokinetics.pxd b/src/python/espressomd/electrokinetics.pxd deleted file mode 100644 index ab268adab32..00000000000 --- a/src/python/espressomd/electrokinetics.pxd +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (C) 2010-2022 The ESPResSo project -# -# This file is part of ESPResSo. -# -# ESPResSo is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# ESPResSo is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -include "myconfig.pxi" -from libcpp cimport bool - -IF ELECTROKINETICS and CUDA: - cdef extern from "grid_based_algorithms/electrokinetics.hpp": - - DEF MAX_NUMBER_OF_SPECIES = 10 - - # EK data struct - IF EK_DEBUG: - ctypedef struct EKParameters: - float agrid - float time_step - float lb_density - unsigned int dim_x - unsigned int dim_y - unsigned int dim_z - unsigned int number_of_nodes - float viscosity - float bulk_viscosity - float gamma_odd - float gamma_even - float friction - float T - float prefactor - float lb_ext_force_density[3] - unsigned int number_of_species - int reaction_species[3] - float rho_reactant_reservoir - float rho_product0_reservoir - float rho_product1_reservoir - float reaction_ct_rate - float reaction_fraction_0 - float reaction_fraction_1 - float mass_reactant - float mass_product0 - float mass_product1 - int stencil - int number_of_boundary_nodes - float fluctuation_amplitude - bool fluctuations - bool advection - bool fluidcoupling_ideal_contribution - float * charge_potential - float * j - float * lb_force_density_previous - float * j_fluc - float * rho[MAX_NUMBER_OF_SPECIES] - int species_index[MAX_NUMBER_OF_SPECIES] - float density[MAX_NUMBER_OF_SPECIES] - float D[MAX_NUMBER_OF_SPECIES] - float d[MAX_NUMBER_OF_SPECIES] - float valency[MAX_NUMBER_OF_SPECIES] - float ext_force_density[3][MAX_NUMBER_OF_SPECIES] - char * node_is_catalyst - bool es_coupling - float * charge_potential_buffer - float * electric_field - ELSE: - ctypedef struct EKParameters: - float agrid - float time_step - float lb_density - unsigned int dim_x - unsigned int dim_y - unsigned int dim_z - unsigned int number_of_nodes - float viscosity - float bulk_viscosity - float gamma_odd - float gamma_even - float friction - float T - float prefactor - float lb_ext_force_density[3] - unsigned int number_of_species - int reaction_species[3] - float rho_reactant_reservoir - float rho_product0_reservoir - float rho_product1_reservoir - float reaction_ct_rate - float reaction_fraction_0 - float reaction_fraction_1 - float mass_reactant - float mass_product0 - float mass_product1 - int stencil - int number_of_boundary_nodes - float fluctuation_amplitude - bool fluctuations - bool advection - bool fluidcoupling_ideal_contribution - float * charge_potential - float * j - float * lb_force_density_previous - float * rho[MAX_NUMBER_OF_SPECIES] - int species_index[MAX_NUMBER_OF_SPECIES] - float density[MAX_NUMBER_OF_SPECIES] - float D[MAX_NUMBER_OF_SPECIES] - float d[MAX_NUMBER_OF_SPECIES] - float valency[MAX_NUMBER_OF_SPECIES] - float ext_force_density[3][MAX_NUMBER_OF_SPECIES] - char * node_is_catalyst - bool es_coupling - float * charge_potential_buffer - float * electric_field - - cdef extern EKParameters ek_parameters - - # EK functions - void ek_print_parameters() - void ek_print_lbpar() - unsigned int ek_calculate_boundary_mass() - int ek_print_vtk_density(int species, char * filename) - int ek_print_vtk_flux(int species, char * filename) - int ek_print_vtk_flux_fluc(int species, char * filename) - int ek_print_vtk_flux_link(int species, char * filename) - int ek_print_vtk_potential(char * filename) - int ek_print_vtk_lbforce_density(char * filename) - int ek_lb_print_vtk_density(char * filename) - int ek_lb_print_vtk_velocity(char * filename) - int ek_init() - void ek_set_agrid(float agrid) except + - void ek_set_lb_density(float lb_density) except + - void ek_set_viscosity(float viscosity) except + - void ek_set_friction(float friction) except + - void ek_set_lb_ext_force_density(float lb_ext_force_dens_x, float lb_ext_force_dens_y, float lb_ext_force_dens_z) except + - void ek_set_T(float T) except + - void ek_set_prefactor(float prefactor) except + - void ek_set_bulk_viscosity(float bulk_viscosity) except + - void ek_set_gamma_odd(float gamma_odd) except + - void ek_set_gamma_even(float gamma_even) except + - void ek_set_density(int species, float density) - void ek_set_D(int species, float D) - void ek_set_valency(int species, float valency) - void ek_set_ext_force_density(int species, float ext_force_density_x, float ext_force_density_y, float ext_force_density_z) - void ek_set_stencil(int stencil) except + - void ek_set_advection(bool advection) except + - void ek_set_fluctuations(bool fluctuations) except + - void ek_set_fluctuation_amplitude(float fluctuation_amplitude) except + - void ek_set_fluidcoupling(bool ideal_contribution) except + - void ek_set_electrostatics_coupling(bool electrostatics_coupling) except + - int ek_node_get_density(int species, int x, int y, int z, double * density) - int ek_node_get_flux(int species, int x, int y, int z, double * flux) - int ek_node_get_potential(int x, int y, int z, double * potential) - int ek_node_set_density(int species, int x, int y, int z, double density) - float ek_calculate_net_charge() - int ek_neutralize_system(int species) - - int ek_print_vtk_particle_potential(char * filename) diff --git a/src/python/espressomd/electrokinetics.py b/src/python/espressomd/electrokinetics.py new file mode 100644 index 00000000000..48532405f8f --- /dev/null +++ b/src/python/espressomd/electrokinetics.py @@ -0,0 +1,700 @@ +# +# Copyright (C) 2021-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import itertools +import numpy as np + +from . import utils +from .detail.walberla import VTKOutputBase, LatticeWalberla # pylint: disable=unused-import +from .script_interface import ScriptInterfaceHelper, script_interface_register, ScriptObjectList, array_variant +import espressomd.detail.walberla +import espressomd.shapes +import espressomd.code_features + + +@script_interface_register +class EKFFT(ScriptInterfaceHelper): + """ + A FFT-based Poisson solver. + + """ + + _so_name = "walberla::EKFFT" + _so_creation_policy = "GLOBAL" + + def __init__(self, *args, **kwargs): + if not espressomd.code_features.has_features("WALBERLA_FFT"): + raise NotImplementedError("Feature WALBERLA not compiled in") + + super().__init__(*args, **kwargs) + + +@script_interface_register +class EKNone(ScriptInterfaceHelper): + """ + The default Poisson solver. + Imposes a null electrostatic potential everywhere. + + """ + _so_name = "walberla::EKNone" + _so_creation_policy = "GLOBAL" + + def __init__(self, *args, **kwargs): + if not espressomd.code_features.has_features("WALBERLA"): + raise NotImplementedError("Feature WALBERLA not compiled in") + + super().__init__(*args, **kwargs) + + +@script_interface_register +class EKContainer(ScriptObjectList): + _so_name = "walberla::EKContainer" + + def __init__(self, *args, **kwargs): + if not espressomd.code_features.has_features("WALBERLA"): + raise NotImplementedError("Feature WALBERLA not compiled in") + + super().__init__(*args, **kwargs) + + def add(self, ekspecies): + self.call_method("add", object=ekspecies) + + def remove(self, ekspecies): + self.call_method("remove", object=ekspecies) + + def clear(self): + self.call_method("clear") + + +@script_interface_register +class EKSpecies(ScriptInterfaceHelper, + espressomd.detail.walberla.LatticeModel): + """ + The advection-diffusion-reaction method for chemical species using waLBerla. + + Parameters + ---------- + lattice : :obj:`espressomd.electrokinetics.LatticeWalberla ` + Lattice object. + tau : :obj:`float` + EK time step, must be an integer multiple of the MD time step. + density : :obj:`float` + Species density. + diffusion : :obj:`float` + Species diffusion coefficient. + valency : :obj:`float` + Species valency. + advection : :obj:`bool` + Whether to enable advection. + friction_coupling : :obj:`bool` + Whether to enable friction coupling. + ext_efield : (3,) array_like of :obj:`float`, optional + External electrical field. + kT : :obj:`float`, optional + Thermal energy of the simulated heat bath (for thermalized species). + Set it to 0 for an unthermalized species. + single_precision : :obj:`bool`, optional + Use single-precision floating-point arithmetic. + + Methods + ------- + clear_density_boundaries() + Remove density boundary conditions. + + clear_flux_boundaries() + Remove flux boundary conditions. + + clear_boundaries() + Remove all boundary conditions. + + save_checkpoint() + Write EK densities and boundary conditions to a file. + + Parameters + ---------- + path : :obj:`str` + Destination file path. + binary : :obj:`bool` + Whether to write in binary or ASCII mode. + + load_checkpoint() + Load EK densities and boundary conditions from a file. + + Parameters + ---------- + path : :obj:`str` + File path to read from. + binary : :obj:`bool` + Whether to read in binary or ASCII mode. + + add_vtk_writer() + Attach a VTK writer. + + Parameters + ---------- + vtk : :class:`espressomd.electrokinetics.VTKOutput` + VTK writer. + + remove_vtk_writer() + Detach a VTK writer. + + Parameters + ---------- + vtk : :class:`espressomd.electrokinetics.VTKOutput` + VTK writer. + + clear_vtk_writers() + Detach all VTK writers. + + """ + + _so_name = "walberla::EKSpecies" + _so_creation_policy = "GLOBAL" + _so_bind_methods = ( + "clear_density_boundaries", + "clear_flux_boundaries", + "clear_boundaries", + "add_vtk_writer", + "remove_vtk_writer", + "clear_vtk_writers", + ) + + def __init__(self, *args, **kwargs): + if not espressomd.code_features.has_features("WALBERLA"): + raise NotImplementedError("Feature WALBERLA not compiled in") + + if "sip" not in kwargs: + params = self.default_params() + params.update(kwargs) + super().__init__(*args, **params) + else: + super().__init__(**kwargs) + + def default_params(self): + return {"single_precision": False, + "kT": 0., "ext_efield": [0., 0., 0.]} + + def __getitem__(self, key): + if isinstance(key, (tuple, list, np.ndarray)) and len(key) == 3: + if any(isinstance(item, slice) for item in key): + return EKSpeciesSlice( + parent_sip=self, slice_range=key, node_grid=self.shape) + else: + return EKSpeciesNode(parent_sip=self, index=np.array(key)) + + raise TypeError( + f"{key} is not a valid index. Should be a point on the " + "nodegrid e.g. ek[0,0,0], or a slice, e.g. ek[:,0,0]") + + def add_boundary_from_shape(self, shape, value, boundary_type): + """ + Set boundary conditions from a shape. + + Parameters + ---------- + shape : :obj:`espressomd.shapes.Shape` + Shape to rasterize. + value : (O,) or (L, M, N, O) array_like of :obj:`float`, optional + Boundary numerical value. If a single value of shape ``(O,)`` + is given, it will be broadcast to all nodes inside the shape, + otherwise ``L, M, N`` must be equal to the EK grid dimensions. + boundary_type : Union[:class:`~espressomd.electrokinetics.DensityBoundary`, + :class:`~espressomd.electrokinetics.FluxBoundary`] (optional) + Type of the boundary condition. + + """ + if not issubclass(boundary_type, (FluxBoundary, DensityBoundary)): + raise TypeError( + "Parameter 'boundary_type' must be a subclass of FluxBoundary or DensityBoundary") + + if not hasattr(value, "__iter__"): + value = (value, ) + + value = np.array(value, dtype=float) + utils.check_type_or_throw_except( + shape, 1, espressomd.shapes.Shape, "expected an espressomd.shapes.Shape") + if issubclass(boundary_type, FluxBoundary): + if np.shape(value) not in [(3,), tuple(self.shape) + (3,)]: + raise ValueError( + f"Cannot process flux value grid of shape {np.shape(value)}") + if issubclass(boundary_type, DensityBoundary): + if np.shape(value) not in [(1,), tuple(self.shape) + (1,)]: + raise ValueError( + f"Cannot process density value grid of shape {np.shape(value)}") + + mask = self.get_shape_bitmask(shape=shape).astype(int) + if issubclass(boundary_type, FluxBoundary): + boundaries_update_method = "update_flux_boundary_from_shape" + else: + boundaries_update_method = "update_density_boundary_from_shape" + self.call_method( + boundaries_update_method, + raster=array_variant(mask.flatten()), + values=array_variant(value.flatten())) + + +class FluxBoundary: + """ + Hold flux information for the flux boundary + condition at a single node. + + """ + + def __init__(self, flux): + utils.check_type_or_throw_except( + flux, 3, float, "FluxBoundary flux must be three floats") + self.flux = flux + + +class DensityBoundary: + """ + Hold density information for the density boundary + condition at a single node. + + """ + + def __init__(self, density): + utils.check_type_or_throw_except( + density, 1, float, "DensityBoundary flux must be one float") + self.density = density + + +@script_interface_register +class EKSpeciesNode(ScriptInterfaceHelper): + _so_name = "walberla::EKSpeciesNode" + _so_creation_policy = "GLOBAL" + + def required_keys(self): + return {"parent_sip", "index"} + + def validate_params(self, params): + utils.check_required_keys(self.required_keys(), params.keys()) + utils.check_type_or_throw_except( + params["index"], 3, int, "The index of an EK species node consists of three integers.") + + def __init__(self, *args, **kwargs): + if "sip" not in kwargs: + self.validate_params(kwargs) + super().__init__(*args, **kwargs) + utils.handle_errors("EKSpeciesNode instantiation failed") + else: + super().__init__(**kwargs) + + def __reduce__(self): + raise NotImplementedError("Cannot serialize EK species node objects") + + def __eq__(self, obj): + return isinstance(obj, EKSpeciesNode) and self.index == obj.index + + def __hash__(self): + return hash(self.index) + + @property + def index(self): + return tuple(self._index) + + @index.setter + def index(self, value): + raise RuntimeError("Parameter 'index' is read-only.") + + @property + def density(self): + return self.call_method("get_density") + + @density.setter + def density(self, value): + self.call_method("set_density", value=value) + + @property + def is_boundary(self): + return self.call_method("get_is_boundary") + + @is_boundary.setter + def is_boundary(self, value): + raise RuntimeError("Property 'is_boundary' is read-only.") + + @property + def density_boundary(self): + """ + Returns + ------- + :class:`~espressomd.electrokinetics.DensityBoundary` + If the node is a boundary node + ``None`` + If the node is not a boundary node + """ + density = self.call_method("get_node_density_at_boundary") + if density is not None: + return DensityBoundary(density) + return None + + @density_boundary.setter + def density_boundary(self, value): + """ + Parameters + ---------- + value : :class:`~espressomd.electrokinetics.DensityBoundary` or ``None`` + If value is :class:`~espressomd.EkSpecies.DensityBoundary`, + set the node to be a boundary node with the specified density. + If value is ``None``, the node will become a domain node. + + """ + + if isinstance(value, DensityBoundary): + value = value.density + elif value is not None: + raise TypeError( + "Parameter 'value' must be an instance of DensityBoundary or None") + self.call_method("set_node_density_at_boundary", value=value) + + @property + def flux_boundary(self): + """ + Returns + ------- + :class:`~espressomd.electrokinetics.FluxBoundary` + If the node is a boundary node + ``None`` + If the node is not a boundary node + + """ + flux = self.call_method("get_node_flux_at_boundary") + if flux is not None: + return FluxBoundary(flux) + return None + + @flux_boundary.setter + def flux_boundary(self, value): + """ + Parameters + ---------- + value : :class:`~espressomd.electrokinetics.FluxBoundary` or ``None`` + If value is :class:`~espressomd.EkSpecies.FluxBoundary`, + set the node to be a boundary node with the specified flux. + If value is ``None``, the node will become a domain node. + + """ + + if isinstance(value, FluxBoundary): + value = value.flux + elif value is not None: + raise TypeError( + "Parameter 'value' must be an instance of FluxBoundary or None") + self.call_method("set_node_flux_at_boundary", value=value) + + +@script_interface_register +class EKSpeciesSlice(ScriptInterfaceHelper): + _so_name = "walberla::EKSpeciesSlice" + _so_creation_policy = "GLOBAL" + + def required_keys(self): + return {"parent_sip", "slice_range"} + + def validate_params(self, params): + utils.check_required_keys(self.required_keys(), params.keys()) + + def __init__(self, *args, **kwargs): + if "sip" in kwargs: + super().__init__(**kwargs) + else: + self.validate_params(kwargs) + slice_range = kwargs.pop("slice_range") + grid_size = kwargs["parent_sip"].shape + extra_kwargs = espressomd.detail.walberla.get_slice_bounding_box( + slice_range, grid_size) + node = EKSpeciesNode(index=np.array([0, 0, 0]), **kwargs) + super().__init__(*args, node_sip=node, **kwargs, **extra_kwargs) + utils.handle_errors("EKSpeciesSlice instantiation failed") + + def __iter__(self): + lower, upper = self.call_method("get_slice_ranges") + indices = [list(range(lower[i], upper[i])) for i in range(3)] + lb_sip = self.call_method("get_ek_sip") + for index in itertools.product(*indices): + yield EKSpeciesNode(parent_sip=lb_sip, index=np.array(index)) + + def __reduce__(self): + raise NotImplementedError("Cannot serialize EK species slice objects") + + def _getter(self, attr): + value_grid, shape = self.call_method(f"get_{attr}") + if attr == "flux_at_boundary": + value_grid = [ + None if x is None else FluxBoundary(x) for x in value_grid] + elif attr == "density_at_boundary": + value_grid = [ + None if x is None else DensityBoundary(x) for x in value_grid] + return utils.array_locked(np.reshape(value_grid, shape)) + + def _setter(self, attr, values): + dimensions = self.call_method("get_slice_size") + if 0 in dimensions: + raise AttributeError( + f"Cannot set properties of an empty '{self.__class__.__name__}' object") + + values = np.copy(values) + value_shape = tuple(self.call_method("get_value_shape", name=attr)) + target_shape = (*dimensions, *value_shape) + + # broadcast if only one element was provided + if values.shape == value_shape or values.shape == () and value_shape == (1,): + values = np.full(target_shape, values) + + def shape_squeeze(shape): + return tuple(x for x in shape if x != 1) + + if shape_squeeze(values.shape) != shape_squeeze(target_shape): + raise ValueError( + f"Input-dimensions of '{attr}' array {values.shape} does not match slice dimensions {target_shape}") + + self.call_method(f"set_{attr}", values=values.flatten()) + + @property + def density(self): + return self._getter("density",) + + @density.setter + def density(self, value): + self._setter("density", value) + + @property + def is_boundary(self): + return self._getter("is_boundary") + + @is_boundary.setter + def is_boundary(self, value): + raise RuntimeError("Property 'is_boundary' is read-only.") + + @property + def density_boundary(self): + """ + Returns + ------- + (N, M, L) array_like of :class:`~espressomd.electrokinetics.DensityBoundary` + If the nodes are boundary nodes + (N, M, L) array_like of ``None`` + If the nodes are not boundary nodes + + """ + + return self._getter("density_at_boundary") + + @density_boundary.setter + def density_boundary(self, values): + """ + Parameters + ---------- + values : (N, M, L) array_like of :class:`~espressomd.electrokinetics.DensityBoundary` or obj:`None` + If values are :class:`~espressomd.electrokinetics.DensityBoundary`, + set the nodes to be boundary nodes with the specified density. + If values are obj:`None`, the nodes will become domain nodes. + + """ + + type_error_msg = "Parameter 'values' must be an array_like of DensityBoundary or None" + values = np.copy(values) + if values.dtype != np.dtype("O"): + raise TypeError(type_error_msg) + for index in np.ndindex(*values.shape): + if values[index] is not None: + if not isinstance(values[index], DensityBoundary): + raise TypeError(type_error_msg) + values[index] = np.array(values[index].density) + self._setter("density_at_boundary", values=values) + + @property + def flux_boundary(self): + """ + Returns + ------- + (N, M, L) array_like of :class:`~espressomd.electrokinetics.FluxBoundary` + If the nodes are boundary nodes + (N, M, L) array_like of `None`` + If the nodes are not boundary nodes + + """ + + return self._getter("flux_at_boundary") + + @flux_boundary.setter + def flux_boundary(self, values): + """ + Parameters + ---------- + values : (N, M, L) array_like of :class:`~espressomd.electrokinetics.FluxBoundary` or obj:`None` + If values are :class:`~espressomd.lb.FluxBoundary`, + set the nodes to be boundary nodes with the specified flux. + If values are obj:`None`, the nodes will become domain nodes. + + """ + + type_error_msg = "Parameter 'values' must be an array_like of FluxBoundary or None" + values = np.copy(values) + if values.dtype != np.dtype("O"): + raise TypeError(type_error_msg) + for index in np.ndindex(*values.shape): + if values[index] is not None: + if not isinstance(values[index], FluxBoundary): + raise TypeError(type_error_msg) + values[index] = np.array(values[index].flux) + self._setter("flux_at_boundary", values=values) + + +@script_interface_register +class VTKOutput(VTKOutputBase): + """ + Create a VTK writer. + + Files are written to ``//_*.vtu``. + Summary is written to ``/.pvd``. + + Manual VTK callbacks can be called at any time to take a snapshot + of the current state of the EK species. + + Automatic VTK callbacks can be disabled at any time and re-enabled later. + Please note that the internal VTK counter is no longer incremented when + an automatic callback is disabled, which means the number of EK steps + between two frames will not always be an integer multiple of ``delta_N``. + + Parameters + ---------- + identifier : :obj:`str` + Name of the VTK writer. + observables : :obj:`list`, {'density',} + List of observables to write to the VTK files. + delta_N : :obj:`int` + Write frequency. If this value is 0 (default), the object is a + manual VTK callback that must be triggered manually. Otherwise, + it is an automatic callback that is added to the time loop and + writes every ``delta_N`` EK steps. + base_folder : :obj:`str` (optional), default is 'vtk_out' + Path to the output VTK folder. + prefix : :obj:`str` (optional), default is 'simulation_step' + Prefix for VTK files. + + """ + _so_name = "walberla::EKVTKHandle" + _so_creation_policy = "GLOBAL" + _so_bind_methods = ("enable", "disable", "write") + + def required_keys(self): + return self.valid_keys() - self.default_params().keys() + + def __repr__(self): + class_id = f"{self.__class__.__module__}.{self.__class__.__name__}" + if self.delta_N: + write_when = f"every {self.delta_N} EK steps" + if not self.enabled: + write_when += " (disabled)" + else: + write_when = "on demand" + return f"<{class_id}: write to '{self.vtk_uid}' {write_when}>" + + +@script_interface_register +class EKReactant(ScriptInterfaceHelper): + _so_name = "walberla::EKReactant" + _so_creation_policy = "GLOBAL" + + +class EKBulkReaction(ScriptInterfaceHelper): + _so_name = "walberla::EKBulkReaction" + _so_creation_policy = "GLOBAL" + + +class EKIndexedReaction(ScriptInterfaceHelper): + _so_name = "walberla::EKIndexedReaction" + _so_creation_policy = "GLOBAL" + + def add_node_to_index(self, node): + self.call_method("set_node_is_boundary", node=node, is_boundary=True) + + def remove_node_from_index(self, node): + self.call_method("set_node_is_boundary", node=node, is_boundary=False) + + def __getitem__(self, key): + if isinstance(key, (tuple, list, np.ndarray)) and len(key) == 3: + if any(isinstance(typ, slice) for typ in key): + shape = self.shape + + indices = [np.atleast_1d(np.arange(shape[i])[key[i]]) + for i in range(3)] + dimensions = [ind.size for ind in indices] + + value_grid = np.zeros((*dimensions,), dtype=bool) + indices = itertools.product(*map(enumerate, indices)) + for (i, x), (j, y), (k, z) in indices: + value_grid[i, j, k] = self.call_method( + "get_node_is_boundary", node=(x, y, z)) + + return utils.array_locked(value_grid) + else: + return self.call_method("get_node_is_boundary", node=key) + raise TypeError( + f"{key} is not a valid index. Should be a point on the nodegrid or a slice") + + def __setitem__(self, key, values): + if isinstance(key, (tuple, list, np.ndarray)) and len(key) == 3: + if any(isinstance(typ, slice) for typ in key): + shape = self.shape + + indices = [np.atleast_1d(np.arange(shape[i])[key[i]]) + for i in range(3)] + dimensions = tuple(ind.size for ind in indices) + + values = np.copy(values) + + # broadcast if only one element was provided + if values.shape == (): + values = np.full(dimensions, values) + if values.shape != dimensions: + raise ValueError( + f"Input-dimensions of array {values.shape} does not match slice dimensions {dimensions}.") + + indices = itertools.product(*map(enumerate, indices)) + for (i, x), (j, y), (k, z) in indices: + self.call_method("set_node_is_boundary", node=( + x, y, z), is_boundary=bool(values[i, j, k])) + else: + return self.call_method( + "set_node_is_boundary", node=key, is_boundary=values) + else: + raise TypeError( + f"{key} is not a valid index. Should be a point on the nodegrid or a slice") + + +@script_interface_register +class EKReactions(ScriptObjectList): + _so_name = "walberla::EKReactions" + _so_creation_policy = "GLOBAL" + + def add(self, reaction): + if not isinstance(reaction, (EKBulkReaction, EKIndexedReaction)): + raise TypeError("reaction object is not of correct type.") + + self.call_method("add", object=reaction) + + return reaction + + def remove(self, reaction): + self.call_method("remove", object=reaction) + + def clear(self): + self.call_method("clear") diff --git a/src/python/espressomd/electrokinetics.pyx b/src/python/espressomd/electrokinetics.pyx deleted file mode 100644 index 0f5dea2c841..00000000000 --- a/src/python/espressomd/electrokinetics.pyx +++ /dev/null @@ -1,511 +0,0 @@ -# Copyright (C) 2010-2022 The ESPResSo project -# -# This file is part of ESPResSo. -# -# ESPResSo is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# ESPResSo is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -include "myconfig.pxi" -IF CUDA: - from .lb cimport HydrodynamicInteraction - from .lb cimport LBFluidRoutines - from .lb cimport lb_lbfluid_print_vtk_boundary - from .lb cimport lb_lbnode_is_index_valid - from .lb cimport lb_lbfluid_set_lattice_switch - from .lb cimport GPU -from . import utils -from .utils cimport Vector3i -import numpy as np - -IF ELECTROKINETICS: - cdef class Electrokinetics(HydrodynamicInteraction): - """ - Creates the electrokinetic method using the GPU unit. - - """ - - def __getitem__(self, key): - if isinstance(key, (tuple, list, np.ndarray)) and len(key) == 3: - return ElectrokineticsRoutines(np.array(key)) - raise ValueError( - f"{key} is not a valid key. Should be a point on the nodegrid e.g. ek[0,0,0].") - - def validate_params(self): - """ - Checks if the parameters for "stencil" and "fluid_coupling" are valid. - - """ - default_params = self.default_params() - - if self._params["stencil"] not in ["linkcentered", "nodecentered"]: - raise ValueError( - "stencil has to be 'linkcentered' or 'nodecentered'.") - - if self._params["fluid_coupling"] not in ["friction", "estatics"]: - raise ValueError( - "fluid_coupling has to be 'friction' or 'estatics'.") - - def valid_keys(self): - """ - Returns the valid options used for the electrokinetic method. - """ - - return ["agrid", "lb_density", "viscosity", "friction", - "bulk_viscosity", "gamma_even", "gamma_odd", "T", "ext_force_density", - "prefactor", "stencil", "advection", "fluid_coupling", - "fluctuations", "fluctuation_amplitude", "es_coupling", - "species"] - - def required_keys(self): - """ - Returns the necessary options to initialize the electrokinetic method. - - """ - return ["agrid", "lb_density", "viscosity", - "friction", "T", "prefactor"] - - def default_params(self): - """ - Returns the default parameters. - - """ - return {"agrid": -1, - "lb_density": -1, - "viscosity": -1, - "bulk_viscosity": -1, - "gamma_odd": 0.0, - "gamma_even": 0.0, - "ext_force_density": [0., 0., 0.], - "friction": 0.0, - "T": -1, - "prefactor": -1, - "stencil": "linkcentered", - "advection": True, - "fluid_coupling": "friction", - "fluctuations": False, - "fluctuation_amplitude": 0.0, - "es_coupling": False, - "species": []} - - def _get_params_from_es_core(self): - if ek_parameters.stencil == 0: - stencil = "linkcentered" - elif ek_parameters.stencil == 1: - stencil = "nodecentered" - else: - raise RuntimeError("Value of stencil could not be identified.") - - if ek_parameters.fluidcoupling_ideal_contribution: - fluid_coupling = "friction" - else: - fluid_coupling = "estatics" - - return {"agrid": ek_parameters.agrid, - "lb_density": ek_parameters.lb_density, - "viscosity": ek_parameters.viscosity, - "bulk_viscosity": ek_parameters.bulk_viscosity, - "gamma_odd": ek_parameters.gamma_odd, - "gamma_even": ek_parameters.gamma_even, - "ext_force_density": ek_parameters.lb_ext_force_density, - "friction": ek_parameters.friction, - "T": ek_parameters.T, - "prefactor": ek_parameters.prefactor, - "stencil": stencil, - "advection": ek_parameters.advection, - "fluid_coupling": fluid_coupling, - "fluctuations": ek_parameters.fluctuations, - "fluctuation_amplitude": - ek_parameters.fluctuation_amplitude, - "es_coupling": ek_parameters.es_coupling} - - def _set_params_in_es_core(self): - if self._params["stencil"] == "linkcentered": - ek_set_stencil(0) - elif self._params["stencil"] == "nodecentered": - ek_set_stencil(1) - - if self._params["fluid_coupling"] == "friction": - ek_set_fluidcoupling(True) - elif self._params["fluid_coupling"] == "estatics": - ek_set_fluidcoupling(False) - - ek_set_agrid(self._params["agrid"]) - ek_set_lb_density(self._params["lb_density"]) - ek_set_viscosity(self._params["viscosity"]) - ek_set_friction(self._params["friction"]) - ek_set_lb_ext_force_density(self._params["ext_force_density"][0], - self._params["ext_force_density"][1], - self._params["ext_force_density"][2]) - ek_set_T(self._params["T"]) - ek_set_prefactor(self._params["prefactor"]) - ek_set_bulk_viscosity(self._params["bulk_viscosity"]) - ek_set_gamma_odd(self._params["gamma_odd"]) - ek_set_gamma_even(self._params["gamma_even"]) - ek_set_advection(self._params["advection"]) - ek_set_fluctuations(self._params["fluctuations"]) - ek_set_fluctuation_amplitude(self._params["fluctuation_amplitude"]) - ek_set_electrostatics_coupling(self._params["es_coupling"]) - - def set_density(self, species=None, density=None, node=None): - """ - Sets the density of a species at a specific node. - If no node is given the density will be set global for the species. - - Parameters - ---------- - species : :obj:`int` - species for which the density will apply. - density : :obj:`float` - The value to which the density will be set to. - node : numpy-array of type :obj:`int` of length (3) - If set the density will be only applied on this specific node. - - """ - - if species is None or density is None: - raise ValueError("species and density have to be set.") - utils.check_type_or_throw_except( - species, 1, float, "species needs to be an integer.") - if node is None: - ek_set_density(species, density) - else: - utils.check_type_or_throw_except( - species, node, int, "node has to be an array of 3 integers") - ek_node_set_density( - species, node[0], node[1], node[2], density) - - def _activate_method(self): - self._set_params_in_es_core() - for species in self._params["species"]: - species._activate_method() - lb_lbfluid_set_lattice_switch(GPU) - self.ek_init() - - def neutralize_system(self, species): - """ - Sets the global density of a species to a specific value - for which the whole system will have no net charge. - - .. note :: The previous density of the species will be ignored and - it will be homogeneous distributed over the whole system - The species must be charged to begin with. If the - neutralization would lead to a negative species density - an exception will be raised. - - Parameters - ---------- - species : :obj:`int` - The species which will be changed to neutralize the system. - - """ - err = ek_neutralize_system(species.id) - - if err == 1: - raise RuntimeError( - 'Species used for neutralization must be added to electrokinetics') - elif err == 2: - raise RuntimeError( - 'Species used for neutralization must be charged') - elif err == 3: - raise RuntimeError( - 'Neutralization with specified species would result in negative density') - elif err != 0: - raise RuntimeError('Unknown error') - - self.ek_init() - - def ek_init(self): - """ - Initializes the electrokinetic system. - This automatically initializes the lattice-Boltzmann method on the GPU. - - """ - err = ek_init() - if err: - raise RuntimeError('EK init failed') - - def add_species(self, species): - """ - Initializes a new species for the electrokinetic method. - - Parameters - ---------- - species : :obj:`int` - Species to be initialized. - - """ - self._params["species"].append(species) - - def get_params(self): - """ - Prints out the parameters of the electrokinetic system. - - """ - self._params.update(self._get_params_from_es_core()) - return self._params - - def write_vtk_boundary(self, path): - """ - Writes the boundary information into a vtk-file. - - Parameters - ---------- - path : :obj:`str` - Path of the .vtk file the boundary is written to. - - """ - lb_lbfluid_print_vtk_boundary(utils.to_char_pointer(path)) - - def write_vtk_velocity(self, path): - """ - Writes the lattice-Boltzmann velocity information into a vtk-file. - - Parameters - ---------- - path : :obj:`str` - Path of the .vtk file the velocity is written to. - - """ - ek_lb_print_vtk_velocity(utils.to_char_pointer(path)) - - def write_vtk_density(self, path): - """ - Writes the LB density information into a vtk-file. - - Parameters - ---------- - path : :obj:`str` - Path of the .vtk file the LB density is written to. - - """ - ek_lb_print_vtk_density(utils.to_char_pointer(path)) - - def write_vtk_potential(self, path): - """ - Writes the electrostatic potential into a vtk-file. - - Parameters - ---------- - path : :obj:`str` - Path of the .vtk file the electrostatic potential is written to. - - """ - ek_print_vtk_potential(utils.to_char_pointer(path)) - - def write_vtk_lbforce(self, path): - """ - Writes the LB force information into a vtk-file. - - Parameters - ---------- - path : :obj:`str` - Path of the .vtk file the LB force is written to. - - """ - ek_print_vtk_lbforce_density(utils.to_char_pointer(path)) - - def write_vtk_particle_potential(self, path): - """ - Writes the electrostatic particle potential into a vtk-file. - - .. note :: This only works if 'es_coupling' is active. - - Parameters - ---------- - path : :obj:`str` - Path of the .vtk file the electrostatic potential is written to. - - """ - - if self._params["es_coupling"]: - ek_print_vtk_particle_potential(utils.to_char_pointer(path)) - else: - raise RuntimeError("'es_coupling' is not active.") - - def save_checkpoint(self, path): - raise RuntimeError("EK does not support checkpointing") - - def load_checkpoint(self, path): - raise RuntimeError("EK does not support checkpointing") - - def add_reaction(self, shape): - raise NotImplementedError("This method is not implemented yet.") - - def add_boundary(self, shape): - raise NotImplementedError("This method is not implemented yet.") - - cdef class ElectrokineticsRoutines(LBFluidRoutines): - - property potential: - def __get__(self): - cdef double potential - ek_node_get_potential(self.node[0], self.node[1], self.node[2], & potential) - return potential - - def __set__(self, value): - raise Exception("Potential can not be set.") - - class Species: - - """ - Creates a species object that is passed to the ek instance. - - """ - - py_number_of_species = 0 - id = -1 - _params = {} - - # __getstate__ and __setstate__ define the pickle interaction - def __getstate__(self): - raise RuntimeError("EK does not support checkpointing") - - def __setstate__(self, params): - raise RuntimeError("EK does not support checkpointing") - - def __str__(self): - return f"{self.__class__.__name__}({self.get_params()})" - - def __getitem__(self, key): - if isinstance(key, (tuple, list, np.ndarray)) and len(key) == 3: - return SpecieRoutines(np.array(key), self.id) - raise ValueError( - f"{key} is not a valid key. Should be a point on the nodegrid e.g. species[0,0,0].") - - def __init__(self, **kwargs): - Species.py_number_of_species += 1 - self.id = Species.py_number_of_species - utils.check_required_keys(self.required_keys(), kwargs.keys()) - utils.check_valid_keys(self.valid_keys(), kwargs.keys()) - self._params = self.default_params() - self._params.update(kwargs) - - def valid_keys(self): - """ - Returns the valid keys for the species. - - """ - return {"density", "D", "valency", "ext_force_density"} - - def required_keys(self): - """ - Returns the required keys for the species. - - """ - return {"density", "D", "valency"} - - def default_params(self): - """ - Returns the default parameters for the species. - - """ - return {"ext_force_density": [0, 0, 0]} - - def _get_params_from_es_core(self): - return { - "density": ek_parameters.density[ - ek_parameters.species_index[self.id]], - "D": ek_parameters.D[ek_parameters.species_index[self.id]], - "valency": ek_parameters.valency[ - ek_parameters.species_index[self.id]], - "ext_force_density": - [ek_parameters.ext_force_density[0][ek_parameters.species_index[self.id]], - ek_parameters.ext_force_density[1][ek_parameters.species_index[self.id]], - ek_parameters.ext_force_density[2][ek_parameters.species_index[self.id]]]} - - def _set_params_in_es_core(self): - ek_set_D(self.id, self._params["D"]) - ek_set_valency(self.id, self._params["valency"]) - ek_set_density(self.id, self._params["density"]) - ek_set_ext_force_density(self.id, - self._params["ext_force_density"][0], - self._params["ext_force_density"][1], - self._params["ext_force_density"][2]) - - def _activate_method(self): - self._set_params_in_es_core() - - def get_params(self): - """ - Returns the parameters of the species. - - """ - self._params.update(self._get_params_from_es_core()) - return self._params - - def write_vtk_density(self, path): - """ - Writes the species density into a vtk-file. - - Parameters - ---------- - path : :obj:`str` - Path of the .vtk file the species density is written to. - - """ - ek_print_vtk_density(self.id, utils.to_char_pointer(path)) - - def write_vtk_flux(self, path): - """ - Writes the species flux into a vtk-file. - - Parameters - ---------- - path : :obj:`str` - Path of the .vtk file the species flux is written to. - - """ - ek_print_vtk_flux(self.id, utils.to_char_pointer(path)) - - def write_vtk_flux_fluc(self, path): - ek_print_vtk_flux_fluc(self.id, utils.to_char_pointer(path)) - - def write_vtk_flux_link(self, path): - ek_print_vtk_flux_link(self.id, utils.to_char_pointer(path)) - - cdef class SpecieRoutines: - cdef Vector3i node - cdef int id - - def __init__(self, key, id): - self.node[0] = key[0] - self.node[1] = key[1] - self.node[2] = key[2] - self.id = id - if not lb_lbnode_is_index_valid(self.node): - raise IndexError("LB node index out of bounds") - - property density: - def __set__(self, value): - utils.check_type_or_throw_except( - value, 1, float, "Property 'density' has to be a float") - if ek_node_set_density( - self.id, self.node[0], self.node[1], self.node[2], value) != 0: - raise RuntimeError("Species has not been added to EK.") - - def __get__(self): - cdef double density - if ek_node_get_density(self.id, self.node[0], self.node[1], self.node[2], & density) != 0: - raise RuntimeError("Species has not been added to EK.") - return density - - property flux: - def __set__(self, value): - raise ValueError("Node flux is not settable.") - - def __get__(self): - cdef double flux[3] - if ek_node_get_flux( - self.id, self.node[0], self.node[1], self.node[2], flux) != 0: - raise RuntimeError("Species has not been added to EK.") - - return np.array([flux[0], flux[1], flux[2]]) diff --git a/src/python/espressomd/interactions.py b/src/python/espressomd/interactions.py index f6e9578cf80..c765f4bfea6 100644 --- a/src/python/espressomd/interactions.py +++ b/src/python/espressomd/interactions.py @@ -1546,8 +1546,6 @@ def add(self, *args): return bond_id def __getitem__(self, bond_id): - self._assert_key_type(bond_id) - if self.call_method('has_bond', bond_id=bond_id): bond_obj = self.call_method('get_bond', bond_id=bond_id) bond_obj._bond_id = bond_id @@ -1590,7 +1588,6 @@ def _insert_bond(self, bond_id, bond_obj): bond_id = self.call_method("insert", object=bond_obj) else: # Throw error if attempting to overwrite a bond of different type - self._assert_key_type(bond_id) if self.call_method("contains", key=bond_id): old_type = self._bond_classes[ self.call_method("get_zero_based_type", bond_id=bond_id)] @@ -1625,3 +1622,14 @@ def __getstate__(self): def __setstate__(self, params): for bond_id, (type_number, bond_params) in params.items(): self[bond_id] = self._bond_classes[type_number](**bond_params) + + def __reduce__(self): + so_callback, (so_name, so_bytestring) = super().__reduce__() + return (BondedInteractions._restore_object, + (so_callback, (so_name, so_bytestring), self.__getstate__())) + + @classmethod + def _restore_object(cls, so_callback, so_callback_args, state): + so = so_callback(*so_callback_args) + so.__setstate__(state) + return so diff --git a/src/python/espressomd/io/CMakeLists.txt b/src/python/espressomd/io/CMakeLists.txt index 4023895074b..f6d2f170963 100644 --- a/src/python/espressomd/io/CMakeLists.txt +++ b/src/python/espressomd/io/CMakeLists.txt @@ -18,6 +18,7 @@ # configure_file(mpiio.py mpiio.py COPYONLY) +configure_file(vtk.py vtk.py COPYONLY) add_subdirectory(writer) set(cython_AUX ${cython_AUX} "${CMAKE_SOURCE_DIR}/src/python/espressomd/io/__init__.py" diff --git a/src/python/espressomd/io/vtk.py b/src/python/espressomd/io/vtk.py new file mode 100644 index 00000000000..2572aaa9430 --- /dev/null +++ b/src/python/espressomd/io/vtk.py @@ -0,0 +1,135 @@ +# +# Copyright (C) 2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import numpy as np +import vtk +import vtk.util.numpy_support + + +class VTKReader: + """ + Reader for VTK multi-piece uniform grids written in XML format. + """ + error_tolerance = 1e-5 # VTK data is written with 1e-7 precision + + @classmethod + def get_array_names(cls, reader): + array_names = set() + n_ghost_layers = reader.GetUpdateGhostLevel() + n_pieces = reader.GetNumberOfPieces() + for piece_index in range(n_pieces): + reader.UpdatePiece(piece_index, n_pieces, n_ghost_layers) + piece = reader.GetOutput() + cell = piece.GetCellData() + for i in range(cell.GetNumberOfArrays()): + array_names.add(cell.GetArrayName(i)) + return array_names + + @classmethod + def get_piece_topology( + cls, piece, array, bounding_box_lower, bounding_box_upper): + bounds = np.array(piece.GetBounds()) + box_l = bounds[1::2] - bounds[0:-1:2] + n_grid_points = array.GetNumberOfTuples() + shape_float = box_l / np.min(box_l) + shape_float *= np.cbrt(n_grid_points / np.prod(shape_float)) + shape_int = np.around(shape_float).astype(int) + assert np.linalg.norm(shape_int - shape_float) < cls.error_tolerance and np.prod( + shape_int) == n_grid_points, "only cubic grids are supported" + agrid = np.mean(box_l / shape_float) + shape = tuple(shape_int.tolist()) + lower_corner = [] + for i in range(3): + start = int(np.around(bounds[i * 2])) + stop = start + shape[i] + bounding_box_lower[i] = min(bounding_box_lower[i], start) + bounding_box_upper[i] = max(bounding_box_upper[i], stop) + lower_corner.append(start) + return agrid, shape, lower_corner + + @classmethod + def reconstruct_array(cls, reader, array_name): + n_pieces = reader.GetNumberOfPieces() + n_ghost_layers = reader.GetUpdateGhostLevel() + # get bounding box + info = [] + agrids = [] + bounding_box_lower = 3 * [float("inf")] + bounding_box_upper = 3 * [-float("inf")] + for piece_index in range(n_pieces): + reader.UpdatePiece(piece_index, n_pieces, n_ghost_layers) + piece = reader.GetOutput() + cell = piece.GetCellData() + array = cell.GetArray(array_name) + if array is not None: + agrid, shape, lower_corner = cls.get_piece_topology( + piece, array, bounding_box_lower, bounding_box_upper) + agrids.append(agrid) + info.append([piece_index, shape, lower_corner]) + + if not info: + return None + + # get array type and size + assert float("inf") not in bounding_box_lower + assert -float("inf") not in bounding_box_upper + if np.std(agrids) / np.mean(agrids) > cls.error_tolerance: + raise NotImplementedError( + f"VTK non-uniform grids are not supported (got agrid = {agrids} when parsing array '{array_name}')") + data_dims = np.array(bounding_box_upper) - np.array(bounding_box_lower) + piece_index = info[0][0] + reader.UpdatePiece(piece_index, n_pieces, n_ghost_layers) + array = reader.GetOutput().GetCellData().GetArray(array_name) + vector_length = array.GetNumberOfComponents() + val_dims = [] if vector_length == 1 else [vector_length] + data_type = array.GetDataTypeAsString() + if data_type == "float": + dtype = float + elif data_type == "int": + dtype = int + else: + raise NotImplementedError( + f"Unknown VTK data type '{data_type}' (when parsing array '{array_name}')") + + # get data + data = np.empty(data_dims.tolist() + val_dims, dtype=dtype) + for piece_index, shape, lower_corner in info: + reader.UpdatePiece(piece_index, n_pieces, n_ghost_layers) + array = reader.GetOutput().GetCellData().GetArray(array_name) + subset = [] + for i in range(3): + start = lower_corner[i] - bounding_box_lower[i] + stop = start + shape[i] + subset.append(slice(start, stop)) + data[tuple(subset)] = vtk.util.numpy_support.vtk_to_numpy( + array).reshape(list(shape) + val_dims, order='F') + + return data + + def parse(self, filepath): + reader = vtk.vtkXMLUnstructuredGridReader() + reader.SetFileName(str(filepath)) + reader.Update() + + arrays = {} + array_names = self.get_array_names(reader) + for array_name in sorted(array_names): + arrays[array_name] = self.reconstruct_array(reader, array_name) + + return arrays diff --git a/src/python/espressomd/lb.pxd b/src/python/espressomd/lb.pxd deleted file mode 100644 index 3ac79be7c11..00000000000 --- a/src/python/espressomd/lb.pxd +++ /dev/null @@ -1,217 +0,0 @@ -# -# Copyright (C) 2013-2022 The ESPResSo project -# -# This file is part of ESPResSo. -# -# ESPResSo is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# ESPResSo is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# -include "myconfig.pxi" - -from libcpp cimport bool -from libcpp.vector cimport vector -from libcpp.string cimport string -from libc cimport stdint - -from .utils cimport Vector3d -from .utils cimport Vector3i -from .utils cimport Vector6d -from .utils cimport Vector19d -from .utils cimport make_array_locked - -cdef class FluidActor: - cdef public _isactive - cdef public _params - cdef public system - -cdef class HydrodynamicInteraction(FluidActor): - pass - -cdef class LBFluidRoutines: - cdef Vector3i node - -############################################## -# -# extern functions and structs -# -############################################## - -cdef extern from "grid_based_algorithms/lb_interface.hpp" namespace "ActiveLB": - cdef ActiveLB NONE - cdef ActiveLB CPU - cdef ActiveLB GPU - -cdef extern from "grid_based_algorithms/lb_interface.hpp": - - cdef enum ActiveLB: - pass - void lb_lbfluid_set_tau(double c_tau) except + - double lb_lbfluid_get_tau() except + - void lb_lbfluid_set_density(double c_dens) except + - double lb_lbfluid_get_density() except + - void lb_lbfluid_set_viscosity(double c_visc) except + - double lb_lbfluid_get_viscosity() except + - void lb_lbfluid_set_agrid(double c_agrid) except + - double lb_lbfluid_get_agrid() except + - void lb_lbfluid_set_gamma_odd(double c_gamma_odd) except + - double lb_lbfluid_get_gamma_odd() except + - void lb_lbfluid_set_gamma_even(double c_gamma_even) except + - double lb_lbfluid_get_gamma_even() except + - void lb_lbfluid_set_ext_force_density(const Vector3d forcedensity) except + - const Vector3d lb_lbfluid_get_ext_force_density() except + - void lb_lbfluid_set_bulk_viscosity(double c_bulk_visc) except + - double lb_lbfluid_get_bulk_viscosity() except + - void lb_lbfluid_print_vtk_velocity(string filename) except + - void lb_lbfluid_print_vtk_velocity(string filename, vector[int] bb1, vector[int] bb2) except + - void lb_lbfluid_print_vtk_boundary(string filename) except + - void lb_lbfluid_print_velocity(string filename) except + - void lb_lbfluid_print_boundary(string filename) except + - void lb_lbfluid_save_checkpoint(string filename, bool binary) except + - void lb_lbfluid_load_checkpoint(string filename, bool binary) except + - void lb_lbfluid_set_lattice_switch(ActiveLB local_lattice_switch) except + - Vector6d lb_lbfluid_get_pressure_tensor() except + - bool lb_lbnode_is_index_valid(const Vector3i & ind) except + - Vector3i lb_lbfluid_get_shape() except + - const Vector3d lb_lbnode_get_velocity(const Vector3i & ind) except + - void lb_lbnode_set_velocity(const Vector3i & ind, const Vector3d & u) except + - double lb_lbnode_get_density(const Vector3i & ind) except + - void lb_lbnode_set_density(const Vector3i & ind, double density) except + - const Vector6d lb_lbnode_get_pressure_tensor(const Vector3i & ind) except + - const Vector6d lb_lbnode_get_pressure_tensor_neq(const Vector3i & ind) except + - const Vector19d lb_lbnode_get_pop(const Vector3i & ind) except + - void lb_lbnode_set_pop(const Vector3i & ind, const Vector19d & populations) except + - int lb_lbnode_get_boundary(const Vector3i & ind) except + - stdint.uint64_t lb_lbfluid_get_rng_state() except + - void lb_lbfluid_set_rng_state(stdint.uint64_t) except + - void lb_lbfluid_set_kT(double) except + - double lb_lbfluid_get_kT() except + - double lb_lbfluid_get_lattice_speed() except + - void check_tau_time_step_consistency(double tau, double time_s) except + - const Vector3d lb_lbfluid_get_interpolated_velocity(const Vector3d & p) except + - -cdef extern from "grid_based_algorithms/lb_particle_coupling.hpp": - void lb_lbcoupling_set_rng_state(stdint.uint64_t) - stdint.uint64_t lb_lbcoupling_get_rng_state() except + - void lb_lbcoupling_set_gamma(double) - double lb_lbcoupling_get_gamma() except + - bool lb_lbcoupling_is_seed_required() - -cdef extern from "grid_based_algorithms/lbgpu.hpp": - void linear_velocity_interpolation(double * positions, double * velocities, int length) - void quadratic_velocity_interpolation(double * positions, double * velocities, int length) - -cdef extern from "grid_based_algorithms/lb_interpolation.hpp": - cdef cppclass InterpolationOrder: - pass - void lb_lbinterpolation_set_interpolation_order(InterpolationOrder & order) - -cdef extern from "grid_based_algorithms/lb_interpolation.hpp" namespace "InterpolationOrder": - cdef InterpolationOrder linear - cdef InterpolationOrder quadratic - -cdef extern from "integrate.hpp": - double get_time_step() - -############################################## -# -# Wrapper-functions to handle unit conversions -# -############################################## - -cdef inline python_lbfluid_set_density(double dens, double agrid) except +: - lb_lbfluid_set_density(dens * agrid**3) - -cdef inline python_lbfluid_set_viscosity(double visc, double agrid, double tau) except +: - lb_lbfluid_set_viscosity(visc * tau / agrid**2) - -cdef inline python_lbfluid_set_agrid(double agrid) except +: - lb_lbfluid_set_agrid(agrid) - -cdef inline python_lbfluid_set_bulk_viscosity(double bvisc, double agrid, double tau) except +: - lb_lbfluid_set_bulk_viscosity(bvisc * tau / agrid**2) - -cdef inline python_lbfluid_set_gamma(double gamma) except +: - lb_lbcoupling_set_gamma(gamma) - -cdef inline python_lbfluid_set_gamma_odd(double gamma_odd) except +: - lb_lbfluid_set_gamma_odd(gamma_odd) - -cdef inline python_lbfluid_set_gamma_even(double gamma_even) except +: - lb_lbfluid_set_gamma_even(gamma_even) - -cdef inline python_lbfluid_set_ext_force_density(Vector3d ext_force_density, double agrid, double tau) except +: - lb_lbfluid_set_ext_force_density(ext_force_density * agrid**2 * tau**2) - -cdef inline python_lbfluid_get_density(double agrid) except +: - return lb_lbfluid_get_density() / agrid**3 - -cdef inline python_lbfluid_get_viscosity(double agrid, double tau) except +: - return lb_lbfluid_get_viscosity() / tau * agrid**2 - -cdef inline python_lbfluid_get_bulk_viscosity(double agrid, double tau) except +: - return lb_lbfluid_get_bulk_viscosity() / tau * agrid**2 - -cdef inline python_lbfluid_get_gamma() except +: - return lb_lbcoupling_get_gamma() - -cdef inline python_lbfluid_get_ext_force_density(double agrid, double tau) except +: - cdef Vector3d ext_force_density = lb_lbfluid_get_ext_force_density() - return make_array_locked(ext_force_density / (agrid**2 * tau**2)) - -cdef inline python_lbfluid_get_pressure_tensor(double agrid, double tau) except +: - cdef Vector6d c_tensor = lb_lbfluid_get_pressure_tensor() - cdef double unit_conversion = 1.0 / (agrid * tau**2) - cdef Vector6d p_tensor = c_tensor * unit_conversion - return [[p_tensor[0], p_tensor[1], p_tensor[3]], - [p_tensor[1], p_tensor[2], p_tensor[4]], - [p_tensor[3], p_tensor[4], p_tensor[5]]] - -cdef inline python_lbnode_set_velocity(Vector3i node, Vector3d velocity) except +: - lb_lbnode_set_velocity(node, velocity / lb_lbfluid_get_lattice_speed()) - -cdef inline python_lbnode_get_velocity(Vector3i node) except +: - cdef Vector3d c_velocity = lb_lbnode_get_velocity(node) - return make_array_locked(c_velocity * lb_lbfluid_get_lattice_speed()) - -cdef inline python_lbnode_get_interpolated_velocity(Vector3d pos) except +: - cdef Vector3d c_velocity = lb_lbfluid_get_interpolated_velocity(pos) - return make_array_locked(c_velocity * lb_lbfluid_get_lattice_speed()) - -cdef inline python_lbnode_set_density(Vector3i node, double density) except +: - cdef double agrid = lb_lbfluid_get_agrid() - lb_lbnode_set_density(node, density * agrid**3) - -cdef inline python_lbnode_get_density(Vector3i node) except +: - cdef double c_density = lb_lbnode_get_density(node) - cdef double agrid = lb_lbfluid_get_agrid() - return c_density / agrid**3 - -cdef inline python_lbnode_get_pressure_tensor(Vector3i node) except +: - cdef Vector6d c_tensor = lb_lbnode_get_pressure_tensor(node) - cdef double tau = lb_lbfluid_get_tau() - cdef double agrid = lb_lbfluid_get_agrid() - cdef double unit_conversion = 1.0 / (tau**2 * agrid) - cdef Vector6d p_tensor = c_tensor * unit_conversion - return [[p_tensor[0], p_tensor[1], p_tensor[3]], - [p_tensor[1], p_tensor[2], p_tensor[4]], - [p_tensor[3], p_tensor[4], p_tensor[5]]] - -cdef inline python_lbnode_get_pressure_tensor_neq(Vector3i node) except +: - cdef Vector6d c_tensor = lb_lbnode_get_pressure_tensor_neq(node) - cdef double tau = lb_lbfluid_get_tau() - cdef double agrid = lb_lbfluid_get_agrid() - cdef double unit_conversion = 1.0 / (tau**2 * agrid) - cdef Vector6d p_tensor = c_tensor * unit_conversion - return [[p_tensor[0], p_tensor[1], p_tensor[3]], - [p_tensor[1], p_tensor[2], p_tensor[4]], - [p_tensor[3], p_tensor[4], p_tensor[5]]] diff --git a/src/python/espressomd/lb.py b/src/python/espressomd/lb.py new file mode 100644 index 00000000000..2d155e01902 --- /dev/null +++ b/src/python/espressomd/lb.py @@ -0,0 +1,765 @@ +# +# Copyright (C) 2013-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import itertools +import numpy as np + +from . import utils +from .detail.walberla import VTKOutputBase, LatticeWalberla +from .script_interface import ScriptInterfaceHelper, script_interface_register, array_variant +import espressomd.detail.walberla +import espressomd.shapes +import espressomd.code_features + + +class VelocityBounceBack: + """ + Hold velocity information for the velocity bounce back boundary + condition at a single node. + + """ + + def __init__(self, velocity): + utils.check_type_or_throw_except( + velocity, 3, float, "VelocityBounceBack velocity must be three floats") + self.velocity = velocity + + +class HydrodynamicInteraction(ScriptInterfaceHelper): + """ + Base class for LB implementations. + + """ + + def __getitem__(self, key): + raise NotImplementedError("Derived classes must implement this method") + + def __str__(self): + return f"{self.__class__.__name__}({self.get_params()})" + + def _activate(self): + self._activate_method() + + def _deactivate(self): + self._deactivate_method() + + def _activate_method(self): + self.call_method("activate") + utils.handle_errors("HydrodynamicInteraction activation failed") + + def _deactivate_method(self): + self.call_method("deactivate") + utils.handle_errors("HydrodynamicInteraction deactivation failed") + + def validate_params(self, params): + pass + + def valid_keys(self): + return {"agrid", "tau", "density", "ext_force_density", + "kinematic_viscosity", "lattice", "kT", "seed"} + + def required_keys(self): + return {"lattice", "density", "kinematic_viscosity", "tau"} + + def default_params(self): + return {"lattice": None, "seed": 0, "kT": 0., + "ext_force_density": [0.0, 0.0, 0.0]} + + def mach_limit(self): + """ + The fluid velocity is limited to :math:`v_{\\mathrm{max}} = 0.20` + (see *quasi-incompressible limit* in :cite:`kruger17a`, + chapter 7, page 272), which corresponds to Mach 0.35. + + The relative error in the fluid density between a compressible fluid + and an incompressible fluid at Mach 0.30 is less than 5% (see + *constant density assumption* in :cite:`kundu01a` chapter 16, page + 663). Since the speed of sound is :math:`c_s = 1 / \\sqrt{3}` in LB + velocity units in a D3Q19 lattice, the velocity limit at Mach 0.30 + is :math:`v_{\\mathrm{max}} = 0.30 / \\sqrt{3} \\approx 0.17`. + At Mach 0.35 the relative error is around 6% and + :math:`v_{\\mathrm{max}} = 0.35 / \\sqrt{3} \\approx 0.20`. + + Returns + ------- + v_max : :obj:`float` + The Mach limit expressed in LB velocity units. + + """ + return 0.20 + + @classmethod + def _check_mach_limit(cls, velocities): + vel_max = cls.mach_limit(cls) + velocities = np.reshape(velocities, (-1, 3)) + if np.any(np.linalg.norm(velocities, axis=1) > vel_max): + speed_of_sound = 1. / np.sqrt(3.) + mach_number = vel_max / speed_of_sound + raise ValueError(f"Slip velocity exceeds Mach {mach_number:.2f}") + + @property + def pressure_tensor(self): + tensor = self.call_method("get_pressure_tensor") + return utils.array_locked(tensor) + + @pressure_tensor.setter + def pressure_tensor(self, value): + raise RuntimeError(f"Property 'pressure_tensor' is read-only") + + +@script_interface_register +class LBFluidWalberla(HydrodynamicInteraction, + espressomd.detail.walberla.LatticeModel): + """ + The lattice-Boltzmann method for hydrodynamics using waLBerla. + If argument ``lattice`` is not provided, one will be default + constructed if an argument ``agrid`` is provided. + + Parameters + ---------- + lattice : :obj:`espressomd.lb.LatticeWalberla ` + Lattice object. If not provided, a default one will be constructed + using the ``agrid`` parameter. + agrid : :obj:`float` + Lattice constant. The box size in every direction must be an integer + multiple of ``agrid``. Cannot be provided together with ``lattice``. + tau : :obj:`float` + LB time step, must be an integer multiple of the MD time step. + density : :obj:`float` + Fluid density. + kinematic_viscosity : :obj:`float` + Fluid kinematic viscosity. + ext_force_density : (3,) array_like of :obj:`float`, optional + Force density applied on the fluid. + kT : :obj:`float`, optional + Thermal energy of the simulated heat bath (for thermalized fluids). + Set it to 0 for an unthermalized fluid. + seed : :obj:`int`, optional + Initial counter value (or seed) of the philox RNG. + Required for a thermalized fluid. Must be positive. + single_precision : :obj:`bool`, optional + Use single-precision floating-point arithmetic. + + Methods + ------- + get_interpolated_velocity() + Get LB fluid velocity at specified position. + + Parameters + ---------- + pos : (3,) array_like of :obj:`float` + The position at which velocity is requested. + + Returns + ------- + v : (3,) array_like :obj:`float` + The LB fluid velocity at ``pos``. + + add_force_at_pos(): + Adds a force to the fluid at given position. + + Parameters + ---------- + pos : (3,) array_like of :obj:`float` + The position at which the force will be added. + force : (3,) array_like of :obj:`float` + The force vector which will be distributed at the position. + + clear_boundaries() + Remove velocity bounce-back boundary conditions. + + save_checkpoint() + Write LB node populations and boundary conditions to a file. + + Parameters + ---------- + path : :obj:`str` + Destination file path. + binary : :obj:`bool` + Whether to write in binary or ASCII mode. + + load_checkpoint() + Load LB node populations and boundary conditions from a file. + + Parameters + ---------- + path : :obj:`str` + File path to read from. + binary : :obj:`bool` + Whether to read in binary or ASCII mode. + + add_vtk_writer() + Attach a VTK writer. + + Parameters + ---------- + vtk : :class:`espressomd.lb.VTKOutput` + VTK writer. + + remove_vtk_writer() + Detach a VTK writer. + + Parameters + ---------- + vtk : :class:`espressomd.lb.VTKOutput` + VTK writer. + + clear_vtk_writers() + Detach all VTK writers. + + """ + + _so_name = "walberla::LBFluid" + _so_creation_policy = "GLOBAL" + _so_bind_methods = ( + "add_force_at_pos", + "clear_boundaries", + "get_interpolated_velocity", + "add_vtk_writer", + "remove_vtk_writer", + "clear_vtk_writers", + ) + + def __init__(self, *args, **kwargs): + if not espressomd.code_features.has_features("WALBERLA"): + raise NotImplementedError("Feature WALBERLA not compiled in") + + if "sip" not in kwargs: + params = self.default_params() + params.update(kwargs) + self.validate_params(params) + super().__init__(*args, **params) + else: + super().__init__(**kwargs) + + def validate_params(self, params): + super().validate_params(params) + + # construct default lattice if necessary + if params.get("lattice") is None: + if "agrid" not in params: + raise ValueError("missing argument 'lattice' or 'agrid'") + params["lattice"] = LatticeWalberla( + agrid=params.pop("agrid"), n_ghost_layers=1) + elif "agrid" in params: + raise ValueError("cannot provide both 'lattice' and 'agrid'") + + utils.check_required_keys(self.required_keys(), params.keys()) + utils.check_valid_keys(self.valid_keys(), params.keys()) + + def default_params(self): + return {"single_precision": False, **super().default_params()} + + def valid_keys(self): + return {"single_precision", *super().valid_keys()} + + def __getitem__(self, key): + if isinstance(key, (tuple, list, np.ndarray)) and len(key) == 3: + if any(isinstance(item, slice) for item in key): + return LBFluidSliceWalberla(parent_sip=self, slice_range=key) + else: + return LBFluidNodeWalberla( + parent_sip=self, index=np.array(key)) + + raise TypeError( + f"{key} is not a valid index. Should be a point on the " + "nodegrid e.g. lbf[0,0,0], or a slice e.g. lbf[:,0,0]") + + def add_boundary_from_shape(self, shape, + velocity=np.zeros(3, dtype=float), + boundary_type=VelocityBounceBack): + """ + Set velocity bounce-back boundary conditions from a shape. + + Parameters + ---------- + shape : :obj:`espressomd.shapes.Shape` + Shape to rasterize. + velocity : (3,) or (L, M, N, 3) array_like of :obj:`float`, optional + Slip velocity. By default no-slip boundary conditions are used. + If a vector of 3 values, a uniform slip velocity is used, + otherwise ``L, M, N`` must be equal to the LB grid dimensions. + boundary_type : Union[:class:`~espressomd.lb.VelocityBounceBack`] (optional) + Type of the boundary condition. + + """ + if not issubclass(boundary_type, VelocityBounceBack): + raise TypeError( + "Parameter 'boundary_type' must be a subclass of VelocityBounceBack") + + utils.check_type_or_throw_except( + shape, 1, espressomd.shapes.Shape, "expected an espressomd.shapes.Shape") + if np.shape(velocity) not in [(3,), tuple(self.shape) + (3,)]: + raise ValueError( + f'Cannot process velocity value grid of shape {np.shape(velocity)}') + + # range checks + lattice_speed = self.call_method("get_lattice_speed") + velocity = np.array(velocity, dtype=float).reshape((-1, 3)) + velocity *= 1. / lattice_speed + self._check_mach_limit(velocity) + + mask = self.get_shape_bitmask(shape=shape).astype(int) + self.call_method( + "add_boundary_from_shape", + raster=array_variant(mask.flatten()), + values=array_variant(velocity.flatten())) + + +class LBFluidWalberlaGPU(HydrodynamicInteraction): + """ + Initialize the lattice-Boltzmann method for hydrodynamic flow using + waLBerla for the GPU. See :class:`HydrodynamicInteraction` for the + list of parameters. + + """ + + # pylint: disable=unused-argument + def __init__(self, *args, **kwargs): + if not espressomd.code_features.has_features("CUDA"): + raise NotImplementedError("Feature CUDA not compiled in") + if not espressomd.code_features.has_features("WALBERLA"): + raise NotImplementedError("Feature WALBERLA not compiled in") + raise NotImplementedError("Not implemented yet") + + +@script_interface_register +class LBFluidNodeWalberla(ScriptInterfaceHelper): + _so_name = "walberla::LBFluidNode" + _so_creation_policy = "GLOBAL" + + def required_keys(self): + return {"parent_sip", "index"} + + def __init__(self, *args, **kwargs): + if "sip" not in kwargs: + super().__init__(*args, **kwargs) + utils.handle_errors("LBFluidNode instantiation failed") + else: + super().__init__(**kwargs) + + def __reduce__(self): + raise NotImplementedError("Cannot serialize LB fluid node objects") + + def __eq__(self, obj): + return isinstance(obj, LBFluidNodeWalberla) and self.index == obj.index + + def __hash__(self): + return hash(self.index) + + @property + def index(self): + return tuple(self._index) + + @index.setter + def index(self, value): + raise RuntimeError("Parameter 'index' is read-only.") + + @property + def density(self): + return self.call_method("get_density") + + @density.setter + def density(self, value): + self.call_method("set_density", value=value) + + @property + def population(self): + return utils.array_locked(self.call_method("get_population")) + + @population.setter + def population(self, value): + self.call_method("set_population", value=value) + + @property + def pressure_tensor(self): + tensor = self.call_method("get_pressure_tensor") + return utils.array_locked(tensor) + + @pressure_tensor.setter + def pressure_tensor(self, value): + raise RuntimeError("Property 'pressure_tensor' is read-only.") + + @property + def is_boundary(self): + return self.call_method("get_is_boundary") + + @is_boundary.setter + def is_boundary(self, value): + raise RuntimeError("Property 'is_boundary' is read-only.") + + @property + def boundary(self): + """ + Returns + ------- + :class:`~espressomd.lb.VelocityBounceBack` + If the node is a boundary node + None + If the node is not a boundary node + + """ + + velocity = self.call_method("get_velocity_at_boundary") + if velocity is not None: + return VelocityBounceBack(velocity) + return None + + @boundary.setter + def boundary(self, value): + """ + Parameters + ---------- + value : :class:`~espressomd.lb.VelocityBounceBack` or ``None`` + If value is :class:`~espressomd.lb.VelocityBounceBack`, + set the node to be a boundary node with the specified velocity. + If value is ``None``, the node will become a fluid node. + + """ + + if isinstance(value, VelocityBounceBack): + value = value.velocity + lattice_speed = self.call_method("get_lattice_speed") + HydrodynamicInteraction._check_mach_limit( + np.array(value) / lattice_speed) + elif value is not None: + raise TypeError( + "Parameter 'value' must be an instance of VelocityBounceBack or None") + self.call_method("set_velocity_at_boundary", value=value) + + @property + def boundary_force(self): + return self.call_method("get_boundary_force") + + @boundary_force.setter + def boundary_force(self, value): + raise RuntimeError("Property 'boundary_force' is read-only.") + + @property + def velocity(self): + return self.call_method("get_velocity") + + @velocity.setter + def velocity(self, value): + self.call_method("set_velocity", value=value) + + @property + def last_applied_force(self): + return self.call_method("get_last_applied_force") + + @last_applied_force.setter + def last_applied_force(self, value): + self.call_method("set_last_applied_force", value=value) + + +@script_interface_register +class LBFluidSliceWalberla(ScriptInterfaceHelper): + _so_name = "walberla::LBFluidSlice" + _so_creation_policy = "GLOBAL" + + def required_keys(self): + return {"parent_sip", "slice_range"} + + def validate_params(self, params): + utils.check_required_keys(self.required_keys(), params.keys()) + + def __init__(self, *args, **kwargs): + if "sip" in kwargs: + super().__init__(**kwargs) + else: + self.validate_params(kwargs) + slice_range = kwargs.pop("slice_range") + grid_size = kwargs["parent_sip"].shape + extra_kwargs = espressomd.detail.walberla.get_slice_bounding_box( + slice_range, grid_size) + node = LBFluidNodeWalberla(index=np.array([0, 0, 0]), **kwargs) + super().__init__(*args, node_sip=node, **kwargs, **extra_kwargs) + utils.handle_errors("LBFluidSliceWalberla instantiation failed") + + def __iter__(self): + lower, upper = self.call_method("get_slice_ranges") + indices = [list(range(lower[i], upper[i])) for i in range(3)] + lb_sip = self.call_method("get_lb_sip") + for index in itertools.product(*indices): + yield LBFluidNodeWalberla(parent_sip=lb_sip, index=np.array(index)) + + def __reduce__(self): + raise NotImplementedError("Cannot serialize LB fluid slice objects") + + def _getter(self, attr): + value_grid, shape = self.call_method(f"get_{attr}") + if attr == "velocity_at_boundary": + value_grid = [ + None if x is None else VelocityBounceBack(x) for x in value_grid] + return utils.array_locked(np.reshape(value_grid, shape)) + + def _setter(self, attr, values): + dimensions = self.call_method("get_slice_size") + if 0 in dimensions: + raise AttributeError( + f"Cannot set properties of an empty '{self.__class__.__name__}' object") + + values = np.copy(values) + value_shape = tuple(self.call_method("get_value_shape", name=attr)) + target_shape = (*dimensions, *value_shape) + + # broadcast if only one element was provided + if values.shape == value_shape or values.shape == () and value_shape == (1,): + values = np.full(target_shape, values) + + def shape_squeeze(shape): + return tuple(x for x in shape if x != 1) + + if shape_squeeze(values.shape) != shape_squeeze(target_shape): + raise ValueError( + f"Input-dimensions of '{attr}' array {values.shape} does not match slice dimensions {target_shape}") + + self.call_method(f"set_{attr}", values=values.flatten()) + + @property + def density(self): + return self._getter("density",) + + @density.setter + def density(self, value): + self._setter("density", value) + + @property + def population(self): + return self._getter("population") + + @population.setter + def population(self, value): + self._setter("population", value) + + @property + def pressure_tensor(self): + return self._getter("pressure_tensor") + + @pressure_tensor.setter + def pressure_tensor(self, value): + raise RuntimeError("Property 'pressure_tensor' is read-only.") + + @property + def is_boundary(self): + return self._getter("is_boundary") + + @is_boundary.setter + def is_boundary(self, value): + raise RuntimeError("Property 'is_boundary' is read-only.") + + @property + def boundary(self): + """ + Returns + ------- + (N, M, L) array_like of :class:`~espressomd.lb.VelocityBounceBack` + If the nodes are boundary nodes + (N, M, L) array_like of ``None`` + If the nodes are not boundary nodes + + """ + + return self._getter("velocity_at_boundary") + + @boundary.setter + def boundary(self, values): + """ + Parameters + ---------- + values : (N, M, L) array_like of :class:`~espressomd.lb.VelocityBounceBack` or ``None`` + If values are :class:`~espressomd.lb.VelocityBounceBack`, + set the nodes to be boundary nodes with the specified velocity. + If values are ``None``, the nodes will become fluid nodes. + + """ + + type_error_msg = "Parameter 'values' must be an array_like of VelocityBounceBack or None" + values = np.copy(values) + lattice_speed = self.call_method("get_lattice_speed") + if values.dtype != np.dtype("O"): + raise TypeError(type_error_msg) + for index in np.ndindex(*values.shape): + if values[index] is not None: + if not isinstance(values[index], VelocityBounceBack): + raise TypeError(type_error_msg) + HydrodynamicInteraction._check_mach_limit( + np.array(values[index].velocity) / lattice_speed) + values[index] = np.array(values[index].velocity) + self._setter("velocity_at_boundary", values=values) + + @property + def boundary_force(self): + return self._getter("boundary_force") + + @boundary_force.setter + def boundary_force(self, value): + raise RuntimeError("Property 'boundary_force' is read-only.") + + @property + def velocity(self): + return self._getter("velocity") + + @velocity.setter + def velocity(self, value): + self._setter("velocity", value) + + @property + def last_applied_force(self): + return self._getter("last_applied_force") + + @last_applied_force.setter + def last_applied_force(self, value): + self._setter("last_applied_force", value) + + +@script_interface_register +class VTKOutput(VTKOutputBase): + """ + Create a VTK writer. + + Files are written to ``//_*.vtu``. + Summary is written to ``/.pvd``. + + Manual VTK callbacks can be called at any time to take a snapshot + of the current state of the LB fluid. + + Automatic VTK callbacks can be disabled at any time and re-enabled later. + Please note that the internal VTK counter is no longer incremented when + an automatic callback is disabled, which means the number of LB steps + between two frames will not always be an integer multiple of ``delta_N``. + + Parameters + ---------- + identifier : :obj:`str` + Name of the VTK writer. + observables : :obj:`list`, {'density', 'velocity_vector', 'pressure_tensor'} + List of observables to write to the VTK files. + delta_N : :obj:`int` + Write frequency. If this value is 0 (default), the object is a + manual VTK callback that must be triggered manually. Otherwise, + it is an automatic callback that is added to the time loop and + writes every ``delta_N`` LB steps. + base_folder : :obj:`str` (optional), default is 'vtk_out' + Path to the output VTK folder. + prefix : :obj:`str` (optional), default is 'simulation_step' + Prefix for VTK files. + + """ + _so_name = "walberla::LBVTKHandle" + _so_creation_policy = "GLOBAL" + _so_bind_methods = ("enable", "disable", "write") + + def required_keys(self): + return self.valid_keys() - self.default_params().keys() + + def __repr__(self): + class_id = f"{self.__class__.__module__}.{self.__class__.__name__}" + if self.delta_N: + write_when = f"every {self.delta_N} LB steps" + if not self.enabled: + write_when += " (disabled)" + else: + write_when = "on demand" + return f"<{class_id}: write to '{self.vtk_uid}' {write_when}>" + + +def edge_detection(boundary_mask, periodicity): + """ + Find boundary nodes in contact with the fluid. Relies on a convolution + kernel constructed from the D3Q19 stencil. + + Parameters + ---------- + boundary_mask : (N, M, L) array_like of :obj:`bool` + Bitmask for the rasterized boundary geometry. + periodicity : (3,) array_like of :obj:`bool` + Bitmask for the box periodicity. + + Returns + ------- + (N, 3) array_like of :obj:`int` + The indices of the boundary nodes at the interface with the fluid. + + """ + import scipy.signal + import itertools + + fluid_mask = np.logical_not(boundary_mask) + + # edge kernel + edge = -np.ones((3, 3, 3)) + for i, j, k in itertools.product((0, 2), (0, 2), (0, 2)): + edge[i, j, k] = 0 + edge[1, 1, 1] = -np.sum(edge) + + # periodic convolution + wrapped_mask = np.pad(fluid_mask.astype(int), 3 * [(2, 2)], mode="wrap") + if not periodicity[0]: + wrapped_mask[:2, :, :] = 0 + wrapped_mask[-2:, :, :] = 0 + if not periodicity[1]: + wrapped_mask[:, :2, :] = 0 + wrapped_mask[:, -2:, :] = 0 + if not periodicity[2]: + wrapped_mask[:, :, :2] = 0 + wrapped_mask[:, :, -2:] = 0 + convolution = scipy.signal.convolve( + wrapped_mask, edge, mode="same", method="direct")[2:-2, 2:-2, 2:-2] + convolution = np.multiply(convolution, boundary_mask) + + return np.array(np.nonzero(convolution < 0)).T + + +def calc_cylinder_tangential_vectors(center, agrid, offset, node_indices): + """ + Utility function to calculate a constant slip velocity tangential to the + surface of a cylinder. + + Parameters + ---------- + center : (3,) array_like of :obj:`float` + Center of the cylinder. + agrid : :obj:`float` + LB agrid. + offset : :obj:`float` + LB offset. + node_indices : (N, 3) array_like of :obj:`int` + Indices of the boundary surface nodes. + + Returns + ------- + (N, 3) array_like of :obj:`float` + The unit vectors tangential to the surface of a cylinder. + + """ + velocities = [] + for ijk in node_indices: + p = (ijk + offset) * agrid + r = center - p + norm = np.linalg.norm(r[:2]) + if norm < 1e-10: + velocities.append(np.zeros(3)) + continue + angle_r = np.arccos(np.dot(r[:2] / norm, [1, 0])) + angle_v = angle_r - np.pi / 2 + flip = np.sign(r[1]) + slip_velocity = np.array([flip * np.cos(angle_v), np.sin(angle_v), 0.]) + velocities.append(slip_velocity) + return np.array(velocities) diff --git a/src/python/espressomd/lb.pyx b/src/python/espressomd/lb.pyx deleted file mode 100644 index e2875002e33..00000000000 --- a/src/python/espressomd/lb.pyx +++ /dev/null @@ -1,809 +0,0 @@ -# -# Copyright (C) 2013-2022 The ESPResSo project -# -# This file is part of ESPResSo. -# -# ESPResSo is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# ESPResSo is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# -include "myconfig.pxi" -import os -import cython -import itertools -import functools -import numpy as np -cimport numpy as np -from libc cimport stdint -from . import highlander -from . import utils -from . cimport utils -from .utils cimport Vector3i, Vector3d, Vector6d, Vector19d -from .lb cimport get_time_step - - -cdef class FluidActor: - - """ - Abstract base class for interactions affecting particles in the system, - such as LB fluids. Derived classes must implement the interface to the - relevant core objects and global variables. - """ - - # Keys in active_list have to match the method name. - active_list = dict(HydrodynamicInteraction=False) - - # __getstate__ and __setstate__ define the pickle interaction - def __getstate__(self): - odict = self._params.copy() - return odict - - def __setstate__(self, params): - self._params = params - self._set_params_in_es_core() - - def __init__(self, *args, **kwargs): - self._isactive = False - utils.check_valid_keys(self.valid_keys(), kwargs.keys()) - utils.check_required_keys(self.required_keys(), kwargs.keys()) - self._params = self.default_params() - self._params.update(kwargs) - - def _activate(self): - inter = self._get_interaction_type() - if inter in FluidActor.active_list: - if FluidActor.active_list[inter]: - raise highlander.ThereCanOnlyBeOne(self.__class__.__bases__[0]) - FluidActor.active_list[inter] = True - - self.validate_params() - self._activate_method() - utils.handle_errors("Activation of an actor") - self._isactive = True - - def _deactivate(self): - self._deactivate_method() - utils.handle_errors("Deactivation of an actor") - self._isactive = False - inter = self._get_interaction_type() - if inter in FluidActor.active_list: - if not FluidActor.active_list[inter]: - raise Exception( - f"Class not registered in Actor.active_list: {self.__class__.__bases__[0].__name__}") - FluidActor.active_list[inter] = False - - def is_valid(self): - """ - Check if the data stored in this instance still matches the - corresponding data in the core. - """ - return self._params == self._get_params_from_es_core() - - def get_params(self): - """Get interaction parameters""" - # If this instance refers to an actual interaction defined in the es - # core, load current parameters from there - if self.is_active(): - update = self._get_params_from_es_core() - self._params.update(update) - return self._params - - def set_params(self, **p): - """Update the given parameters.""" - # Check if keys are valid - utils.check_valid_keys(self.valid_keys(), p.keys()) - - # When an interaction is newly activated, all required keys must be - # given - if not self.is_active(): - utils.check_required_keys(self.required_keys(), p.keys()) - - self._params.update(p) - # validate updated parameters - self.validate_params() - # Put in values given by the user - if self.is_active(): - self._set_params_in_es_core() - - def __str__(self): - return f"{self.__class__.__name__}({self.get_params()})" - - def _get_interaction_type(self): - bases = self.class_lookup(self.__class__) - for i in range(len(bases)): - if bases[i].__name__ in FluidActor.active_list: - return bases[i].__name__ - - def class_lookup(self, cls): - c = list(cls.__bases__) - for base in c: - c.extend(self.class_lookup(base)) - return c - - def is_active(self): - return self._isactive - - def valid_keys(self): - """Virtual method.""" - raise Exception( - f"Subclasses of {self._get_interaction_type()} must define the valid_keys() method.") - - def required_keys(self): - """Virtual method.""" - raise Exception( - "Subclasses of {self._get_interaction_type()} must define the required_keys() method.") - - def validate_params(self): - """Virtual method.""" - raise Exception( - "Subclasses of {self._get_interaction_type()} must define the validate_params() method.") - - def _get_params_from_es_core(self): - """Virtual method.""" - raise Exception( - "Subclasses of {self._get_interaction_type()} must define the _get_params_from_es_core() method.") - - def _set_params_in_es_core(self): - """Virtual method.""" - raise Exception( - "Subclasses of {self._get_interaction_type()} must define the _set_params_in_es_core() method.") - - def default_params(self): - """Virtual method.""" - raise Exception( - "Subclasses of {self._get_interaction_type()} must define the default_params() method.") - - def _activate_method(self): - """Virtual method.""" - raise Exception( - "Subclasses of {self._get_interaction_type()} must define the _activate_method() method.") - - def _deactivate_method(self): - """Virtual method.""" - raise Exception( - "Subclasses of {self._get_interaction_type()} must define the _deactivate_method() method.") - - -cdef class HydrodynamicInteraction(FluidActor): - """ - Base class for LB implementations. - - Parameters - ---------- - agrid : :obj:`float` - Lattice constant. The box size in every direction must be an integer - multiple of ``agrid``. - tau : :obj:`float` - LB time step, must be an integer multiple of the MD time step. - dens : :obj:`float` - Fluid density. - visc : :obj:`float` - Fluid kinematic viscosity. - bulk_visc : :obj:`float`, optional - Fluid bulk viscosity. - gamma_odd : :obj:`int`, optional - Relaxation parameter :math:`\\gamma_{\\textrm{odd}}` for kinetic modes. - gamma_even : :obj:`int`, optional - Relaxation parameter :math:`\\gamma_{\\textrm{even}}` for kinetic modes. - ext_force_density : (3,) array_like of :obj:`float`, optional - Force density applied on the fluid. - kT : :obj:`float`, optional - Thermal energy of the simulated heat bath (for thermalized fluids). - Set it to 0 for an unthermalized fluid. - seed : :obj:`int`, optional - Initial counter value (or seed) of the philox RNG. - Required for a thermalized fluid. Must be positive. - """ - - def _assert_agrid_tau_set(self): - unset = self.default_params() - assert self.agrid != unset['agrid'] and self.tau != unset['tau'], \ - "tau and agrid have to be set first!" - - def _lb_init(self): - raise Exception( - "Subclasses of HydrodynamicInteraction must define the _lb_init() method.") - - @classmethod - def _restore_object(cls, derived_cls, params): - obj = derived_cls(**params) - obj._params = params - return obj - - def __reduce__(self): - return (HydrodynamicInteraction._restore_object, - (self.__class__, self._params)) - - def __getitem__(self, key): - cdef Vector3i shape - if isinstance(key, (tuple, list, np.ndarray)): - if len(key) == 3: - if any(isinstance(typ, slice) for typ in key): - shape = lb_lbfluid_get_shape() - return LBSlice(key, (shape[0], shape[1], shape[2])) - else: - return LBFluidRoutines(np.array(key)) - else: - raise Exception( - "%s is not a valid key. Should be a point on the nodegrid e.g. lbf[0,0,0], or a slice" % key) - # validate the given parameters on actor initialization - #################################################### - - def validate_params(self): - default_params = self.default_params() - - utils.check_type_or_throw_except( - self._params["kT"], 1, float, "kT must be a number") - if self._params["kT"] > 0. and not self._params["seed"]: - raise ValueError( - "seed has to be given if temperature is not 0.") - - if self._params["dens"] == default_params["dens"]: - raise Exception("LB_FLUID density not set") - elif not (self._params["dens"] > 0.0 and (utils.is_valid_type(self._params["dens"], float) or utils.is_valid_type(self._params["dens"], int))): - raise ValueError("Density must be a positive double") - - if self._params["tau"] <= 0.: - raise ValueError("tau has to be a positive double") - - def valid_keys(self): - return {"agrid", "dens", "ext_force_density", "visc", "tau", - "bulk_visc", "gamma_odd", "gamma_even", "kT", "seed"} - - def required_keys(self): - return {"dens", "agrid", "visc", "tau"} - - def default_params(self): - return {"agrid": -1.0, - "dens": -1.0, - "ext_force_density": [0.0, 0.0, 0.0], - "visc": -1.0, - "bulk_visc": -1.0, - "tau": -1.0, - "seed": None, - "kT": 0.} - - def _set_lattice_switch(self): - raise Exception( - "Subclasses of HydrodynamicInteraction must define the _set_lattice_switch() method.") - - def _set_params_in_es_core(self): - default_params = self.default_params() - self.agrid = self._params['agrid'] - self.tau = self._params['tau'] - self.density = self._params['dens'] - - if self._params['kT'] > 0.: - self.seed = self._params['seed'] - self.kT = self._params['kT'] - - self.viscosity = self._params['visc'] - if self._params['bulk_visc'] != default_params['bulk_visc']: - self.bulk_viscosity = self._params['bulk_visc'] - - self.ext_force_density = self._params["ext_force_density"] - - if "gamma_odd" in self._params: - python_lbfluid_set_gamma_odd(self._params["gamma_odd"]) - - if "gamma_even" in self._params: - python_lbfluid_set_gamma_even(self._params["gamma_even"]) - - utils.handle_errors("LB fluid activation") - - def _get_params_from_es_core(self): - default_params = self.default_params() - self._params['agrid'] = self.agrid - self._params["tau"] = self.tau - self._params['dens'] = self.density - self._params["kT"] = self.kT - if self._params['kT'] > 0.0: - self._params['seed'] = self.seed - self._params['visc'] = self.viscosity - if not self._params["bulk_visc"] == default_params["bulk_visc"]: - self._params['bulk_visc'] = self.bulk_viscosity - self._params['ext_force_density'] = self.ext_force_density - if 'gamma_odd' in self._params: - self._params['gamma_odd'] = lb_lbfluid_get_gamma_odd() - if 'gamma_even' in self._params: - self._params['gamma_even'] = lb_lbfluid_get_gamma_even() - - return self._params - - def set_interpolation_order(self, interpolation_order): - """ Set the order for the fluid interpolation scheme. - - Parameters - ---------- - interpolation_order : :obj:`str`, {"linear", "quadratic"} - ``"linear"`` for trilinear interpolation, ``"quadratic"`` for - quadratic interpolation. For the CPU implementation of LB, only - ``"linear"`` is available. - - """ - if interpolation_order == "linear": - lb_lbinterpolation_set_interpolation_order(linear) - elif interpolation_order == "quadratic": - lb_lbinterpolation_set_interpolation_order(quadratic) - else: - raise ValueError("Invalid parameter") - - def get_interpolated_velocity(self, pos): - """Get LB fluid velocity at specified position. - - Parameters - ---------- - pos : (3,) array_like of :obj:`float` - The position at which velocity is requested. - - Returns - ------- - v : (3,) array_like :obj:`float` - The LB fluid velocity at ``pos``. - - """ - return python_lbnode_get_interpolated_velocity( - utils.make_Vector3d(pos)) - - def write_vtk_velocity(self, path, bb1=None, bb2=None): - """Write the LB fluid velocity to a VTK file. - If both ``bb1`` and ``bb2`` are specified, return a subset of the grid. - - Parameters - ---------- - path : :obj:`str` - Path to the output ASCII file. - bb1 : (3,) array_like of :obj:`int`, optional - Node indices of the lower corner of the bounding box. - bb2 : (3,) array_like of :obj:`int`, optional - Node indices of the upper corner of the bounding box. - - """ - cdef vector[int] bb1_vec - cdef vector[int] bb2_vec - if bb1 is None and bb2 is None: - lb_lbfluid_print_vtk_velocity(utils.to_char_pointer(path)) - elif bb1 is None or bb2 is None: - raise ValueError( - "Invalid parameter: must provide either both bb1 and bb2, or none of them") - else: - utils.check_type_or_throw_except( - bb1, 3, int, "bb1 has to be an integer list of length 3") - utils.check_type_or_throw_except( - bb2, 3, int, "bb2 has to be an integer list of length 3") - bb1_vec = bb1 - bb2_vec = bb2 - lb_lbfluid_print_vtk_velocity( - utils.to_char_pointer(path), bb1_vec, bb2_vec) - - def write_vtk_boundary(self, path): - """Write the LB boundaries to a VTK file. - - Parameters - ---------- - path : :obj:`str` - Path to the output ASCII file. - - """ - lb_lbfluid_print_vtk_boundary(utils.to_char_pointer(path)) - - def write_velocity(self, path): - """Write the LB fluid velocity to a data file that can be loaded by - numpy, with format "x y z vx vy vz". - - Parameters - ---------- - path : :obj:`str` - Path to the output data file. - - """ - lb_lbfluid_print_velocity(utils.to_char_pointer(path)) - - def write_boundary(self, path): - """Write the LB boundaries to a data file that can be loaded by numpy, - with format "x y z u". - - Parameters - ---------- - path : :obj:`str` - Path to the output data file. - - """ - lb_lbfluid_print_boundary(utils.to_char_pointer(path)) - - def save_checkpoint(self, path, binary): - ''' - Write LB node populations to a file. - :class:`~espressomd.lbboundaries.LBBoundaries` - information is not written to the file. - ''' - tmp_path = path + ".__tmp__" - lb_lbfluid_save_checkpoint(utils.to_char_pointer(tmp_path), binary) - os.rename(tmp_path, path) - - def load_checkpoint(self, path, binary): - ''' - Load LB node populations from a file. - :class:`~espressomd.lbboundaries.LBBoundaries` - information is not available in the file. The boundary - information of the grid will be set to zero, - even if :class:`~espressomd.lbboundaries.LBBoundaries` - contains :class:`~espressomd.lbboundaries.LBBoundary` - objects (they are ignored). - ''' - lb_lbfluid_load_checkpoint(utils.to_char_pointer(path), binary) - - def _activate_method(self): - raise Exception( - "Subclasses of HydrodynamicInteraction have to implement _activate_method.") - - def _deactivate_method(self): - lb_lbfluid_set_lattice_switch(NONE) - - property shape: - def __get__(self): - cdef Vector3i shape = lb_lbfluid_get_shape() - return (shape[0], shape[1], shape[2]) - - property kT: - def __get__(self): - return lb_lbfluid_get_kT() - - def __set__(self, kT): - cdef double _kT = kT - lb_lbfluid_set_kT(_kT) - - property seed: - def __get__(self): - return lb_lbfluid_get_rng_state() - - def __set__(self, seed): - cdef stdint.uint64_t _seed = seed - lb_lbfluid_set_rng_state(seed) - - property pressure_tensor: - def __get__(self): - tensor = python_lbfluid_get_pressure_tensor(self.agrid, self.tau) - return utils.array_locked(tensor) - - def __set__(self, value): - raise NotImplementedError - - property ext_force_density: - def __get__(self): - self._assert_agrid_tau_set() - return python_lbfluid_get_ext_force_density(self.agrid, self.tau) - - def __set__(self, ext_force_density): - self._assert_agrid_tau_set() - python_lbfluid_set_ext_force_density( - utils.make_Vector3d(ext_force_density), self.agrid, self.tau) - - property density: - def __get__(self): - self._assert_agrid_tau_set() - return python_lbfluid_get_density(self.agrid) - - def __set__(self, density): - self._assert_agrid_tau_set() - python_lbfluid_set_density(density, self.agrid) - - property viscosity: - def __get__(self): - self._assert_agrid_tau_set() - return python_lbfluid_get_viscosity(self.agrid, self.tau) - - def __set__(self, viscosity): - self._assert_agrid_tau_set() - python_lbfluid_set_viscosity(viscosity, self.agrid, self.tau) - - property bulk_viscosity: - def __get__(self): - self._assert_agrid_tau_set() - return python_lbfluid_get_bulk_viscosity(self.agrid, self.tau) - - def __set__(self, viscosity): - self._assert_agrid_tau_set() - python_lbfluid_set_bulk_viscosity(viscosity, self.agrid, self.tau) - - property tau: - def __get__(self): - return lb_lbfluid_get_tau() - - def __set__(self, tau): - lb_lbfluid_set_tau(tau) - if get_time_step() > 0.0: - check_tau_time_step_consistency(tau, get_time_step()) - - property agrid: - def __get__(self): - return lb_lbfluid_get_agrid() - - def __set__(self, agrid): - lb_lbfluid_set_agrid(agrid) - - def nodes(self): - """Provides a generator for iterating over all lb nodes""" - - shape = self.shape - for i, j, k in itertools.product( - range(shape[0]), range(shape[1]), range(shape[2])): - yield self[i, j, k] - - -cdef class LBFluid(HydrodynamicInteraction): - """ - Initialize the lattice-Boltzmann method for hydrodynamic flow using the CPU. - See :class:`HydrodynamicInteraction` for the list of parameters. - - """ - - def _set_lattice_switch(self): - lb_lbfluid_set_lattice_switch(CPU) - - def _activate_method(self): - self.validate_params() - self._set_lattice_switch() - self._set_params_in_es_core() - -IF CUDA: - cdef class LBFluidGPU(HydrodynamicInteraction): - """ - Initialize the lattice-Boltzmann method for hydrodynamic flow using the GPU. - See :class:`HydrodynamicInteraction` for the list of parameters. - - """ - - def _set_lattice_switch(self): - lb_lbfluid_set_lattice_switch(GPU) - - def _activate_method(self): - self.validate_params() - self._set_lattice_switch() - self._set_params_in_es_core() - - @cython.boundscheck(False) - @cython.wraparound(False) - def get_interpolated_fluid_velocity_at_positions(self, np.ndarray[double, ndim=2, mode="c"] positions not None, three_point=False): - """Calculate the fluid velocity at given positions. - - Parameters - ---------- - positions : (N,3) numpy-array of type :obj:`float` - The 3-dimensional positions. - - Returns - ------- - velocities : (N,3) numpy-array of type :obj:`float` - The 3-dimensional LB fluid velocities. - - Raises - ------ - AssertionError - If shape of ``positions`` not (N,3). - - """ - assert positions.shape[1] == 3, \ - "The input array must have shape (N,3)" - cdef int length - length = positions.shape[0] - velocities = np.empty_like(positions) - if three_point: - quadratic_velocity_interpolation(< double * >np.PyArray_GETPTR2(positions, 0, 0), < double * >np.PyArray_GETPTR2(velocities, 0, 0), length) - else: - linear_velocity_interpolation(< double * >np.PyArray_GETPTR2(positions, 0, 0), < double * >np.PyArray_GETPTR2(velocities, 0, 0), length) - return velocities * lb_lbfluid_get_lattice_speed() - -ELSE: - cdef class LBFluidGPU(HydrodynamicInteraction): - def __init__(self, *args, **kwargs): - raise Exception("LBFluidGPU not compiled in.") - - -cdef class LBFluidRoutines: - - def __init__(self, key): - utils.check_type_or_throw_except( - key, 3, int, "The index of an lb fluid node consists of three integers.") - self.node = utils.make_Vector3i(key) - if not lb_lbnode_is_index_valid(self.node): - raise ValueError("LB node index out of bounds") - - property index: - def __get__(self): - return (self.node[0], self.node[1], self.node[2]) - - property velocity: - def __get__(self): - return python_lbnode_get_velocity(self.node) - - def __set__(self, value): - utils.check_type_or_throw_except( - value, 3, float, "velocity has to be 3 floats") - python_lbnode_set_velocity(self.node, utils.make_Vector3d(value)) - - property density: - def __get__(self): - return python_lbnode_get_density(self.node) - - def __set__(self, value): - python_lbnode_set_density(self.node, value) - - property pressure_tensor: - def __get__(self): - tensor = python_lbnode_get_pressure_tensor(self.node) - return utils.array_locked(tensor) - - def __set__(self, value): - raise NotImplementedError - - property pressure_tensor_neq: - def __get__(self): - tensor = python_lbnode_get_pressure_tensor_neq(self.node) - return utils.array_locked(tensor) - - def __set__(self, value): - raise NotImplementedError - - property population: - def __get__(self): - cdef Vector19d double_return - double_return = lb_lbnode_get_pop(self.node) - return utils.array_locked(np.array([double_return[0], - double_return[1], - double_return[2], - double_return[3], - double_return[4], - double_return[5], - double_return[6], - double_return[7], - double_return[8], - double_return[9], - double_return[10], - double_return[11], - double_return[12], - double_return[13], - double_return[14], - double_return[15], - double_return[16], - double_return[17], - double_return[18]] - )) - - def __set__(self, population): - cdef Vector19d _population - for i in range(19): - _population[i] = population[i] - lb_lbnode_set_pop(self.node, _population) - - property boundary: - def __get__(self): - return lb_lbnode_get_boundary(self.node) - - def __set__(self, value): - raise NotImplementedError - - def __eq__(self, obj1): - index_1 = np.array(self.index) - index_2 = np.array(obj1.index) - return all(index_1 == index_2) - - def __hash__(self): - return hash(self.index) - - -class LBSlice: - - def __init__(self, key, shape): - self.x_indices, self.y_indices, self.z_indices = self.get_indices( - key, shape[0], shape[1], shape[2]) - - def get_indices(self, key, shape_x, shape_y, shape_z): - x_indices = np.atleast_1d(np.arange(shape_x)[key[0]]) - y_indices = np.atleast_1d(np.arange(shape_y)[key[1]]) - z_indices = np.atleast_1d(np.arange(shape_z)[key[2]]) - return x_indices, y_indices, z_indices - - def get_values(self, x_indices, y_indices, z_indices, prop_name): - shape_res = np.shape( - getattr(LBFluidRoutines(np.array([0, 0, 0])), prop_name)) - res = np.zeros( - (x_indices.size, - y_indices.size, - z_indices.size, - *shape_res)) - for i, x in enumerate(x_indices): - for j, y in enumerate(y_indices): - for k, z in enumerate(z_indices): - res[i, j, k] = getattr(LBFluidRoutines( - np.array([x, y, z])), prop_name) - if shape_res == (1,): - res = np.squeeze(res, axis=-1) - return utils.array_locked(res) - - def set_values(self, x_indices, y_indices, z_indices, prop_name, value): - for i, x in enumerate(x_indices): - for j, y in enumerate(y_indices): - for k, z in enumerate(z_indices): - setattr(LBFluidRoutines( - np.array([x, y, z])), prop_name, value[i, j, k]) - - def __iter__(self): - indices = [(x, y, z) for (x, y, z) in itertools.product( - self.x_indices, self.y_indices, self.z_indices)] - return (LBFluidRoutines(np.array(index)) for index in indices) - - -def _add_lb_slice_properties(): - """ - Automatically add all of LBFluidRoutines's properties to LBSlice. - - """ - - def set_attribute(lb_slice, value, attribute): - """ - Setter function that sets attribute on every member of lb_slice. - If values contains only one element, all members are set to it. - - """ - - indices = [lb_slice.x_indices, lb_slice.y_indices, lb_slice.z_indices] - N = [len(x) for x in indices] - - if N[0] * N[1] * N[2] == 0: - raise AttributeError("Cannot set properties of an empty LBSlice") - - value = np.copy(value) - attribute_shape = lb_slice.get_values( - *np.zeros((3, 1), dtype=int), attribute).shape[3:] - target_shape = (*N, *attribute_shape) - - # broadcast if only one element was provided - if value.shape == attribute_shape: - value = np.ones(target_shape) * value - - if value.shape != target_shape: - raise ValueError( - f"Input-dimensions of {attribute} array {value.shape} does not match slice dimensions {target_shape}.") - - lb_slice.set_values(*indices, attribute, value) - - def get_attribute(lb_slice, attribute): - """ - Getter function that copies attribute from every member of - lb_slice into an array (if possible). - - """ - - indices = [lb_slice.x_indices, lb_slice.y_indices, lb_slice.z_indices] - N = [len(x) for x in indices] - - if N[0] * N[1] * N[2] == 0: - return np.empty(0, dtype=type(None)) - - return lb_slice.get_values(*indices, attribute) - - for attribute_name in dir(LBFluidRoutines): - if attribute_name in dir(LBSlice) or not isinstance( - getattr(LBFluidRoutines, attribute_name), type(LBFluidRoutines.density)): - continue - - # synthesize a new property - new_property = property( - functools.partial(get_attribute, attribute=attribute_name), - functools.partial(set_attribute, attribute=attribute_name), - doc=getattr(LBFluidRoutines, attribute_name).__doc__ or f'{attribute_name} for a slice') - # attach the property to LBSlice - setattr(LBSlice, attribute_name, new_property) - - -_add_lb_slice_properties() diff --git a/src/python/espressomd/lbboundaries.py b/src/python/espressomd/lbboundaries.py deleted file mode 100644 index 5ddea5dcc0a..00000000000 --- a/src/python/espressomd/lbboundaries.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright (C) 2010-2022 The ESPResSo project -# -# This file is part of ESPResSo. -# -# ESPResSo is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# ESPResSo is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -from .script_interface import ScriptObjectList, ScriptInterfaceHelper, script_interface_register -from .code_features import has_features - - -if any(has_features(i) for i in ["LB_BOUNDARIES", "LB_BOUNDARIES_GPU"]): - @script_interface_register - class LBBoundaries(ScriptObjectList): - - """ - Creates a set of lattice-Boltzmann boundaries. - - Methods - ------- - size() - Get the number of active boundaries. - empty() - Return ``True`` if there are not active boundaries. - clear() - Clear the list of boundaries. - - """ - - _so_name = "LBBoundaries::LBBoundaries" - _so_bind_methods = ("size", "empty", "clear") - - def add(self, *args, **kwargs): - """ - Adds a boundary to the set of boundaries. - Either pass a valid boundary as argument, - or a valid set of parameters to create a boundary. - - """ - - if len(args) == 1: - if isinstance(args[0], LBBoundary): - lbboundary = args[0] - else: - raise TypeError( - "Either a LBBoundary object or key-value pairs for the parameters of a LBBoundary object need to be passed.") - else: - lbboundary = LBBoundary(**kwargs) - self.call_method("add", object=lbboundary) - return lbboundary - - def remove(self, lbboundary): - """ - Removes a boundary from the set. - - Parameters - ---------- - lbboundary : :obj:`LBBoundary` - The boundary to be removed from the set. - - """ - - self.call_method("remove", object=lbboundary) - - @script_interface_register - class LBBoundary(ScriptInterfaceHelper): - - """ - Creates a LB boundary from a shape. - - The fluid velocity is limited to :math:`v_{\\mathrm{max}} = 0.20` - (see *quasi-incompressible limit* in :cite:`kruger17a`, - chapter 7, page 272), which corresponds to Mach 0.35. - - The relative error in the fluid density between a compressible fluid - and an incompressible fluid at Mach 0.30 is less than 5% (see - *constant density assumption* in :cite:`kundu01a` chapter 16, page - 663). Since the speed of sound is :math:`c_s = 1 / \\sqrt{3}` in LB - velocity units in a D3Q19 lattice, the velocity limit at Mach 0.30 - is :math:`v_{\\mathrm{max}} = 0.30 / \\sqrt{3} \\approx 0.17`. - At Mach 0.35 the relative error is around 6% and - :math:`v_{\\mathrm{max}} = 0.35 / \\sqrt{3} \\approx 0.20`. - - Parameters - ---------- - shape : :obj:`espressomd.shapes.Shape` - The shape from which to build the boundary. - velocity : (3,) array_like of :obj:`float`, optional - The boundary slip velocity. By default, a velocity of zero is used - (no-slip boundary condition). - - """ - - _so_name = "LBBoundaries::LBBoundary" - _so_bind_methods = ("get_force",) diff --git a/src/python/espressomd/script_interface.pyx b/src/python/espressomd/script_interface.pyx index 940c1f3e2cb..5a81316d83b 100644 --- a/src/python/espressomd/script_interface.pyx +++ b/src/python/espressomd/script_interface.pyx @@ -207,6 +207,19 @@ cdef class PScriptInterface: return odict + +class array_variant(np.ndarray): + + """ + Returns a numpy.ndarray that will be serialized as a ``std::vector``. + + """ + + def __new__(cls, input_array): + obj = np.asarray(input_array).view(cls) + return obj + + cdef Variant python_object_to_variant(value) except *: """Convert Python objects to C++ Variant objects.""" @@ -216,10 +229,17 @@ cdef Variant python_object_to_variant(value) except *: cdef unordered_map[int, Variant] map_int2var cdef unordered_map[string, Variant] map_str2var cdef PObjectRef oref + cdef int[::1] view_int + cdef int * data_int + cdef double[::1] view_double + cdef double * data_double if value is None: return Variant() + if isinstance(value, np.ndarray) and value.ndim == 0: + value = value.item() + # The order is important, the object character should # be preserved even if the PScriptInterface derived class # is iterable. @@ -245,6 +265,16 @@ cdef Variant python_object_to_variant(value) except *: f" to 'Variant[std::unordered_map]'") elif type(value) in (str, np.str_): return make_variant[string](utils.to_char_pointer(str(value))) + elif isinstance(value, array_variant) and np.issubdtype(value.dtype, np.signedinteger): + view_int = np.ascontiguousarray(value, dtype=np.int32) + data_int = &view_int[0] + vec_int.assign(data_int, data_int + len(view_int)) + return make_variant[vector[int]](vec_int) + elif isinstance(value, array_variant) and np.issubdtype(value.dtype, np.floating): + view_double = np.ascontiguousarray(value, dtype=np.float64) + data_double = &view_double[0] + vec_double.assign(data_double, data_double + len(view_double)) + return make_variant[vector[double]](vec_double) elif hasattr(value, '__iter__'): if len(value) == 0: return make_variant[vector[Variant]](vec_variant) @@ -448,8 +478,7 @@ class ScriptInterfaceHelper(PScriptInterface): class ScriptObjectList(ScriptInterfaceHelper): """ Base class for container-like classes such as - :class:`~espressomd.constraints.Constraints` and - :class:`~espressomd.lbboundaries.LBBoundaries`. Derived classes must + :class:`~espressomd.constraints.Constraints`. Derived classes must implement an ``add()`` method which adds a single item to the container. The core objects must be managed by a container derived from @@ -457,15 +486,6 @@ class ScriptObjectList(ScriptInterfaceHelper): """ - def __init__(self, *args, **kwargs): - if args: - params, (_unpickle_so_class, (_so_name, bytestring)) = args - assert _so_name == self._so_name - self = _unpickle_so_class(_so_name, bytestring) - self.__setstate__(params) - else: - super().__init__(**kwargs) - def __getitem__(self, key): return self.call_method("get_elements")[key] @@ -477,24 +497,6 @@ class ScriptObjectList(ScriptInterfaceHelper): def __len__(self): return self.call_method("size") - @classmethod - def _restore_object(cls, so_callback, so_callback_args, state): - so = so_callback(*so_callback_args) - so.__setstate__(state) - return so - - def __reduce__(self): - so_callback, (so_name, so_bytestring) = super().__reduce__() - return (ScriptObjectList._restore_object, - (so_callback, (so_name, so_bytestring), self.__getstate__())) - - def __getstate__(self): - return self.call_method("get_elements") - - def __setstate__(self, object_list): - for item in object_list: - self.add(item) - class ScriptObjectMap(ScriptInterfaceHelper): """ @@ -507,17 +509,6 @@ class ScriptObjectMap(ScriptInterfaceHelper): """ - _key_type = int - - def __init__(self, *args, **kwargs): - if args: - params, (_unpickle_so_class, (_so_name, bytestring)) = args - assert _so_name == self._so_name - self = _unpickle_so_class(_so_name, bytestring) - self.__setstate__(params) - else: - super().__init__(**kwargs) - def remove(self, key): """ Remove the element with the given key. @@ -536,15 +527,12 @@ class ScriptObjectMap(ScriptInterfaceHelper): return self.call_method("size") def __getitem__(self, key): - self._assert_key_type(key) return self.call_method("get", key=key) def __setitem__(self, key, value): - self._assert_key_type(key) self.call_method("insert", key=key, object=value) def __delitem__(self, key): - self._assert_key_type(key) self.call_method("erase", key=key) def keys(self): @@ -556,28 +544,6 @@ class ScriptObjectMap(ScriptInterfaceHelper): def items(self): for k in self.keys(): yield k, self[k] - def _assert_key_type(self, key): - if not utils.is_valid_type(key, self._key_type): - raise TypeError(f"Key has to be of type {self._key_type.__name__}") - - @classmethod - def _restore_object(cls, so_callback, so_callback_args, state): - so = so_callback(*so_callback_args) - so.__setstate__(state) - return so - - def __reduce__(self): - so_callback, (so_name, so_bytestring) = super().__reduce__() - return (ScriptObjectMap._restore_object, - (so_callback, (so_name, so_bytestring), self.__getstate__())) - - def __getstate__(self): - return dict(self.items()) - - def __setstate__(self, params): - for key, val in params.items(): - self[key] = val - # Map from script object names to their corresponding python classes _python_class_by_so_name = {} diff --git a/src/python/espressomd/shapes.py b/src/python/espressomd/shapes.py index 241e90a303a..e92dc3fc0df 100644 --- a/src/python/espressomd/shapes.py +++ b/src/python/espressomd/shapes.py @@ -21,7 +21,7 @@ class Shape: - _so_bind_methods = ("calc_distance",) + _so_bind_methods = ("calc_distance", "is_inside") @script_interface_register diff --git a/src/python/espressomd/system.py b/src/python/espressomd/system.py index 0d39573cc6d..2964f81df03 100644 --- a/src/python/espressomd/system.py +++ b/src/python/espressomd/system.py @@ -29,11 +29,10 @@ from . import collision_detection from . import comfixed from . import constraints -from . import ekboundaries from . import galilei from . import interactions from . import integrate -from . import lbboundaries +from . import electrokinetics from . import lees_edwards from . import particle_data from . import thermostat @@ -91,11 +90,11 @@ class System(ScriptInterfaceHelper): collision_detection: :class:`espressomd.collision_detection.CollisionDetection` comfixed: :class:`espressomd.comfixed.ComFixed` constraints: :class:`espressomd.constraints.Constraints` + ekcontainer: :class:`espressomd.electrokinetics.EKContainer` + ekreactions: :class:`espressomd.electrokinetics.EKReactions` cuda_init_handle: :class:`espressomd.cuda_init.CudaInitHandle` - ekboundaries: :class:`espressomd.ekboundaries.EKBoundaries` galilei: :class:`espressomd.galilei.GalileiTransform` integrator: :class:`espressomd.integrate.IntegratorHandle` - lbboundaries: :class:`espressomd.lbboundaries.LBBoundaries` lees_edwards: :class:`espressomd.lees_edwards.LeesEdwards` non_bonded_inter: :class:`espressomd.interactions.NonBondedInteractions` part: :class:`espressomd.particle_data.ParticleList` @@ -210,10 +209,10 @@ def __init__(self, **kwargs): self.constraints = constraints.Constraints() if has_features("CUDA"): self.cuda_init_handle = cuda_init.CudaInitHandle() + if has_features("WALBERLA"): + self.ekcontainer = electrokinetics.EKContainer() + self.ekreactions = electrokinetics.EKReactions() self.galilei = galilei.GalileiTransform() - if has_features("LB_BOUNDARIES") or has_features("LB_BOUNDARIES_GPU"): - self.lbboundaries = lbboundaries.LBBoundaries() - self.ekboundaries = ekboundaries.EKBoundaries() self.lees_edwards = lees_edwards.LeesEdwards() self.non_bonded_inter = interactions.NonBondedInteractions() self.part = particle_data.ParticleList() @@ -243,14 +242,14 @@ def __getstate__(self): checkpointable_properties.append("_active_virtual_sites_handle") checkpointable_properties += [ "non_bonded_inter", "bonded_inter", "cell_system", "lees_edwards", - "part", "actors", "analysis", "auto_update_accumulators", - "comfixed", "constraints", "galilei", "thermostat", - "bond_breakage" + "part", "analysis", "auto_update_accumulators", + "comfixed", "constraints", "galilei", "bond_breakage" ] - if has_features("LB_BOUNDARIES") or has_features("LB_BOUNDARIES_GPU"): - checkpointable_properties.append("lbboundaries") if has_features("COLLISION_DETECTION"): checkpointable_properties.append("collision_detection") + checkpointable_properties += ["actors", "thermostat"] + if has_features("WALBERLA"): + checkpointable_properties += ["ekcontainer", "ekreactions"] odict = collections.OrderedDict() for property_name in checkpointable_properties: diff --git a/src/python/espressomd/thermostat.pxd b/src/python/espressomd/thermostat.pxd index 7ad36753057..4b0394d12a1 100644 --- a/src/python/espressomd/thermostat.pxd +++ b/src/python/espressomd/thermostat.pxd @@ -114,3 +114,14 @@ cdef extern from "stokesian_dynamics/sd_interface.hpp": IF STOKESIAN_DYNAMICS: void set_sd_kT(double kT) except + double get_sd_kT() + +cdef extern from "grid_based_algorithms/lb_interface.hpp": + double lb_lbfluid_get_kT "LB::get_kT"() except + + +cdef extern from "grid_based_algorithms/lb_particle_coupling.hpp": + void lb_lbcoupling_set_rng_state(stdint.uint64_t) except + + stdint.uint64_t lb_lbcoupling_get_rng_state() except + + void lb_lbcoupling_set_gamma(double) except + + double lb_lbcoupling_get_gamma() except + + cbool lb_lbcoupling_is_seed_required() except + + void mpi_bcast_lb_particle_coupling() diff --git a/src/python/espressomd/thermostat.pyx b/src/python/espressomd/thermostat.pyx index 1b8e63c1d68..f9429703875 100644 --- a/src/python/espressomd/thermostat.pyx +++ b/src/python/espressomd/thermostat.pyx @@ -20,12 +20,6 @@ import functools include "myconfig.pxi" from . cimport utils from .lb import HydrodynamicInteraction -from .lb cimport lb_lbcoupling_set_gamma -from .lb cimport lb_lbcoupling_get_gamma -from .lb cimport lb_lbcoupling_set_rng_state -from .lb cimport lb_lbcoupling_get_rng_state -from .lb cimport lb_lbcoupling_is_seed_required -from .lb cimport lb_lbfluid_get_kT def AssertThermostatType(*allowedthermostats): @@ -234,13 +228,17 @@ cdef class Thermostat: thermo_list.append(sd_dict) return thermo_list + def _set_temperature(self, kT): + mpi_set_temperature(kT) + utils.handle_errors("Temperature change failed") + def turn_off(self): """ Turns off all the thermostat and sets all the thermostat variables to zero. """ - mpi_set_temperature(0.) + self._set_temperature(0.) mpi_set_thermo_virtual(True) IF PARTICLE_ANISOTROPY: mpi_set_langevin_gamma(utils.make_Vector3d((0., 0., 0.))) @@ -257,6 +255,7 @@ cdef class Thermostat: mpi_set_thermo_switch(THERMO_OFF) lb_lbcoupling_set_gamma(0.0) + mpi_bcast_lb_particle_coupling() @AssertThermostatType(THERMO_LANGEVIN, THERMO_DPD) def set_langevin(self, kT, gamma, gamma_rotation=None, @@ -348,7 +347,7 @@ cdef class Thermostat: raise ValueError("seed must be a positive integer") langevin_set_rng_seed(seed) - mpi_set_temperature(kT) + self._set_temperature(kT) IF PARTICLE_ANISOTROPY: cdef utils.Vector3d gamma_vec if scalar_gamma_def: @@ -475,7 +474,7 @@ cdef class Thermostat: raise ValueError("seed must be a positive integer") brownian_set_rng_seed(seed) - mpi_set_temperature(kT) + self._set_temperature(kT) IF PARTICLE_ANISOTROPY: cdef utils.Vector3d gamma_vec if scalar_gamma_def: @@ -524,11 +523,11 @@ cdef class Thermostat: """ Sets the LB thermostat. - This thermostat requires the feature ``LBFluid`` or ``LBFluidGPU``. + This thermostat requires the feature ``WALBERLA``. Parameters ---------- - LB_fluid : :class:`~espressomd.lb.LBFluid` or :class:`~espressomd.lb.LBFluidGPU` + LB_fluid : :class:`~espressomd.lb.LBFluidWalberla` seed : :obj:`int` Seed for the random number generator, required if kT > 0. Must be positive. @@ -553,14 +552,17 @@ cdef class Thermostat: if seed < 0: raise ValueError("seed must be a positive integer") lb_lbcoupling_set_rng_state(seed) + mpi_bcast_lb_particle_coupling() else: lb_lbcoupling_set_rng_state(0) + mpi_bcast_lb_particle_coupling() global thermo_switch mpi_set_thermo_switch(thermo_switch | THERMO_LB) mpi_set_thermo_virtual(act_on_virtual) lb_lbcoupling_set_gamma(gamma) + mpi_bcast_lb_particle_coupling() IF NPT: @AssertThermostatType(THERMO_NPT_ISO) @@ -598,7 +600,7 @@ cdef class Thermostat: raise ValueError("seed must be a positive integer") npt_iso_set_rng_seed(seed) - mpi_set_temperature(kT) + self._set_temperature(kT) global thermo_switch mpi_set_thermo_switch(thermo_switch | THERMO_NPT_ISO) mpi_set_nptiso_gammas(gamma0, gammav) @@ -635,7 +637,7 @@ cdef class Thermostat: raise ValueError("seed must be a positive integer") dpd_set_rng_seed(seed) - mpi_set_temperature(kT) + self._set_temperature(kT) global thermo_switch mpi_set_thermo_switch(thermo_switch | THERMO_DPD) diff --git a/src/python/espressomd/visualization.py b/src/python/espressomd/visualization.py index f06e30601c0..1379f9fe81a 100644 --- a/src/python/espressomd/visualization.py +++ b/src/python/espressomd/visualization.py @@ -193,8 +193,10 @@ class openGLLive(): Rescale LB node velocity arrow length. LB_vel_radius_scale : :obj:`float`, optional Rescale LB node velocity arrow radii. - LB_arrow_color : (3,) array_like of :obj:`float`, optional - RGB of the LB velocity arrows. + LB_arrow_color_fluid : (3,) array_like of :obj:`float`, optional + RGB of the LB velocity arrows inside the fluid. + LB_arrow_color_boundary : (3,) array_like of :obj:`float`, optional + RGB of the LB velocity arrows inside boundaries. LB_arrow_material : :obj:`str`, optional Material of LB arrows. quality_constraints : :obj:`int`, optional @@ -338,7 +340,8 @@ def __init__(self, system, **kwargs): 'LB_plane_ngrid': 5, 'LB_vel_scale': 1.0, 'LB_vel_radius_scale': 0.005, - 'LB_arrow_color': [1.0, 1.0, 1.0], + 'LB_arrow_color_fluid': [1.0, 1.0, 1.0], + 'LB_arrow_color_boundary': [1.0, 0.25, 0.25], 'LB_arrow_material': 'transparent1', 'LB_arrow_quality': 16, @@ -374,11 +377,6 @@ def __init__(self, system, **kwargs): if not espressomd.has_features('ROTATION'): self.specs['director_arrows'] = False - if not espressomd.has_features('LB_BOUNDARIES') and \ - not espressomd.has_features('LB_BOUNDARIES_GPU'): - self.specs['LB_draw_boundaries'] = False - self.specs['LB_draw_node_boundaries'] = False - # ESPResSo-related inits that are known only when running the # integration loop are called once in the update loop # (constraints, node boxes, cell boxes, charge range, bonds) @@ -763,7 +761,8 @@ def _update_lb_velocity_plane_cpu(self): xj * 1.0 / ng * self.lb_plane_b2) % self.system.box_l) i, j, k = (int(ppp / agrid) for ppp in pp) lb_vel = np.copy(self.lb[i, j, k].velocity) - self.lb_plane_vel.append([pp, lb_vel]) + lb_boundary = self.lb[i, j, k].is_boundary + self.lb_plane_vel.append([pp, lb_vel, lb_boundary]) def _update_lb_velocity_plane_gpu(self): ng = self.specs['LB_plane_ngrid'] @@ -777,8 +776,9 @@ def _update_lb_velocity_plane_gpu(self): lb_vels = self.lb.get_interpolated_fluid_velocity_at_positions( np.array(col_pos)) self.lb_plane_vel = [] + lb_boundary = False # TODO WALBERLA for p, v in zip(col_pos, lb_vels): - self.lb_plane_vel.append([p, v]) + self.lb_plane_vel.append([p, v, lb_boundary]) def _update_cells(self): self.cell_box_origins = [] @@ -870,21 +870,6 @@ def shape_arguments(shape, part_type): except KeyError: self.shapes.append(Shape(*arguments)) - if self.specs['LB_draw_boundaries']: - ni = 0 - for constraint in self.system.lbboundaries: - if isinstance(constraint, espressomd.lbboundaries.LBBoundary): - part_type = ni - ni += 1 - shape = constraint.get_parameter('shape') - for sub_shape in unpack_shapes(shape): - arguments = shape_arguments(sub_shape, part_type) - try: - self.shapes.append( - shape_mapping[sub_shape.name()](*arguments)) - except KeyError: - self.shapes.append(Shape(*arguments)) - def _update_bonds(self, particle_data): """Update bond data used for drawing bonds. Do not call directly but use @@ -947,6 +932,8 @@ def _draw_system(self): self._draw_cells() if self.specs['LB_draw_nodes'] or self.specs['LB_draw_node_boundaries']: self._draw_lb_grid() + if self.specs['LB_draw_boundaries']: + self._draw_lb_boundaries() def _draw_system_box(self): draw_box([0, 0, 0], self.system.box_l, self.inverse_bg_color, @@ -973,14 +960,29 @@ def _draw_lb_grid(self): for k in range(int(dims[2])): n = np.array([i, j, k]) * cell_size if self.specs['LB_draw_node_boundaries'] \ - and self.lb[i, j, k].boundary: + and self.lb[i, j, k].is_boundary: draw_box(n, cell_size, self.lb_box_color_boundary, self.materials['transparent2'], 5.0) if self.specs['LB_draw_nodes'] \ - and not self.lb[i, j, k].boundary: + and not self.lb[i, j, k].is_boundary: draw_box(n, cell_size, self.lb_box_color, self.materials['transparent2'], 1.5) + def _draw_lb_boundaries(self): + a = self.lb_params['agrid'] + dims = np.rint(np.array(self.system.box_l) / a) + + set_solid_material(self.inverse_bg_color) + OpenGL.GL.glPointSize(self.specs['rasterize_pointsize']) + OpenGL.GL.glBegin(OpenGL.GL.GL_POINTS) + for i in range(int(dims[0])): + for j in range(int(dims[1])): + for k in range(int(dims[2])): + if self.lb[i, j, k].is_boundary: + OpenGL.GL.glVertex3f( + i * a + 0.5, j * a + 0.5, k * a + 0.5) + OpenGL.GL.glEnd() + def _draw_constraints(self): # clip borders of simulation box @@ -1242,12 +1244,12 @@ def _cut_bond(self, x_a, dx): # arrows in a plane for LB velocities def _draw_lb_vel(self): - for lb_pos, lb_vel in self.lb_plane_vel: + for lb_pos, lb_vel, lb_boundary in self.lb_plane_vel: draw_arrow( lb_pos, lb_vel * self.specs['LB_vel_scale'], self.lb_arrow_radius, - self.specs['LB_arrow_color'], + self.specs['LB_arrow_color_boundary'] if lb_boundary else self.specs['LB_arrow_color_fluid'], self.materials[self.specs['LB_arrow_material']], self.specs['LB_arrow_quality']) @@ -1592,14 +1594,12 @@ def _init_espresso_visualization(self): self.depth = 0 # LOOK FOR LB ACTOR - lb_types = [espressomd.lb.LBFluid] - if espressomd.has_features('CUDA'): - lb_types.append(espressomd.lb.LBFluidGPU) + lb_types = [espressomd.lb.LBFluidWalberla] for actor in self.system.actors: if isinstance(actor, tuple(lb_types)): self.lb_params = actor.get_params() self.lb = actor - self.lb_is_cpu = isinstance(actor, espressomd.lb.LBFluid) + self.lb_is_cpu = True break if self.specs['LB_draw_velocity_plane']: diff --git a/src/script_interface/CMakeLists.txt b/src/script_interface/CMakeLists.txt index f53e6c85e88..430cd7a8662 100644 --- a/src/script_interface/CMakeLists.txt +++ b/src/script_interface/CMakeLists.txt @@ -19,8 +19,8 @@ add_library( espresso_script_interface SHARED - initialize.cpp ObjectHandle.cpp object_container_mpi_guard.cpp - GlobalContext.cpp ContextManager.cpp ParallelExceptionHandler.cpp) + initialize.cpp ObjectHandle.cpp GlobalContext.cpp ContextManager.cpp + ParallelExceptionHandler.cpp) add_library(espresso::script_interface ALIAS espresso_script_interface) set_target_properties(espresso_script_interface PROPERTIES CXX_CLANG_TIDY "${ESPRESSO_CXX_CLANG_TIDY}") @@ -38,7 +38,6 @@ add_subdirectory(galilei) add_subdirectory(h5md) add_subdirectory(integrators) add_subdirectory(interactions) -add_subdirectory(lbboundaries) add_subdirectory(lees_edwards) add_subdirectory(magnetostatics) add_subdirectory(math) @@ -51,6 +50,7 @@ add_subdirectory(scafacos) add_subdirectory(shapes) add_subdirectory(system) add_subdirectory(virtual_sites) +add_subdirectory(walberla) install(TARGETS espresso_script_interface LIBRARY DESTINATION ${ESPRESSO_INSTALL_PYTHON}/espressomd) diff --git a/src/script_interface/ObjectContainer.hpp b/src/script_interface/ObjectContainer.hpp new file mode 100644 index 00000000000..f5a8e89e2da --- /dev/null +++ b/src/script_interface/ObjectContainer.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2023 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef SCRIPT_INTERFACE_OBJECT_CONTAINER_HPP +#define SCRIPT_INTERFACE_OBJECT_CONTAINER_HPP + +#include "script_interface/auto_parameters/AutoParameters.hpp" + +#include + +namespace ScriptInterface { + +/** + * @brief Base class for containers whose @c BaseType might be a full + * specialization of @ref AutoParameters. + */ +template