From fb6e28494d31232304a11a175d309cd07af29aae Mon Sep 17 00:00:00 2001 From: "kodiakhq[bot]" <49736102+kodiakhq[bot]@users.noreply.github.com> Date: Fri, 24 Feb 2023 18:28:08 +0000 Subject: [PATCH] Make trajectories more reproducible during checkpointing (#4677) Fixes #4657 Description of changes: - explain which factors affect reproducibility in checkpointed simulations in the user guide - re-purpose the save/load samples to help measuring force jumps during checkpointing - make the P3M family of algorithms more deterministic by avoiding re-tuning during checkpointing - improve docstrings of the MMM1D family of algorithms --- doc/sphinx/io.rst | 19 +++++++++++++++++++ samples/load_checkpoint.py | 11 +++++++---- samples/save_checkpoint.py | 7 +++++++ src/python/espressomd/electrostatics.py | 17 +++++++++++------ .../electrostatics/CoulombP3M.hpp | 2 +- .../electrostatics/CoulombP3MGPU.hpp | 2 +- .../magnetostatics/DipolarP3M.hpp | 2 +- .../scripts/samples/test_load_checkpoint.py | 5 +++++ .../scripts/samples/test_save_checkpoint.py | 3 ++- 9 files changed, 54 insertions(+), 14 deletions(-) diff --git a/doc/sphinx/io.rst b/doc/sphinx/io.rst index 02f7f3e3e2f..47f30093605 100644 --- a/doc/sphinx/io.rst +++ b/doc/sphinx/io.rst @@ -141,6 +141,25 @@ Be aware of the following limitations: versions. It may therefore not be possible to load a checkpoint in a different environment than where it was written. +* To be fully deterministic when loading from a checkpoint with an active + thermostat, the first step of the integration should be called with the flag + ``reuse_forces=True``, e.g. ``system.integrator.run(2, reuse_forces=True)``. + This is because loading a checkpoint reinitializes the system and enforces + a recalculation of the forces. However, this computes the forces from the + velocities at the current time step and not at the previous half time step. + Please note that long-range actors can make trajectories non-reproducible. + For example, lattice-Boltzmann introduces errors of the order of 1e-15 with + binary checkpoint files, or 1e-7 with ASCII checkpoint files. In addition, + several electrostatic and magnetostatic actors automatically introduce + a deviation of the order of 1e-7, either due to floating-point rounding + errors (:class:`~espressomd.electrostatics.P3MGPU`), or due to re-tuning + using the most recent system state (:class:`~espressomd.electrostatics.MMM1D`, + :class:`~espressomd.electrostatics.MMM1DGPU`). + When in doubt, you can easily verify the absence of a "force jump" when + loading from a checkpoint by replacing the electrostatics actor with your + combination of features in files :file:`samples/save_checkpoint.py` and + :file:`samples/load_checkpoint.py` and running them sequentially. + For additional methods of the checkpointing class, see :class:`espressomd.checkpointing.Checkpoint`. diff --git a/samples/load_checkpoint.py b/samples/load_checkpoint.py index 1a30dbb4096..de96e0fc530 100644 --- a/samples/load_checkpoint.py +++ b/samples/load_checkpoint.py @@ -30,6 +30,7 @@ import espressomd import espressomd.electrostatics import espressomd.checkpointing +import numpy as np required_features = ["P3M", "WCA"] espressomd.assert_features(required_features) @@ -69,7 +70,6 @@ print("\n### p3m test ###") print(f"p3m.get_params() = {p3m.get_params()}") - # test registered objects # all objects that are registered when writing a checkpoint are # automatically registered after loading this checkpoint @@ -77,8 +77,11 @@ print( f"checkpoint.get_registered_objects() = {checkpoint.get_registered_objects()}") - -# integrate system +# integrate system while re-using forces (and velocities at half time step) print("Integrating...") +system.integrator.run(2, reuse_forces=True) -system.integrator.run(1000) +# measure deviation from reference forces (trajectory must be deterministic) +forces_ref = np.loadtxt("mycheckpoint/forces.npy") +forces_diff = np.abs(system.part.all().f - forces_ref) +print(f"max deviation from reference forces = {np.max(forces_diff):.2e}") diff --git a/samples/save_checkpoint.py b/samples/save_checkpoint.py index a7743d5eaa5..052a3c0f3fb 100644 --- a/samples/save_checkpoint.py +++ b/samples/save_checkpoint.py @@ -79,4 +79,11 @@ # let's also register the p3m reference for easy access later checkpoint.register("p3m") +# get velocities at half time step (for thermostat reproducibility) +system.integrator.run(1) + checkpoint.save() + +# write reference forces to file +system.integrator.run(2) +np.savetxt("mycheckpoint/forces.npy", np.copy(system.part.all().f)) diff --git a/src/python/espressomd/electrostatics.py b/src/python/espressomd/electrostatics.py index 3b795f58ad2..d634e8817d8 100644 --- a/src/python/espressomd/electrostatics.py +++ b/src/python/espressomd/electrostatics.py @@ -396,11 +396,13 @@ class MMM1D(ElectrostaticInteraction): Maximal pairwise error. far_switch_radius : :obj:`float`, optional Radius where near-field and far-field calculation are switched. - bessel_cutoff : :obj:`int`, optional - tune : :obj:`bool`, optional - Specify whether to automatically tune or not. Defaults to ``True``. - timings : :obj:`int` + verbose : :obj:`bool`, optional + If ``False``, disable log output during tuning. + timings : :obj:`int`, optional Number of force calculations during tuning. + check_neutrality : :obj:`bool`, optional + Raise a warning if the system is not electrically neutral when + set to ``True`` (default). """ _so_name = "Coulomb::CoulombMMM1D" @@ -445,8 +447,11 @@ class MMM1DGPU(ElectrostaticInteraction): far_switch_radius : :obj:`float`, optional Radius where near-field and far-field calculation are switched bessel_cutoff : :obj:`int`, optional - tune : :obj:`bool`, optional - Specify whether to automatically tune or not. Defaults to ``True``. + timings : :obj:`int`, optional + Number of force calculations during tuning. + check_neutrality : :obj:`bool`, optional + Raise a warning if the system is not electrically neutral when + set to ``True`` (default). """ _so_name = "Coulomb::CoulombMMM1DGpu" _so_creation_policy = "GLOBAL" diff --git a/src/script_interface/electrostatics/CoulombP3M.hpp b/src/script_interface/electrostatics/CoulombP3M.hpp index 14e0b3e7643..c3e4576d7b9 100644 --- a/src/script_interface/electrostatics/CoulombP3M.hpp +++ b/src/script_interface/electrostatics/CoulombP3M.hpp @@ -75,7 +75,7 @@ class CoulombP3M : public Actor { void do_construct(VariantMap const ¶ms) override { m_tune = get_value(params, "tune"); context()->parallel_try_catch([&]() { - auto p3m = P3MParameters{get_value(params, "tune"), + auto p3m = P3MParameters{!get_value_or(params, "is_tuned", !m_tune), get_value(params, "epsilon"), get_value(params, "r_cut"), get_value(params, "mesh"), diff --git a/src/script_interface/electrostatics/CoulombP3MGPU.hpp b/src/script_interface/electrostatics/CoulombP3MGPU.hpp index fec2219523c..c786a37edae 100644 --- a/src/script_interface/electrostatics/CoulombP3MGPU.hpp +++ b/src/script_interface/electrostatics/CoulombP3MGPU.hpp @@ -76,7 +76,7 @@ class CoulombP3MGPU : public Actor { void do_construct(VariantMap const ¶ms) override { m_tune = get_value(params, "tune"); context()->parallel_try_catch([&]() { - auto p3m = P3MParameters{get_value(params, "tune"), + auto p3m = P3MParameters{!get_value_or(params, "is_tuned", !m_tune), get_value(params, "epsilon"), get_value(params, "r_cut"), get_value(params, "mesh"), diff --git a/src/script_interface/magnetostatics/DipolarP3M.hpp b/src/script_interface/magnetostatics/DipolarP3M.hpp index 6923ded75b6..9565db44dec 100644 --- a/src/script_interface/magnetostatics/DipolarP3M.hpp +++ b/src/script_interface/magnetostatics/DipolarP3M.hpp @@ -74,7 +74,7 @@ class DipolarP3M : public Actor { void do_construct(VariantMap const ¶ms) override { m_tune = get_value(params, "tune"); context()->parallel_try_catch([&]() { - auto p3m = P3MParameters{get_value(params, "tune"), + auto p3m = P3MParameters{!get_value_or(params, "is_tuned", !m_tune), get_value(params, "epsilon"), get_value(params, "r_cut"), get_value(params, "mesh"), diff --git a/testsuite/scripts/samples/test_load_checkpoint.py b/testsuite/scripts/samples/test_load_checkpoint.py index 1d31cfb5f82..c3e6d9bb9ce 100644 --- a/testsuite/scripts/samples/test_load_checkpoint.py +++ b/testsuite/scripts/samples/test_load_checkpoint.py @@ -16,6 +16,7 @@ # along with this program. If not, see . import unittest as ut +import numpy as np import importlib_wrapper @@ -32,6 +33,10 @@ def test_file_generation(self): {'myvar', 'system', 'p3m'}) self.assertEqual(sample.myvar, "some script variable (updated value)") + def test_trajectory_reproducibility(self): + self.assertTrue(sample.p3m.is_tuned) + np.testing.assert_array_less(sample.forces_diff, 1e-16) + if __name__ == "__main__": ut.main() diff --git a/testsuite/scripts/samples/test_save_checkpoint.py b/testsuite/scripts/samples/test_save_checkpoint.py index 1edc132d64e..e4b09ab33ec 100644 --- a/testsuite/scripts/samples/test_save_checkpoint.py +++ b/testsuite/scripts/samples/test_save_checkpoint.py @@ -28,9 +28,10 @@ class Sample(ut.TestCase): system = sample.system def test_file_generation(self): - # test .checkpoint files exist filepath = pathlib.Path("mycheckpoint") / "0.checkpoint" self.assertTrue(filepath.exists(), f"File {filepath} not created") + filepath = pathlib.Path("mycheckpoint") / "forces.npy" + self.assertTrue(filepath.exists(), f"File {filepath} not created") if __name__ == "__main__":