Skip to content

Commit

Permalink
Make trajectories more reproducible during checkpointing (#4677)
Browse files Browse the repository at this point in the history
Fixes #4657 

Description of changes:
- explain which factors affect reproducibility in checkpointed simulations in the user guide
- re-purpose the save/load samples to help measuring force jumps during checkpointing
- make the P3M family of algorithms more deterministic by avoiding re-tuning during checkpointing
- improve docstrings of the MMM1D family of algorithms
  • Loading branch information
kodiakhq[bot] authored Feb 24, 2023
2 parents dc9b2ac + 89d3725 commit 1945d5b
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 14 deletions.
19 changes: 19 additions & 0 deletions doc/sphinx/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,25 @@ Be aware of the following limitations:

system = setup_system()

* To be fully deterministic when loading from a checkpoint with an active
thermostat, the first step of the integration should be called with the flag
``reuse_forces=True``, e.g. ``system.integrator.run(2, reuse_forces=True)``.
This is because loading a checkpoint reinitializes the system and enforces
a recalculation of the forces. However, this computes the forces from the
velocities at the current time step and not at the previous half time step.
Please note that long-range actors can make trajectories non-reproducible.
For example, lattice-Boltzmann introduces errors of the order of 1e-15 with
binary checkpoint files, or 1e-7 with ASCII checkpoint files. In addition,
several electrostatic and magnetostatic actors automatically introduce
a deviation of the order of 1e-7, either due to floating-point rounding
errors (:class:`~espressomd.electrostatics.P3MGPU`), or due to re-tuning
using the most recent system state (:class:`~espressomd.electrostatics.MMM1D`,
:class:`~espressomd.electrostatics.MMM1DGPU`).
When in doubt, you can easily verify the absence of a "force jump" when
loading from a checkpoint by replacing the electrostatics actor with your
combination of features in files :file:`samples/save_checkpoint.py` and
:file:`samples/load_checkpoint.py` and running them sequentially.

For additional methods of the checkpointing class, see
:class:`espressomd.checkpointing.Checkpoint`.

Expand Down
11 changes: 7 additions & 4 deletions samples/load_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import espressomd
import espressomd.electrostatics
import espressomd.checkpointing
import numpy as np

required_features = ["P3M", "WCA"]
espressomd.assert_features(required_features)
Expand Down Expand Up @@ -69,16 +70,18 @@
print("\n### p3m test ###")
print(f"p3m.get_params() = {p3m.get_params()}")


# test registered objects
# all objects that are registered when writing a checkpoint are
# automatically registered after loading this checkpoint
print("\n### checkpoint register test ###")
print(
f"checkpoint.get_registered_objects() = {checkpoint.get_registered_objects()}")


# integrate system
# integrate system while re-using forces (and velocities at half time step)
print("Integrating...")
system.integrator.run(2, reuse_forces=True)

system.integrator.run(1000)
# measure deviation from reference forces (trajectory must be deterministic)
forces_ref = np.loadtxt("mycheckpoint/forces.npy")
forces_diff = np.abs(system.part.all().f - forces_ref)
print(f"max deviation from reference forces = {np.max(forces_diff):.2e}")
7 changes: 7 additions & 0 deletions samples/save_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,11 @@
# let's also register the p3m reference for easy access later
checkpoint.register("p3m")

# get velocities at half time step (for thermostat reproducibility)
system.integrator.run(1)

checkpoint.save()

# write reference forces to file
system.integrator.run(2)
np.savetxt("mycheckpoint/forces.npy", np.copy(system.part.all().f))
17 changes: 11 additions & 6 deletions src/python/espressomd/electrostatics.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,11 +390,13 @@ class MMM1D(ElectrostaticInteraction):
Maximal pairwise error.
far_switch_radius : :obj:`float`, optional
Radius where near-field and far-field calculation are switched.
bessel_cutoff : :obj:`int`, optional
tune : :obj:`bool`, optional
Specify whether to automatically tune or not. Defaults to ``True``.
timings : :obj:`int`
verbose : :obj:`bool`, optional
If ``False``, disable log output during tuning.
timings : :obj:`int`, optional
Number of force calculations during tuning.
check_neutrality : :obj:`bool`, optional
Raise a warning if the system is not electrically neutral when
set to ``True`` (default).
"""
_so_name = "Coulomb::CoulombMMM1D"
Expand Down Expand Up @@ -425,8 +427,11 @@ class MMM1DGPU(ElectrostaticInteraction):
far_switch_radius : :obj:`float`, optional
Radius where near-field and far-field calculation are switched
bessel_cutoff : :obj:`int`, optional
tune : :obj:`bool`, optional
Specify whether to automatically tune or not. Defaults to ``True``.
timings : :obj:`int`, optional
Number of force calculations during tuning.
check_neutrality : :obj:`bool`, optional
Raise a warning if the system is not electrically neutral when
set to ``True`` (default).
"""
_so_name = "Coulomb::CoulombMMM1DGpu"
_so_creation_policy = "GLOBAL"
Expand Down
2 changes: 1 addition & 1 deletion src/script_interface/electrostatics/CoulombP3M.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class CoulombP3M : public Actor<CoulombP3M, ::CoulombP3M> {
void do_construct(VariantMap const &params) override {
m_tune = get_value<bool>(params, "tune");
context()->parallel_try_catch([&]() {
auto p3m = P3MParameters{get_value<bool>(params, "tune"),
auto p3m = P3MParameters{!get_value_or<bool>(params, "is_tuned", !m_tune),
get_value<double>(params, "epsilon"),
get_value<double>(params, "r_cut"),
get_value<Utils::Vector3i>(params, "mesh"),
Expand Down
2 changes: 1 addition & 1 deletion src/script_interface/electrostatics/CoulombP3MGPU.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class CoulombP3MGPU : public Actor<CoulombP3MGPU, ::CoulombP3MGPU> {
void do_construct(VariantMap const &params) override {
m_tune = get_value<bool>(params, "tune");
context()->parallel_try_catch([&]() {
auto p3m = P3MParameters{get_value<bool>(params, "tune"),
auto p3m = P3MParameters{!get_value_or<bool>(params, "is_tuned", !m_tune),
get_value<double>(params, "epsilon"),
get_value<double>(params, "r_cut"),
get_value<Utils::Vector3i>(params, "mesh"),
Expand Down
2 changes: 1 addition & 1 deletion src/script_interface/magnetostatics/DipolarP3M.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class DipolarP3M : public Actor<DipolarP3M, ::DipolarP3M> {
void do_construct(VariantMap const &params) override {
m_tune = get_value<bool>(params, "tune");
context()->parallel_try_catch([&]() {
auto p3m = P3MParameters{get_value<bool>(params, "tune"),
auto p3m = P3MParameters{!get_value_or<bool>(params, "is_tuned", !m_tune),
get_value<double>(params, "epsilon"),
get_value<double>(params, "r_cut"),
get_value<Utils::Vector3i>(params, "mesh"),
Expand Down
5 changes: 5 additions & 0 deletions testsuite/scripts/samples/test_load_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import unittest as ut
import numpy as np
import importlib_wrapper


Expand All @@ -32,6 +33,10 @@ def test_file_generation(self):
{'myvar', 'system', 'p3m'})
self.assertEqual(sample.myvar, "some script variable (updated value)")

def test_trajectory_reproducibility(self):
self.assertTrue(sample.p3m.is_tuned)
np.testing.assert_array_less(sample.forces_diff, 1e-16)


if __name__ == "__main__":
ut.main()
3 changes: 2 additions & 1 deletion testsuite/scripts/samples/test_save_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ class Sample(ut.TestCase):
system = sample.system

def test_file_generation(self):
# test .checkpoint files exist
filepath = pathlib.Path("mycheckpoint") / "0.checkpoint"
self.assertTrue(filepath.exists(), f"File {filepath} not created")
filepath = pathlib.Path("mycheckpoint") / "forces.npy"
self.assertTrue(filepath.exists(), f"File {filepath} not created")


if __name__ == "__main__":
Expand Down

0 comments on commit 1945d5b

Please sign in to comment.