Merge branch 'python' into system

espressomd · Nov 20, 2023 · 7c3efae · 7c3efae
2 parents 0bb7c49 + cb3c23c
commit 7c3efae
Show file tree

Hide file tree

Showing 21 changed files with 1,750 additions and 54 deletions.
diff --git a/doc/bibliography.bib b/doc/bibliography.bib
@@ -981,6 +981,16 @@ @Article{reed92a
   publisher={AIP Publishing}
 }
 
+@InProceedings{rocklin15a,
+  author    = {Rocklin, Matthew},
+  title     = {{D}ask: Parallel Computation with Blocked algorithms and Task Scheduling},
+  booktitle = {Proceedings of the 14\textsuperscript{th} {P}ython in {S}cience {C}onference},
+  year      = {2015},
+  editor    = {Huff, Kathryn and Bergstra, James},
+  pages     = {126--132},
+  doi       = {10.25080/Majora-7b98e3ed-013},
+}
+
 @Book{rubinstein03a,
   title = {Polymer Physics},
   publisher = {Oxford University Press},

diff --git a/doc/sphinx/samples.py b/doc/sphinx/samples.py
@@ -36,6 +36,7 @@ def get_docstring(filenames):
 # extract docstrings
 samples = [x for x in os.listdir(samples_dir) if x.endswith('.py')]
 samples += ['immersed_boundary/sampleImmersedBoundary.py',
+            'high_throughput_with_dask/run_pv.py',
             'object_in_fluid/motivation.py']
 docstrings = get_docstring(samples)
 

diff --git a/doc/tutorials/CMakeLists.txt b/doc/tutorials/CMakeLists.txt
@@ -115,6 +115,7 @@ add_subdirectory(ferrofluid)
 add_subdirectory(constant_pH)
 add_subdirectory(widom_insertion)
 add_subdirectory(electrodes)
+add_subdirectory(grand_canonical_monte_carlo)
 
 configure_file(Readme.md ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
 configure_file(convert.py ${CMAKE_CURRENT_BINARY_DIR})
diff --git a/doc/tutorials/Readme.md b/doc/tutorials/Readme.md
@@ -69,7 +69,9 @@ physical systems.
 * **Widom particle insertion method**  
   Measuring the excess chemical potential of a salt solution using the Widom particle insertion method.  
   [Guide](widom_insertion/widom_insertion.ipynb)
-
+* **Grand-Canonical Monte Carlo**
+  Simulating a polyelectrolyte solution coupled to a reservoir of salt.
+  [Guide](grand_canonical_monte_carlo/grand_canonical_monte_carlo.ipynb)
 
 [comment]: # (End of tutorials landing page)
 

diff --git a/doc/tutorials/constant_pH/constant_pH.ipynb b/doc/tutorials/constant_pH/constant_pH.ipynb
@@ -1119,7 +1119,7 @@
    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -1133,7 +1133,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.10.6"
   }
  },
  "nbformat": 4,

diff --git a/doc/tutorials/grand_canonical_monte_carlo/CMakeLists.txt b/doc/tutorials/grand_canonical_monte_carlo/CMakeLists.txt
@@ -0,0 +1,26 @@
+#
+# Copyright (C) 2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+configure_tutorial_target(
+  TARGET tutorial_grand_canonical_monte_carlo DEPENDS
+  grand_canonical_monte_carlo.ipynb figures/schematic.svg)
+
+nb_export(TARGET tutorial_grand_canonical_monte_carlo SUFFIX "" FILE
+          "grand_canonical_monte_carlo.ipynb" HTML_RUN VAR_SUBST
+          "\"p3m_params={'mesh':10,'cao':6,'r_cut':8.22}\"")
diff --git a/doc/tutorials/grand_canonical_monte_carlo/NotesForTutor.md b/doc/tutorials/grand_canonical_monte_carlo/NotesForTutor.md
@@ -0,0 +1,14 @@
+# Notes for Tutors: Grand-Canonical Monte Carlo
+
+## Physics learning goals
+
+After the tutorial, students should be able to explain:
+
+* what the grand-canonical ensemble is and where it is applicable
+* how to simulate in the grand-canonical ensemble using GCMC
+
+## ESPResSo learning goals
+
+In the course of this tutorial, students should learn to:
+
+* to set up a GCMC simulation in ESPResSo
diff --git a/doc/tutorials/grand_canonical_monte_carlo/figures/schematic.svg b/doc/tutorials/grand_canonical_monte_carlo/figures/schematic.svg
diff --git a/doc/tutorials/grand_canonical_monte_carlo/grand_canonical_monte_carlo.ipynb b/doc/tutorials/grand_canonical_monte_carlo/grand_canonical_monte_carlo.ipynb
diff --git a/samples/high_throughput_with_dask/Readme.md b/samples/high_throughput_with_dask/Readme.md
@@ -0,0 +1,79 @@
+# Introduction
+
+This sample illustrates how to run a large amount of short ESPResSo simulations
+with Dask. Dask is a parallel computing library in Python that enables efficient
+handling of large datasets and computation tasks.
+Note that this sample is not meant to produce meaningful physics results.
+The sample consists of the following parts:
+
+- `espresso_dask.py`: contains helper functions that handle running ESPResSo
+  within Dask and communicating data between Dask and ESPResSo
+- `lj_pressure.py`: simulation script which obtains the average pressure
+  for a Lennard-Jones liquid at a given volume fraction
+- `run_pv.py`: Uses Dask to run the simulation script at various volume
+  fractions and obtain a pressure vs volume fraction curve.
+- `test_dask_espresso.py`: corresponding unit tests, to be run with `pytest`
+- `echo.py`: Used to mock an ESPResSo simulation for the unit tests
+
+## How to Use
+
+Note: It is not possible to use ESPResSo with `dask.distributed.LocalCluster`.
+Instead, follow the procedure described below:
+
+1. Move to the sample directory
+   ```bash
+   cd samples/high_throughput_with_dask
+   ```
+1. Open `run_pv.py` in an editor and adapt the `PYPRESSO` variable
+   to the correct path to `pypresso`
+1. Set the `PYTHONPATH` environment variable such that it includes
+   the directory in which `dask_espresso.py` resides:
+   ```bash
+   export PYTHONPATH="${PYTHONPATH:+$PYTHONPATH:}$(realpath .)"
+   ```
+1. Start the Dask scheduler
+   ```bash
+   dask scheduler &
+   ```
+1. Note the address of the scheduler (e.g., `tcp://127.0.0.1:8786`)
+1. Launch a few workers using the correct scheduler address:
+   ```bash
+   for i in {1..5}; do dask worker SCHEDULER_ADDRESS & done
+   ```
+1. Run `python3 run_pv.py SCHEDULER_ADDRESS`, again inserting the scheduler address from above
+1. Use `fg` and Ctrl-C to shut down the Dask workers and scheduler,
+   or use `pkill "dask"` if you don't have any other Dask scheduler
+   running in the background.
+
+Note that Dask can also be used on compute clusters with HTCondor and Slurm.
+
+## Technical Notes
+
+- Since currently only one ESPResSo instance can be used in a Python script,
+  ESPResSo is run as a separate process. This is accomplished by the
+  `dask_espresso_task` function in `dask_espresso.py`.
+- Also, the data transfer between Dask and ESPResSo has to be handled such that
+  it is safe for inter-process communication. This is achieved via the `pickle`
+  and `base64` Python modules. Encoding and decoding functions can be found in
+  `dask_espresso.py`
+- The communication happens via the standard input and output of the simulation
+  script. Therefore, it is essential not to use simple `print()` calls in the
+  simulation script. Instead, use the `logging` module for status messages.
+  These will go to the standard error stream.
+- To use this sample for your own simulations:
+   - Use `dask_espresso.py` as is.
+   - Adapt `run_pv.py` to run simulations with the parameters you need.
+     The keyword arguments passed to `dask_espresso_task()` will be passed
+     as a dictionary to the simulation.
+   - Use `data = dask_espresso.get_data_from_stdin()` to get the parameters
+     at the beginning of the simulation script.
+   - Use `print(dask_espresso.encode_transport_data(result))` at the end
+     of your simulation to pass the result to Dask.
+   - The simulation parameters and results can be any Python object that
+     can be safely pickled and do not require additional context. Basic data
+     types (int, float, string, list, dict) as well as numpy arrays work,
+     whereas objects that require additional context to be valid do not
+     (e.g. file objects and ESPResSo particles).
+   - To test your simulation script, including the transfer of parameters
+     and results outside Dask, you can also use
+     the `dask_espresso.dask_espresso_task.py` function.
diff --git a/samples/high_throughput_with_dask/dask_espresso.py b/samples/high_throughput_with_dask/dask_espresso.py
@@ -0,0 +1,77 @@
+#
+# Copyright (C) 2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+"""Helper functions to use ESPResSo with Dask."""
+
+import pickle
+import base64
+import sys
+import subprocess
+import logging
+import dask
+
+
+def encode_transport_data(data):
+    """
+    Use ``pickle`` and ``base64`` to convert the provided data to a string
+    which can be passed safely between the Dask scheduler, worker and ESPResSo.
+    """
+    return base64.b64encode(pickle.dumps(data)).decode("utf-8")
+
+
+def decode_transport_data(encoded_data):
+    """
+    Convert the transport data back to a Python object via ``base64``
+    and ``pickle``.
+    """
+    pickle_data = base64.b64decode(encoded_data)
+    return pickle.loads(pickle_data)
+
+
+def get_data_from_stdin():
+    return decode_transport_data(sys.stdin.read())
+
+
+@dask.delayed
+def dask_espresso_task(pypresso, script, **kwargs):
+    """
+    Run ESPResSo asynchronously as a Dask task.
+
+    pypresso: :obj:`str`
+        Path to pypresso
+    script: :obj:`str`
+        Simulation script to run with pypresso
+    kwargs:
+        The keyword arguments are passed encoded and sent to
+        the standard input of the simulation script.
+        Use ``data = get_data_from_stdin()`` to obtain it.
+    """
+
+    logger = logging.getLogger(__name__)
+    encoded_data = encode_transport_data(kwargs)
+    espresso = subprocess.Popen([pypresso, script],
+                                stdin=subprocess.PIPE,
+                                stdout=subprocess.PIPE,
+                                stderr=subprocess.PIPE,
+                                text=True)
+    espresso.stdin.write(encoded_data)
+    out, err = espresso.communicate()
+    if err != "":
+        logger.warning("STDERR output from ESPResSo\n", err)
+    return decode_transport_data(out)    
diff --git a/samples/high_throughput_with_dask/echo.py b/samples/high_throughput_with_dask/echo.py
@@ -0,0 +1,29 @@
+#
+# Copyright (C) 2023 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+"""
+This is part of the unit tests. It reads encoded simulation data from stdin,
+decodes it, adds ``processed=True`` and outputs the encoded result to stdout.
+"""
+
+import dask_espresso as de
+data = de.get_data_from_stdin()
+data.update(processed=True)
+
+print(de.encode_transport_data(data))