From 4331aae829c09d25eff6f35a8ad11d916d732bf0 Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Fri, 25 Oct 2024 18:22:59 +0200 Subject: [PATCH] MLIP tutorial MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Alexander Reinauer Co-authored-by: Samuel Tovey Co-authored-by: Jean-Noël Grad --- doc/tutorials/CMakeLists.txt | 1 + doc/tutorials/Readme.md | 3 + doc/tutorials/mlip/CMakeLists.txt | 22 + doc/tutorials/mlip/NotesForTutor.md | 22 + doc/tutorials/mlip/mlip.ipynb | 720 ++++++++++++++++++++++++++++ 5 files changed, 768 insertions(+) create mode 100644 doc/tutorials/mlip/CMakeLists.txt create mode 100644 doc/tutorials/mlip/NotesForTutor.md create mode 100644 doc/tutorials/mlip/mlip.ipynb diff --git a/doc/tutorials/CMakeLists.txt b/doc/tutorials/CMakeLists.txt index 2f23dcfad8..fe94a23785 100644 --- a/doc/tutorials/CMakeLists.txt +++ b/doc/tutorials/CMakeLists.txt @@ -108,6 +108,7 @@ add_subdirectory(constant_pH) add_subdirectory(widom_insertion) add_subdirectory(electrodes) add_subdirectory(grand_canonical_monte_carlo) +add_subdirectory(mlip) configure_file(Readme.md ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(convert.py ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/doc/tutorials/Readme.md b/doc/tutorials/Readme.md index d3f8d7bb73..b9d14e9ec6 100644 --- a/doc/tutorials/Readme.md +++ b/doc/tutorials/Readme.md @@ -50,6 +50,9 @@ physical systems. * **Raspberry electrophoresis** Extended objects in a Lattice-Boltzmann fluid, raspberry particles. [Guide](raspberry_electrophoresis/raspberry_electrophoresis.ipynb) +* **Machine-learned interatomic potential** + Atomistic simulations using the MACE-MP-0 model. + [Guide](mlip/mlip.ipynb) ### Advanced tutorials diff --git a/doc/tutorials/mlip/CMakeLists.txt b/doc/tutorials/mlip/CMakeLists.txt new file mode 100644 index 0000000000..a98cbf654a --- /dev/null +++ b/doc/tutorials/mlip/CMakeLists.txt @@ -0,0 +1,22 @@ +# +# Copyright (C) 2016-2024 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +configure_tutorial_target(TARGET tutorial_mlip DEPENDS mlip.ipynb) + +nb_export(TARGET tutorial_mlip SUFFIX "" FILE "mlip.ipynb") diff --git a/doc/tutorials/mlip/NotesForTutor.md b/doc/tutorials/mlip/NotesForTutor.md new file mode 100644 index 0000000000..ddf4c79cf1 --- /dev/null +++ b/doc/tutorials/mlip/NotesForTutor.md @@ -0,0 +1,22 @@ +# Machine-learned interatomic potential + +## Physics learning goals + +* Short introduction to how machine-learned inter-atomic potentials (MLPs) work. +* Visualisation of dissociation of sulfuric acid in water. +* Ground state calculations of small molecules MLPs +* Learn about the shortcomings of the lack of interpretability of MLPs. + +After the tutorial, students should be able to: + +* Explain how MLP derived results can be hard to explain, e.g., is the dissociation real or an artifact. +* Understand how to use the ESPResSo engine with MLPs. +* Perform ground state calculations and small quantum mechanically accurate simulations. + +## ESPResSo learning goals + +* Interfacing ESPResSo with an MLP. +* Interfacing ESPResSo with ASE. +* Performing geometry minimisation. +* Creating and filling simulation boxes. +* Setting thermostats and running MD. diff --git a/doc/tutorials/mlip/mlip.ipynb b/doc/tutorials/mlip/mlip.ipynb new file mode 100644 index 0000000000..465f4d64c0 --- /dev/null +++ b/doc/tutorials/mlip/mlip.ipynb @@ -0,0 +1,720 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Machine-learned interatomic potential\n", + "\n", + "Quantum mechanics gives us the most complete description of the world we can achieve.\n", + "While the methods involve some degree of approximation, there are numerous approaches to performing quantum mechanical simulations.\n", + "The most prominent one is probably density functional theory (DFT).\n", + "Simulations at this level of accuracy are some of the most precise we can perform,\n", + "allowing us to study complex chemical interactions and even bond-dynamics studies.\n", + "Unfortunately, they are computationally demanding, and thus limited to small numbers of atoms and short time scales.\n", + "\n", + "Over the past decade, machine learning has been used extensively to address this problem.\n", + "Beginning in 2008, scientists began using machine-learning models trained on small quantum mechanical simulations to perform large-scale atomistic studies with no loss in accuracy.\n", + "The models work by computing energies and forces on individual atoms based on the local atomic environment, encoded in a chemical descriptor.\n", + "Nowadays, this technique enables large-scale simulations with quantum mechanical accuracy ([Kozinsky et al.](https://doi.org/10.1145/3581784.3627041)).\n", + "\n", + "There are many different machine-learning models and dedicated open-source Python packages:\n", + "\n", + "- [MACE](https://github.com/ACEsuit/mace)\n", + "- [NequIP / allegro](https://github.com/mir-group/nequip)\n", + "- [Apax](https://github.com/apax-hub/apax)\n", + "- [DeepMD](https://github.com/deepmodeling/deepmd-kit)\n", + "- [TorchANI](https://github.com/aiqm/torchani)\n", + "\n", + "Each of these has its own specialization and applications.\n", + "In their infancy, using a trained machine-learned potential was challenging:\n", + "it typically required extensive re-training and, therefore, quantum mechanical simulations or complex active-learning routines where one iteratively improves model accuracy during a simulation.\n", + "However, as of 2024, foundation models now exist.\n", + "These models are trained on an extensive chemical space and have demonstrated to be stable on many material simulations.\n", + "\n", + "In this tutorial, we will focus on the [MACE-MP-0](https://doi.org/10.48550/arXiv.2401.00096) model. This model has been trained on the [materials project dataset](https://doi.org/10.1063/1.4812323) and can accurately predict energies and forces for many organic and inorganic systems.\n", + "The MACE-MP-0 model uses the state-of-the-art MACE equivariant message passing model architecture, more information for which is available in the lecture slides." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ASE Primer\n", + "Most machine-learned potential framework operations through the Atomic Simulation Environment (ASE) Python package.\n", + "ASE aims to simplify small atomistic computations and finds a lot of use in quantum mechanical and single-point calculations.\n", + "The majority of ASE takes place within the `ase.Atoms` objects, which in turn are built from the `ase.Atom` objects (note the missing final 's').\n", + "An `Atom` object contains information about a single atom in a configuration (an `Atoms` object),\n", + "including its position, forces, species, and how interactions should be computed between its neighbours.\n", + "These interactions are defined using `Calculators`, which take a whole `Atoms` object and return energies and forces on each atom.\n", + "More information can be found in the ASE documentation: https://wiki.fysik.dtu.dk/ase/ase/atoms.html.\n", + "\n", + "As a part of this tutorial, we have extended ESPResSo to interface with ASE in such a way that it allows calculators to apply forces to atoms in a simulation box.\n", + "In this case, we will focus solely on the machine-learned potentials.\n", + "However, by the end of the tutorial, the way other calculators could be used should be clear." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Libraries\n", + "\n", + "The first step in the tutorial is the imports.\n", + "We will need a few additional libraries during this tutorial, so let's discuss each one and why we will use it.\n", + "\n", + "They can be installed with the following command:\n", + "\n", + "```sh\n", + "python3 -m pip install -c requirements \"mace-torch==0.3.6\" \"rdkit2ase==0.1.4\" \"plotly>=5.15.0\" tqdm pint rdkit\n", + "```\n", + "\n", + "### ESPResSo\n", + "* ``espressomd`` to create our simulation box and run the MD simulations\n", + "* ``ASEInterface`` to allow ESPResSo to communicate with ASE\n", + "* ``ZnDraw`` visualizer for interactive visualization\n", + "\n", + "### MACE\n", + "We import the foundation model mentioned earlier, mace.\n", + "Specifically, the ASE calculator that is shipped with the model.\n", + "When you perform this import, you automatically download the trained model parameters.\n", + "\n", + "### RDKit\n", + "The next import is `rdkit2ase`, a helper package developed at the Institute for Computational Physics. It allows users to interface the popular atomistic analysis tool RDKit with ASE.\n", + "We will use it to generate molecular structures later on.\n", + "\n", + "### Helpers\n", + "* Numpy for linear algebra\n", + "* Pandas for additional data management\n", + "* pint to help us with units\n", + "* plotly for visualization\n", + "* tqdm to make our loading bars look better than nothing but worse than rich." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ESPResSo imports\n", + "import espressomd\n", + "from espressomd.plugins.ase import ASEInterface\n", + "from espressomd.zn import Visualizer\n", + "\n", + "# MACE-MP-0\n", + "from mace.calculators import mace_mp\n", + "\n", + "# Simulation box\n", + "import rdkit2ase\n", + "\n", + "# Plotting tools\n", + "import rdkit.Chem\n", + "import rdkit.Chem.Draw\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "plt.rcParams.update({'font.size': 18})\n", + "\n", + "# Miscellaneous\n", + "import numpy as np\n", + "import pint\n", + "import tqdm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Units\n", + "\n", + "In the majority of machine-learned inter-atomic potential studies the unit system is eV for energy, Angstrom for distances, and mass in atomic units.\n", + "This is not due to any limitations on the models, simply what has now become the default units for the models.\n", + "The machine-learning algorithms themselves don't care what they are trained on, but they can only be trained on one set of units, so it is important to know what units they are.\n", + "Additionally, times are often on the femtosecond scale.\n", + "Perhaps not surprisingly, this is also the standard unit system within ASE.\n", + "We will use pint to manage our units." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Exercise:**\n", + "- Create an instance of the [`pint.UnitRegistry`](https://pint.readthedocs.io/en/stable/api/base.html#pint.UnitRegistry) for the unit conversions throughout the tutorial and name the variable `ureg`.\n", + "\n", + "**Hint:**\n", + "- It might be helpful to look at the [Tutorial section of pint](https://pint.readthedocs.io/en/stable/getting/tutorial.html#initializing-a-registry)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# SOLUTION CELL\n", + "ureg = pint.UnitRegistry()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Geometry Optimization of Water" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Representing Molecular Structures\n", + "\n", + "In this tutorial, we will use [Simplified Molecular Input Line Entry System](https://doi.org/10.1021/ci00057a005) (SMILES) representations and [RDKit](https://github.com/rdkit/rdkit) to generate starting structures.\n", + "SMILES strings are text representations of molecules without the hydrogen atoms.\n", + "For example, the SMILES strings for methane, ethane, and propane are `C`, `CC`, and `CCC` respectively.\n", + "Other elements are represented by their atomic symbol.\n", + "Ammonia for example would be `N`.\n", + "SMILES can also represent cyclic structures, double bonds, and other chemically relevant properties, but we won't get into this today.\n", + "RDKit, the tool mentioned earlier, provides powerful utilities for creating 3D structures from these string representations of molecules.\n", + "\n", + "In the first part of the tutorial, we will perform a geometry optimization for water.\n", + "The chemical formula of water is H2O." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Exercise:**\n", + "- Write down the SMILES string for water in the variable `smiles`, we will use it to create a molecular structure.\n", + " Remember, you do not include hydrogens in a SMILES representation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# SOLUTION CELL\n", + "smiles = \"O\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the SMILES string, we can create a 2D sketch of the molecule using rdkit:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m = rdkit.Chem.MolFromSmiles(smiles)\n", + "rdkit.Chem.Draw.MolToImage(rdkit.Chem.AddHs(m))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also convert it into an `ase.Atoms` object. For that we can use the function `rdkit2ase.smiles2atoms()`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "atoms = rdkit2ase.smiles2atoms(smiles)\n", + "atoms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We are almost there in terms of computing energies on this system; we only need a calculator now." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### MACE Model Initialization\n", + "\n", + "We can now call up the machine-learning model, MACE-MP-0, download its parameters in the process, and attach it to the `ase.Atoms` object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mace_mp_calc = mace_mp()\n", + "atoms.calc = mace_mp_calc" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To compute energies and forces with this model one can use the member functions of the `ase.Atoms` object called [`get_potential_energy()`](https://wiki.fysik.dtu.dk/ase/ase/atoms.html#ase.Atoms.get_potential_energy) and [`get_forces()`](https://wiki.fysik.dtu.dk/ase/ase/atoms.html#ase.Atoms.get_forces).\n", + "Both will be used later in the integration cycles.\n", + "Please note that the units depend on how the model was trained, in this case eV and eV per Angstrom." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Running the Minimization\n", + "\n", + "Now that we know how to compute properties, it is time to plug it into ESPResSo and peform the geometry optimization.\n", + "We will create a box of arbitrary size, define a simulation time step of 0.5 fs to resolve the movement of the hydrogens in the molecule, and run a geometry optimization.\n", + "\n", + "This would allow the generation of a relaxed structure and it would give us a good starting structure for an MD simulation.\n", + "\n", + "The first step is creating the simulation box and setting the time step.\n", + "This is also where the first element of compelxity arises.\n", + "ESPResSo uses simulation units, not real units, when performing simulations.\n", + "This means you will need to convert the units you get out of ASE, into something that makes sense for ESPResSo.\n", + "The good news is, you'll get plenty of practice doing so here." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Exercise:**\n", + "- Create a new instance of the ESPResSo-`System` class called `system` with box length of `16`.\n", + "- Disable periodic boundary conditions (PBC).\n", + "- Set the Verlet list skin to a standard value of `0.4`.\n", + "- Set the timestep to 0.5 femtoseconds. Don't forget to convert the units to simulation units first.\n", + "\n", + "**Hint:**\n", + "- The box length is somewhat arbitrary since we disable periodic boundary conditions.\n", + "- For the conversion of the timestep you should be familiar with the [ESPResSo Units](https://espressomd.github.io/doc/introduction.html#on-units) and you can use pint for the conversion. You can get the magnitude of the timestep in the correct unit system with pints' [`m_as()`](https://pint.readthedocs.io/en/stable/api/base.html#pint.Quantity.m_as) method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# SOLUTION CELL\n", + "system = espressomd.System(box_l=[16, 16, 16])\n", + "system.periodicity = [False, False, False]\n", + "system.cell_system.skin = 0.4\n", + "system.time_step = (0.5 * ureg.fs).m_as(((1 * ureg.u * ureg.angstrom**2) / ureg.electron_volt) ** 0.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The next step is adding atoms to the simulation box.\n", + "ESPResSo, as it deals mostly with fictitious particle types, does not require physically realistic particle numbers when it comes to types.\n", + "ASE, however, as it only deals with atoms, does.\n", + "Because of this, we have to define the mapping between the ESPResSo particle types and the ASE atom types.\n", + "This is done by passing a type_mapping dictionary to the `ASEInterface` class.\n", + "An example for such a type map for a water system would be:\n", + "\n", + "```python\n", + "type_mapping = {1: 8, 2: 1}\n", + "```\n", + "\n", + "e.g., particle type 1 in ESPResSo is mapped to Oxygen (atomic number 8) and particle type 2 is mapped to Hydrogen (atomic number 1).\n", + "In this tutorial, for simplicity, we will use the same particle types as the atomic number in ASE.\n", + "This means the mapping can simply be created by:\n", + "\n", + "```python\n", + "type_mapping = {x: x for x in set(atoms.numbers)}\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for atom in atoms:\n", + " system.part.add(pos=atom.position, type=atom.number, mass=atom.mass)\n", + "system.ase = ASEInterface(type_mapping={x: x for x in set(atoms.numbers)})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will use the [ZnDraw](https://github.com/zincware/ZnDraw) visualizer integrated with ESPResSo to follow the simulation. To see the effect of the geometry optimization, we display the atomic forces. You can either follow the simulation inside this Notebook or open the app in a dedicated window to the side, following the printed URL." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vis = Visualizer(system)\n", + "vis.zndraw.config.scene.vectors = \"forces\"\n", + "vis.zndraw.config.scene.vector_scale = 5\n", + "vis.zndraw.config.scene.simulation_box = False\n", + "vis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use the steepest descent algorithm of ESPResSo for the energy minimization." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "system.integrator.set_steepest_descent(f_max=(0.1 * ureg.eV / ureg.angstrom).magnitude,\n", + " gamma=4,\n", + " max_displacement=(0.001 * ureg.angstrom).magnitude)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In practice this means that we use the MACE Model to predict the forces on the atoms, which are then provided to ESPResSo as external forces and ESPResSo takes care of the atom displacements." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "energies = []\n", + "for _ in tqdm.trange(100, ncols=120):\n", + " atoms = system.ase.get()\n", + " atoms.calc = mace_mp_calc\n", + " system.part.all().ext_force = atoms.get_forces()\n", + " energies.append(atoms.get_potential_energy())\n", + " vis.zndraw.append(atoms)\n", + " system.integrator.run(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Exercise:**\n", + "- Take a look at the energies and the molecule in the visualizer during the minimisation.\n", + "- Why do we stop at $\\text{f}_{\\text{max}} = 0.1\\,\\text{eV}\\cdot\\unicode{xC5}^{-1}$?\n", + "- Compare the final energy to the value of water in the materials project database [mp-697111](https://next-gen.materialsproject.org/materials/mp-697111/tasks/mp-697111?chemsys=H-O) of $\\approx -14.89$ eV. The `mp-697111` structure is a periodic ice crystal belonging to the space group Cmc21." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# SOLUTION CELL\n", + "# Due to the PBC and interactions the energy of the ice crystal is lower than the energy of the isolated water molecule.\n", + "# Going beyond the given fmax would be outside the achievable accuracy of the MLIP.\n", + "plt.figure(figsize=(10, 6))\n", + "plt.plot(energies)\n", + "plt.xlabel(\"Frame\")\n", + "plt.ylabel(\"Energy (eV)\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have seen how we can use the MACE Model to do geometry optimization of an organic molecule in ESPResSo. We will continue with another system to showcase the dynamics for a more advanced system. For that we will start from a cleared system and also remove the frames from the visualizer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "system.part.clear()\n", + "system.thermostat.turn_off()\n", + "\n", + "del vis.zndraw[:]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## MD Simulations of Sulfuric Acid and Water\n", + "\n", + "A particularly novel application of machine-learned potentials is the description of chemical reactions.\n", + "Molecular dynamics engines require special methods to model bond breaking and bond formation,\n", + "since it entails overcoming energy barriers and dynamically updating the connectivity information of atoms.\n", + "Methods that aid this process are plentiful but beyond the scope of this tutorial.\n", + "Therefore, we will examine the proton transfer from sulfuric acid to water, which can be seen without biasing the simulation." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this part of the tutorial, we generate two different molecular species - water and sulfuric acid - and use [packmol](https://github.com/m3g/packmol) to create a suitable starting structure.\n", + "Packmol takes an example molecule structure and fills a simulation box with it,\n", + "while taking periodic boundary conditions into account avoiding atom overlap.\n", + "We will use the Python interface to packmol provided by rdkit2ase." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To see the proton transfer we need two molecules, water and sulfuric acid.\n", + "We again make use of the SMILES representations to generate the `ase.Atoms` objects,\n", + "which are then used to fill the box with 100 water molecules and 1 sulfuric acid molecule." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "water = rdkit2ase.smiles2atoms(\"O\")\n", + "sulfuric_acid = rdkit2ase.smiles2atoms(\"OS(=O)(=O)O\")\n", + "box_of_atoms = rdkit2ase.pack([[water], [sulfuric_acid]], [100, 1], density=1000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will repeat the geometry optimization process. Now that we have a bulk structure, we need to set the correct cell vectors,\n", + "which we also get from the ase structure we just created.\n", + "We will use a slightly smaller timestep in this study as it will make watching the bond formation later a lot easier." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "system.box_l = box_of_atoms.get_cell().diagonal()\n", + "system.periodicity = [True, True, True]\n", + "system.time_step = (0.25 * ureg.fs).m_as(((1 * ureg.u * ureg.angstrom**2) / ureg.electron_volt) ** 0.5)\n", + "system.cell_system.skin = 0.4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "system.ase = ASEInterface(type_mapping={x: x for x in set(box_of_atoms.numbers)})\n", + "for atom in box_of_atoms:\n", + " system.part.add(pos=atom.position, type=atom.number, mass=atom.mass)\n", + "system.integrator.set_steepest_descent(f_max=(0.1 * ureg.eV / ureg.angstrom).magnitude,\n", + " gamma=4,\n", + " max_displacement=(0.001 * ureg.angstrom).magnitude)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We again use ZnDraw and enable showing the box this time. The next code cells will update the window that was created in the water exercise." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vis.zndraw.config.scene.simulation_box = True\n", + "vis.zndraw.config.scene.vector_scale = 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for _ in tqdm.trange(50, ncols=120):\n", + " atoms = system.ase.get()\n", + " atoms.calc = mace_mp_calc\n", + " system.part.all().ext_force = atoms.get_forces()\n", + " vis.zndraw.append(atoms)\n", + " system.integrator.run(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With this minimized structure, we can now run an MD simulation.\n", + "Let us highlight the two hydrogen atoms from the sulfuric acid in the visualizer. These are the particles we want to follow and will participate in the proton transfer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vis.zndraw.selection = [305, 306]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To run the MD we will switch to a Velocity Verlet integrator and set the thermostat to 400 K." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "system.integrator.set_vv()\n", + "system.thermostat.set_langevin(kT=(400 * ureg.K * ureg.boltzmann_constant).m_as(\"eV\"), gamma=2, seed=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Conceptually the process for the MD is the same with the geometry optimization. However, this time we only show every 5th frame to the visualizer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tbar = tqdm.trange(500, ncols=120)\n", + "for idx in tbar:\n", + " atoms = system.ase.get()\n", + " atoms.calc = mace_mp_calc\n", + " system.part.all().ext_force = atoms.get_forces()\n", + " if idx % 5 == 0:\n", + " vis.zndraw.append(atoms)\n", + " system.integrator.run(1)\n", + " tbar.set_description(f\"E_pot: {atoms.get_potential_energy():.3f} eV\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A good indicator to see the protonation events is the distance between the hydrogen and the oxygen atom in the hydroxyl group of the sulfuric acid.\n", + "We calculate these distances for each frame in the visualization together with the potential energy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "OH_1 = []\n", + "OH_2 = []\n", + "energies = []\n", + "for atoms in vis.zndraw:\n", + " OH_1.append(atoms.get_distance(300, 305, mic=True))\n", + " OH_2.append(atoms.get_distance(304, 306, mic=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(10, 6))\n", + "plt.plot(OH_1, label=\"OH1\")\n", + "plt.plot(OH_2, label=\"OH2\")\n", + "plt.xlabel(\"Frame\")\n", + "plt.ylabel(\"O$-$H bond distance (Å)\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Exercise:**\n", + "- Indentify the highlighted atoms in the visualization window, which are the two hydrogen atoms of the sulfuric acid.\n", + "- Observe the moment they undergo the protonation event visually and also in the hydrogen-oxygen distance plots.\n", + "- Discuss the results and the potential for using MLIPs to aid in the simulation of chemical reactions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "In this tutorial, you have created simulations from simple SMILES representations and were able to run geometry optimization and MD simulations with (almost) DFT accuracy.\n", + "These approaches are not just theoretical, but highly practical. They can be used when a system requires a high degree of accuracy in its calculations, providing you with a reliable toolkit for your computational chemistry simulations." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "- Batatia, I.; Benner, P.; Chiang, Y.; Elena, A. M.; Kovács, D. P.; Riebesell, J.; Advincula, X. R.; Asta, M.; Baldwin, W. J.; Bernstein, N.; Bhowmik, A.; Blau, S. M.; Cărare, V.; Darby, J. P.; De, S.; Della Pia, F.; Deringer, V. L.; Elijošius, R.; El-Machachi, Z.; Fako, E.; Ferrari, A. C.; Genreith-Schriever, A.; George, J.; Goodall, R. E. A.; Grey, C. P.; Han, S.; Handley, W.; Heenen, H. H.; Hermansson, K.; Holm, C.; Jaafar, J.; Hofmann, S.; Jakob, K. S.; Jung, H.; Kapil, V.; Kaplan, A. D.; Karimitari, N.; Kroupa, N.; Kullgren, J.; Kuner, M. C.; Kuryla, D.; Liepuoniute, G.; Margraf, J. T.; Magdău, I.-B.; Michaelides, A.; Moore, J. H.; Naik, A. A.; Niblett, S. P.; Norwood, S. W.; O’Neill, N.; Ortner, C.; Persson, K. A.; Reuter, K.; Rosen, A. S.; Schaaf, L. L.; Schran, C.; Sivonxay, E.; Stenczel, T. K.; Svahn, V.; Sutton, C.; van der Oord, C.; Varga-Umbrich, E.; Vegge, T.; Vondrák, M.; Wang, Y.; Witt, W. C.; Zills, F.; Csányi, G. A Foundation Model for Atomistic Materials Chemistry. arXiv December 29, 2023. https://doi.org/10.48550/arXiv.2401.00096.\n", + "- Batatia, I.; Kovacs, D. P.; Simm, G.; Ortner, C.; Csanyi, G. MACE: Higher Order Equivariant Message Passing Neural Networks for Fast and Accurate Force Fields. In Advances in neural information processing systems; Koyejo, S., Mohamed, S., Agarwal, A., Belgrave, D., Cho, K., Oh, A., Eds.; Curran Associates, Inc., 2022; Vol. 35, pp 11423–11436. https://openreview.net/forum?id=YPpSngE-ZU.\n", + "- Landrum, G.; Tosco, P.; Kelley, B.; Ric; Cosgrove, D.; sriniker; gedeck; Vianello, R.; NadineSchneider; Kawashima, E.; N, D.; Jones, G.; Dalke, A.; Cole, B.; Swain, M.; Turk, S.; AlexanderSavelyev; Vaucher, A.; Wójcikowski, M.; Take, I.; Probst, D.; Ujihara, K.; Scalfani, V. F.; godin, guillaume; Lehtivarjo, J.; Pahl, A.; Walker, R.; Berenger, F.; jasondbiggs; strets123. Rdkit/Rdkit: 2023_03_2 (Q1 2023) Release, 2023. https://doi.org/10.5281/zenodo.8053810.\n", + "- Martínez, L.; Andrade, R.; Birgin, E. G.; Martínez, J. M. PACKMOL: A Package for Building Initial Configurations for Molecular Dynamics Simulations. J Comput Chem 2009, 30 (13), 2157–2164. https://doi.org/10.1002/jcc.21224.\n", + "- Zills, F.; Schäfer, M. R.; Segreto, N.; Kästner, J.; Holm, C.; Tovey, S. Collaboration on Machine-Learned Potentials with IPSuite: A Modular Framework for Learning-on-the-Fly. J. Phys. Chem. B 2024. https://doi.org/10.1021/acs.jpcb.3c07187." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}