Merge pull request #168 from TheoChem-VU/154-add-command-line-scripts

Added command line scripts for concatening irc paths
TheoChem-VU · Mar 21, 2024 · 3c743ff · 3c743ff
2 parents 0f126c6 + ae5955a
commit 3c743ff
Show file tree

Hide file tree

Showing 28 changed files with 1,516,245 additions and 6 deletions.
diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml
@@ -49,7 +49,7 @@ jobs:
           pip install -e .
 
       - name: Install sphinx
-        run: pip install sphinx sphinx-tabs sphinx-copybutton sphinx-autodoc-typehints
+        run: pip install sphinx sphinx-tabs sphinx-copybutton sphinx-autodoc-typehints sphinx-argparse
 
       - name: Install sphinx theme
         run: pip install pydata-sphinx-theme

diff --git a/.github/workflows/build_docs_no_deploy.yaml b/.github/workflows/build_docs_no_deploy.yaml
@@ -31,7 +31,7 @@ jobs:
           pip install -e .
 
       - name: Install sphinx
-        run: pip install sphinx sphinx-tabs sphinx-copybutton sphinx-autodoc-typehints
+        run: pip install sphinx sphinx-tabs sphinx-copybutton sphinx-autodoc-typehints sphinx-argparse
 
       - name: Install sphinx theme
         run: pip install pydata-sphinx-theme

diff --git a/docs/_templates/logo.html b/docs/_templates/logo.html
@@ -9,7 +9,7 @@
     }
 </style>
 <div class="myDiv">
-    <a href="./index.html">
+    <a href="/index.html">
         <img src="https://avatars.githubusercontent.com/u/119413491?s=60&v=4">
         TCutility
     </a>

diff --git a/docs/cli_scripts/main.rst b/docs/cli_scripts/main.rst
@@ -0,0 +1,15 @@
+Command-line interface (CLI) scripts
+====================================
+
+TCutility comes with some helpful CLI scripts for you to use. Please find an overview and examples here. You can also access information by using the ``tc -h`` command. This will show you an overview of the available CLI scripts. More detailed descriptions can be accessed using the ``tc {program name} -h`` commands.
+
+Overview
+--------
+
+CLI scripts are invoked using the parent ``tc`` command followed by the sub-program (see below). If you have suggestions for useful scripts please contact the developers or `open an issue <https://github.com/TheoChem-VU/TCutility/issues/new>`_ on our GitHub page.
+
+.. argparse::
+    :filename: ../src/tcutility/cli_scripts/tcparser.py
+    :func: create_parser
+    :prog: tc
+
diff --git a/docs/conf.py b/docs/conf.py
@@ -49,6 +49,7 @@
     "sphinx_copybutton",
     # 'sphinx.ext.autosummary',
     "sphinx_autodoc_typehints",
+    "sphinxarg.ext",
 ]
 napoleon_use_param = True
 templates_path = ["_templates"]

diff --git a/docs/index.rst b/docs/index.rst
@@ -15,6 +15,7 @@ TCutility |ProjectVersion| documentation
    results/results
    tcutility.job
    analysis/analysis
+   cli_scripts/main
 
 .. toctree::
    :maxdepth: 2

diff --git a/pyproject.toml b/pyproject.toml
@@ -59,3 +59,6 @@ exclude = [
   "docs*",
 ] # exclude packages matching these glob patterns (empty by default)
 namespaces = false # to disable scanning PEP 420 namespaces (true by default)
+
+[project.scripts]
+tc = "tcutility.cli_scripts.tcparser:main"
diff --git a/src/tcutility/cli_scripts/concatenate_irc.py b/src/tcutility/cli_scripts/concatenate_irc.py
@@ -0,0 +1,185 @@
+import argparse
+import pathlib as pl
+from typing import List, Tuple, Union
+
+import numpy as np
+from scm.plams import Molecule
+from tcutility.log import log
+from tcutility.results import read
+from tcutility.results.result import Result
+
+
+def _create_result_objects(job_dirs: Union[List[str], List[pl.Path]]) -> List[Result]:
+    """Creates a list of Result objects from a list of directories."""
+    return [read(pl.Path(file)) for file in job_dirs]
+
+
+def _get_converged_energies(res: Result) -> List[float]:
+    """Returns a list of energies of the converged geometries."""
+    return [energy for converged, energy in zip(res.history.converged, res.history.energy) if converged]  # type: ignore
+
+
+def _get_converged_molecules(res: Result) -> List[Molecule]:
+    """Returns a list of molecules of the converged geometries."""
+    return [mol for converged, mol in zip(res.history.converged, res.history.molecule) if converged]  # type: ignore
+
+
+def _concatenate_irc_trajectories_by_rmsd(irc_trajectories: List[List[Molecule]], energies: List[List[float]]) -> Tuple[List[Molecule], List[float]]:
+    """
+    Concatenates lists of molecules by comparing the RMSD values of the end and beginnings of the trajectory.
+    The entries that are closest to each other are used to concatenate the trajectories.
+
+    Parameters:
+        irc_trajectories: A list of lists of Molecule objects representing the trajectories.
+        energies: A list of lists of float values representing the energies.
+
+    Returns:
+        A tuple containing a list of Molecule objects and a list of energies.
+
+    Raises:
+        ValueError: If the RMSD values are not as expected.
+    """
+    concatenated_mols: List[Molecule] = irc_trajectories[0][::-1]
+    concatenated_energies: List[float] = energies[0][::-1]
+
+    for traj_index in range(len(irc_trajectories) - 1):
+        # Calculate RMSD values of two connected trajectories to compare the connection points / molecules
+        rmsd_matrix = np.array([[Molecule.rmsd(irc_trajectories[traj_index][i], irc_trajectories[traj_index + 1][j]) for j in [0, -1]] for i in [0, -1]])
+
+        # Flatten the matrix and find the index of the minimum value
+        lowest_index = np.argmin(rmsd_matrix.flatten())
+
+        log(f"Lowest RMSD values: {rmsd_matrix.flatten()}", 10)
+
+        # Starting points are connected
+        if lowest_index == 0:
+            concatenated_mols += irc_trajectories[traj_index + 1][1:]
+            concatenated_energies += energies[traj_index + 1][1:]
+        # Ending points are connected
+        elif lowest_index == 1:
+            concatenated_mols += irc_trajectories[traj_index + 1][::-1]
+            concatenated_energies += energies[traj_index + 1][::-1]
+        # Something went wrong
+        else:
+            raise ValueError(f"The RMSD values are not as expected: {rmsd_matrix.flatten()} with {lowest_index=}.")
+
+    return concatenated_mols, concatenated_energies
+
+
+def concatenate_irc_trajectories(result_objects: List[Result], user_log_level: int, reverse: bool = False) -> Tuple[List[Molecule], List[float]]:
+    """
+    Concatenates trajectories from irc calculations, often being forward and backward, through the RMSD values.
+
+    Parameters:
+        job_dirs: A list of directories containing the ams.rkf files.
+        user_log_level: The log level set by the user.
+        reverse: A boolean indicating whether to reverse the trajectory. Default is False.
+
+    Returns:
+        A tuple containing a list of Molecule objects and a list of energies.
+
+    Raises:
+        Exception: If an exception is raised in the try block, it is caught and printed.
+    """
+    traj_geometries: List[List[Molecule]] = [[] for _ in result_objects]
+    traj_energies: List[List[float]] = [[] for _ in result_objects]
+
+    for i, res_obj in enumerate(result_objects):
+        log(f"Processing {res_obj.files.root}", user_log_level)  # type: ignore # root is a valid attribute
+        traj_geometries[i] = _get_converged_molecules(res_obj)
+        traj_energies[i] = _get_converged_energies(res_obj)
+        log(f"IRC trajectory {i+1} has {len(traj_geometries[i])} geometries.", user_log_level)
+
+    log("Concatenating trajectories...", user_log_level)
+    concatenated_mols, concatenated_energies = _concatenate_irc_trajectories_by_rmsd(traj_geometries, traj_energies)
+
+    if reverse:
+        log("Reversing the trajectory...", user_log_level)
+        concatenated_mols = concatenated_mols[::-1]
+        concatenated_energies = concatenated_energies[::-1]
+    return concatenated_mols, concatenated_energies
+
+
+def _xyz_format(mol: Molecule) -> str:
+    """Returns a string representation of a molecule in the xyz format, e.g.:
+
+    Geometry 1, Energy: -0.5 Ha
+    C      0.00000000      0.00000000      0.00000000
+    ...
+    """
+    return "\n".join([f"{atom.symbol:6s}{atom.x:16.8f}{atom.y:16.8f}{atom.z:16.8f}" for atom in mol.atoms])
+
+
+def _amv_format(mol: Molecule, step: int, energy: Union[float, None] = None) -> str:
+    """Returns a string representation of a molecule in the amv format, e.g.:
+
+    Geometry 1, Energy: -0.5 Ha
+    C      0.00000000      0.00000000      0.00000000
+    ...
+
+    If no energy is provided, the energy is not included in the string representation"""
+    if energy is None:
+        return f"Geometry {step}\n" + "\n".join([f"{atom.symbol:6s}{atom.x:16.8f}{atom.y:16.8f}{atom.z:16.8f}" for atom in mol.atoms])
+    return f"Geometry {step}, Energy: {energy} Ha\n" + "\n".join([f"{atom.symbol:6s}{atom.x:16.8f}{atom.y:16.8f}{atom.z:16.8f}" for atom in mol.atoms])
+
+
+def write_mol_to_xyz_file(mols: Union[List[Molecule], Molecule], filename: Union[str, pl.Path]) -> None:
+    """Writes a list of molecules to a file in xyz format."""
+    mols = mols if isinstance(mols, list) else [mols]
+    out_file = pl.Path(f"{filename}.xyz")
+
+    [mol.delete_all_bonds() for mol in mols]
+    write_string = "\n\n".join([_xyz_format(mol) for mol in mols])
+    out_file.write_text(write_string)
+
+    return None
+
+
+def write_mol_to_amv_file(mols: Union[List[Molecule], Molecule], energies: Union[List[float], None], filename: Union[str, pl.Path]) -> None:
+    """Writes a list of molecules to a file in amv format."""
+    mols = mols if isinstance(mols, list) else [mols]
+    out_file = pl.Path(f"{filename}.amv")
+    energies = energies if energies is not None else [0.0 for _ in mols]
+
+    [mol.delete_all_bonds() for mol in mols]
+    write_string = "\n\n".join([_amv_format(mol, step, energy) for step, (mol, energy) in enumerate(zip(mols, energies), 1)])
+    out_file.write_text(write_string)
+
+    return None
+
+
+def create_subparser(parent_parser: argparse.ArgumentParser):
+    subparser = parent_parser.add_parser(  # type: ignore # add_parser is a valid method
+        "concat-irc",
+        help="Combine separated IRC paths.",
+        description="""
+        Scripts that takes in two or more directories containing an IRC file ("ams.rkf") and concatenates them through the RMSD values. Produces a .xyz and .amv file in the specified output directory.
+        The output directory is specified with the -o flag. If not specified, the output will be written to the current working directory.
+        In addition, the -r flag can be used to reverse the trajectory.
+
+        Note: ALWAYS visualize the .amv file in AMSView to verify the trajectory.
+    """,
+    )
+
+    # Add the arguments
+    subparser.add_argument("jobs", nargs="*", type=str, help="Job directories containing the ams.rkf of the irc calculation(s)")
+    subparser.add_argument("-r", "--reverse", action="store_true", help="Reverses the trajectory")
+    subparser.add_argument("-o", "--output", type=str, default="./", help="Directory in which the outputfile will be saved")
+    subparser.add_argument("-l", "--log_level", type=int, default=20, help="Set the log level. The lower the value, the more is printed. Default is 20 (info).")
+
+
+def main(args):
+    outputdir = pl.Path(args.output).resolve()
+    job_dirs = [pl.Path(directory).resolve() for directory in args.jobs]
+
+    if len(job_dirs) > 2:
+        raise ValueError("Only two IRC paths can be concatenated at a time as is currently implemented.")
+
+    res_objects = _create_result_objects(job_dirs)
+    molecules, energies = concatenate_irc_trajectories(res_objects, args.log_level, args.reverse)
+
+    outputdir.mkdir(parents=True, exist_ok=True)
+    write_mol_to_amv_file(molecules, energies, outputdir / "read_concatenated_mols")
+    write_mol_to_xyz_file(molecules, outputdir / "read_concatenated_mols")
+
+    log(f"Output written to {outputdir / 'concatenated_mols'}")
diff --git a/src/tcutility/cli_scripts/job_script.py b/src/tcutility/cli_scripts/job_script.py
@@ -0,0 +1,67 @@
+""" Module containing functions for quickly submitting geometry optimization jobs via the command line """
+import argparse
+from tcutility import job as tcjob
+import tcutility
+import os
+
+
+def create_subparser(parent_parser: argparse.ArgumentParser):
+    desc = "Set up and run a geometry optimization on a given structure."
+    subparser = parent_parser.add_parser('optimize', help=desc, description=desc)
+    subparser.add_argument("-l", "--level", 
+                           type=str, 
+                           help="Set the level of theory for the optimization. For example, \"GFN1-xTB\" or \"BLYP-D3(BJ)/TZ2P\" Can be set in the xyz-file with the 'level_of_theory' flag.", 
+                           default=None)
+    subparser.add_argument("-c", "--charge",
+                           type=int,
+                           help="The charge of the system. Can be set in the xyz-file with the 'charge' flag.",
+                           default=None)
+    subparser.add_argument("-s", "--spinpol",
+                           type=int,
+                           help="The spin-polarization of the system. Can be set in the xyz-file with the 'spinpol' flag.",
+                           default=None)
+    subparser.add_argument("-o", "--output",
+                           type=str,
+                           help="The file to write the optimized result to. By default will be written to '{xyzfile}_optimized.xyz'.",
+                           default=None)
+    subparser.add_argument("-k", "--keep",
+                           help="Keep the calculation directory after finishing the calculation.",
+                           default=False,
+                           action="store_true")
+    subparser.add_argument("xyzfile",
+                           type=str,
+                           help="The molecule to optimize, in extended xyz-format. See https://theochem-vu.github.io/TCutility/api/tcutility.html#module-tcutility.molecule for more information.")
+
+
+def main(args: argparse.Namespace):
+    # set a default output
+    if args.output is None:
+        args.output = args.xyzfile.removesuffix('.xyz') + '_optimized.xyz'
+
+    mol = tcutility.molecule.load(args.xyzfile)
+
+    # get the correct job class object and set the level-of-theory
+    level = args.level or mol.flags.level_of_theory or 'GFN1-xTB'
+    if level.lower() == 'gfn1-xtb':
+        job = tcjob.DFTBJob(delete_on_finish=not args.keep)  # by default DFTBJob uses GFN1-xTB
+    else:
+        # if we are not using GFN1-xTB we will use ADF
+        parts = level.split('/')
+        job = tcjob.ADFJob(delete_on_finish=not args.keep)
+        job.functional(parts[0])
+        if len(parts) > 1:
+            job.basis_set(parts[1])
+
+    job.molecule(mol)
+    job.optimization()
+    job.charge(args.charge or mol.flags.charge or 0)
+    if isinstance(job, tcjob.ADFJob):
+        job.spin_polarization(args.spinpol or mol.flags.spinpol or 0)
+
+    job.rundir = os.path.split(args.xyzfile)[0]
+    job.name = f'.tmp.{os.getpid()}'
+
+    # copy the optimized mol to the output file
+    job.add_postamble(f'cp {job.output_mol_path} {args.output}')
+
+    job.run()
diff --git a/src/tcutility/cli_scripts/tcparser.py b/src/tcutility/cli_scripts/tcparser.py
@@ -0,0 +1,41 @@
+from tcutility.cli_scripts import concatenate_irc, job_script
+
+# to add a script:
+# 1. Add a create_subparser function and main function to your script.
+# 2. Import the script.
+# 3. Add it to the dictionary below {program_name: script-module}.
+sub_programs = {
+    "optimize": job_script,
+    "concat-irc": concatenate_irc,
+}
+
+
+def create_parser():
+    import argparse
+
+    parser = argparse.ArgumentParser(prog="tc")
+    # add the subparsers. dest ensures we can retrieve the subparser name later on
+    subparsers = parser.add_subparsers(dest="subprogram", title="TCutility command-line scripts")
+
+    # add the subparsers to the main parser
+    for sub_program in sub_programs.values():
+        sub_program.create_subparser(subparsers)
+
+    return parser
+
+
+def main():
+    parser = create_parser()
+    args = parser.parse_args()
+
+    # if the program was called without a subprogram we simply print the help message
+    if args.subprogram is None:
+        parser.print_help()
+        return
+
+    # call the main function of the subprogram that was called
+    sub_programs[args.subprogram].main(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/tcutility/job/generic.py b/src/tcutility/job/generic.py
@@ -21,7 +21,7 @@ class Job:
         overwrite: whether to overwrite a previously run job in the same working directory.
         wait_for_finish: whether to wait for this job to finish running before continuing your runscript.
     '''
-    def __init__(self, test_mode: bool = False, overwrite: bool = False, wait_for_finish: bool = False):
+    def __init__(self, test_mode: bool = False, overwrite: bool = False, wait_for_finish: bool = False, delete_on_finish: bool = False):
         self.settings = results.Result()
         self._sbatch = results.Result()
         self._molecule = None
@@ -32,6 +32,7 @@ def __init__(self, test_mode: bool = False, overwrite: bool = False, wait_for_fi
         self.test_mode = test_mode
         self.overwrite = overwrite
         self.wait_for_finish = wait_for_finish
+        self.delete_on_finish = delete_on_finish
         self._preambles = []
         self._postambles = []
         self._postscripts = []
@@ -106,6 +107,9 @@ def run(self):
             return
 
         # write the post-script calls to the post-ambles:
+        if self.delete_on_finish:
+            self.add_postamble(f'rm -r {self.workdir}')
+
         for postscript in self._postscripts:
             print(postscript)
             self._postambles.append(f'python {postscript[0]} {" ".join(postscript[1])}')

diff --git a/src/tcutility/pathfunc.py b/src/tcutility/pathfunc.py
@@ -166,8 +166,6 @@ def match(root: str, pattern: str) -> Dict[str, dict]:
             [2024/01/17 14:39:08] root/NH3-BH3/M06-2X_TZ2P   NH3-BH3   M06-2X       TZ2P
             [2024/01/17 14:39:08] root/SN2/BLYP_TZ2P         SN2       BLYP         TZ2P
             [2024/01/17 14:39:08] root/NH3-BH3/BLYP_QZ4P     NH3-BH3   BLYP         QZ4P
-
-
     """
     # get the number and names of substitutions in the given pattern
     substitutions = re.findall(r"{(\w+[+*?]?)}", pattern)

diff --git a/test/fixtures/irc_trajectories/backward/ams.rkf b/test/fixtures/irc_trajectories/backward/ams.rkf