Skip to content

Commit

Permalink
Merge pull request #168 from TheoChem-VU/154-add-command-line-scripts
Browse files Browse the repository at this point in the history
Added command line scripts for concatening irc paths
  • Loading branch information
SiebeLeDe authored Mar 21, 2024
2 parents 0f126c6 + ae5955a commit 3c743ff
Show file tree
Hide file tree
Showing 28 changed files with 1,516,245 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
pip install -e .
- name: Install sphinx
run: pip install sphinx sphinx-tabs sphinx-copybutton sphinx-autodoc-typehints
run: pip install sphinx sphinx-tabs sphinx-copybutton sphinx-autodoc-typehints sphinx-argparse

- name: Install sphinx theme
run: pip install pydata-sphinx-theme
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_docs_no_deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
pip install -e .
- name: Install sphinx
run: pip install sphinx sphinx-tabs sphinx-copybutton sphinx-autodoc-typehints
run: pip install sphinx sphinx-tabs sphinx-copybutton sphinx-autodoc-typehints sphinx-argparse

- name: Install sphinx theme
run: pip install pydata-sphinx-theme
Expand Down
2 changes: 1 addition & 1 deletion docs/_templates/logo.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
}
</style>
<div class="myDiv">
<a href="./index.html">
<a href="/index.html">
<img src="https://avatars.githubusercontent.com/u/119413491?s=60&v=4">
TCutility
</a>
Expand Down
15 changes: 15 additions & 0 deletions docs/cli_scripts/main.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Command-line interface (CLI) scripts
====================================

TCutility comes with some helpful CLI scripts for you to use. Please find an overview and examples here. You can also access information by using the ``tc -h`` command. This will show you an overview of the available CLI scripts. More detailed descriptions can be accessed using the ``tc {program name} -h`` commands.

Overview
--------

CLI scripts are invoked using the parent ``tc`` command followed by the sub-program (see below). If you have suggestions for useful scripts please contact the developers or `open an issue <https://github.com/TheoChem-VU/TCutility/issues/new>`_ on our GitHub page.

.. argparse::
:filename: ../src/tcutility/cli_scripts/tcparser.py
:func: create_parser
:prog: tc

1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"sphinx_copybutton",
# 'sphinx.ext.autosummary',
"sphinx_autodoc_typehints",
"sphinxarg.ext",
]
napoleon_use_param = True
templates_path = ["_templates"]
Expand Down
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ TCutility |ProjectVersion| documentation
results/results
tcutility.job
analysis/analysis
cli_scripts/main

.. toctree::
:maxdepth: 2
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,6 @@ exclude = [
"docs*",
] # exclude packages matching these glob patterns (empty by default)
namespaces = false # to disable scanning PEP 420 namespaces (true by default)

[project.scripts]
tc = "tcutility.cli_scripts.tcparser:main"
185 changes: 185 additions & 0 deletions src/tcutility/cli_scripts/concatenate_irc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
import argparse
import pathlib as pl
from typing import List, Tuple, Union

import numpy as np
from scm.plams import Molecule
from tcutility.log import log
from tcutility.results import read
from tcutility.results.result import Result


def _create_result_objects(job_dirs: Union[List[str], List[pl.Path]]) -> List[Result]:
"""Creates a list of Result objects from a list of directories."""
return [read(pl.Path(file)) for file in job_dirs]


def _get_converged_energies(res: Result) -> List[float]:
"""Returns a list of energies of the converged geometries."""
return [energy for converged, energy in zip(res.history.converged, res.history.energy) if converged] # type: ignore


def _get_converged_molecules(res: Result) -> List[Molecule]:
"""Returns a list of molecules of the converged geometries."""
return [mol for converged, mol in zip(res.history.converged, res.history.molecule) if converged] # type: ignore


def _concatenate_irc_trajectories_by_rmsd(irc_trajectories: List[List[Molecule]], energies: List[List[float]]) -> Tuple[List[Molecule], List[float]]:
"""
Concatenates lists of molecules by comparing the RMSD values of the end and beginnings of the trajectory.
The entries that are closest to each other are used to concatenate the trajectories.
Parameters:
irc_trajectories: A list of lists of Molecule objects representing the trajectories.
energies: A list of lists of float values representing the energies.
Returns:
A tuple containing a list of Molecule objects and a list of energies.
Raises:
ValueError: If the RMSD values are not as expected.
"""
concatenated_mols: List[Molecule] = irc_trajectories[0][::-1]
concatenated_energies: List[float] = energies[0][::-1]

for traj_index in range(len(irc_trajectories) - 1):
# Calculate RMSD values of two connected trajectories to compare the connection points / molecules
rmsd_matrix = np.array([[Molecule.rmsd(irc_trajectories[traj_index][i], irc_trajectories[traj_index + 1][j]) for j in [0, -1]] for i in [0, -1]])

# Flatten the matrix and find the index of the minimum value
lowest_index = np.argmin(rmsd_matrix.flatten())

log(f"Lowest RMSD values: {rmsd_matrix.flatten()}", 10)

# Starting points are connected
if lowest_index == 0:
concatenated_mols += irc_trajectories[traj_index + 1][1:]
concatenated_energies += energies[traj_index + 1][1:]
# Ending points are connected
elif lowest_index == 1:
concatenated_mols += irc_trajectories[traj_index + 1][::-1]
concatenated_energies += energies[traj_index + 1][::-1]
# Something went wrong
else:
raise ValueError(f"The RMSD values are not as expected: {rmsd_matrix.flatten()} with {lowest_index=}.")

return concatenated_mols, concatenated_energies


def concatenate_irc_trajectories(result_objects: List[Result], user_log_level: int, reverse: bool = False) -> Tuple[List[Molecule], List[float]]:
"""
Concatenates trajectories from irc calculations, often being forward and backward, through the RMSD values.
Parameters:
job_dirs: A list of directories containing the ams.rkf files.
user_log_level: The log level set by the user.
reverse: A boolean indicating whether to reverse the trajectory. Default is False.
Returns:
A tuple containing a list of Molecule objects and a list of energies.
Raises:
Exception: If an exception is raised in the try block, it is caught and printed.
"""
traj_geometries: List[List[Molecule]] = [[] for _ in result_objects]
traj_energies: List[List[float]] = [[] for _ in result_objects]

for i, res_obj in enumerate(result_objects):
log(f"Processing {res_obj.files.root}", user_log_level) # type: ignore # root is a valid attribute
traj_geometries[i] = _get_converged_molecules(res_obj)
traj_energies[i] = _get_converged_energies(res_obj)
log(f"IRC trajectory {i+1} has {len(traj_geometries[i])} geometries.", user_log_level)

log("Concatenating trajectories...", user_log_level)
concatenated_mols, concatenated_energies = _concatenate_irc_trajectories_by_rmsd(traj_geometries, traj_energies)

if reverse:
log("Reversing the trajectory...", user_log_level)
concatenated_mols = concatenated_mols[::-1]
concatenated_energies = concatenated_energies[::-1]
return concatenated_mols, concatenated_energies


def _xyz_format(mol: Molecule) -> str:
"""Returns a string representation of a molecule in the xyz format, e.g.:
Geometry 1, Energy: -0.5 Ha
C 0.00000000 0.00000000 0.00000000
...
"""
return "\n".join([f"{atom.symbol:6s}{atom.x:16.8f}{atom.y:16.8f}{atom.z:16.8f}" for atom in mol.atoms])


def _amv_format(mol: Molecule, step: int, energy: Union[float, None] = None) -> str:
"""Returns a string representation of a molecule in the amv format, e.g.:
Geometry 1, Energy: -0.5 Ha
C 0.00000000 0.00000000 0.00000000
...
If no energy is provided, the energy is not included in the string representation"""
if energy is None:
return f"Geometry {step}\n" + "\n".join([f"{atom.symbol:6s}{atom.x:16.8f}{atom.y:16.8f}{atom.z:16.8f}" for atom in mol.atoms])
return f"Geometry {step}, Energy: {energy} Ha\n" + "\n".join([f"{atom.symbol:6s}{atom.x:16.8f}{atom.y:16.8f}{atom.z:16.8f}" for atom in mol.atoms])


def write_mol_to_xyz_file(mols: Union[List[Molecule], Molecule], filename: Union[str, pl.Path]) -> None:
"""Writes a list of molecules to a file in xyz format."""
mols = mols if isinstance(mols, list) else [mols]
out_file = pl.Path(f"{filename}.xyz")

[mol.delete_all_bonds() for mol in mols]
write_string = "\n\n".join([_xyz_format(mol) for mol in mols])
out_file.write_text(write_string)

return None


def write_mol_to_amv_file(mols: Union[List[Molecule], Molecule], energies: Union[List[float], None], filename: Union[str, pl.Path]) -> None:
"""Writes a list of molecules to a file in amv format."""
mols = mols if isinstance(mols, list) else [mols]
out_file = pl.Path(f"{filename}.amv")
energies = energies if energies is not None else [0.0 for _ in mols]

[mol.delete_all_bonds() for mol in mols]
write_string = "\n\n".join([_amv_format(mol, step, energy) for step, (mol, energy) in enumerate(zip(mols, energies), 1)])
out_file.write_text(write_string)

return None


def create_subparser(parent_parser: argparse.ArgumentParser):
subparser = parent_parser.add_parser( # type: ignore # add_parser is a valid method
"concat-irc",
help="Combine separated IRC paths.",
description="""
Scripts that takes in two or more directories containing an IRC file ("ams.rkf") and concatenates them through the RMSD values. Produces a .xyz and .amv file in the specified output directory.
The output directory is specified with the -o flag. If not specified, the output will be written to the current working directory.
In addition, the -r flag can be used to reverse the trajectory.
Note: ALWAYS visualize the .amv file in AMSView to verify the trajectory.
""",
)

# Add the arguments
subparser.add_argument("jobs", nargs="*", type=str, help="Job directories containing the ams.rkf of the irc calculation(s)")
subparser.add_argument("-r", "--reverse", action="store_true", help="Reverses the trajectory")
subparser.add_argument("-o", "--output", type=str, default="./", help="Directory in which the outputfile will be saved")
subparser.add_argument("-l", "--log_level", type=int, default=20, help="Set the log level. The lower the value, the more is printed. Default is 20 (info).")


def main(args):
outputdir = pl.Path(args.output).resolve()
job_dirs = [pl.Path(directory).resolve() for directory in args.jobs]

if len(job_dirs) > 2:
raise ValueError("Only two IRC paths can be concatenated at a time as is currently implemented.")

res_objects = _create_result_objects(job_dirs)
molecules, energies = concatenate_irc_trajectories(res_objects, args.log_level, args.reverse)

outputdir.mkdir(parents=True, exist_ok=True)
write_mol_to_amv_file(molecules, energies, outputdir / "read_concatenated_mols")
write_mol_to_xyz_file(molecules, outputdir / "read_concatenated_mols")

log(f"Output written to {outputdir / 'concatenated_mols'}")
67 changes: 67 additions & 0 deletions src/tcutility/cli_scripts/job_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
""" Module containing functions for quickly submitting geometry optimization jobs via the command line """
import argparse
from tcutility import job as tcjob
import tcutility
import os


def create_subparser(parent_parser: argparse.ArgumentParser):
desc = "Set up and run a geometry optimization on a given structure."
subparser = parent_parser.add_parser('optimize', help=desc, description=desc)
subparser.add_argument("-l", "--level",
type=str,
help="Set the level of theory for the optimization. For example, \"GFN1-xTB\" or \"BLYP-D3(BJ)/TZ2P\" Can be set in the xyz-file with the 'level_of_theory' flag.",
default=None)
subparser.add_argument("-c", "--charge",
type=int,
help="The charge of the system. Can be set in the xyz-file with the 'charge' flag.",
default=None)
subparser.add_argument("-s", "--spinpol",
type=int,
help="The spin-polarization of the system. Can be set in the xyz-file with the 'spinpol' flag.",
default=None)
subparser.add_argument("-o", "--output",
type=str,
help="The file to write the optimized result to. By default will be written to '{xyzfile}_optimized.xyz'.",
default=None)
subparser.add_argument("-k", "--keep",
help="Keep the calculation directory after finishing the calculation.",
default=False,
action="store_true")
subparser.add_argument("xyzfile",
type=str,
help="The molecule to optimize, in extended xyz-format. See https://theochem-vu.github.io/TCutility/api/tcutility.html#module-tcutility.molecule for more information.")


def main(args: argparse.Namespace):
# set a default output
if args.output is None:
args.output = args.xyzfile.removesuffix('.xyz') + '_optimized.xyz'

mol = tcutility.molecule.load(args.xyzfile)

# get the correct job class object and set the level-of-theory
level = args.level or mol.flags.level_of_theory or 'GFN1-xTB'
if level.lower() == 'gfn1-xtb':
job = tcjob.DFTBJob(delete_on_finish=not args.keep) # by default DFTBJob uses GFN1-xTB
else:
# if we are not using GFN1-xTB we will use ADF
parts = level.split('/')
job = tcjob.ADFJob(delete_on_finish=not args.keep)
job.functional(parts[0])
if len(parts) > 1:
job.basis_set(parts[1])

job.molecule(mol)
job.optimization()
job.charge(args.charge or mol.flags.charge or 0)
if isinstance(job, tcjob.ADFJob):
job.spin_polarization(args.spinpol or mol.flags.spinpol or 0)

job.rundir = os.path.split(args.xyzfile)[0]
job.name = f'.tmp.{os.getpid()}'

# copy the optimized mol to the output file
job.add_postamble(f'cp {job.output_mol_path} {args.output}')

job.run()
41 changes: 41 additions & 0 deletions src/tcutility/cli_scripts/tcparser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from tcutility.cli_scripts import concatenate_irc, job_script

# to add a script:
# 1. Add a create_subparser function and main function to your script.
# 2. Import the script.
# 3. Add it to the dictionary below {program_name: script-module}.
sub_programs = {
"optimize": job_script,
"concat-irc": concatenate_irc,
}


def create_parser():
import argparse

parser = argparse.ArgumentParser(prog="tc")
# add the subparsers. dest ensures we can retrieve the subparser name later on
subparsers = parser.add_subparsers(dest="subprogram", title="TCutility command-line scripts")

# add the subparsers to the main parser
for sub_program in sub_programs.values():
sub_program.create_subparser(subparsers)

return parser


def main():
parser = create_parser()
args = parser.parse_args()

# if the program was called without a subprogram we simply print the help message
if args.subprogram is None:
parser.print_help()
return

# call the main function of the subprogram that was called
sub_programs[args.subprogram].main(args)


if __name__ == "__main__":
main()
6 changes: 5 additions & 1 deletion src/tcutility/job/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class Job:
overwrite: whether to overwrite a previously run job in the same working directory.
wait_for_finish: whether to wait for this job to finish running before continuing your runscript.
'''
def __init__(self, test_mode: bool = False, overwrite: bool = False, wait_for_finish: bool = False):
def __init__(self, test_mode: bool = False, overwrite: bool = False, wait_for_finish: bool = False, delete_on_finish: bool = False):
self.settings = results.Result()
self._sbatch = results.Result()
self._molecule = None
Expand All @@ -32,6 +32,7 @@ def __init__(self, test_mode: bool = False, overwrite: bool = False, wait_for_fi
self.test_mode = test_mode
self.overwrite = overwrite
self.wait_for_finish = wait_for_finish
self.delete_on_finish = delete_on_finish
self._preambles = []
self._postambles = []
self._postscripts = []
Expand Down Expand Up @@ -106,6 +107,9 @@ def run(self):
return

# write the post-script calls to the post-ambles:
if self.delete_on_finish:
self.add_postamble(f'rm -r {self.workdir}')

for postscript in self._postscripts:
print(postscript)
self._postambles.append(f'python {postscript[0]} {" ".join(postscript[1])}')
Expand Down
2 changes: 0 additions & 2 deletions src/tcutility/pathfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,6 @@ def match(root: str, pattern: str) -> Dict[str, dict]:
[2024/01/17 14:39:08] root/NH3-BH3/M06-2X_TZ2P NH3-BH3 M06-2X TZ2P
[2024/01/17 14:39:08] root/SN2/BLYP_TZ2P SN2 BLYP TZ2P
[2024/01/17 14:39:08] root/NH3-BH3/BLYP_QZ4P NH3-BH3 BLYP QZ4P
"""
# get the number and names of substitutions in the given pattern
substitutions = re.findall(r"{(\w+[+*?]?)}", pattern)
Expand Down
Binary file added test/fixtures/irc_trajectories/backward/ams.rkf
Binary file not shown.
Loading

0 comments on commit 3c743ff

Please sign in to comment.