TheoChem-VU · YHordijk · Aug 22, 2024 · Jun 27, 2024 · Jul 19, 2024 · Jul 19, 2024
diff --git a/recipes/job/conformer_search.py b/recipes/job/conformer_search.py
diff --git a/recipes/job/preoptimize.py b/recipes/job/preoptimize.py
diff --git a/src/tcutility/analysis/task_specific/irc.py b/src/tcutility/analysis/task_specific/irc.py
@@ -0,0 +1,95 @@
+import pathlib as pl
+from typing import List, Tuple, Union
+
+import numpy as np
+from scm.plams import Molecule
+from tcutility.log import log
+from tcutility.results import read
+from tcutility.results.result import Result
+
+
+def create_result_objects(job_dirs: Union[List[str], List[pl.Path]]) -> List[Result]:
+    """Creates a list of Result objects from a list of directories."""
+    return [read(pl.Path(file)) for file in job_dirs]
+
+
+def _get_converged_energies(res: Result) -> List[float]:
+    """Returns a list of energies of the converged geometries."""
+    return [energy for converged, energy in zip(res.history.converged, res.history.energy) if converged]  # type: ignore
+
+
+def _get_converged_molecules(res: Result) -> List[Molecule]:
+    """Returns a list of molecules of the converged geometries."""
+    return [mol for converged, mol in zip(res.history.converged, res.history.molecule) if converged]  # type: ignore
+
+
+def _concatenate_irc_trajectories_by_rmsd(irc_trajectories: List[List[Molecule]], energies: Union[List[List[float]], None]) -> Tuple[List[Molecule], List[float]]:
+    """
+    Concatenates lists of molecules by comparing the RMSD values of the end and beginnings of the trajectory.
+    The entries that are closest to each other are used to concatenate the trajectories.
+
+    Parameters:
+        irc_trajectories: A list of lists of Molecule objects representing the trajectories.
+        energies: A list of lists of float values representing the energies.
+
+    Returns:
+        A tuple containing a list of Molecule objects and a list of energies.
+
+    Raises:
+        ValueError: If the RMSD values are not as expected.
+    """
+    concatenated_mols: List[Molecule] = irc_trajectories[0][::-1]
+    concatenated_energies: List[float] = energies[0][::-1] if energies is not None else []
+
+    for traj_index in range(len(irc_trajectories) - 1):
+        # Calculate RMSD values of two connected trajectories to compare the connection points / molecules
+        rmsd_matrix = np.array([[Molecule.rmsd(irc_trajectories[traj_index][i], irc_trajectories[traj_index + 1][j]) for j in [0, -1]] for i in [0, -1]])
+
+        # Flatten the matrix and find the index of the minimum value
+        lowest_index = np.argmin(rmsd_matrix.flatten())
+
+        log(f"Lowest RMSD values: {rmsd_matrix.flatten()}", 10)
+
+        # Starting points are connected
+        if lowest_index == 0:
+            concatenated_mols += irc_trajectories[traj_index + 1][1:]
+            concatenated_energies += energies[traj_index + 1][1:] if energies is not None else []
+        # Ending points are connected
+        elif lowest_index == 1:
+            concatenated_mols += irc_trajectories[traj_index + 1][::-1]
+            concatenated_energies += energies[traj_index + 1][::-1] if energies is not None else []
+        # Something went wrong
+        else:
+            raise ValueError(f"The RMSD values are not as expected: {rmsd_matrix.flatten()} with {lowest_index=}.")
+
+    return concatenated_mols, concatenated_energies
+
+
+def concatenate_irc_trajectories(result_objects: List[Result], reverse: bool = False) -> Tuple[List[Molecule], List[float]]:
+    """
+    Concatenates trajectories from irc calculations, often being forward and backward, through the RMSD values.
+
+    Parameters:
+        job_dirs: A list of directories containing the ams.rkf files.
+        user_log_level: The log level set by the user.
+        reverse: A boolean indicating whether to reverse the trajectory. Default is False.
+
+    Returns:
+        A tuple containing a list of Molecule objects and a list of energies.
+
+    Raises:
+        Exception: If an exception is raised in the try block, it is caught and printed.
+    """
+    traj_geometries: List[List[Molecule]] = [[] for _ in result_objects]
+    traj_energies: List[List[float]] = [[] for _ in result_objects]
+
+    for i, res_obj in enumerate(result_objects):
+        traj_geometries[i] = _get_converged_molecules(res_obj)
+        traj_energies[i] = _get_converged_energies(res_obj)
+
+    concatenated_mols, concatenated_energies = _concatenate_irc_trajectories_by_rmsd(traj_geometries, traj_energies)
+
+    if reverse:
+        concatenated_mols = concatenated_mols[::-1]
+        concatenated_energies = concatenated_energies[::-1]
+    return concatenated_mols, concatenated_energies
diff --git a/src/tcutility/cli_scripts/concatenate_irc.py b/src/tcutility/cli_scripts/concatenate_irc.py
@@ -1,151 +1,9 @@
 import argparse
 import pathlib as pl
-from typing import List, Tuple, Union
 
-import numpy as np
-from scm.plams import Molecule
+from tcutility.analysis.task_specific.irc import concatenate_irc_trajectories, create_result_objects
 from tcutility.log import log
-from tcutility.results import read
-from tcutility.results.result import Result
-
-
-def _create_result_objects(job_dirs: Union[List[str], List[pl.Path]]) -> List[Result]:
-    """Creates a list of Result objects from a list of directories."""
-    return [read(pl.Path(file)) for file in job_dirs]
-
-
-def _get_converged_energies(res: Result) -> List[float]:
-    """Returns a list of energies of the converged geometries."""
-    return [energy for converged, energy in zip(res.history.converged, res.history.energy) if converged]  # type: ignore
-
-
-def _get_converged_molecules(res: Result) -> List[Molecule]:
-    """Returns a list of molecules of the converged geometries."""
-    return [mol for converged, mol in zip(res.history.converged, res.history.molecule) if converged]  # type: ignore
-
-
-def _concatenate_irc_trajectories_by_rmsd(irc_trajectories: List[List[Molecule]], energies: List[List[float]]) -> Tuple[List[Molecule], List[float]]:
-    """
-    Concatenates lists of molecules by comparing the RMSD values of the end and beginnings of the trajectory.
-    The entries that are closest to each other are used to concatenate the trajectories.
-
-    Parameters:
-        irc_trajectories: A list of lists of Molecule objects representing the trajectories.
-        energies: A list of lists of float values representing the energies.
-
-    Returns:
-        A tuple containing a list of Molecule objects and a list of energies.
-
-    Raises:
-        ValueError: If the RMSD values are not as expected.
-    """
-    concatenated_mols: List[Molecule] = irc_trajectories[0][::-1]
-    concatenated_energies: List[float] = energies[0][::-1]
-
-    for traj_index in range(len(irc_trajectories) - 1):
-        # Calculate RMSD values of two connected trajectories to compare the connection points / molecules
-        rmsd_matrix = np.array([[Molecule.rmsd(irc_trajectories[traj_index][i], irc_trajectories[traj_index + 1][j]) for j in [0, -1]] for i in [0, -1]])
-
-        # Flatten the matrix and find the index of the minimum value
-        lowest_index = np.argmin(rmsd_matrix.flatten())
-
-        log(f"Lowest RMSD values: {rmsd_matrix.flatten()}", 10)
-
-        # Starting points are connected
-        if lowest_index == 0:
-            concatenated_mols += irc_trajectories[traj_index + 1][1:]
-            concatenated_energies += energies[traj_index + 1][1:]
-        # Ending points are connected
-        elif lowest_index == 1:
-            concatenated_mols += irc_trajectories[traj_index + 1][::-1]
-            concatenated_energies += energies[traj_index + 1][::-1]
-        # Something went wrong
-        else:
-            raise ValueError(f"The RMSD values are not as expected: {rmsd_matrix.flatten()} with {lowest_index=}.")
-
-    return concatenated_mols, concatenated_energies
-
-
-def concatenate_irc_trajectories(result_objects: List[Result], user_log_level: int, reverse: bool = False) -> Tuple[List[Molecule], List[float]]:
-    """
-    Concatenates trajectories from irc calculations, often being forward and backward, through the RMSD values.
-
-    Parameters:
-        job_dirs: A list of directories containing the ams.rkf files.
-        user_log_level: The log level set by the user.
-        reverse: A boolean indicating whether to reverse the trajectory. Default is False.
-
-    Returns:
-        A tuple containing a list of Molecule objects and a list of energies.
-
-    Raises:
-        Exception: If an exception is raised in the try block, it is caught and printed.
-    """
-    traj_geometries: List[List[Molecule]] = [[] for _ in result_objects]
-    traj_energies: List[List[float]] = [[] for _ in result_objects]
-
-    for i, res_obj in enumerate(result_objects):
-        log(f"Processing {res_obj.files.root}", user_log_level)  # type: ignore # root is a valid attribute
-        traj_geometries[i] = _get_converged_molecules(res_obj)
-        traj_energies[i] = _get_converged_energies(res_obj)
-        log(f"IRC trajectory {i+1} has {len(traj_geometries[i])} geometries.", user_log_level)
-
-    log("Concatenating trajectories...", user_log_level)
-    concatenated_mols, concatenated_energies = _concatenate_irc_trajectories_by_rmsd(traj_geometries, traj_energies)
-
-    if reverse:
-        log("Reversing the trajectory...", user_log_level)
-        concatenated_mols = concatenated_mols[::-1]
-        concatenated_energies = concatenated_energies[::-1]
-    return concatenated_mols, concatenated_energies
-
-
-def _xyz_format(mol: Molecule) -> str:
-    """Returns a string representation of a molecule in the xyz format, e.g.:
-
-    Geometry 1, Energy: -0.5 Ha
-    C      0.00000000      0.00000000      0.00000000
-    ...
-    """
-    return "\n".join([f"{atom.symbol:6s}{atom.x:16.8f}{atom.y:16.8f}{atom.z:16.8f}" for atom in mol.atoms])
-
-
-def _amv_format(mol: Molecule, step: int, energy: Union[float, None] = None) -> str:
-    """Returns a string representation of a molecule in the amv format, e.g.:
-
-    Geometry 1, Energy: -0.5 Ha
-    C      0.00000000      0.00000000      0.00000000
-    ...
-
-    If no energy is provided, the energy is not included in the string representation"""
-    if energy is None:
-        return f"Geometry {step}\n" + "\n".join([f"{atom.symbol:6s}{atom.x:16.8f}{atom.y:16.8f}{atom.z:16.8f}" for atom in mol.atoms])
-    return f"Geometry {step}, Energy: {energy} Ha\n" + "\n".join([f"{atom.symbol:6s}{atom.x:16.8f}{atom.y:16.8f}{atom.z:16.8f}" for atom in mol.atoms])
-
-
-def write_mol_to_xyz_file(mols: Union[List[Molecule], Molecule], filename: Union[str, pl.Path]) -> None:
-    """Writes a list of molecules to a file in xyz format."""
-    mols = mols if isinstance(mols, list) else [mols]
-    out_file = pl.Path(f"{filename}.xyz")
-
-    [mol.delete_all_bonds() for mol in mols]
-    write_string = "\n\n".join([_xyz_format(mol) for mol in mols])
-    out_file.write_text(write_string)
-
-    return None
-
-
-def write_mol_to_amv_file(mols: Union[List[Molecule], Molecule], energies: Union[List[float], None], filename: Union[str, pl.Path]) -> None:
-    """Writes a list of molecules to a file in amv format."""
-    mols = mols if isinstance(mols, list) else [mols]
-    out_file = pl.Path(f"{filename}.amv")
-    energies = energies if energies is not None else [0.0 for _ in mols]
-
-    [mol.delete_all_bonds() for mol in mols]
-    write_string = "\n\n".join([_amv_format(mol, step, energy) for step, (mol, energy) in enumerate(zip(mols, energies), 1)])
-    out_file.write_text(write_string)
-
-    return None
+from tcutility.molecule import write_mol_to_amv_file, write_mol_to_xyz_file
 
 
 def create_subparser(parent_parser: argparse.ArgumentParser):
@@ -175,8 +33,10 @@ def main(args):
     if len(job_dirs) > 2:
         raise ValueError("Only two IRC paths can be concatenated at a time as is currently implemented.")
 
-    res_objects = _create_result_objects(job_dirs)
-    molecules, energies = concatenate_irc_trajectories(res_objects, args.log_level, args.reverse)
+    log(f"Concatenating trajectories... with reverse = {args.reverse}", args.log_level)
+    res_objects = create_result_objects(job_dirs)
+    molecules, energies = concatenate_irc_trajectories(res_objects, args.reverse)
+    log(f"Trajectories concatenated successfully (total length = {len(molecules)}) .", args.log_level)
 
     outputdir.mkdir(parents=True, exist_ok=True)
     write_mol_to_amv_file(molecules, energies, outputdir / "read_concatenated_mols")

diff --git a/src/tcutility/data/_atom_data_info/color.txt b/src/tcutility/data/_atom_data_info/color.txt
@@ -1,3 +1,4 @@
+0, 128, 0, 128
 1, 255, 255, 255
 2, 217, 255, 255
 3, 204, 128, 255

diff --git a/src/tcutility/data/_atom_data_info/name.txt b/src/tcutility/data/_atom_data_info/name.txt
@@ -1,3 +1,4 @@
+0, dummy
 1, hydrogen
 2, helium
 3, lithium

diff --git a/src/tcutility/data/_atom_data_info/radius.txt b/src/tcutility/data/_atom_data_info/radius.txt
@@ -1,3 +1,4 @@
+0, 0.15
 1, 0.31
 2, 0.28
 3, 1.28

diff --git a/src/tcutility/data/_atom_data_info/symbol.txt b/src/tcutility/data/_atom_data_info/symbol.txt
@@ -1,3 +1,4 @@
+0, Xx
 1, H
 2, He
 3, Li

diff --git a/src/tcutility/data/atom.py b/src/tcutility/data/atom.py
@@ -45,10 +45,10 @@ def parse_element(val):
     # if it is not an int it should be a string
     if val.lower() in _element_order:
         # first try to get it in the element name list
-        return _element_order.index(val.lower()) + 1
+        return _element_order.index(val.lower())
     if val in _symbol_order:
         # alternatively try to get it in the symbol list
-        return _symbol_order.index(val) + 1
+        return _symbol_order.index(val)
     raise KeyError(f'Element "{val}" not parsable.')
 
 
@@ -81,13 +81,13 @@ def atom_number(element):
 
 def symbol(element):
     num = parse_element(element)
-    return _symbol_order[num - 1]
+    return _symbol_order[num]
 
 
 def element(element):
     num = parse_element(element)
-    return _element_order[num - 1]
+    return _element_order[num]
 
 
 if __name__ == "__main__":
-    print(symbol(2))
+    print(symbol(0))
-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
+, 0.15
 , 0.31
 , 0.28
 , 1.28
@@ Expand Down @@