Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parity with main #300

Merged
merged 13 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file modified recipes/job/conformer_search.py
100755 → 100644
Empty file.
Empty file modified recipes/job/preoptimize.py
100755 → 100644
Empty file.
95 changes: 95 additions & 0 deletions src/tcutility/analysis/task_specific/irc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import pathlib as pl
from typing import List, Tuple, Union

import numpy as np
from scm.plams import Molecule
from tcutility.log import log
from tcutility.results import read
from tcutility.results.result import Result


def create_result_objects(job_dirs: Union[List[str], List[pl.Path]]) -> List[Result]:
"""Creates a list of Result objects from a list of directories."""
return [read(pl.Path(file)) for file in job_dirs]


def _get_converged_energies(res: Result) -> List[float]:
"""Returns a list of energies of the converged geometries."""
return [energy for converged, energy in zip(res.history.converged, res.history.energy) if converged] # type: ignore


def _get_converged_molecules(res: Result) -> List[Molecule]:
"""Returns a list of molecules of the converged geometries."""
return [mol for converged, mol in zip(res.history.converged, res.history.molecule) if converged] # type: ignore


def _concatenate_irc_trajectories_by_rmsd(irc_trajectories: List[List[Molecule]], energies: Union[List[List[float]], None]) -> Tuple[List[Molecule], List[float]]:
"""
Concatenates lists of molecules by comparing the RMSD values of the end and beginnings of the trajectory.
The entries that are closest to each other are used to concatenate the trajectories.

Parameters:
irc_trajectories: A list of lists of Molecule objects representing the trajectories.
energies: A list of lists of float values representing the energies.

Returns:
A tuple containing a list of Molecule objects and a list of energies.

Raises:
ValueError: If the RMSD values are not as expected.
"""
concatenated_mols: List[Molecule] = irc_trajectories[0][::-1]
concatenated_energies: List[float] = energies[0][::-1] if energies is not None else []

for traj_index in range(len(irc_trajectories) - 1):
# Calculate RMSD values of two connected trajectories to compare the connection points / molecules
rmsd_matrix = np.array([[Molecule.rmsd(irc_trajectories[traj_index][i], irc_trajectories[traj_index + 1][j]) for j in [0, -1]] for i in [0, -1]])

# Flatten the matrix and find the index of the minimum value
lowest_index = np.argmin(rmsd_matrix.flatten())

log(f"Lowest RMSD values: {rmsd_matrix.flatten()}", 10)

# Starting points are connected
if lowest_index == 0:
concatenated_mols += irc_trajectories[traj_index + 1][1:]
concatenated_energies += energies[traj_index + 1][1:] if energies is not None else []
# Ending points are connected
elif lowest_index == 1:
concatenated_mols += irc_trajectories[traj_index + 1][::-1]
concatenated_energies += energies[traj_index + 1][::-1] if energies is not None else []
# Something went wrong
else:
raise ValueError(f"The RMSD values are not as expected: {rmsd_matrix.flatten()} with {lowest_index=}.")

return concatenated_mols, concatenated_energies


def concatenate_irc_trajectories(result_objects: List[Result], reverse: bool = False) -> Tuple[List[Molecule], List[float]]:
"""
Concatenates trajectories from irc calculations, often being forward and backward, through the RMSD values.

Parameters:
job_dirs: A list of directories containing the ams.rkf files.
user_log_level: The log level set by the user.
reverse: A boolean indicating whether to reverse the trajectory. Default is False.

Returns:
A tuple containing a list of Molecule objects and a list of energies.

Raises:
Exception: If an exception is raised in the try block, it is caught and printed.
"""
traj_geometries: List[List[Molecule]] = [[] for _ in result_objects]
traj_energies: List[List[float]] = [[] for _ in result_objects]

for i, res_obj in enumerate(result_objects):
traj_geometries[i] = _get_converged_molecules(res_obj)
traj_energies[i] = _get_converged_energies(res_obj)

concatenated_mols, concatenated_energies = _concatenate_irc_trajectories_by_rmsd(traj_geometries, traj_energies)

if reverse:
concatenated_mols = concatenated_mols[::-1]
concatenated_energies = concatenated_energies[::-1]
return concatenated_mols, concatenated_energies
152 changes: 6 additions & 146 deletions src/tcutility/cli_scripts/concatenate_irc.py
Original file line number Diff line number Diff line change
@@ -1,151 +1,9 @@
import argparse
import pathlib as pl
from typing import List, Tuple, Union

import numpy as np
from scm.plams import Molecule
from tcutility.analysis.task_specific.irc import concatenate_irc_trajectories, create_result_objects
from tcutility.log import log
from tcutility.results import read
from tcutility.results.result import Result


def _create_result_objects(job_dirs: Union[List[str], List[pl.Path]]) -> List[Result]:
"""Creates a list of Result objects from a list of directories."""
return [read(pl.Path(file)) for file in job_dirs]


def _get_converged_energies(res: Result) -> List[float]:
"""Returns a list of energies of the converged geometries."""
return [energy for converged, energy in zip(res.history.converged, res.history.energy) if converged] # type: ignore


def _get_converged_molecules(res: Result) -> List[Molecule]:
"""Returns a list of molecules of the converged geometries."""
return [mol for converged, mol in zip(res.history.converged, res.history.molecule) if converged] # type: ignore


def _concatenate_irc_trajectories_by_rmsd(irc_trajectories: List[List[Molecule]], energies: List[List[float]]) -> Tuple[List[Molecule], List[float]]:
"""
Concatenates lists of molecules by comparing the RMSD values of the end and beginnings of the trajectory.
The entries that are closest to each other are used to concatenate the trajectories.

Parameters:
irc_trajectories: A list of lists of Molecule objects representing the trajectories.
energies: A list of lists of float values representing the energies.

Returns:
A tuple containing a list of Molecule objects and a list of energies.

Raises:
ValueError: If the RMSD values are not as expected.
"""
concatenated_mols: List[Molecule] = irc_trajectories[0][::-1]
concatenated_energies: List[float] = energies[0][::-1]

for traj_index in range(len(irc_trajectories) - 1):
# Calculate RMSD values of two connected trajectories to compare the connection points / molecules
rmsd_matrix = np.array([[Molecule.rmsd(irc_trajectories[traj_index][i], irc_trajectories[traj_index + 1][j]) for j in [0, -1]] for i in [0, -1]])

# Flatten the matrix and find the index of the minimum value
lowest_index = np.argmin(rmsd_matrix.flatten())

log(f"Lowest RMSD values: {rmsd_matrix.flatten()}", 10)

# Starting points are connected
if lowest_index == 0:
concatenated_mols += irc_trajectories[traj_index + 1][1:]
concatenated_energies += energies[traj_index + 1][1:]
# Ending points are connected
elif lowest_index == 1:
concatenated_mols += irc_trajectories[traj_index + 1][::-1]
concatenated_energies += energies[traj_index + 1][::-1]
# Something went wrong
else:
raise ValueError(f"The RMSD values are not as expected: {rmsd_matrix.flatten()} with {lowest_index=}.")

return concatenated_mols, concatenated_energies


def concatenate_irc_trajectories(result_objects: List[Result], user_log_level: int, reverse: bool = False) -> Tuple[List[Molecule], List[float]]:
"""
Concatenates trajectories from irc calculations, often being forward and backward, through the RMSD values.

Parameters:
job_dirs: A list of directories containing the ams.rkf files.
user_log_level: The log level set by the user.
reverse: A boolean indicating whether to reverse the trajectory. Default is False.

Returns:
A tuple containing a list of Molecule objects and a list of energies.

Raises:
Exception: If an exception is raised in the try block, it is caught and printed.
"""
traj_geometries: List[List[Molecule]] = [[] for _ in result_objects]
traj_energies: List[List[float]] = [[] for _ in result_objects]

for i, res_obj in enumerate(result_objects):
log(f"Processing {res_obj.files.root}", user_log_level) # type: ignore # root is a valid attribute
traj_geometries[i] = _get_converged_molecules(res_obj)
traj_energies[i] = _get_converged_energies(res_obj)
log(f"IRC trajectory {i+1} has {len(traj_geometries[i])} geometries.", user_log_level)

log("Concatenating trajectories...", user_log_level)
concatenated_mols, concatenated_energies = _concatenate_irc_trajectories_by_rmsd(traj_geometries, traj_energies)

if reverse:
log("Reversing the trajectory...", user_log_level)
concatenated_mols = concatenated_mols[::-1]
concatenated_energies = concatenated_energies[::-1]
return concatenated_mols, concatenated_energies


def _xyz_format(mol: Molecule) -> str:
"""Returns a string representation of a molecule in the xyz format, e.g.:

Geometry 1, Energy: -0.5 Ha
C 0.00000000 0.00000000 0.00000000
...
"""
return "\n".join([f"{atom.symbol:6s}{atom.x:16.8f}{atom.y:16.8f}{atom.z:16.8f}" for atom in mol.atoms])


def _amv_format(mol: Molecule, step: int, energy: Union[float, None] = None) -> str:
"""Returns a string representation of a molecule in the amv format, e.g.:

Geometry 1, Energy: -0.5 Ha
C 0.00000000 0.00000000 0.00000000
...

If no energy is provided, the energy is not included in the string representation"""
if energy is None:
return f"Geometry {step}\n" + "\n".join([f"{atom.symbol:6s}{atom.x:16.8f}{atom.y:16.8f}{atom.z:16.8f}" for atom in mol.atoms])
return f"Geometry {step}, Energy: {energy} Ha\n" + "\n".join([f"{atom.symbol:6s}{atom.x:16.8f}{atom.y:16.8f}{atom.z:16.8f}" for atom in mol.atoms])


def write_mol_to_xyz_file(mols: Union[List[Molecule], Molecule], filename: Union[str, pl.Path]) -> None:
"""Writes a list of molecules to a file in xyz format."""
mols = mols if isinstance(mols, list) else [mols]
out_file = pl.Path(f"{filename}.xyz")

[mol.delete_all_bonds() for mol in mols]
write_string = "\n\n".join([_xyz_format(mol) for mol in mols])
out_file.write_text(write_string)

return None


def write_mol_to_amv_file(mols: Union[List[Molecule], Molecule], energies: Union[List[float], None], filename: Union[str, pl.Path]) -> None:
"""Writes a list of molecules to a file in amv format."""
mols = mols if isinstance(mols, list) else [mols]
out_file = pl.Path(f"{filename}.amv")
energies = energies if energies is not None else [0.0 for _ in mols]

[mol.delete_all_bonds() for mol in mols]
write_string = "\n\n".join([_amv_format(mol, step, energy) for step, (mol, energy) in enumerate(zip(mols, energies), 1)])
out_file.write_text(write_string)

return None
from tcutility.molecule import write_mol_to_amv_file, write_mol_to_xyz_file


def create_subparser(parent_parser: argparse.ArgumentParser):
Expand Down Expand Up @@ -175,8 +33,10 @@ def main(args):
if len(job_dirs) > 2:
raise ValueError("Only two IRC paths can be concatenated at a time as is currently implemented.")

res_objects = _create_result_objects(job_dirs)
molecules, energies = concatenate_irc_trajectories(res_objects, args.log_level, args.reverse)
log(f"Concatenating trajectories... with reverse = {args.reverse}", args.log_level)
res_objects = create_result_objects(job_dirs)
molecules, energies = concatenate_irc_trajectories(res_objects, args.reverse)
log(f"Trajectories concatenated successfully (total length = {len(molecules)}) .", args.log_level)

outputdir.mkdir(parents=True, exist_ok=True)
write_mol_to_amv_file(molecules, energies, outputdir / "read_concatenated_mols")
Expand Down
1 change: 1 addition & 0 deletions src/tcutility/data/_atom_data_info/color.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
0, 128, 0, 128
1, 255, 255, 255
2, 217, 255, 255
3, 204, 128, 255
Expand Down
1 change: 1 addition & 0 deletions src/tcutility/data/_atom_data_info/name.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
0, dummy
1, hydrogen
2, helium
3, lithium
Expand Down
1 change: 1 addition & 0 deletions src/tcutility/data/_atom_data_info/radius.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
0, 0.15
1, 0.31
2, 0.28
3, 1.28
Expand Down
1 change: 1 addition & 0 deletions src/tcutility/data/_atom_data_info/symbol.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
0, Xx
1, H
2, He
3, Li
Expand Down
10 changes: 5 additions & 5 deletions src/tcutility/data/atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ def parse_element(val):
# if it is not an int it should be a string
if val.lower() in _element_order:
# first try to get it in the element name list
return _element_order.index(val.lower()) + 1
return _element_order.index(val.lower())
if val in _symbol_order:
# alternatively try to get it in the symbol list
return _symbol_order.index(val) + 1
return _symbol_order.index(val)
raise KeyError(f'Element "{val}" not parsable.')


Expand Down Expand Up @@ -81,13 +81,13 @@ def atom_number(element):

def symbol(element):
num = parse_element(element)
return _symbol_order[num - 1]
return _symbol_order[num]


def element(element):
num = parse_element(element)
return _element_order[num - 1]
return _element_order[num]


if __name__ == "__main__":
print(symbol(2))
print(symbol(0))
Loading
Loading