Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new find_mcs function for exclusion of hydrogens #110

Merged
merged 10 commits into from
Apr 14, 2023
203 changes: 188 additions & 15 deletions transformato/mutate.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ def _check_for_lp(
]
lp_dict_dummy_region = defaultdict(list)
lp_dict_common_core = defaultdict(list)
print(f"die version {pm.__version__}")

for atom in psf.view[f":{tlc}"].atoms:
if atom.name.find("LP") == False:
print(f"die Atome {atom}")
Expand Down Expand Up @@ -959,6 +959,8 @@ def _find_mcs(
self,
mol1_name: str,
mol2_name: str,
iterate_over_matches: bool = False,
max_matches: int = 10,
):
"""
A class that proposes the mutation route between two molecules with a
Expand All @@ -980,15 +982,33 @@ def _find_mcs(

m1, m2 = [deepcopy(self.mols[mol1_name]), deepcopy(self.mols[mol2_name])]

# second copy of mols - to use as representation with removed hydrogens
remmol1 = deepcopy(m1)
remmol2 = deepcopy(m2)

# removal of hydrogens - if not removed, common core for molecule + hydrogens is computed!
remmol1 = Chem.rdmolops.RemoveAllHs(remmol1)
remmol2 = Chem.rdmolops.RemoveAllHs(remmol2)

# remmols contains both molecules with removed hydrogens
remmols = [remmol1, remmol2]

for m in [m1, m2]:
logger.debug("Mol in SMILES format: {}.".format(Chem.MolToSmiles(m, True)))

# make copy of mols
changed_mols = [Chem.Mol(x) for x in [m1, m2]]

# find substructure match (ignore bond order but enforce element matching)

# findmcs-function is called for mol-objects with removed hydrogens

# original Transformato-parameters (yield bad / for Transformato not usable results for molecules with cyclic structures, e.g., ccores between 2-CPI and 7-CPI)
# especially because completeRingsOnly is set to False
"""
mcs = rdFMCS.FindMCS(
changed_mols,
#changed_mols,
remmols,
bondCompare=self.bondCompare,
timeout=120,
atomCompare=self.atomCompare,
Expand All @@ -997,27 +1017,180 @@ def _find_mcs(
completeRingsOnly=self.completeRingsOnly,
ringMatchesRingOnly=self.ringMatchesRingOnly,
)
"""

# find_mcs-function from tf_routes:
# yields more reasonable common cores (e.g. for 2-CPI/7-CPI )
# in particular, completeRingsOnly=True is important

mcs = rdFMCS.FindMCS(
remmols,
timeout=120,
ringMatchesRingOnly=True,
completeRingsOnly=True,
ringCompare=Chem.rdFMCS.RingCompare.StrictRingFusion,
bondCompare=rdFMCS.BondCompare.CompareAny,
matchValences=False,
)

logger.debug("Substructure match: {}".format(mcs.smartsString))
# convert from SMARTS
mcsp = Chem.MolFromSmarts(mcs.smartsString, False)

s1 = m1.GetSubstructMatch(mcsp)
logger.debug("Substructere match idx: {}".format(s1))
# iterate_over_matches == False: the common core atoms for a single stubstructure match are determined
# possibly a different match yields a bigger ccore - i.e. a ccore with more hydrogens (neopentane - methane)
if iterate_over_matches == False:
s1 = m1.GetSubstructMatch(mcsp)
logger.debug("Substructere match idx: {}".format(s1))
self._show_common_core(
m1, self.get_common_core_idx_mol1(), show_atom_type=False, internal=True
)
s2 = m2.GetSubstructMatch(mcsp)
logger.debug("Substructere match idx: {}".format(s2))
self._show_common_core(
m2, self.get_common_core_idx_mol2(), show_atom_type=False, internal=True
)

self._show_common_core(
m1, self.get_common_core_idx_mol1(), show_atom_type=False, internal=True
)
# new code: add hydrogens to both common-core-on-molecule-projections
# set with all common core atom indices for both molecules
hit_ats1_compl = list(s1)
hit_ats2_compl = list(s2)

# check for each common core atom whether hydrogen atoms are in its neighbourhood
# s1/s2 contain the mapping of the common core (without hydrogens) to both molecules
# iterating over all mapped atoms, the number of hydrogens attached to the common core atom is determined
# the minimum number (i.e. if the atom of molecule 1 has one hydrogen bond, the atom of molecule 2 zero hydrogen bonds, it is zero) gives the number of hydrogen atoms to add to the common core

for indexpos, indexnr in enumerate(s1):
# get mapped atoms
atom1 = m1.GetAtomWithIdx(s1[indexpos])
atom2 = m2.GetAtomWithIdx(s2[indexpos])

# determine number of hydrogens in the neighbourhood of the atom from molecule1
h_atoms1 = 0
for x in atom1.GetNeighbors():
if x.GetSymbol() == "H":
h_atoms1 = h_atoms1 + 1

# determine number of hydrogens in the neighbourhood of the atom from molecule2
h_atoms2 = 0
for x in atom2.GetNeighbors():
if x.GetSymbol() == "H":
h_atoms2 = h_atoms2 + 1

# find minimum number of hydrogens
min_h_atoms = min(h_atoms1, h_atoms2)

# add minimum number of hydrogens to the ccore for molecule1
h_atoms1 = 0
for x in atom1.GetNeighbors():
if x.GetSymbol() == "H" and h_atoms1 < min_h_atoms:
hit_ats1_compl.append(x.GetIdx())
h_atoms1 = h_atoms1 + 1

# add minimum number of hydrogens to the ccore for molecule2
h_atoms2 = 0
for x in atom2.GetNeighbors():
if x.GetSymbol() == "H" and h_atoms2 < min_h_atoms:
hit_ats2_compl.append(x.GetIdx())
h_atoms2 = h_atoms2 + 1

# create new tuple of common core atom indices with additional hydrogens (molecule 1)
hit_ats1 = tuple(hit_ats1_compl)

# create new tuple of common core atom indices with additional hydrogens (molecule 2)
hit_ats2 = tuple(hit_ats2_compl)

self._substructure_match[mol1_name] = list(hit_ats1)
self._substructure_match[mol2_name] = list(hit_ats2)

# self._substructure_match[mol1_name] = list(s1)
# self._substructure_match[mol2_name] = list(s2)

s2 = m2.GetSubstructMatch(mcsp)
logger.debug("Substructere match idx: {}".format(s2))
return mcs

self._show_common_core(
m2, self.get_common_core_idx_mol2(), show_atom_type=False, internal=True
)
self._substructure_match[mol1_name] = list(s1)
self._substructure_match[mol2_name] = list(s2)
# iterate_over_matches == True: it is iterated over all pairs of substructure matches
# the substructure matches with the biggest emering common cores are finally chosen
# the common cores for different substructure match pairs contain the same heavy atoms, but differ in the number of hydrogens, i.e. the finally chosen matches have the common cores with most hydrogens
else:
s1s = m1.GetSubstructMatches(mcsp, maxMatches=max_matches)
logger.debug("Substructere match idx: {}".format(s1s))
self._show_common_core(
m1, self.get_common_core_idx_mol1(), show_atom_type=False, internal=True
)
s2s = m2.GetSubstructMatches(mcsp, maxMatches=max_matches)
logger.debug("Substructere match idx: {}".format(s2s))
self._show_common_core(
m2, self.get_common_core_idx_mol2(), show_atom_type=False, internal=True
)

curr_size_of_ccores = 0
for s1 in s1s:
for s2 in s2s:
# new code: add hydrogens to both common-core-on-molecule-projections
# set with all common core atom indices for both molecules
hit_ats1_compl = list(s1)
hit_ats2_compl = list(s2)

# check for each common core atom whether hydrogen atoms are in its neighbourhood
# s1/s2 contain the mapping of the common core (without hydrogens) to both molecules
# iterating over all mapped atoms, the number of hydrogens attached to the common core atom is determined
# the minimum number (i.e. if the atom of molecule 1 has one hydrogen bond, the atom of molecule 2 zero hydrogen bonds, it is zero) gives the number of hydrogen atoms to add to the common core

for indexpos, indexnr in enumerate(s1):
# get mapped atoms
atom1 = m1.GetAtomWithIdx(s1[indexpos])
atom2 = m2.GetAtomWithIdx(s2[indexpos])

# determine number of hydrogens in the neighbourhood of the atom from molecule1
h_atoms1 = 0
for x in atom1.GetNeighbors():
if x.GetSymbol() == "H":
h_atoms1 = h_atoms1 + 1

# determine number of hydrogens in the neighbourhood of the atom from molecule2
h_atoms2 = 0
for x in atom2.GetNeighbors():
if x.GetSymbol() == "H":
h_atoms2 = h_atoms2 + 1

# find minimum number of hydrogens
min_h_atoms = min(h_atoms1, h_atoms2)

# add minimum number of hydrogens to the ccore for molecule1
h_atoms1 = 0
for x in atom1.GetNeighbors():
if x.GetSymbol() == "H" and h_atoms1 < min_h_atoms:
hit_ats1_compl.append(x.GetIdx())
h_atoms1 = h_atoms1 + 1

# add minimum number of hydrogens to the ccore for molecule2
h_atoms2 = 0
for x in atom2.GetNeighbors():
if x.GetSymbol() == "H" and h_atoms2 < min_h_atoms:
hit_ats2_compl.append(x.GetIdx())
h_atoms2 = h_atoms2 + 1

# count whether the new common cores are bigger (i.e. contain more hydrogens) than the previous common cores
# if this is the case, the current substructure matches are chosen
if len(hit_ats1_compl) > curr_size_of_ccores:
curr_size_of_ccores = len(hit_ats1_compl)
hit_ats1_compl_final = hit_ats1_compl
hit_ats2_compl_final = hit_ats2_compl

# create new tuple of common core atom indices with additional hydrogens (molecule 1)
hit_ats1 = tuple(hit_ats1_compl_final)

# create new tuple of common core atom indices with additional hydrogens (molecule 2)
hit_ats2 = tuple(hit_ats2_compl_final)

self._substructure_match[mol1_name] = list(hit_ats1)
self._substructure_match[mol2_name] = list(hit_ats2)

# self._substructure_match[mol1_name] = list(s1)
# self._substructure_match[mol2_name] = list(s2)

return mcs
return mcs

def _return_atom_idx_from_bond_idx(self, mol: Chem.Mol, bond_idx: int):
return (
Expand Down
43 changes: 41 additions & 2 deletions transformato/tests/test_alchemical_path_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def test_generate_alchemical_path_for_2_CPI_to_common_core():
)

output_files = mutate_2_CPI_to_7_CPI_cc(configuration=configuration)
assert len(output_files) == 18
assert len(output_files) == 17


@pytest.mark.rsfe
Expand All @@ -244,7 +244,7 @@ def test_generate_alchemical_path_for_7_CPI_to_common_core():
)

output_files = mutate_7_CPI_to_2_CPI_cc(configuration=configuration)
assert len(output_files) == 13
assert len(output_files) == 12


@pytest.mark.rsfe
Expand All @@ -269,3 +269,42 @@ def test_generate_alchemical_path_for_1a0q_1a07(caplog):
)
perform_mutations(configuration=configuration, i=i, mutation_list=mutation_list)
assert i.current_step == 24


@pytest.mark.rbfe
def test_generate_path_for_ppar_system():
conf = f"{get_testsystems_dir()}/config/ppar-cpd31_cpd25.yaml"
configuration = load_config_yaml(
config=conf, input_dir=get_testsystems_dir(), output_dir=get_test_output_dir()
)

s1 = SystemStructure(configuration, "structure1")
s2 = SystemStructure(configuration, "structure2")
s1_to_s2 = ProposeMutationRoute(s1, s2)
s1_to_s2.propose_common_core()
s1_to_s2.finish_common_core()

assert (
len(s1_to_s2.get_common_core_idx_mol1())
== len(s1_to_s2.get_common_core_idx_mol2())
== 46
)

assert s1_to_s2.get_idx_not_in_common_core_for_mol1() == [31, 32, 33, 49, 50, 51]
assert s1_to_s2.get_idx_not_in_common_core_for_mol2() == [0, 1, 40, 49, 50, 51]

assert s1_to_s2.terminal_real_atom_cc1 == [19, 12]
assert s1_to_s2.terminal_real_atom_cc2 == [21, 14]

assert s1_to_s2.dummy_region_cc1.lj_default == [31, 33]
assert s1_to_s2.dummy_region_cc2.lj_default == [40, 1]
assert s1_to_s2.dummy_region_cc2.connected_dummy_regions == [
[49, 50, 51, 0, 1],
[40],
]
assert s1_to_s2.dummy_region_cc1.connected_dummy_regions == [
[33],
[51, 50, 49, 32, 31],
]

assert s1_to_s2.matching_terminal_atoms_between_cc == [(12, 14), (19, 21)]
Loading