diff --git a/transformato/mutate.py b/transformato/mutate.py index fdfa1361..14dbff3e 100644 --- a/transformato/mutate.py +++ b/transformato/mutate.py @@ -590,7 +590,7 @@ def _check_for_lp( ] lp_dict_dummy_region = defaultdict(list) lp_dict_common_core = defaultdict(list) - print(f"die version {pm.__version__}") + for atom in psf.view[f":{tlc}"].atoms: if atom.name.find("LP") == False: print(f"die Atome {atom}") @@ -959,6 +959,8 @@ def _find_mcs( self, mol1_name: str, mol2_name: str, + iterate_over_matches: bool = False, + max_matches: int = 10, ): """ A class that proposes the mutation route between two molecules with a @@ -980,6 +982,17 @@ def _find_mcs( m1, m2 = [deepcopy(self.mols[mol1_name]), deepcopy(self.mols[mol2_name])] + # second copy of mols - to use as representation with removed hydrogens + remmol1 = deepcopy(m1) + remmol2 = deepcopy(m2) + + # removal of hydrogens - if not removed, common core for molecule + hydrogens is computed! + remmol1 = Chem.rdmolops.RemoveAllHs(remmol1) + remmol2 = Chem.rdmolops.RemoveAllHs(remmol2) + + # remmols contains both molecules with removed hydrogens + remmols = [remmol1, remmol2] + for m in [m1, m2]: logger.debug("Mol in SMILES format: {}.".format(Chem.MolToSmiles(m, True))) @@ -987,8 +1000,15 @@ def _find_mcs( changed_mols = [Chem.Mol(x) for x in [m1, m2]] # find substructure match (ignore bond order but enforce element matching) + + # findmcs-function is called for mol-objects with removed hydrogens + + # original Transformato-parameters (yield bad / for Transformato not usable results for molecules with cyclic structures, e.g., ccores between 2-CPI and 7-CPI) + # especially because completeRingsOnly is set to False + """ mcs = rdFMCS.FindMCS( - changed_mols, + #changed_mols, + remmols, bondCompare=self.bondCompare, timeout=120, atomCompare=self.atomCompare, @@ -997,27 +1017,180 @@ def _find_mcs( completeRingsOnly=self.completeRingsOnly, ringMatchesRingOnly=self.ringMatchesRingOnly, ) + """ + + # find_mcs-function from tf_routes: + # yields more reasonable common cores (e.g. for 2-CPI/7-CPI ) + # in particular, completeRingsOnly=True is important + + mcs = rdFMCS.FindMCS( + remmols, + timeout=120, + ringMatchesRingOnly=True, + completeRingsOnly=True, + ringCompare=Chem.rdFMCS.RingCompare.StrictRingFusion, + bondCompare=rdFMCS.BondCompare.CompareAny, + matchValences=False, + ) + logger.debug("Substructure match: {}".format(mcs.smartsString)) # convert from SMARTS mcsp = Chem.MolFromSmarts(mcs.smartsString, False) - s1 = m1.GetSubstructMatch(mcsp) - logger.debug("Substructere match idx: {}".format(s1)) + # iterate_over_matches == False: the common core atoms for a single stubstructure match are determined + # possibly a different match yields a bigger ccore - i.e. a ccore with more hydrogens (neopentane - methane) + if iterate_over_matches == False: + s1 = m1.GetSubstructMatch(mcsp) + logger.debug("Substructere match idx: {}".format(s1)) + self._show_common_core( + m1, self.get_common_core_idx_mol1(), show_atom_type=False, internal=True + ) + s2 = m2.GetSubstructMatch(mcsp) + logger.debug("Substructere match idx: {}".format(s2)) + self._show_common_core( + m2, self.get_common_core_idx_mol2(), show_atom_type=False, internal=True + ) - self._show_common_core( - m1, self.get_common_core_idx_mol1(), show_atom_type=False, internal=True - ) + # new code: add hydrogens to both common-core-on-molecule-projections + # set with all common core atom indices for both molecules + hit_ats1_compl = list(s1) + hit_ats2_compl = list(s2) + + # check for each common core atom whether hydrogen atoms are in its neighbourhood + # s1/s2 contain the mapping of the common core (without hydrogens) to both molecules + # iterating over all mapped atoms, the number of hydrogens attached to the common core atom is determined + # the minimum number (i.e. if the atom of molecule 1 has one hydrogen bond, the atom of molecule 2 zero hydrogen bonds, it is zero) gives the number of hydrogen atoms to add to the common core + + for indexpos, indexnr in enumerate(s1): + # get mapped atoms + atom1 = m1.GetAtomWithIdx(s1[indexpos]) + atom2 = m2.GetAtomWithIdx(s2[indexpos]) + + # determine number of hydrogens in the neighbourhood of the atom from molecule1 + h_atoms1 = 0 + for x in atom1.GetNeighbors(): + if x.GetSymbol() == "H": + h_atoms1 = h_atoms1 + 1 + + # determine number of hydrogens in the neighbourhood of the atom from molecule2 + h_atoms2 = 0 + for x in atom2.GetNeighbors(): + if x.GetSymbol() == "H": + h_atoms2 = h_atoms2 + 1 + + # find minimum number of hydrogens + min_h_atoms = min(h_atoms1, h_atoms2) + + # add minimum number of hydrogens to the ccore for molecule1 + h_atoms1 = 0 + for x in atom1.GetNeighbors(): + if x.GetSymbol() == "H" and h_atoms1 < min_h_atoms: + hit_ats1_compl.append(x.GetIdx()) + h_atoms1 = h_atoms1 + 1 + + # add minimum number of hydrogens to the ccore for molecule2 + h_atoms2 = 0 + for x in atom2.GetNeighbors(): + if x.GetSymbol() == "H" and h_atoms2 < min_h_atoms: + hit_ats2_compl.append(x.GetIdx()) + h_atoms2 = h_atoms2 + 1 + + # create new tuple of common core atom indices with additional hydrogens (molecule 1) + hit_ats1 = tuple(hit_ats1_compl) + + # create new tuple of common core atom indices with additional hydrogens (molecule 2) + hit_ats2 = tuple(hit_ats2_compl) + + self._substructure_match[mol1_name] = list(hit_ats1) + self._substructure_match[mol2_name] = list(hit_ats2) + + # self._substructure_match[mol1_name] = list(s1) + # self._substructure_match[mol2_name] = list(s2) - s2 = m2.GetSubstructMatch(mcsp) - logger.debug("Substructere match idx: {}".format(s2)) + return mcs - self._show_common_core( - m2, self.get_common_core_idx_mol2(), show_atom_type=False, internal=True - ) - self._substructure_match[mol1_name] = list(s1) - self._substructure_match[mol2_name] = list(s2) + # iterate_over_matches == True: it is iterated over all pairs of substructure matches + # the substructure matches with the biggest emering common cores are finally chosen + # the common cores for different substructure match pairs contain the same heavy atoms, but differ in the number of hydrogens, i.e. the finally chosen matches have the common cores with most hydrogens + else: + s1s = m1.GetSubstructMatches(mcsp, maxMatches=max_matches) + logger.debug("Substructere match idx: {}".format(s1s)) + self._show_common_core( + m1, self.get_common_core_idx_mol1(), show_atom_type=False, internal=True + ) + s2s = m2.GetSubstructMatches(mcsp, maxMatches=max_matches) + logger.debug("Substructere match idx: {}".format(s2s)) + self._show_common_core( + m2, self.get_common_core_idx_mol2(), show_atom_type=False, internal=True + ) + + curr_size_of_ccores = 0 + for s1 in s1s: + for s2 in s2s: + # new code: add hydrogens to both common-core-on-molecule-projections + # set with all common core atom indices for both molecules + hit_ats1_compl = list(s1) + hit_ats2_compl = list(s2) + + # check for each common core atom whether hydrogen atoms are in its neighbourhood + # s1/s2 contain the mapping of the common core (without hydrogens) to both molecules + # iterating over all mapped atoms, the number of hydrogens attached to the common core atom is determined + # the minimum number (i.e. if the atom of molecule 1 has one hydrogen bond, the atom of molecule 2 zero hydrogen bonds, it is zero) gives the number of hydrogen atoms to add to the common core + + for indexpos, indexnr in enumerate(s1): + # get mapped atoms + atom1 = m1.GetAtomWithIdx(s1[indexpos]) + atom2 = m2.GetAtomWithIdx(s2[indexpos]) + + # determine number of hydrogens in the neighbourhood of the atom from molecule1 + h_atoms1 = 0 + for x in atom1.GetNeighbors(): + if x.GetSymbol() == "H": + h_atoms1 = h_atoms1 + 1 + + # determine number of hydrogens in the neighbourhood of the atom from molecule2 + h_atoms2 = 0 + for x in atom2.GetNeighbors(): + if x.GetSymbol() == "H": + h_atoms2 = h_atoms2 + 1 + + # find minimum number of hydrogens + min_h_atoms = min(h_atoms1, h_atoms2) + + # add minimum number of hydrogens to the ccore for molecule1 + h_atoms1 = 0 + for x in atom1.GetNeighbors(): + if x.GetSymbol() == "H" and h_atoms1 < min_h_atoms: + hit_ats1_compl.append(x.GetIdx()) + h_atoms1 = h_atoms1 + 1 + + # add minimum number of hydrogens to the ccore for molecule2 + h_atoms2 = 0 + for x in atom2.GetNeighbors(): + if x.GetSymbol() == "H" and h_atoms2 < min_h_atoms: + hit_ats2_compl.append(x.GetIdx()) + h_atoms2 = h_atoms2 + 1 + + # count whether the new common cores are bigger (i.e. contain more hydrogens) than the previous common cores + # if this is the case, the current substructure matches are chosen + if len(hit_ats1_compl) > curr_size_of_ccores: + curr_size_of_ccores = len(hit_ats1_compl) + hit_ats1_compl_final = hit_ats1_compl + hit_ats2_compl_final = hit_ats2_compl + + # create new tuple of common core atom indices with additional hydrogens (molecule 1) + hit_ats1 = tuple(hit_ats1_compl_final) + + # create new tuple of common core atom indices with additional hydrogens (molecule 2) + hit_ats2 = tuple(hit_ats2_compl_final) + + self._substructure_match[mol1_name] = list(hit_ats1) + self._substructure_match[mol2_name] = list(hit_ats2) + + # self._substructure_match[mol1_name] = list(s1) + # self._substructure_match[mol2_name] = list(s2) - return mcs + return mcs def _return_atom_idx_from_bond_idx(self, mol: Chem.Mol, bond_idx: int): return ( diff --git a/transformato/tests/test_alchemical_path_generation.py b/transformato/tests/test_alchemical_path_generation.py index 6566e59c..55396161 100644 --- a/transformato/tests/test_alchemical_path_generation.py +++ b/transformato/tests/test_alchemical_path_generation.py @@ -230,7 +230,7 @@ def test_generate_alchemical_path_for_2_CPI_to_common_core(): ) output_files = mutate_2_CPI_to_7_CPI_cc(configuration=configuration) - assert len(output_files) == 18 + assert len(output_files) == 17 @pytest.mark.rsfe @@ -244,7 +244,7 @@ def test_generate_alchemical_path_for_7_CPI_to_common_core(): ) output_files = mutate_7_CPI_to_2_CPI_cc(configuration=configuration) - assert len(output_files) == 13 + assert len(output_files) == 12 @pytest.mark.rsfe @@ -269,3 +269,42 @@ def test_generate_alchemical_path_for_1a0q_1a07(caplog): ) perform_mutations(configuration=configuration, i=i, mutation_list=mutation_list) assert i.current_step == 24 + + +@pytest.mark.rbfe +def test_generate_path_for_ppar_system(): + conf = f"{get_testsystems_dir()}/config/ppar-cpd31_cpd25.yaml" + configuration = load_config_yaml( + config=conf, input_dir=get_testsystems_dir(), output_dir=get_test_output_dir() + ) + + s1 = SystemStructure(configuration, "structure1") + s2 = SystemStructure(configuration, "structure2") + s1_to_s2 = ProposeMutationRoute(s1, s2) + s1_to_s2.propose_common_core() + s1_to_s2.finish_common_core() + + assert ( + len(s1_to_s2.get_common_core_idx_mol1()) + == len(s1_to_s2.get_common_core_idx_mol2()) + == 46 + ) + + assert s1_to_s2.get_idx_not_in_common_core_for_mol1() == [31, 32, 33, 49, 50, 51] + assert s1_to_s2.get_idx_not_in_common_core_for_mol2() == [0, 1, 40, 49, 50, 51] + + assert s1_to_s2.terminal_real_atom_cc1 == [19, 12] + assert s1_to_s2.terminal_real_atom_cc2 == [21, 14] + + assert s1_to_s2.dummy_region_cc1.lj_default == [31, 33] + assert s1_to_s2.dummy_region_cc2.lj_default == [40, 1] + assert s1_to_s2.dummy_region_cc2.connected_dummy_regions == [ + [49, 50, 51, 0, 1], + [40], + ] + assert s1_to_s2.dummy_region_cc1.connected_dummy_regions == [ + [33], + [51, 50, 49, 32, 31], + ] + + assert s1_to_s2.matching_terminal_atoms_between_cc == [(12, 14), (19, 21)] diff --git a/transformato/tests/test_mutation.py b/transformato/tests/test_mutation.py index 0000173d..493e9342 100644 --- a/transformato/tests/test_mutation.py +++ b/transformato/tests/test_mutation.py @@ -5,6 +5,8 @@ import copy import shutil import logging +import pytest +import os import numpy as np import parmed as pm @@ -159,12 +161,6 @@ def test_proposed_mutation_mcs(): 28, 27, 29, - 46, - 47, - 48, - 45, - 41, - 44, 2, 7, 11, @@ -173,16 +169,22 @@ def test_proposed_mutation_mcs(): 10, 13, 12, - 39, - 38, - 36, - 37, + 31, + 33, + 32, + 44, + 41, + 45, + 46, + 47, + 48, + 30, 34, 35, - 30, - 32, - 33, - 31, + 37, + 36, + 38, + 39, ] ) cc2 = set( @@ -203,12 +205,6 @@ def test_proposed_mutation_mcs(): 23, 22, 24, - 43, - 44, - 45, - 42, - 40, - 41, 2, 7, 11, @@ -217,16 +213,22 @@ def test_proposed_mutation_mcs(): 10, 13, 12, - 38, - 37, - 35, - 36, + 30, + 32, + 31, + 41, + 40, + 42, + 43, + 44, + 45, + 29, 33, 34, - 29, - 31, - 32, - 30, + 36, + 35, + 37, + 38, ] ) @@ -244,8 +246,8 @@ def test_proposed_mutation_mcs(): 0, 3, 6, - 33, - 31, + 5, + 4, 14, 24, 23, @@ -257,12 +259,30 @@ def test_proposed_mutation_mcs(): 28, 27, 29, + 2, + 7, + 11, + 9, + 8, + 10, + 13, + 12, + 31, + 33, + 32, + 44, + 41, + 45, 46, 47, 48, - 45, - 41, - 44, + 30, + 34, + 35, + 37, + 36, + 38, + 39, ] ) cc2 = set( @@ -270,8 +290,8 @@ def test_proposed_mutation_mcs(): 0, 3, 6, - 32, - 30, + 5, + 4, 14, 19, 18, @@ -283,12 +303,30 @@ def test_proposed_mutation_mcs(): 23, 22, 24, + 2, + 7, + 11, + 9, + 8, + 10, + 13, + 12, + 30, + 32, + 31, + 41, + 40, + 42, 43, 44, 45, - 42, - 40, - 41, + 29, + 33, + 34, + 36, + 35, + 37, + 38, ] ) @@ -349,6 +387,11 @@ def test_mutation_with_multiple_dummy_regions(caplog): s1_to_s2.finish_common_core() +@pytest.mark.rsfe +@pytest.mark.skipif( + os.getenv("CI") == "true", + reason="Skipping tests that cannot pass in github actions", +) def test_proposed_mutation_terminal_dummy_real_atom_match(): from rdkit.Chem import rdFMCS @@ -395,6 +438,11 @@ def test_proposed_mutation_terminal_dummy_real_atom_match(): # INFO transformato.mutate:mutate.py:139 Matching terminal atoms from cc1 to cc2. cc1: 16 : cc2: 15 +@pytest.mark.rsfe +@pytest.mark.skipif( + os.getenv("CI") == "true", + reason="Skipping tests that cannot pass in github actions", +) def test_find_connected_dummy_regions1(): workdir = get_test_output_dir() from rdkit.Chem import rdFMCS @@ -548,7 +596,7 @@ def test_common_core_for_multiple_systems(): a.propose_common_core() a.finish_common_core( connected_dummy_regions_cc1=[ - {0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} ] ) else: @@ -624,11 +672,7 @@ def test_generate_mutation_list_for_multiple_systems(): s1_to_s2 = ProposeMutationRoute(s1, s2) s1_to_s2.propose_common_core() - s1_to_s2.finish_common_core( - connected_dummy_regions_cc1=[ - {0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} - ] - ) + s1_to_s2.finish_common_core(connected_dummy_regions_cc2=[{1, 2, 3, 4}]) mutation_list = s1_to_s2.generate_mutations_to_common_core_for_mol1() assert set(mutation_list.keys()) == set( ["charge", "hydrogen-lj", "lj", "transform", "default-lj"] @@ -761,11 +805,7 @@ def test_charge_mutation_for_multiple_systems(): for a, system in zip([s1_to_s2], [s1]): if system_name == "neopentane-methane": a.propose_common_core() - a.finish_common_core( - connected_dummy_regions_cc1=[ - {0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} - ] - ) + a.finish_common_core(connected_dummy_regions_cc2=[{1, 2, 3, 4}]) else: a.calculate_common_core() @@ -819,7 +859,7 @@ def test_vdw_mutation_for_hydrogens_system1(): s1_to_s2.propose_common_core() s1_to_s2.finish_common_core( connected_dummy_regions_cc1=[ - {0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} ] ) else: @@ -898,8 +938,8 @@ def test_vdw_mutation_for_hydrogens_system2(): s1_to_s2 = ProposeMutationRoute(s1, s2) s1_to_s2.completeRingsOnly = True s1_to_s2.propose_common_core() - s1_to_s2.remove_idx_from_common_core_of_mol1([14]) - s1_to_s2.remove_idx_from_common_core_of_mol2([6]) + # s1_to_s2.remove_idx_from_common_core_of_mol1([14]) + # s1_to_s2.remove_idx_from_common_core_of_mol2([6]) s1_to_s2.finish_common_core() mutation_list = s1_to_s2.generate_mutations_to_common_core_for_mol1() @@ -1678,7 +1718,7 @@ def test_vdw_mutation_for_hydrogens_and_heavy_atoms(): s1_to_s2.propose_common_core() s1_to_s2.finish_common_core( connected_dummy_regions_cc1=[ - {0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} ] ) elif system_name == "7-CPI-2-CPI": @@ -1890,7 +1930,7 @@ def test_full_mutation_system1(caplog): s1_to_s2.propose_common_core() s1_to_s2.finish_common_core( connected_dummy_regions_cc1=[ - {0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} ] ) else: @@ -1953,7 +1993,7 @@ def test_full_mutation_system2(): s1_to_s2.propose_common_core() s1_to_s2.finish_common_core( connected_dummy_regions_cc1=[ - {0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} ] ) else: @@ -2055,7 +2095,9 @@ def test_generate_list_of_heavy_atoms_to_mutate(): s1_to_s2 = ProposeMutationRoute(s1, s2) s1_to_s2.propose_common_core() s1_to_s2.finish_common_core( - connected_dummy_regions_cc1=[{0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}] + connected_dummy_regions_cc1=[ + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} + ] ) mutation_list = s1_to_s2.generate_mutations_to_common_core_for_mol1()