From 08ef70d9dfd1666162d55a3d4931f8203feb2528 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Sun, 13 Nov 2022 23:57:51 +0100 Subject: [PATCH 01/12] keep original hydrogens --- prolif/molecule.py | 35 ++++++++++++++++++++++++++++++----- tests/test_molecule.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/prolif/molecule.py b/prolif/molecule.py index b8d6f47..582c468 100644 --- a/prolif/molecule.py +++ b/prolif/molecule.py @@ -3,6 +3,7 @@ ======================================================= """ import copy +import warnings from collections.abc import Sequence from operator import attrgetter @@ -237,11 +238,35 @@ def pdbqt_to_mol(self, pdbqt_path): pdbqt.add_TopologyAttr("chainIDs", pdbqt.atoms.segids) pdbqt.atoms.types = pdbqt.atoms.elements # convert without infering bond orders and charges - mol = pdbqt.atoms.convert_to.rdkit(NoImplicit=False, - **self.converter_kwargs) - # assign BO from template then add hydrogens - mol = Chem.RemoveHs(mol, sanitize=False) - mol = AssignBondOrdersFromTemplate(self.template, mol) + pdbqt_mol = pdbqt.atoms.convert_to.rdkit( + NoImplicit=False, **self.converter_kwargs + ) + # remove explicit hydrogens and assign BO from template + pdbqt_noH = Chem.RemoveAllHs(pdbqt_mol, sanitize=False) + pdbqt_noH.UpdatePropertyCache(strict=False) + mol = AssignBondOrdersFromTemplate(self.template, pdbqt_noH) + mol = Chem.RWMol(mol) + # mapping between identifier of atom bearing H and H atom + hydrogens = { + atom.GetNeighbors()[0].GetProp("_MDAnalysis_index"): atom + for atom in pdbqt_mol.GetAtoms() + if atom.GetAtomicNum() == 1 + } + # mapping between atom that should be bearing a H in RWMol and corresponding H + hydrogen_mapping = { + atom.GetIdx(): hydrogens[idx] + for atom in mol.GetAtoms() + if (idx := atom.GetProp("_MDAnalysis_index")) in hydrogens + } + # add removed Hs + mol.BeginBatchEdit() + for atom_idx, hydrogen in hydrogen_mapping.items(): + h_idx = mol.AddAtom(hydrogen) + mol.AddBond(atom_idx, h_idx, Chem.BondType.SINGLE) + mol.GetAtomWithIdx(atom_idx).SetNoImplicit(True) + mol.CommitBatchEdit() + mol.UpdatePropertyCache(strict=False) + Chem.SanitizeMol(mol) mol = Chem.AddHs(mol, addCoords=True, addResidueInfo=True) return Molecule.from_rdkit(mol, **self._kwargs) diff --git a/tests/test_molecule.py b/tests/test_molecule.py index 0af16e9..63e9533 100644 --- a/tests/test_molecule.py +++ b/tests/test_molecule.py @@ -1,3 +1,4 @@ +from collections import defaultdict import pytest from MDAnalysis import SelectionError from numpy.testing import assert_array_equal @@ -6,6 +7,7 @@ sdf_supplier) from prolif.residue import ResidueId from rdkit import Chem +from rdkit.Chem import AllChem from .test_base import TestBaseRDKitMol, ligand_rdkit, rdkit_mol, u @@ -120,3 +122,42 @@ def test_mol2_starting_with_comment(self): suppl = mol2_supplier(path) mol = next(iter(suppl)) assert mol is not None + + +def test_pdbqt(): + smiles_params = Chem.SmilesParserParams() + smiles_params.sanitize = False + smiles_params.removeHs = False + pdb_mol = Chem.MolFromSmiles( + "[C@@]1(C)(N(C(O)[C@H]2C[N@](C)([H])[C@@H]3CC4CN([H])C5[C@@H]4[C@H]([C@@H]3C2)CCC5)[H])[C@H](O)N2[C@@H](CC3CCCCC3)[C@@H](O)N3CCC[C@H]3[C@]2(O[H])O1", + smiles_params + ) + for atom in pdb_mol.GetAtoms(): + atom.SetUnsignedProp("pdbindex", atom.GetIdx()) + # remove Hs to match with template + pdb_noh = Chem.RemoveAllHs(pdb_mol, sanitize=False) + template = Chem.MolFromSmiles("C[NH+]1CC(C(=O)NC2(C)OC3(O)C4CCCN4C(=O)C(Cc4ccccc4)N3C2=O)C=C2c3cccc4[nH]cc(c34)CC21") + mol = AllChem.AssignBondOrdersFromTemplate(template, pdb_noh) + mol = Chem.RWMol(mol) + # mapping between pdbindex of atom bearing H --> H atom(s) + atoms_with_hydrogens = defaultdict(list) + for atom in pdb_mol.GetAtoms(): + if atom.GetAtomicNum() == 1: + atoms_with_hydrogens[atom.GetNeighbors()[0].GetProp("pdbindex")].append(atom) + # mapping between atom that should be bearing a H in RWMol and corresponding H(s) + reverse_mapping = { + atom.GetIdx(): atoms_with_hydrogens[idx] + for atom in mol.GetAtoms() + if (idx := atom.GetProp("pdbindex")) in atoms_with_hydrogens + } + # add missing Hs + mol.BeginBatchEdit() + for atom_idx, hydrogens in reverse_mapping.items(): + for hydrogen in hydrogens: + h_idx = mol.AddAtom(hydrogen) + mol.AddBond(atom_idx, h_idx, Chem.BondType.SINGLE) + mol.GetAtomWithIdx(atom_idx).SetNoImplicit(True) + mol.CommitBatchEdit() + mol.UpdatePropertyCache(strict=False) + # sanitize fails with Explicit valence for atom # 7 N, 4, is greater than permitted + Chem.SanitizeMol(mol) From 140fc764bf3a46c256aa96145a5b79e7db031de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 16 Nov 2022 00:31:51 +0100 Subject: [PATCH 02/12] fix hydrogens from pdbqt --- prolif/molecule.py | 53 +++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/prolif/molecule.py b/prolif/molecule.py index 582c468..2f3e880 100644 --- a/prolif/molecule.py +++ b/prolif/molecule.py @@ -3,7 +3,7 @@ ======================================================= """ import copy -import warnings +from collections import defaultdict from collections.abc import Sequence from operator import attrgetter @@ -241,34 +241,43 @@ def pdbqt_to_mol(self, pdbqt_path): pdbqt_mol = pdbqt.atoms.convert_to.rdkit( NoImplicit=False, **self.converter_kwargs ) + mol = self._adjust_hydrogens(self.template, pdbqt_mol) + return Molecule.from_rdkit(mol, **self._kwargs) + + @staticmethod + def _adjust_hydrogens(template, pdbqt_mol): # remove explicit hydrogens and assign BO from template pdbqt_noH = Chem.RemoveAllHs(pdbqt_mol, sanitize=False) - pdbqt_noH.UpdatePropertyCache(strict=False) - mol = AssignBondOrdersFromTemplate(self.template, pdbqt_noH) + mol = AssignBondOrdersFromTemplate(template, pdbqt_noH) mol = Chem.RWMol(mol) - # mapping between identifier of atom bearing H and H atom - hydrogens = { - atom.GetNeighbors()[0].GetProp("_MDAnalysis_index"): atom - for atom in pdbqt_mol.GetAtoms() - if atom.GetAtomicNum() == 1 - } - # mapping between atom that should be bearing a H in RWMol and corresponding H - hydrogen_mapping = { - atom.GetIdx(): hydrogens[idx] - for atom in mol.GetAtoms() - if (idx := atom.GetProp("_MDAnalysis_index")) in hydrogens - } - # add removed Hs + # mapping between pdbindex of atom bearing H --> H atom(s) + atoms_with_hydrogens = defaultdict(list) + for atom in pdbqt_mol.GetAtoms(): + if atom.GetAtomicNum() == 1: + atoms_with_hydrogens[ + atom.GetNeighbors()[0].GetIntProp("_MDAnalysis_index") + ].append(atom) + # mapping between atom that should be bearing a H in RWMol and corresponding H(s) + reverse_mapping = {} + for atom in mol.GetAtoms(): + if (idx := atom.GetIntProp("_MDAnalysis_index")) in atoms_with_hydrogens: + reverse_mapping[atom.GetIdx()] = atoms_with_hydrogens[idx] + atom.SetNumExplicitHs(0) + elif atom.GetNumRadicalElectrons() > 0: + atom.SetNumRadicalElectrons(0) + atom.SetNoImplicit(False) + # add missing Hs mol.BeginBatchEdit() - for atom_idx, hydrogen in hydrogen_mapping.items(): - h_idx = mol.AddAtom(hydrogen) - mol.AddBond(atom_idx, h_idx, Chem.BondType.SINGLE) - mol.GetAtomWithIdx(atom_idx).SetNoImplicit(True) + for atom_idx, hydrogens in reverse_mapping.items(): + for hydrogen in hydrogens: + h_idx = mol.AddAtom(hydrogen) + mol.AddBond(atom_idx, h_idx, Chem.BondType.SINGLE) mol.CommitBatchEdit() - mol.UpdatePropertyCache(strict=False) + # sanitize + mol.UpdatePropertyCache() Chem.SanitizeMol(mol) mol = Chem.AddHs(mol, addCoords=True, addResidueInfo=True) - return Molecule.from_rdkit(mol, **self._kwargs) + return mol def __len__(self): return len(self.paths) From 2db3a5cba9e3eb3a017b7c5ed896447bbc604ae1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 16 Nov 2022 00:32:07 +0100 Subject: [PATCH 03/12] test hydrogen preservation in pdbqt_suppl --- tests/test_molecule.py | 46 +++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/tests/test_molecule.py b/tests/test_molecule.py index 63e9533..891b1d4 100644 --- a/tests/test_molecule.py +++ b/tests/test_molecule.py @@ -1,4 +1,3 @@ -from collections import defaultdict import pytest from MDAnalysis import SelectionError from numpy.testing import assert_array_equal @@ -7,7 +6,6 @@ sdf_supplier) from prolif.residue import ResidueId from rdkit import Chem -from rdkit.Chem import AllChem from .test_base import TestBaseRDKitMol, ligand_rdkit, rdkit_mol, u @@ -124,7 +122,7 @@ def test_mol2_starting_with_comment(self): assert mol is not None -def test_pdbqt(): +def test_pdbqt_hydrogens_stay_in_mol(): smiles_params = Chem.SmilesParserParams() smiles_params.sanitize = False smiles_params.removeHs = False @@ -132,32 +130,20 @@ def test_pdbqt(): "[C@@]1(C)(N(C(O)[C@H]2C[N@](C)([H])[C@@H]3CC4CN([H])C5[C@@H]4[C@H]([C@@H]3C2)CCC5)[H])[C@H](O)N2[C@@H](CC3CCCCC3)[C@@H](O)N3CCC[C@H]3[C@]2(O[H])O1", smiles_params ) + h_indices = set() for atom in pdb_mol.GetAtoms(): - atom.SetUnsignedProp("pdbindex", atom.GetIdx()) - # remove Hs to match with template - pdb_noh = Chem.RemoveAllHs(pdb_mol, sanitize=False) - template = Chem.MolFromSmiles("C[NH+]1CC(C(=O)NC2(C)OC3(O)C4CCCN4C(=O)C(Cc4ccccc4)N3C2=O)C=C2c3cccc4[nH]cc(c34)CC21") - mol = AllChem.AssignBondOrdersFromTemplate(template, pdb_noh) - mol = Chem.RWMol(mol) - # mapping between pdbindex of atom bearing H --> H atom(s) - atoms_with_hydrogens = defaultdict(list) - for atom in pdb_mol.GetAtoms(): + atom.SetIntProp("_MDAnalysis_index", atom.GetIdx()) if atom.GetAtomicNum() == 1: - atoms_with_hydrogens[atom.GetNeighbors()[0].GetProp("pdbindex")].append(atom) - # mapping between atom that should be bearing a H in RWMol and corresponding H(s) - reverse_mapping = { - atom.GetIdx(): atoms_with_hydrogens[idx] - for atom in mol.GetAtoms() - if (idx := atom.GetProp("pdbindex")) in atoms_with_hydrogens - } - # add missing Hs - mol.BeginBatchEdit() - for atom_idx, hydrogens in reverse_mapping.items(): - for hydrogen in hydrogens: - h_idx = mol.AddAtom(hydrogen) - mol.AddBond(atom_idx, h_idx, Chem.BondType.SINGLE) - mol.GetAtomWithIdx(atom_idx).SetNoImplicit(True) - mol.CommitBatchEdit() - mol.UpdatePropertyCache(strict=False) - # sanitize fails with Explicit valence for atom # 7 N, 4, is greater than permitted - Chem.SanitizeMol(mol) + h_indices.add(atom.GetIdx()) + atom.SetBoolProp("flagged", True) + else: + atom.SetBoolProp("flagged", False) + template = Chem.MolFromSmiles("C[NH+]1CC(C(=O)NC2(C)OC3(O)C4CCCN4C(=O)C(Cc4ccccc4)N3C2=O)C=C2c3cccc4[nH]cc(c34)CC21") + mol = pdbqt_supplier._adjust_hydrogens(template, pdb_mol) + hydrogens = [ + atom for atom in mol.GetAtoms() + if atom.HasProp("_MDAnalysis_index") + and atom.GetIntProp("_MDAnalysis_index") in h_indices + ] + assert all(atom.GetBoolProp("flagged") for atom in hydrogens) + From a986cde59a773a6257aebd487dfb0994dcb0a07f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 16 Nov 2022 00:34:13 +0100 Subject: [PATCH 04/12] add pdbqt fix to changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c6403c..001f2f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Dead link in the quickstart notebook for the MDAnalysis quickstart (PR #75, @radifar). +- The `pdbqt_supplier` now correctly preserves hydrogens from the input PDBQT file. ## [1.0.0] - 2022-06-07 ### Added From 2df62876ca4d0666769c1993d1302dd8b8390454 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 16 Nov 2022 00:36:32 +0100 Subject: [PATCH 05/12] fix CI coverage upload --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 66b78c4..65aa862 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -70,9 +70,9 @@ jobs: pytest --color=yes --disable-pytest-warnings --cov=prolif --cov-report=xml tests/ - name: Measure tests coverage - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v3 with: - file: coverage.xml + files: ./coverage.xml fail_ci_if_error: True verbose: True From 63db316801077cc86f71f4c555cc12707d34bd54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 16 Nov 2022 00:52:00 +0100 Subject: [PATCH 06/12] update rdkit version --- .github/workflows/ci.yml | 6 +++--- CHANGELOG.md | 1 + docs/source/installation.rst | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 65aa862..12436a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,13 +25,13 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.8, 3.9, "3.10"] - rdkit-version: ["rdkit>=2020.09.1"] + python-version: [3.9, "3.10"] + rdkit-version: ["rdkit>2021.03.1"] include: - name: Min version os: ubuntu-latest python-version: 3.8 - rdkit-version: "rdkit=2020.03.1 boost-cpp=1.72.0=h359cf19_6" + rdkit-version: "rdkit=2021.03.1" steps: - uses: actions/checkout@v2 diff --git a/CHANGELOG.md b/CHANGELOG.md index 001f2f3..686b05b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Metal ligand: exclude amides and some amines. - The Pi stacking interactions have been changed for a more accurate implementation (PR #97). +- Updated the minimal RDKit version to `2021.03.1` ### Fixed - Dead link in the quickstart notebook for the MDAnalysis quickstart (PR #75, @radifar). diff --git a/docs/source/installation.rst b/docs/source/installation.rst index e852ba9..82a1a83 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -4,7 +4,7 @@ Installation Requirements: * Python 3.8+ -* `RDKit `_ (2020.03+) +* `RDKit `_ (2021.03+) * `MDAnalysis `_ (2.2+) * `Pandas `_ (1.0+) * `NumPy `_ From 3ce48e5bf36c8b806b7236bdd8a71b0adb300f9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 16 Nov 2022 22:21:26 +0100 Subject: [PATCH 07/12] transfer coordinates from PDBQT --- prolif/molecule.py | 36 ++++++++++++++--------- tests/test_molecule.py | 67 ++++++++++++++++++++---------------------- 2 files changed, 54 insertions(+), 49 deletions(-) diff --git a/prolif/molecule.py b/prolif/molecule.py index 2f3e880..402dfcd 100644 --- a/prolif/molecule.py +++ b/prolif/molecule.py @@ -174,7 +174,7 @@ def n_residues(self): class pdbqt_supplier(Sequence): - """Supplies molecules, given a path to PDBQT files + """Supplies molecules, given paths to PDBQT files Parameters ---------- @@ -211,6 +211,14 @@ class pdbqt_supplier(Sequence): .. versionchanged:: 1.0.0 Molecule suppliers are now sequences that can be reused, indexed, and can return their length, instead of single-use generators. + + .. versionchanged:: 1.1.0 + Because the PDBQT supplier needs to strip hydrogen atoms before + assigning bond orders from the template, it used to replace them + entirely with hydrogens containing new coordinates. It now directly + uses the hydrogen atoms present in the file and won't add explicit + ones anymore, to prevent the fingerprint from detecting hydrogen bonds + with "random" hydrogen atoms. """ def __init__(self, paths, template, converter_kwargs=None, **kwargs): @@ -249,7 +257,6 @@ def _adjust_hydrogens(template, pdbqt_mol): # remove explicit hydrogens and assign BO from template pdbqt_noH = Chem.RemoveAllHs(pdbqt_mol, sanitize=False) mol = AssignBondOrdersFromTemplate(template, pdbqt_noH) - mol = Chem.RWMol(mol) # mapping between pdbindex of atom bearing H --> H atom(s) atoms_with_hydrogens = defaultdict(list) for atom in pdbqt_mol.GetAtoms(): @@ -263,20 +270,21 @@ def _adjust_hydrogens(template, pdbqt_mol): if (idx := atom.GetIntProp("_MDAnalysis_index")) in atoms_with_hydrogens: reverse_mapping[atom.GetIdx()] = atoms_with_hydrogens[idx] atom.SetNumExplicitHs(0) - elif atom.GetNumRadicalElectrons() > 0: - atom.SetNumRadicalElectrons(0) - atom.SetNoImplicit(False) # add missing Hs - mol.BeginBatchEdit() - for atom_idx, hydrogens in reverse_mapping.items(): - for hydrogen in hydrogens: - h_idx = mol.AddAtom(hydrogen) - mol.AddBond(atom_idx, h_idx, Chem.BondType.SINGLE) - mol.CommitBatchEdit() + pdb_conf = pdbqt_mol.GetConformer() + mol_conf = mol.GetConformer() + with Chem.RWMol(mol) as rwmol: + for atom_idx, hydrogens in reverse_mapping.items(): + for hydrogen in hydrogens: + h_idx = rwmol.AddAtom(hydrogen) + xyz = pdb_conf.GetAtomPosition(hydrogen.GetIdx()) + mol_conf.SetAtomPosition(h_idx, xyz) + rwmol.AddBond(atom_idx, h_idx, Chem.BondType.SINGLE) + mol = rwmol.GetMol() # sanitize - mol.UpdatePropertyCache() - Chem.SanitizeMol(mol) - mol = Chem.AddHs(mol, addCoords=True, addResidueInfo=True) + rwmol.UpdatePropertyCache() + Chem.SanitizeMol(rwmol) + # mol = Chem.AddHs(rwmol, addCoords=True, addResidueInfo=True) return mol def __len__(self): diff --git a/tests/test_molecule.py b/tests/test_molecule.py index 891b1d4..ae119dc 100644 --- a/tests/test_molecule.py +++ b/tests/test_molecule.py @@ -1,4 +1,5 @@ import pytest +from copy import deepcopy from MDAnalysis import SelectionError from numpy.testing import assert_array_equal from prolif.datafiles import datapath @@ -79,14 +80,11 @@ def test_monomer_info(self, suppl): resid = ResidueId.from_atom(mol.GetAtomWithIdx(0)) assert resid == self.resid - @pytest.mark.parametrize("index", [0, 2, 8, -1]) - def test_index(self, suppl, index): - index %= 9 - mol_i = suppl[index] - for i, mol in enumerate(suppl): - if i == index: - break - assert_array_equal(mol.xyz, mol_i.xyz) + def test_index(self, suppl): + mols = list(suppl) + for index in [0, 2, 8, -1]: + mol_i = suppl[index] + assert_array_equal(mols[index].xyz, mol_i.xyz) class TestPDBQTSupplier(SupplierBase): @@ -101,6 +99,32 @@ def suppl(self): "(c34)CC21") return pdbqt_supplier(pdbqts, template) + def test_pdbqt_hydrogens_stay_in_mol(self): + template = Chem.RemoveHs(ligand_rdkit) + indices = [] + rwmol = Chem.RWMol(ligand_rdkit) + rwmol.BeginBatchEdit() + for atom in rwmol.GetAtoms(): + idx = atom.GetIdx() + atom.SetIntProp("_MDAnalysis_index", idx) + if atom.GetAtomicNum() == 1: + if idx % 2: + indices.append(idx) + else: + neighbor = atom.GetNeighbors()[0] + rwmol.RemoveBond(idx, neighbor.GetIdx()) + rwmol.RemoveAtom(idx) + neighbor.SetNumExplicitHs(1) + rwmol.CommitBatchEdit() + pdbqt_mol = rwmol.GetMol() + mol = pdbqt_supplier._adjust_hydrogens(template, pdbqt_mol) + hydrogens = [ + idx for atom in mol.GetAtoms() + if atom.HasProp("_MDAnalysis_index") + and (idx := atom.GetIntProp("_MDAnalysis_index")) in indices + ] + assert hydrogens == indices + class TestSDFSupplier(SupplierBase): @pytest.fixture @@ -120,30 +144,3 @@ def test_mol2_starting_with_comment(self): suppl = mol2_supplier(path) mol = next(iter(suppl)) assert mol is not None - - -def test_pdbqt_hydrogens_stay_in_mol(): - smiles_params = Chem.SmilesParserParams() - smiles_params.sanitize = False - smiles_params.removeHs = False - pdb_mol = Chem.MolFromSmiles( - "[C@@]1(C)(N(C(O)[C@H]2C[N@](C)([H])[C@@H]3CC4CN([H])C5[C@@H]4[C@H]([C@@H]3C2)CCC5)[H])[C@H](O)N2[C@@H](CC3CCCCC3)[C@@H](O)N3CCC[C@H]3[C@]2(O[H])O1", - smiles_params - ) - h_indices = set() - for atom in pdb_mol.GetAtoms(): - atom.SetIntProp("_MDAnalysis_index", atom.GetIdx()) - if atom.GetAtomicNum() == 1: - h_indices.add(atom.GetIdx()) - atom.SetBoolProp("flagged", True) - else: - atom.SetBoolProp("flagged", False) - template = Chem.MolFromSmiles("C[NH+]1CC(C(=O)NC2(C)OC3(O)C4CCCN4C(=O)C(Cc4ccccc4)N3C2=O)C=C2c3cccc4[nH]cc(c34)CC21") - mol = pdbqt_supplier._adjust_hydrogens(template, pdb_mol) - hydrogens = [ - atom for atom in mol.GetAtoms() - if atom.HasProp("_MDAnalysis_index") - and atom.GetIntProp("_MDAnalysis_index") in h_indices - ] - assert all(atom.GetBoolProp("flagged") for atom in hydrogens) - From 7f8ac137c487b911ae0bce14b2ff9c43c8bcf822 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 16 Nov 2022 22:36:49 +0100 Subject: [PATCH 08/12] fix hydrogen positions in rwmol --- prolif/molecule.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/prolif/molecule.py b/prolif/molecule.py index 402dfcd..5de2974 100644 --- a/prolif/molecule.py +++ b/prolif/molecule.py @@ -272,19 +272,19 @@ def _adjust_hydrogens(template, pdbqt_mol): atom.SetNumExplicitHs(0) # add missing Hs pdb_conf = pdbqt_mol.GetConformer() - mol_conf = mol.GetConformer() - with Chem.RWMol(mol) as rwmol: - for atom_idx, hydrogens in reverse_mapping.items(): - for hydrogen in hydrogens: - h_idx = rwmol.AddAtom(hydrogen) - xyz = pdb_conf.GetAtomPosition(hydrogen.GetIdx()) - mol_conf.SetAtomPosition(h_idx, xyz) - rwmol.AddBond(atom_idx, h_idx, Chem.BondType.SINGLE) - mol = rwmol.GetMol() + rwmol = Chem.RWMol(mol) + mol_conf = rwmol.GetConformer() + for atom_idx, hydrogens in reverse_mapping.items(): + for hydrogen in hydrogens: + h_idx = rwmol.AddAtom(hydrogen) + xyz = pdb_conf.GetAtomPosition(hydrogen.GetIdx()) + mol_conf.SetAtomPosition(h_idx, xyz) + rwmol.AddBond(atom_idx, h_idx, Chem.BondType.SINGLE) + mol = rwmol.GetMol() # sanitize - rwmol.UpdatePropertyCache() - Chem.SanitizeMol(rwmol) - # mol = Chem.AddHs(rwmol, addCoords=True, addResidueInfo=True) + mol.UpdatePropertyCache() + Chem.SanitizeMol(mol) + # mol = Chem.AddHs(mol, addCoords=True, addResidueInfo=False) return mol def __len__(self): From 863507d26c61e5c65164fd2683f1fb42fdccc724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 16 Nov 2022 23:17:53 +0100 Subject: [PATCH 09/12] block logs and warnings --- CHANGELOG.md | 3 +++ prolif/molecule.py | 23 +++++++++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 686b05b..dc9d03a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Metal ligand: exclude amides and some amines. - The Pi stacking interactions have been changed for a more accurate implementation (PR #97). +- The `pdbqt_supplier` will not add explicit hydrogen atoms anymore, to avoid detecting + hydrogen bonds with "random" hydrogens that weren't in the PDBQT file. +- When using the `pdbqt_supplier`, irrelevant warnings and logs have been disabled. - Updated the minimal RDKit version to `2021.03.1` ### Fixed diff --git a/prolif/molecule.py b/prolif/molecule.py index 5de2974..625935b 100644 --- a/prolif/molecule.py +++ b/prolif/molecule.py @@ -3,12 +3,13 @@ ======================================================= """ import copy +import warnings from collections import defaultdict from collections.abc import Sequence from operator import attrgetter import MDAnalysis as mda -from rdkit import Chem +from rdkit import Chem, rdBase from rdkit.Chem.AllChem import AssignBondOrdersFromTemplate from .rdkitmol import BaseRDKitMol @@ -219,6 +220,7 @@ class pdbqt_supplier(Sequence): uses the hydrogen atoms present in the file and won't add explicit ones anymore, to prevent the fingerprint from detecting hydrogen bonds with "random" hydrogen atoms. + A lot of irrelevant warnings and logs have been disabled as well. """ def __init__(self, paths, template, converter_kwargs=None, **kwargs): @@ -238,7 +240,11 @@ def __getitem__(self, index): return self.pdbqt_to_mol(pdbqt_path) def pdbqt_to_mol(self, pdbqt_path): - pdbqt = mda.Universe(pdbqt_path) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", message=r"^Failed to guess the mass" + ) + pdbqt = mda.Universe(pdbqt_path) # set attributes needed by the converter elements = [mda.topology.guessers.guess_atom_element(x) for x in pdbqt.atoms.names] @@ -246,9 +252,13 @@ def pdbqt_to_mol(self, pdbqt_path): pdbqt.add_TopologyAttr("chainIDs", pdbqt.atoms.segids) pdbqt.atoms.types = pdbqt.atoms.elements # convert without infering bond orders and charges - pdbqt_mol = pdbqt.atoms.convert_to.rdkit( - NoImplicit=False, **self.converter_kwargs - ) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", message=r"^Could not sanitize molecule" + ) + pdbqt_mol = pdbqt.atoms.convert_to.rdkit( + NoImplicit=False, **self.converter_kwargs + ) mol = self._adjust_hydrogens(self.template, pdbqt_mol) return Molecule.from_rdkit(mol, **self._kwargs) @@ -256,7 +266,8 @@ def pdbqt_to_mol(self, pdbqt_path): def _adjust_hydrogens(template, pdbqt_mol): # remove explicit hydrogens and assign BO from template pdbqt_noH = Chem.RemoveAllHs(pdbqt_mol, sanitize=False) - mol = AssignBondOrdersFromTemplate(template, pdbqt_noH) + with rdBase.BlockLogs(): + mol = AssignBondOrdersFromTemplate(template, pdbqt_noH) # mapping between pdbindex of atom bearing H --> H atom(s) atoms_with_hydrogens = defaultdict(list) for atom in pdbqt_mol.GetAtoms(): From a776c625e00187934ae1fe0763454590cfe09e5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 16 Nov 2022 23:32:52 +0100 Subject: [PATCH 10/12] fix BlockLogs not contextmanaged in older version --- prolif/molecule.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/prolif/molecule.py b/prolif/molecule.py index 625935b..606e64e 100644 --- a/prolif/molecule.py +++ b/prolif/molecule.py @@ -266,8 +266,9 @@ def pdbqt_to_mol(self, pdbqt_path): def _adjust_hydrogens(template, pdbqt_mol): # remove explicit hydrogens and assign BO from template pdbqt_noH = Chem.RemoveAllHs(pdbqt_mol, sanitize=False) - with rdBase.BlockLogs(): - mol = AssignBondOrdersFromTemplate(template, pdbqt_noH) + block = rdBase.BlockLogs() + mol = AssignBondOrdersFromTemplate(template, pdbqt_noH) + del block # mapping between pdbindex of atom bearing H --> H atom(s) atoms_with_hydrogens = defaultdict(list) for atom in pdbqt_mol.GetAtoms(): From 7191df3611326260540457bc6cb9a5a532dbf799 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Thu, 17 Nov 2022 01:12:51 +0100 Subject: [PATCH 11/12] fix docs description --- docs/notebooks/how-to.ipynb | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/notebooks/how-to.ipynb b/docs/notebooks/how-to.ipynb index f0cf3c2..fa0d52b 100644 --- a/docs/notebooks/how-to.ipynb +++ b/docs/notebooks/how-to.ipynb @@ -604,9 +604,7 @@ "Next, we'll use the PDBQT supplier which loads each file from a list of paths, and assigns bond orders and charges using the template. The template and PDBQT file must have the exact same atoms, **even hydrogens**, otherwise no match will be found. Since PDBQT files partially keep the hydrogen atoms, we have the choice between:\n", "\n", "* Manually selecting where to add the hydrogens on the template, do the matching, then add the remaining hydrogens (not covered here)\n", - "* Or just remove the hydrogens from the PDBQT file, do the matching, then add all hydrogens.\n", - "\n", - "This last option will delete the coordinates of your hydrogens atoms and replace them by the ones generated by RDKit, but unless you're working with an exotic system this should be fine.\n", + "* Or just remove the hydrogens from the PDBQT file, do the matching, then re-add the original hydrogens.\n", "\n", "For the protein, there's usually no need to load the PDBQT that was used by Vina. The original file that was used to generate the PDBQT can be used directly, but **it must contain explicit hydrogen atoms**:" ] @@ -626,6 +624,13 @@ "df = fp.to_dataframe()\n", "df" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 4dad51a8541cb705b89d0e8c18977f6dec90d40d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Thu, 17 Nov 2022 01:13:13 +0100 Subject: [PATCH 12/12] add contextmanagers for catching warnings --- prolif/molecule.py | 23 +++++++++-------------- prolif/utils.py | 24 ++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/prolif/molecule.py b/prolif/molecule.py index 606e64e..9ce8764 100644 --- a/prolif/molecule.py +++ b/prolif/molecule.py @@ -3,18 +3,17 @@ ======================================================= """ import copy -import warnings from collections import defaultdict from collections.abc import Sequence from operator import attrgetter import MDAnalysis as mda -from rdkit import Chem, rdBase +from rdkit import Chem from rdkit.Chem.AllChem import AssignBondOrdersFromTemplate from .rdkitmol import BaseRDKitMol from .residue import Residue, ResidueGroup -from .utils import split_mol_by_residues +from .utils import catch_rdkit_logs, catch_warning, split_mol_by_residues class Molecule(BaseRDKitMol): @@ -240,10 +239,7 @@ def __getitem__(self, index): return self.pdbqt_to_mol(pdbqt_path) def pdbqt_to_mol(self, pdbqt_path): - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", message=r"^Failed to guess the mass" - ) + with catch_warning(message=r"^Failed to guess the mass"): pdbqt = mda.Universe(pdbqt_path) # set attributes needed by the converter elements = [mda.topology.guessers.guess_atom_element(x) @@ -252,10 +248,10 @@ def pdbqt_to_mol(self, pdbqt_path): pdbqt.add_TopologyAttr("chainIDs", pdbqt.atoms.segids) pdbqt.atoms.types = pdbqt.atoms.elements # convert without infering bond orders and charges - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", message=r"^Could not sanitize molecule" - ) + with catch_rdkit_logs(), catch_warning( + message=r"^(Could not sanitize molecule)|" + r"(No `bonds` attribute in this AtomGroup)" + ): pdbqt_mol = pdbqt.atoms.convert_to.rdkit( NoImplicit=False, **self.converter_kwargs ) @@ -266,9 +262,8 @@ def pdbqt_to_mol(self, pdbqt_path): def _adjust_hydrogens(template, pdbqt_mol): # remove explicit hydrogens and assign BO from template pdbqt_noH = Chem.RemoveAllHs(pdbqt_mol, sanitize=False) - block = rdBase.BlockLogs() - mol = AssignBondOrdersFromTemplate(template, pdbqt_noH) - del block + with catch_rdkit_logs(): + mol = AssignBondOrdersFromTemplate(template, pdbqt_noH) # mapping between pdbindex of atom bearing H --> H atom(s) atoms_with_hydrogens = defaultdict(list) for atom in pdbqt_mol.GetAtoms(): diff --git a/prolif/utils.py b/prolif/utils.py index 11d0eda..a68220a 100644 --- a/prolif/utils.py +++ b/prolif/utils.py @@ -2,8 +2,10 @@ Helper functions --- :mod:`prolif.utils` ======================================== """ +import warnings from collections import defaultdict from collections.abc import Iterable +from contextlib import contextmanager from copy import deepcopy from functools import wraps from importlib.util import find_spec @@ -11,6 +13,7 @@ import numpy as np import pandas as pd +from rdkit import rdBase from rdkit.Chem import FragmentOnBonds, GetMolFrags, SplitMolByPDBResidues from rdkit.DataStructs import ExplicitBitVect from rdkit.Geometry import Point3D @@ -34,6 +37,27 @@ def wrapper(*args, **kwargs): return inner +@contextmanager +def catch_rdkit_logs(): + log_status = rdBase.LogStatus() + rdBase.DisableLog("rdApp.*") + yield + log_status = {st.split(":")[0]: st.split(":")[1] for st in log_status.split("\n")} + log_status = {k: True if v == "enabled" else False for k, v in log_status.items()} + for k, v in log_status.items(): + if v is True: + rdBase.EnableLog(k) + else: + rdBase.DisableLog(k) + + +@contextmanager +def catch_warning(**kwargs): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", **kwargs) + yield + + def get_centroid(coordinates): """Centroid for an array of XYZ coordinates""" return np.mean(coordinates, axis=0)