From 9ab0e5cbfa783a633e46a8c9ac1b1052711e46d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Fri, 25 Oct 2024 00:52:03 +0200 Subject: [PATCH] VdwContact presets (#223) * add presets for VdWContact * fix intersphinx_mapping --- CHANGELOG.md | 2 + docs/conf.py | 12 +-- prolif/fingerprint.py | 6 +- prolif/interactions/constants.py | 114 ++++++++++++++++++++++++++++ prolif/interactions/interactions.py | 46 +++++++++-- tests/test_interactions.py | 20 +++++ 6 files changed, 186 insertions(+), 14 deletions(-) create mode 100644 prolif/interactions/constants.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 531486d..8b81247 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- `VdWContact` now accepts a `preset` parameter to easily use different van der Waals radii values: + one of `mdanalysis` (default), `rdkit`, or `csd`. - `IFP.interactions()` iterator that yields all interaction data for a given frame in a single flat structure. This makes iterating over the `fp.ifp` results a bit easier / less nested. diff --git a/docs/conf.py b/docs/conf.py index 6649708..8fa1155 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,12 +110,12 @@ html_static_path = [] # ['_static'] intersphinx_mapping = { - "https://docs.python.org/3/": None, - "https://numpy.org/doc/stable/": None, - "https://docs.mdanalysis.org/stable/": None, - "https://www.rdkit.org/docs/": None, - "https://pandas.pydata.org/docs/": None, - "https://multiprocess.readthedocs.io/en/latest/": None, + "python": ("https://docs.python.org/3/", None), + "numpy": ("https://numpy.org/doc/stable/", None), + "mdanalysis": ("https://docs.mdanalysis.org/stable/", None), + "rdkit": ("https://www.rdkit.org/docs/", None), + "pandas": ("https://pandas.pydata.org/docs/", None), + "multiprocess": ("https://multiprocess.readthedocs.io/en/latest/", None), } diff --git a/prolif/fingerprint.py b/prolif/fingerprint.py index d66ccbf..29c059e 100644 --- a/prolif/fingerprint.py +++ b/prolif/fingerprint.py @@ -81,7 +81,11 @@ class Fingerprint: PiStacking, Anionic, Cationic, CationPi, PiCation, VdWContact. parameters : dict, optional New parameters for the interactions. Mapping between an interaction name and a - dict of parameters as they appear in the interaction class. + dict of parameters as they appear in the interaction class (see the + :mod:`prolif.interactions` module's documentation for all classes parameters):: + + >>> fp = plf.Fingerprint(parameters={"Hydrophobic": {"distance": 3.8}}) + count : bool For a given interaction class and pair of residues, there might be multiple combinations of atoms that satisfy the interaction constraints. This parameter diff --git a/prolif/interactions/constants.py b/prolif/interactions/constants.py new file mode 100644 index 0000000..4a5c64e --- /dev/null +++ b/prolif/interactions/constants.py @@ -0,0 +1,114 @@ +""" +Constants used by the package for interactions. +""" + +from MDAnalysis.topology.tables import vdwradii +from rdkit.Chem import GetPeriodicTable + +VDWRADII = {symbol.capitalize(): radius for symbol, radius in vdwradii.items()} + +PT = GetPeriodicTable() +RDKIT_VDWRADII = {PT.GetElementSymbol(i): PT.GetRvdw(i) for i in range(1, 119)} + +# Table 1 of https://doi.org/10.1039/C3DT50599E +CSD_VDWRADII = { + "H": 1.2, + "He": 1.43, + "Li": 2.12, + "Be": 1.98, + "B": 1.91, + "C": 1.77, + "N": 1.66, + "O": 1.5, + "F": 1.46, + "Ne": 1.58, + "Na": 2.5, + "Mg": 2.51, + "Al": 2.25, + "Si": 2.19, + "P": 1.9, + "S": 1.89, + "Cl": 1.82, + "Ar": 1.83, + "K": 2.73, + "Ca": 2.62, + "Sc": 2.58, + "Ti": 2.46, + "V": 2.42, + "Cr": 2.45, + "Mn": 2.45, + "Fe": 2.44, + "Co": 2.4, + "Ni": 2.4, + "Cu": 2.38, + "Zn": 2.39, + "Ga": 2.32, + "Ge": 2.29, + "As": 1.88, + "Se": 1.82, + "Br": 1.86, + "Kr": 2.25, + "Rb": 3.21, + "Sr": 2.84, + "Y": 2.75, + "Zr": 2.52, + "Nb": 2.56, + "Mo": 2.45, + "Tc": 2.44, + "Ru": 2.46, + "Rh": 2.44, + "Pd": 2.15, + "Ag": 2.53, + "Cd": 2.49, + "In": 2.43, + "Sn": 2.42, + "Sb": 2.47, + "Te": 1.99, + "I": 2.04, + "Xe": 2.06, + "Cs": 3.48, + "Ba": 3.03, + "La": 2.98, + "Ce": 2.88, + "Pr": 2.92, + "Nd": 2.95, + "Sm": 2.9, + "Eu": 2.87, + "Gd": 2.83, + "Tb": 2.79, + "Dy": 2.87, + "Ho": 2.81, + "Er": 2.83, + "Tm": 2.79, + "Yb": 2.8, + "Lu": 2.74, + "Hf": 2.63, + "Ta": 2.53, + "W": 2.57, + "Re": 2.49, + "Os": 2.48, + "Ir": 2.41, + "Pt": 2.29, + "Au": 2.32, + "Hg": 2.45, + "Tl": 2.47, + "Pb": 2.6, + "Bi": 2.54, + "Ac": 2.8, + "Th": 2.93, + "Pa": 2.88, + "U": 2.71, # noqa + "Np": 2.82, + "Pu": 2.81, + "Am": 2.83, + "Cm": 3.05, + "Bk": 3.4, + "Cf": 3.05, + "Es": 2.7, +} + +VDW_PRESETS = { + "mdanalysis": VDWRADII, + "rdkit": RDKIT_VDWRADII, + "csd": CSD_VDWRADII, +} diff --git a/prolif/interactions/interactions.py b/prolif/interactions/interactions.py index 9404274..5d30c99 100644 --- a/prolif/interactions/interactions.py +++ b/prolif/interactions/interactions.py @@ -12,8 +12,8 @@ from itertools import product from math import degrees, radians +from typing import Dict, Literal, Optional -from MDAnalysis.topology.tables import vdwradii from rdkit import Geometry from rdkit.Chem import MolFromSmarts @@ -24,6 +24,7 @@ Interaction, SingleAngle, ) +from prolif.interactions.constants import VDW_PRESETS, VDWRADII # noqa from prolif.utils import angle_between_limits, get_centroid, get_ring_normal_vector __all__ = [ @@ -43,7 +44,6 @@ "XBAcceptor", "XBDonor", ] -VDWRADII = {symbol.capitalize(): radius for symbol, radius in vdwradii.items()} class Hydrophobic(Distance): @@ -438,13 +438,19 @@ class VdWContact(Interaction): Parameters ---------- - tolerance : float + tolerance : float, optional Tolerance added to the sum of vdW radii of atoms before comparing to the interatomic distance. If ``distance <= sum_vdw + tolerance`` the - atoms are identified as a contact + atoms are identified as a contact. vdwradii : dict, optional Updates to the vdW radii dictionary, with elements (first letter uppercase) as a key and the radius as a value. + preset : str, optional + Which preset of vdW radii to use. ``mdanalysis`` and ``rdkit`` correspond to the + values used by the corresponding package, ``csd`` uses values calculated from + the Cambridge Structural Database in doi.org/10.1039/C3DT50599E. ``rdkit`` and + ``csd`` should contain almost all elements, as opposed to ``mdanalysis`` which + only defines a limited subset. Raises ------ @@ -455,15 +461,41 @@ class VdWContact(Interaction): .. versionchanged:: 2.0.0 Added the ``vdwradii`` parameter. + .. versionchanged:: 2.1.0 + Added the ``preset`` parameter. + """ - def __init__(self, tolerance=0.0, vdwradii=None): + def __init__( + self, + tolerance: float = 0.0, + vdwradii: Optional[Dict[str, float]] = None, + preset: Literal["mdanalysis", "rdkit", "csd"] = "mdanalysis", + ) -> None: if tolerance >= 0: self.tolerance = tolerance else: raise ValueError("`tolerance` must be 0 or positive") self._vdw_cache = {} - self.vdwradii = {**VDWRADII, **vdwradii} if vdwradii else VDWRADII + self.preset = preset.lower() + preset_vdw = VDW_PRESETS[self.preset] + self.vdwradii = {**preset_vdw, **vdwradii} if vdwradii else preset_vdw + + def _get_radii_sum(self, atom1: str, atom2: str) -> float: + try: + return self.vdwradii[atom1] + self.vdwradii[atom2] + except KeyError: + missing = [] + if atom1 not in self.vdwradii: + missing.append(f"{atom1!r}") + if atom2 not in self.vdwradii: + missing.append(f"{atom2!r}") + raise ValueError( + f"van der Waals radius for atom {' and '.join(missing)} not found." + " Either specify the missing radii in the `vdwradii` parameter for the" + " VdWContact interaction, or use a preset different from the current" + f" {self.preset!r}." + ) from None def detect(self, ligand, residue): lxyz = ligand.GetConformer() @@ -475,7 +507,7 @@ def detect(self, ligand, residue): try: vdw = self._vdw_cache[elements] except KeyError: - vdw = self.vdwradii[lig] + self.vdwradii[res] + self.tolerance + vdw = self._get_radii_sum(lig, res) + self.tolerance self._vdw_cache[elements] = vdw dist = lxyz.GetAtomPosition(la.GetIdx()).Distance( rxyz.GetAtomPosition(ra.GetIdx()), diff --git a/tests/test_interactions.py b/tests/test_interactions.py index 640cb3a..4216c8c 100644 --- a/tests/test_interactions.py +++ b/tests/test_interactions.py @@ -8,6 +8,7 @@ from prolif.fingerprint import Fingerprint from prolif.interactions import VdWContact from prolif.interactions.base import _INTERACTIONS, Interaction, get_mapindex +from prolif.interactions.constants import VDW_PRESETS # disable rdkit warnings lg = RDLogger.logger() @@ -183,6 +184,25 @@ def test_vdwcontact_vdwradii_update(self, any_mol, any_other_mol): metadata = vdw.detect(any_mol[0], any_other_mol[0]) assert next(metadata, None) is None + @pytest.mark.parametrize( + ("any_mol", "any_other_mol"), + [("benzene", "cation")], + indirect=["any_mol", "any_other_mol"], + ) + @pytest.mark.parametrize("preset", ["mdanalysis", "rdkit", "csd"]) + def test_vdwcontact_preset(self, any_mol, any_other_mol, preset): + vdw = VdWContact(preset=preset) + metadata = vdw.detect(any_mol[0], any_other_mol[0]) + assert next(metadata, None) is not None + assert vdw.vdwradii == VDW_PRESETS[preset] + + def test_vdwcontact_radii_missing(self): + vdw = VdWContact(preset="mdanalysis") + with pytest.raises( + ValueError, match=r"van der Waals radius for atom .+ not found" + ): + vdw._get_radii_sum("X", "Y") + @pytest.mark.parametrize( ("interaction_qmol", "smiles", "expected"), [