Skip to content

Commit

Permalink
Interaction changes (#84)
Browse files Browse the repository at this point in the history
- The SMARTS for the following groups have been updated to a more accurate definition:
  - Hydrophobic: excluded F, Cl, tetracoordinated C and S, C connected to N, O or F.
  - HBond donor: exclude charged O, S and charged aromatic N, only accept nitrogen
    that is in valence 3 or ammonium
  - HBond acceptor: include some aromatic oxygen
  • Loading branch information
cbouy authored Oct 5, 2022
1 parent f35a8f7 commit ad51a73
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 22 deletions.
13 changes: 9 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,24 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

## [1.1.0] - 2022-10-XX
### Added
- `Fingerprint.run` now has a `converter_kwargs` parameter that can pass kwargs to the
underlying RDKitConverter from MDAnalysis (Issue #57).

### Changed
- The SMARTS for the following groups have been updated to a more accurate definition
(PR #73, @DrrDom):
(Issue #68, PR #73 by @DrrDom, and PR #84):
- Hydrophobic: excluded F, Cl, tetracoordinated C and S, C connected to N, O or F.
- HBond donor: exclude charged O, S and charged aromatic N, only accept nitrogen
that is in valence 3 or ammonium
- HBond acceptor: exclude amides and some amines, exclude biaryl ethers and alkoxy
oxygen from esters, include aromatic nitrogens,
oxygen from esters, include some aromatic oxygen and nitrogen,
- Anion: include resonance forms of carboxylic, sulfonic and phosphorus acids,
- Cation: include amidine and guanidine,
- Metal ligand: exclude amides and some amines.

- Metal ligand: exclude amides and some amines,
### Fixed
- Dead link in the quickstart notebook for the MDAnalysis quickstart (PR #75, @radifar).

Expand Down
10 changes: 10 additions & 0 deletions prolif/data/bromine.mol2
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
@<TRIPOS>MOLECULE
bromine
2 1 1
SMALL
USER_CHARGES
@<TRIPOS>ATOM
1 Br1 1.366 1.067 1.272 Br 1 UNK1 0.000
2 H01 2.005 0.876 2.134 H 1 UNK1 0.000
@<TRIPOS>BOND
1 1 2 1
2 changes: 1 addition & 1 deletion prolif/fingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ def run(self, traj, lig, prot, residues=None, converter_kwargs=None, progress=Tr
.. versionchanged:: 1.0.0
Added support for multiprocessing
.. versionadded:: 1.0.1
.. versionadded:: 1.1.0
Added support for passing kwargs to the RDKitConverter through
the ``converter_kwargs`` parameter
Expand Down
60 changes: 49 additions & 11 deletions prolif/interactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ def detect(self, res1, res2, threshold=2.0):
>>> fp.closecontact(lmol, pmol["ASP129.A"])
True
Note that some of the SMARTS patterns used in the interaction classes are inspired from
`Pharmit`_ and `RDKit`_.
.. _Pharmit: https://sourceforge.net/p/pharmit/code/ci/master/tree/src/pharmarec.cpp
.. _RDKit: https://github.com/rdkit/rdkit/blob/master/Data/BaseFeatures.fdef
"""

import warnings
Expand Down Expand Up @@ -127,10 +133,19 @@ class Hydrophobic(_Distance):
SMARTS query for hydrophobic atoms
distance : float
Cutoff distance for the interaction
.. versionchanged:: 1.1.0
The initial SMARTS pattern was too broad.
"""
def __init__(self,
hydrophobic="[#6,#16,F,Cl,Br,I,At;+0]",
distance=4.5):
def __init__(
self,
hydrophobic=(
"[c,s,Br,I,S&H0&v2,"
"$([D3,D4;#6])&!$([#6]~[#7,#8,#9])&!$([#6X4H0]);+0]"
),
distance=4.5
):
super().__init__(hydrophobic, hydrophobic, distance)


Expand All @@ -147,12 +162,22 @@ class _BaseHBond(Interaction):
Cutoff distance between the donor and acceptor atoms
angles : tuple
Min and max values for the ``[Donor]-[Hydrogen]...[Acceptor]`` angle
.. versionchanged:: 1.1.0
The initial SMARTS pattern was too broad.
"""
def __init__(self,
donor="[#7,O,#16][H]",
acceptor="[#7&!$([nX3])&!$([NX3]-*=[!#6])&!$([NX3]-[a])&!$([NX4]),O&!$([OX2](C)C=O)&!$(O(~a)~a),-{1-};!+{1-}]",
distance=3.5,
angles=(130, 180)):
def __init__(
self,
donor="[$([O,S;+0]),$([N;v3,v4&+1]),n+0]-[H]",
acceptor=(
"[#7&!$([nX3])&!$([NX3]-*=[O,N,P,S])&!$([NX3]-[a])&!$([Nv4&+1]),"
"O&!$([OX2](C)C=O)&!$(O(~a)~a)&!$(O=N-*)&!$([O-]-N=O),o+0,"
"F&$(F-[#6])&!$(F-[#6][F,Cl,Br,I])]"
),
distance=3.5,
angles=(130, 180)
):
self.donor = MolFromSmarts(donor)
self.acceptor = MolFromSmarts(acceptor)
self.distance = distance
Expand Down Expand Up @@ -263,7 +288,12 @@ def detect(self, ligand, residue):


class _BaseIonic(_Distance):
"""Base class for ionic interactions"""
"""Base class for ionic interactions
.. versionchanged:: 1.1.0
Handles resonance forms for common acids, amidine and guanidine.
"""
def __init__(self,
cation="[+{1-},$([NX3&!$([NX3]-O)]-[C]=[NX3+])]",
anion="[-{1-},$(O=[C,S,P]-[O-])]",
Expand Down Expand Up @@ -298,10 +328,14 @@ class _BaseCationPi(Interaction):
angles : tuple
Min and max values for the angle between the vector normal to the ring
plane and the vector going from the centroid to the cation
.. versionchanged:: 1.1.0
Handles resonance forms for amidine and guanidine as cations.
"""
def __init__(self,
cation="[+{1-},$([NX3&!$([NX3]-O)]-[C]=[NX3+])]",
pi_ring=("a1:a:a:a:a:a:1", "a1:a:a:a:a:1"),
pi_ring=("[a;r6]1:[a;r6]:[a;r6]:[a;r6]:[a;r6]:[a;r6]:1", "[a;r5]1:[a;r5]:[a;r5]:[a;r5]:[a;r5]:1"),
distance=4.5,
angles=(0, 30)):
self.cation = MolFromSmarts(cation)
Expand Down Expand Up @@ -369,7 +403,7 @@ def __init__(self,
centroid_distance=6.0,
shortest_distance=3.8,
plane_angles=(0, 90),
pi_ring=("a1:a:a:a:a:a:1", "a1:a:a:a:a:1")):
pi_ring=("[a;r6]1:[a;r6]:[a;r6]:[a;r6]:[a;r6]:[a;r6]:1", "[a;r5]1:[a;r5]:[a;r5]:[a;r5]:[a;r5]:1")):
self.pi_ring = [MolFromSmarts(s) for s in pi_ring]
self.centroid_distance = centroid_distance
self.shortest_distance = shortest_distance**2
Expand Down Expand Up @@ -435,6 +469,10 @@ class _BaseMetallic(_Distance):
SMARTS for a ligand
distance : float
Cutoff distance
.. versionchanged:: 1.1.0
The initial SMARTS pattern was too broad.
"""
def __init__(self,
metal="[Ca,Cd,Co,Cu,Fe,Mg,Mn,Ni,Zn]",
Expand Down
2 changes: 2 additions & 0 deletions prolif/residue.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import List, Optional

import numpy as np
from rdkit.Chem.rdmolops import FastFindRings

from .rdkitmol import BaseRDKitMol

Expand Down Expand Up @@ -130,6 +131,7 @@ class Residue(BaseRDKitMol):
"""
def __init__(self, mol):
super().__init__(mol)
FastFindRings(self)
self.resid = ResidueId.from_atom(self.GetAtomWithIdx(0))

def __repr__(self): # pragma: no cover
Expand Down
3 changes: 3 additions & 0 deletions tests/mol2factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def etf():
def chlorine():
return from_mol2("chlorine.mol2")

def bromine():
return from_mol2("bromine.mol2")

def hb_donor():
return from_mol2("donor.mol2")

Expand Down
2 changes: 1 addition & 1 deletion tests/test_fingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def test_run_residues(self, fp_simple):

def test_generate(self, fp_simple):
ifp = fp_simple.generate(ligand_mol, protein_mol)
key = (ResidueId("LIG", 1, "G"), ResidueId("THR", 355, "B"))
key = (ResidueId("LIG", 1, "G"), ResidueId("VAL", 201, "A"))
bv = ifp[key]
assert isinstance(bv, np.ndarray)
assert bv[0] is np.True_
Expand Down
24 changes: 19 additions & 5 deletions tests/test_interactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ def fingerprint(self):
("edgetoface", "ftf", "benzene", False),
("hydrophobic", "benzene", "etf", True),
("hydrophobic", "benzene", "ftf", True),
("hydrophobic", "benzene", "chlorine", True),
("hydrophobic", "benzene", "chlorine", False),
("hydrophobic", "benzene", "bromine", True),
("hydrophobic", "benzene", "anion", False),
("hydrophobic", "benzene", "cation", False),
("hbdonor", "hb_donor", "hb_acceptor", True),
Expand Down Expand Up @@ -148,23 +149,36 @@ def test_vdwcontact_cache(self, lig_mol, prot_mol):

@pytest.mark.parametrize(["interaction_qmol", "smiles", "expected"], [
("Hydrophobic.lig_pattern", "C", 1),
("Hydrophobic.lig_pattern", "O", 0),
("Hydrophobic.lig_pattern", "C=[SH2]", 1),
("Hydrophobic.lig_pattern", "c1cscc1", 5),
("Hydrophobic.lig_pattern", "CSC", 3),
("Hydrophobic.lig_pattern", "CS(C)(C)C", 4),
("Hydrophobic.lig_pattern", "FC(F)(F)F", 0),
("Hydrophobic.lig_pattern", "BrI", 2),
("Hydrophobic.lig_pattern", "C=O", 0),
("Hydrophobic.lig_pattern", "C=N", 0),
("Hydrophobic.lig_pattern", "CF", 0),
("_BaseHBond.donor", "[OH2]", 2),
("_BaseHBond.donor", "[NH3]", 3),
("_BaseHBond.donor", "[NH4+]", 4),
("_BaseHBond.donor", "[SH2]", 2),
("_BaseHBond.donor", "O=C=O", 0),
("_BaseHBond.donor", "c1c[nH+]ccc1", 1),
("_BaseHBond.donor", "c1c[nH+]ccc1", 0),
("_BaseHBond.donor", "c1c[nH]cc1", 1),
("_BaseHBond.acceptor", "O", 1),
("_BaseHBond.acceptor", "N", 1),
("_BaseHBond.acceptor", "[NH+]", 0),
("_BaseHBond.acceptor", "[NH4+]", 0),
("_BaseHBond.acceptor", "N-C=O", 1),
("_BaseHBond.acceptor", "N-C=[SH2]", 0),
("_BaseHBond.acceptor", "[nH+]1ccccc1", 0),
("_BaseHBond.acceptor", "n1ccccc1", 1),
("_BaseHBond.acceptor", "Nc1ccccc1", 0),
("_BaseHBond.acceptor", "o1cccc1", 0),
("_BaseHBond.acceptor", "o1cccc1", 1),
("_BaseHBond.acceptor", "COC=O", 1),
("_BaseHBond.acceptor", "c1ccccc1Oc1ccccc1", 0),
("_BaseHBond.acceptor", "FC", 1),
("_BaseHBond.acceptor", "Fc1ccccc1", 1),
("_BaseHBond.acceptor", "FCF", 0),
("_BaseXBond.donor", "CCl", 1),
("_BaseXBond.donor", "c1ccccc1Cl", 1),
("_BaseXBond.donor", "NCl", 1),
Expand Down

0 comments on commit ad51a73

Please sign in to comment.