Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added function to get the number of stereoisomers #217

Merged
merged 8 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions datamol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@
"save_df": "datamol.io",
# isomers
"enumerate_stereoisomers": "datamol.isomers",
"count_stereoisomers": "datamol.isomers",
"enumerate_tautomers": "datamol.isomers",
"enumerate_structisomers": "datamol.isomers",
"canonical_tautomer": "datamol.isomers",
Expand Down Expand Up @@ -329,6 +330,7 @@ def __dir__():
from .io import open_df

from .isomers import enumerate_stereoisomers
from .isomers import count_stereoisomers
from .isomers import enumerate_tautomers
from .isomers import enumerate_structisomers
from .isomers import canonical_tautomer
Expand Down
1 change: 1 addition & 0 deletions datamol/isomers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
from ._enumerate import enumerate_stereoisomers
from ._enumerate import enumerate_tautomers
from ._enumerate import enumerate_structisomers
from ._enumerate import count_stereoisomers
from ._enumerate import remove_stereochemistry
from ._enumerate import canonical_tautomer
41 changes: 40 additions & 1 deletion datamol/isomers/_enumerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers
from rdkit.Chem.EnumerateStereoisomers import GetStereoisomerCount
from rdkit.Chem.EnumerateStereoisomers import StereoEnumerationOptions

import datamol as dm
Expand Down Expand Up @@ -82,7 +83,8 @@ def enumerate_stereoisomers(

# in case any bonds/centers are missing stereo chem flag it here
Chem.AssignStereochemistry(mol, force=False, flagPossibleStereoCenters=True, cleanIt=clean_it) # type: ignore
Chem.FindPotentialStereoBonds(mol, cleanIt=clean_it) # type: ignore
# lu: do not clean (overwrite bond stereo information) when set `undefined_only=Ture`
Chem.FindPotentialStereoBonds(mol, cleanIt=not undefined_only and clean_it)

# set up the options
stereo_opts = StereoEnumerationOptions(
Expand Down Expand Up @@ -117,6 +119,43 @@ def enumerate_stereoisomers(
return variants


def count_stereoisomers(
mol: dm.Mol, undefined_only: bool = False, rationalise: bool = True, clean_it: bool = True
):
"""Get an estimate (upper bound) of the number of possible stereoisomers for a molecule.

Warning: this function is an estimtion, therefore it might be less accurate than enumrerating the stereoisomers.

Args:
mol: The molecule whose state we should enumerate.
n_variants: The maximum amount of molecules that should be returned.
undefined_only: If we should enumerate all stereocenters and bonds or only those
with undefined stereochemistry.
rationalise: If we should try to build and rationalise the molecule to ensure it
can exist.
clean_it: A flag for assigning stereochemistry. If True, it will remove previous stereochemistry
markings on the bonds.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the CI is failing because of the malformed docstring. Locally, you can call mkdocs serve to reproduce the error and it can help to fix the docstring.

"""
# safety first
mol = dm.copy_mol(mol)

# in case any bonds/centers are missing stereo chem flag it here
Chem.AssignStereochemistry(mol, force=False, flagPossibleStereoCenters=True, cleanIt=clean_it) # type: ignore
# lu: do not clean (overwrite bond stereo information) when set `undefined_only=Ture`
Chem.FindPotentialStereoBonds(mol, cleanIt=not undefined_only and clean_it)

# set up the options
stereo_opts = StereoEnumerationOptions(
tryEmbedding=rationalise,
onlyUnassigned=undefined_only,
unique=True,
)

num_variants = GetStereoisomerCount(mol, options=stereo_opts)

return num_variants


def enumerate_structisomers(
mol: dm.Mol,
n_variants: int = 20,
Expand Down
8 changes: 8 additions & 0 deletions tests/test_isomers.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ def test_enumerate_stereo_timeout():
dm.enumerate_stereoisomers(mol, n_variants=2, timeout_seconds=1)


def test_count_stereoisomers():
num_isomers_1 = dm.count_stereoisomers(dm.to_mol("CC=CC"), undefined_only=True)
num_isomers_2 = dm.count_stereoisomers(dm.to_mol("CC=CC"), undefined_only=False)
assert num_isomers_1 == num_isomers_2

assert dm.count_stereoisomers(dm.to_mol("Br/C=C\\Br"), undefined_only=True) == 1


def test_enumerate_structural():
mol = dm.to_mol("CCCCC") # pentane has only three structural isomers

Expand Down
Loading