Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to control running time per molecule #35

Merged
merged 3 commits into from
Sep 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 48 additions & 12 deletions padelpy/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from csv import DictReader
from datetime import datetime
from os import remove
from re import compile, IGNORECASE
from re import IGNORECASE, compile
from time import sleep

# PaDELPy imports
Expand All @@ -27,9 +27,14 @@
]


def from_smiles(smiles, output_csv: str = None, descriptors: bool = True,
fingerprints: bool = False, timeout: int = 60) -> OrderedDict:
""" from_smiles: converts SMILES string to QSPR descriptors/fingerprints
def from_smiles(smiles,
output_csv: str = None,
descriptors: bool = True,
fingerprints: bool = False,
timeout: int = 60,
maxruntime: int = -1,
) -> OrderedDict:
""" from_smiles: converts SMILES string to QSPR descriptors/fingerprints.

Args:
smiles (str, list): SMILES string for a given molecule, or a list of
Expand All @@ -38,13 +43,18 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True,
descriptors (bool): if `True`, calculates descriptors
fingerprints (bool): if `True`, calculates fingerprints
timeout (int): maximum time, in seconds, for conversion
maxruntime (int): maximum running time per molecule in seconds. default=-1.

Returns:
list or OrderedDict: if multiple SMILES strings provided, returns a
list of OrderedDicts, else single OrderedDict; each OrderedDict
contains labels and values for each descriptor generated for each
supplied molecule
"""
# unit conversion for maximum running time per molecule
# seconds -> milliseconds
if maxruntime != -1:
maxruntime = maxruntime * 1000

timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")[:-3]

Expand Down Expand Up @@ -75,6 +85,7 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True,
d_3d=descriptors,
fingerprints=fingerprints,
sp_timeout=timeout,
maxruntime=maxruntime,
retainorder=True
)
break
Expand Down Expand Up @@ -124,8 +135,13 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True,
return rows


def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True,
fingerprints: bool = False, timeout: int = 60) -> list:
def from_mdl(mdl_file: str,
output_csv: str = None,
descriptors: bool = True,
fingerprints: bool = False,
timeout: int = 60,
maxruntime: int = -1,
) -> list:
""" from_mdl: converts MDL file into QSPR descriptors/fingerprints;
multiple molecules may be represented in the MDL file

Expand All @@ -135,6 +151,7 @@ def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True,
descriptors (bool): if `True`, calculates descriptors
fingerprints (bool): if `True`, calculates fingerprints
timeout (int): maximum time, in seconds, for conversion
maxruntime (int): maximum running time per molecule in seconds. default=-1.

Returns:
list: list of dicts, where each dict corresponds sequentially to a
Expand All @@ -151,15 +168,19 @@ def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True,
output_csv=output_csv,
descriptors=descriptors,
fingerprints=fingerprints,
timeout=timeout)
timeout=timeout,
maxruntime=maxruntime,
)
return rows


def from_sdf(sdf_file: str,
output_csv: str = None,
descriptors: bool = True,
fingerprints: bool = False,
timeout: int = 60) -> list:
timeout: int = 60,
maxruntime: int = -1,
) -> list:
""" Converts sdf file into QSPR descriptors/fingerprints.
Multiple molecules may be represented in the sdf file

Expand All @@ -169,6 +190,8 @@ def from_sdf(sdf_file: str,
descriptors (bool): if `True`, calculates descriptors
fingerprints (bool): if `True`, calculates fingerprints
timeout (int): maximum time, in seconds, for conversion
maxruntime (int): maximum running time per molecule in seconds. default=-1.


Returns:
list: list of dicts, where each dict corresponds sequentially to a compound in the
Expand All @@ -185,12 +208,24 @@ def from_sdf(sdf_file: str,
output_csv=output_csv,
descriptors=descriptors,
fingerprints=fingerprints,
timeout=timeout)
sp_timeout=timeout,
maxruntime=maxruntime,
)
return rows


def _from_mdl_lower(mol_file: str, output_csv: str = None, descriptors: bool = True,
fingerprints: bool = False, timeout: int = 60) -> list:
def _from_mdl_lower(mol_file: str,
output_csv: str = None,
descriptors: bool = True,
fingerprints: bool = False,
sp_timeout: int = 60,
maxruntime: int = -1,
) -> list:
# unit conversion for maximum running time per molecule
# seconds -> milliseconds
if maxruntime != -1:
maxruntime = maxruntime * 1000

save_csv = True
if output_csv is None:
save_csv = False
Expand All @@ -201,6 +236,7 @@ def _from_mdl_lower(mol_file: str, output_csv: str = None, descriptors: bool = T
for attempt in range(3):
try:
padeldescriptor(
maxruntime=maxruntime,
mol_dir=mol_file,
d_file=output_csv,
convert3d=True,
Expand All @@ -209,7 +245,7 @@ def _from_mdl_lower(mol_file: str, output_csv: str = None, descriptors: bool = T
d_2d=descriptors,
d_3d=descriptors,
fingerprints=fingerprints,
sp_timeout=timeout
sp_timeout=sp_timeout,
)
break
except RuntimeError as exception:
Expand Down
37 changes: 23 additions & 14 deletions padelpy/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@


def _popen_timeout(command: str, timeout: int) -> tuple:
''' Calls PaDEL-Descriptor, with optional subprocess timeout
"""Calls PaDEL-Descriptor, with optional subprocess timeout

Args:
command (str): command to execute via subprocess.Popen
timeout (int): if not None, times out after this many seconds

Returns:
tuple: (stdout of process, stderr of process)
'''
"""

p = Popen(command.split(), stdout=PIPE, stderr=PIPE)
if timeout is not None:
Expand All @@ -49,21 +49,31 @@ def _popen_timeout(command: str, timeout: int) -> tuple:
return p.communicate()


def padeldescriptor(maxruntime: int = -1, waitingjobs: int = -1,
threads: int = -1, d_2d: bool = False, d_3d: bool = False,
config: str = None, convert3d: bool = False,
def padeldescriptor(maxruntime: int = -1,
waitingjobs: int = -1,
threads: int = -1,
d_2d: bool = False,
d_3d: bool = False,
config: str = None,
convert3d: bool = False,
descriptortypes: str = None,
detectaromaticity: bool = False, mol_dir: str = None,
d_file: str = None, fingerprints: bool = False,
log: bool = False, maxcpdperfile: int = 0,
removesalt: bool = False, retain3d: bool = False,
retainorder: bool = False, standardizenitro: bool = False,
detectaromaticity: bool = False,
mol_dir: str = None,
d_file: str = None,
fingerprints: bool = False,
log: bool = False,
maxcpdperfile: int = 0,
removesalt: bool = False,
retain3d: bool = False,
retainorder: bool = False,
standardizenitro: bool = False,
standardizetautomers: bool = False,
tautomerlist: str = None,
usefilenameasmolname: bool = False,
sp_timeout: int = None,
headless: bool = True) -> None:
''' padeldescriptor: complete wrapper for PaDEL-Descriptor descriptor/
headless: bool = True
) -> None:
"""padeldescriptor: complete wrapper for PaDEL-Descriptor descriptor/
fingerprint generation software

Args:
Expand Down Expand Up @@ -103,7 +113,7 @@ def padeldescriptor(maxruntime: int = -1, waitingjobs: int = -1,

Returns:
None
'''
"""

if which('java') is None:
raise ReferenceError(
Expand Down Expand Up @@ -158,4 +168,3 @@ def padeldescriptor(maxruntime: int = -1, waitingjobs: int = -1,
err.decode('utf-8')
))
return