Skip to content

Commit

Permalink
Add option to control running time per molecule
Browse files Browse the repository at this point in the history
  • Loading branch information
FanwangM committed Mar 10, 2022
1 parent 901772d commit 67207d5
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 26 deletions.
56 changes: 44 additions & 12 deletions padelpy/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from csv import DictReader
from datetime import datetime
from os import remove
from re import compile, IGNORECASE
from re import IGNORECASE, compile
from time import sleep

# PaDELPy imports
Expand All @@ -27,9 +27,14 @@
]


def from_smiles(smiles, output_csv: str = None, descriptors: bool = True,
fingerprints: bool = False, timeout: int = 60) -> OrderedDict:
""" from_smiles: converts SMILES string to QSPR descriptors/fingerprints
def from_smiles(smiles,
output_csv: str = None,
descriptors: bool = True,
fingerprints: bool = False,
timeout: int = 60,
maxruntime: int = -1,
) -> OrderedDict:
""" from_smiles: converts SMILES string to QSPR descriptors/fingerprints.
Args:
smiles (str, list): SMILES string for a given molecule, or a list of
Expand All @@ -38,6 +43,7 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True,
descriptors (bool): if `True`, calculates descriptors
fingerprints (bool): if `True`, calculates fingerprints
timeout (int): maximum time, in seconds, for conversion
maxruntime (int): maximum running time per molecule in seconds. default=-1.
Returns:
list or OrderedDict: if multiple SMILES strings provided, returns a
Expand Down Expand Up @@ -75,6 +81,7 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True,
d_3d=descriptors,
fingerprints=fingerprints,
sp_timeout=timeout,
maxruntime=maxruntime,
retainorder=True
)
break
Expand Down Expand Up @@ -124,8 +131,13 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True,
return rows


def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True,
fingerprints: bool = False, timeout: int = 60) -> list:
def from_mdl(mdl_file: str,
output_csv: str = None,
descriptors: bool = True,
fingerprints: bool = False,
timeout: int = 60,
maxruntime: int = -1,
) -> list:
""" from_mdl: converts MDL file into QSPR descriptors/fingerprints;
multiple molecules may be represented in the MDL file
Expand All @@ -135,6 +147,7 @@ def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True,
descriptors (bool): if `True`, calculates descriptors
fingerprints (bool): if `True`, calculates fingerprints
timeout (int): maximum time, in seconds, for conversion
maxruntime (int): maximum running time per molecule in seconds. default=-1.
Returns:
list: list of dicts, where each dict corresponds sequentially to a
Expand All @@ -151,15 +164,19 @@ def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True,
output_csv=output_csv,
descriptors=descriptors,
fingerprints=fingerprints,
timeout=timeout)
timeout=timeout,
maxruntime=maxruntime,
)
return rows


def from_sdf(sdf_file: str,
output_csv: str = None,
descriptors: bool = True,
fingerprints: bool = False,
timeout: int = 60) -> list:
timeout: int = 60,
maxruntime: int = -1,
) -> list:
""" Converts sdf file into QSPR descriptors/fingerprints.
Multiple molecules may be represented in the sdf file
Expand All @@ -169,6 +186,8 @@ def from_sdf(sdf_file: str,
descriptors (bool): if `True`, calculates descriptors
fingerprints (bool): if `True`, calculates fingerprints
timeout (int): maximum time, in seconds, for conversion
maxruntime (int): maximum running time per molecule in seconds. default=-1.
Returns:
list: list of dicts, where each dict corresponds sequentially to a compound in the
Expand All @@ -185,12 +204,24 @@ def from_sdf(sdf_file: str,
output_csv=output_csv,
descriptors=descriptors,
fingerprints=fingerprints,
timeout=timeout)
timeout=timeout,
maxruntime=maxruntime,
)
return rows


def _from_mdl_lower(mol_file: str, output_csv: str = None, descriptors: bool = True,
fingerprints: bool = False, timeout: int = 60) -> list:
def _from_mdl_lower(mol_file: str,
output_csv: str = None,
descriptors: bool = True,
fingerprints: bool = False,
sp_timeout: int = 60,
maxruntime: int = -1,
) -> list:
# unit conversion for maximum running time per molecule
# seconds -> milliseconds
if maxruntime != -1:
maxruntime = maxruntime * 1000

save_csv = True
if output_csv is None:
save_csv = False
Expand All @@ -201,6 +232,7 @@ def _from_mdl_lower(mol_file: str, output_csv: str = None, descriptors: bool = T
for attempt in range(3):
try:
padeldescriptor(
maxruntime=maxruntime,
mol_dir=mol_file,
d_file=output_csv,
convert3d=True,
Expand All @@ -209,7 +241,7 @@ def _from_mdl_lower(mol_file: str, output_csv: str = None, descriptors: bool = T
d_2d=descriptors,
d_3d=descriptors,
fingerprints=fingerprints,
sp_timeout=timeout
sp_timeout=sp_timeout,
)
break
except RuntimeError as exception:
Expand Down
37 changes: 23 additions & 14 deletions padelpy/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@


def _popen_timeout(command: str, timeout: int) -> tuple:
''' Calls PaDEL-Descriptor, with optional subprocess timeout
"""Calls PaDEL-Descriptor, with optional subprocess timeout
Args:
command (str): command to execute via subprocess.Popen
timeout (int): if not None, times out after this many seconds
Returns:
tuple: (stdout of process, stderr of process)
'''
"""

p = Popen(command.split(), stdout=PIPE, stderr=PIPE)
if timeout is not None:
Expand All @@ -49,21 +49,31 @@ def _popen_timeout(command: str, timeout: int) -> tuple:
return p.communicate()


def padeldescriptor(maxruntime: int = -1, waitingjobs: int = -1,
threads: int = -1, d_2d: bool = False, d_3d: bool = False,
config: str = None, convert3d: bool = False,
def padeldescriptor(maxruntime: int = -1,
waitingjobs: int = -1,
threads: int = -1,
d_2d: bool = False,
d_3d: bool = False,
config: str = None,
convert3d: bool = False,
descriptortypes: str = None,
detectaromaticity: bool = False, mol_dir: str = None,
d_file: str = None, fingerprints: bool = False,
log: bool = False, maxcpdperfile: int = 0,
removesalt: bool = False, retain3d: bool = False,
retainorder: bool = False, standardizenitro: bool = False,
detectaromaticity: bool = False,
mol_dir: str = None,
d_file: str = None,
fingerprints: bool = False,
log: bool = False,
maxcpdperfile: int = 0,
removesalt: bool = False,
retain3d: bool = False,
retainorder: bool = False,
standardizenitro: bool = False,
standardizetautomers: bool = False,
tautomerlist: str = None,
usefilenameasmolname: bool = False,
sp_timeout: int = None,
headless: bool = True) -> None:
''' padeldescriptor: complete wrapper for PaDEL-Descriptor descriptor/
headless: bool = True
) -> None:
"""padeldescriptor: complete wrapper for PaDEL-Descriptor descriptor/
fingerprint generation software
Args:
Expand Down Expand Up @@ -103,7 +113,7 @@ def padeldescriptor(maxruntime: int = -1, waitingjobs: int = -1,
Returns:
None
'''
"""

if which('java') is None:
raise ReferenceError(
Expand Down Expand Up @@ -158,4 +168,3 @@ def padeldescriptor(maxruntime: int = -1, waitingjobs: int = -1,
err.decode('utf-8')
))
return

0 comments on commit 67207d5

Please sign in to comment.