Merge pull request #35 from FanwangM/timeout

Add option to control running time per molecule
ecrl · Sep 8, 2022 · 903a04e · 903a04e
2 parents 9feb460 + 90c30a3
commit 903a04e
Show file tree

Hide file tree

Showing 2 changed files with 71 additions and 26 deletions.
diff --git a/padelpy/functions.py b/padelpy/functions.py
@@ -14,7 +14,7 @@
 from csv import DictReader
 from datetime import datetime
 from os import remove
-from re import compile, IGNORECASE
+from re import IGNORECASE, compile
 from time import sleep
 
 # PaDELPy imports
@@ -27,9 +27,14 @@
 ]
 
 
-def from_smiles(smiles, output_csv: str = None, descriptors: bool = True,
-                fingerprints: bool = False, timeout: int = 60) -> OrderedDict:
-    """ from_smiles: converts SMILES string to QSPR descriptors/fingerprints
+def from_smiles(smiles,
+                output_csv: str = None,
+                descriptors: bool = True,
+                fingerprints: bool = False,
+                timeout: int = 60,
+                maxruntime: int = -1,
+                ) -> OrderedDict:
+    """ from_smiles: converts SMILES string to QSPR descriptors/fingerprints.
 
     Args:
         smiles (str, list): SMILES string for a given molecule, or a list of
@@ -38,13 +43,18 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True,
         descriptors (bool): if `True`, calculates descriptors
         fingerprints (bool): if `True`, calculates fingerprints
         timeout (int): maximum time, in seconds, for conversion
+        maxruntime (int): maximum running time per molecule in seconds. default=-1.
 
     Returns:
         list or OrderedDict: if multiple SMILES strings provided, returns a
             list of OrderedDicts, else single OrderedDict; each OrderedDict
             contains labels and values for each descriptor generated for each
             supplied molecule
     """
+    # unit conversion for maximum running time per molecule
+    # seconds -> milliseconds
+    if maxruntime != -1:
+        maxruntime = maxruntime * 1000
 
     timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")[:-3]
 
@@ -75,6 +85,7 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True,
                 d_3d=descriptors,
                 fingerprints=fingerprints,
                 sp_timeout=timeout,
+                maxruntime=maxruntime,
                 retainorder=True
             )
             break
@@ -132,8 +143,13 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True,
     return rows
 
 
-def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True,
-             fingerprints: bool = False, timeout: int = 60) -> list:
+def from_mdl(mdl_file: str,
+             output_csv: str = None,
+             descriptors: bool = True,
+             fingerprints: bool = False,
+             timeout: int = 60,
+             maxruntime: int = -1,
+             ) -> list:
     """ from_mdl: converts MDL file into QSPR descriptors/fingerprints;
     multiple molecules may be represented in the MDL file
 
@@ -143,6 +159,7 @@ def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True,
         descriptors (bool): if `True`, calculates descriptors
         fingerprints (bool): if `True`, calculates fingerprints
         timeout (int): maximum time, in seconds, for conversion
+        maxruntime (int): maximum running time per molecule in seconds. default=-1.
 
     Returns:
         list: list of dicts, where each dict corresponds sequentially to a
@@ -159,15 +176,19 @@ def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True,
                            output_csv=output_csv,
                            descriptors=descriptors,
                            fingerprints=fingerprints,
-                           timeout=timeout)
+                           timeout=timeout,
+                           maxruntime=maxruntime,
+                           )
     return rows
 
 
 def from_sdf(sdf_file: str,
              output_csv: str = None,
              descriptors: bool = True,
              fingerprints: bool = False,
-             timeout: int = 60) -> list:
+             timeout: int = 60,
+             maxruntime: int = -1,
+             ) -> list:
     """ Converts sdf file into QSPR descriptors/fingerprints.
     Multiple molecules may be represented in the sdf file
 
@@ -177,6 +198,8 @@ def from_sdf(sdf_file: str,
         descriptors (bool): if `True`, calculates descriptors
         fingerprints (bool): if `True`, calculates fingerprints
         timeout (int): maximum time, in seconds, for conversion
+        maxruntime (int): maximum running time per molecule in seconds. default=-1.
+
 
     Returns:
         list: list of dicts, where each dict corresponds sequentially to a compound in the
@@ -193,12 +216,24 @@ def from_sdf(sdf_file: str,
                            output_csv=output_csv,
                            descriptors=descriptors,
                            fingerprints=fingerprints,
-                           timeout=timeout)
+                           sp_timeout=timeout,
+                           maxruntime=maxruntime,
+                           )
     return rows
 
 
-def _from_mdl_lower(mol_file: str, output_csv: str = None, descriptors: bool = True,
-                    fingerprints: bool = False, timeout: int = 60) -> list:
+def _from_mdl_lower(mol_file: str,
+                    output_csv: str = None,
+                    descriptors: bool = True,
+                    fingerprints: bool = False,
+                    sp_timeout: int = 60,
+                    maxruntime: int = -1,
+                    ) -> list:
+    # unit conversion for maximum running time per molecule
+    # seconds -> milliseconds
+    if maxruntime != -1:
+        maxruntime = maxruntime * 1000
+
     save_csv = True
     if output_csv is None:
         save_csv = False
@@ -209,6 +244,7 @@ def _from_mdl_lower(mol_file: str, output_csv: str = None, descriptors: bool = T
     for attempt in range(3):
         try:
             padeldescriptor(
+                maxruntime=maxruntime,
                 mol_dir=mol_file,
                 d_file=output_csv,
                 convert3d=True,
@@ -217,7 +253,7 @@ def _from_mdl_lower(mol_file: str, output_csv: str = None, descriptors: bool = T
                 d_2d=descriptors,
                 d_3d=descriptors,
                 fingerprints=fingerprints,
-                sp_timeout=timeout
+                sp_timeout=sp_timeout,
             )
             break
         except RuntimeError as exception:

diff --git a/padelpy/wrapper.py b/padelpy/wrapper.py
@@ -27,15 +27,15 @@
 
 
 def _popen_timeout(command: str, timeout: int) -> tuple:
-    ''' Calls PaDEL-Descriptor, with optional subprocess timeout
+    """Calls PaDEL-Descriptor, with optional subprocess timeout
 
     Args:
         command (str): command to execute via subprocess.Popen
         timeout (int): if not None, times out after this many seconds
 
     Returns:
         tuple: (stdout of process, stderr of process)
-    '''
+    """
 
     p = Popen(command.split(), stdout=PIPE, stderr=PIPE)
     if timeout is not None:
@@ -49,21 +49,31 @@ def _popen_timeout(command: str, timeout: int) -> tuple:
         return p.communicate()
 
 
-def padeldescriptor(maxruntime: int = -1, waitingjobs: int = -1,
-                    threads: int = -1, d_2d: bool = False, d_3d: bool = False,
-                    config: str = None, convert3d: bool = False,
+def padeldescriptor(maxruntime: int = -1,
+                    waitingjobs: int = -1,
+                    threads: int = -1,
+                    d_2d: bool = False,
+                    d_3d: bool = False,
+                    config: str = None,
+                    convert3d: bool = False,
                     descriptortypes: str = None,
-                    detectaromaticity: bool = False, mol_dir: str = None,
-                    d_file: str = None, fingerprints: bool = False,
-                    log: bool = False, maxcpdperfile: int = 0,
-                    removesalt: bool = False, retain3d: bool = False,
-                    retainorder: bool = False, standardizenitro: bool = False,
+                    detectaromaticity: bool = False,
+                    mol_dir: str = None,
+                    d_file: str = None,
+                    fingerprints: bool = False,
+                    log: bool = False,
+                    maxcpdperfile: int = 0,
+                    removesalt: bool = False,
+                    retain3d: bool = False,
+                    retainorder: bool = False,
+                    standardizenitro: bool = False,
                     standardizetautomers: bool = False,
                     tautomerlist: str = None,
                     usefilenameasmolname: bool = False,
                     sp_timeout: int = None,
-                    headless: bool = True) -> None:
-    ''' padeldescriptor: complete wrapper for PaDEL-Descriptor descriptor/
+                    headless: bool = True
+                    ) -> None:
+    """padeldescriptor: complete wrapper for PaDEL-Descriptor descriptor/
     fingerprint generation software
 
     Args:
@@ -103,7 +113,7 @@ def padeldescriptor(maxruntime: int = -1, waitingjobs: int = -1,
 
     Returns:
         None
-    '''
+    """
 
     if which('java') is None:
         raise ReferenceError(
@@ -158,4 +168,3 @@ def padeldescriptor(maxruntime: int = -1, waitingjobs: int = -1,
             err.decode('utf-8')
         ))
     return
-