From 67207d5b82d58d233cd564dd7b426c73cfcadd9d Mon Sep 17 00:00:00 2001 From: Fanwang Meng Date: Thu, 10 Mar 2022 11:59:37 -0500 Subject: [PATCH] Add option to control running time per molecule --- padelpy/functions.py | 56 ++++++++++++++++++++++++++++++++++---------- padelpy/wrapper.py | 37 ++++++++++++++++++----------- 2 files changed, 67 insertions(+), 26 deletions(-) diff --git a/padelpy/functions.py b/padelpy/functions.py index f4675c8..d46875c 100644 --- a/padelpy/functions.py +++ b/padelpy/functions.py @@ -14,7 +14,7 @@ from csv import DictReader from datetime import datetime from os import remove -from re import compile, IGNORECASE +from re import IGNORECASE, compile from time import sleep # PaDELPy imports @@ -27,9 +27,14 @@ ] -def from_smiles(smiles, output_csv: str = None, descriptors: bool = True, - fingerprints: bool = False, timeout: int = 60) -> OrderedDict: - """ from_smiles: converts SMILES string to QSPR descriptors/fingerprints +def from_smiles(smiles, + output_csv: str = None, + descriptors: bool = True, + fingerprints: bool = False, + timeout: int = 60, + maxruntime: int = -1, + ) -> OrderedDict: + """ from_smiles: converts SMILES string to QSPR descriptors/fingerprints. Args: smiles (str, list): SMILES string for a given molecule, or a list of @@ -38,6 +43,7 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True, descriptors (bool): if `True`, calculates descriptors fingerprints (bool): if `True`, calculates fingerprints timeout (int): maximum time, in seconds, for conversion + maxruntime (int): maximum running time per molecule in seconds. default=-1. Returns: list or OrderedDict: if multiple SMILES strings provided, returns a @@ -75,6 +81,7 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True, d_3d=descriptors, fingerprints=fingerprints, sp_timeout=timeout, + maxruntime=maxruntime, retainorder=True ) break @@ -124,8 +131,13 @@ def from_smiles(smiles, output_csv: str = None, descriptors: bool = True, return rows -def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True, - fingerprints: bool = False, timeout: int = 60) -> list: +def from_mdl(mdl_file: str, + output_csv: str = None, + descriptors: bool = True, + fingerprints: bool = False, + timeout: int = 60, + maxruntime: int = -1, + ) -> list: """ from_mdl: converts MDL file into QSPR descriptors/fingerprints; multiple molecules may be represented in the MDL file @@ -135,6 +147,7 @@ def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True, descriptors (bool): if `True`, calculates descriptors fingerprints (bool): if `True`, calculates fingerprints timeout (int): maximum time, in seconds, for conversion + maxruntime (int): maximum running time per molecule in seconds. default=-1. Returns: list: list of dicts, where each dict corresponds sequentially to a @@ -151,7 +164,9 @@ def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True, output_csv=output_csv, descriptors=descriptors, fingerprints=fingerprints, - timeout=timeout) + timeout=timeout, + maxruntime=maxruntime, + ) return rows @@ -159,7 +174,9 @@ def from_sdf(sdf_file: str, output_csv: str = None, descriptors: bool = True, fingerprints: bool = False, - timeout: int = 60) -> list: + timeout: int = 60, + maxruntime: int = -1, + ) -> list: """ Converts sdf file into QSPR descriptors/fingerprints. Multiple molecules may be represented in the sdf file @@ -169,6 +186,8 @@ def from_sdf(sdf_file: str, descriptors (bool): if `True`, calculates descriptors fingerprints (bool): if `True`, calculates fingerprints timeout (int): maximum time, in seconds, for conversion + maxruntime (int): maximum running time per molecule in seconds. default=-1. + Returns: list: list of dicts, where each dict corresponds sequentially to a compound in the @@ -185,12 +204,24 @@ def from_sdf(sdf_file: str, output_csv=output_csv, descriptors=descriptors, fingerprints=fingerprints, - timeout=timeout) + timeout=timeout, + maxruntime=maxruntime, + ) return rows -def _from_mdl_lower(mol_file: str, output_csv: str = None, descriptors: bool = True, - fingerprints: bool = False, timeout: int = 60) -> list: +def _from_mdl_lower(mol_file: str, + output_csv: str = None, + descriptors: bool = True, + fingerprints: bool = False, + sp_timeout: int = 60, + maxruntime: int = -1, + ) -> list: + # unit conversion for maximum running time per molecule + # seconds -> milliseconds + if maxruntime != -1: + maxruntime = maxruntime * 1000 + save_csv = True if output_csv is None: save_csv = False @@ -201,6 +232,7 @@ def _from_mdl_lower(mol_file: str, output_csv: str = None, descriptors: bool = T for attempt in range(3): try: padeldescriptor( + maxruntime=maxruntime, mol_dir=mol_file, d_file=output_csv, convert3d=True, @@ -209,7 +241,7 @@ def _from_mdl_lower(mol_file: str, output_csv: str = None, descriptors: bool = T d_2d=descriptors, d_3d=descriptors, fingerprints=fingerprints, - sp_timeout=timeout + sp_timeout=sp_timeout, ) break except RuntimeError as exception: diff --git a/padelpy/wrapper.py b/padelpy/wrapper.py index 65e18ba..9c1f927 100644 --- a/padelpy/wrapper.py +++ b/padelpy/wrapper.py @@ -27,7 +27,7 @@ def _popen_timeout(command: str, timeout: int) -> tuple: - ''' Calls PaDEL-Descriptor, with optional subprocess timeout + """Calls PaDEL-Descriptor, with optional subprocess timeout Args: command (str): command to execute via subprocess.Popen @@ -35,7 +35,7 @@ def _popen_timeout(command: str, timeout: int) -> tuple: Returns: tuple: (stdout of process, stderr of process) - ''' + """ p = Popen(command.split(), stdout=PIPE, stderr=PIPE) if timeout is not None: @@ -49,21 +49,31 @@ def _popen_timeout(command: str, timeout: int) -> tuple: return p.communicate() -def padeldescriptor(maxruntime: int = -1, waitingjobs: int = -1, - threads: int = -1, d_2d: bool = False, d_3d: bool = False, - config: str = None, convert3d: bool = False, +def padeldescriptor(maxruntime: int = -1, + waitingjobs: int = -1, + threads: int = -1, + d_2d: bool = False, + d_3d: bool = False, + config: str = None, + convert3d: bool = False, descriptortypes: str = None, - detectaromaticity: bool = False, mol_dir: str = None, - d_file: str = None, fingerprints: bool = False, - log: bool = False, maxcpdperfile: int = 0, - removesalt: bool = False, retain3d: bool = False, - retainorder: bool = False, standardizenitro: bool = False, + detectaromaticity: bool = False, + mol_dir: str = None, + d_file: str = None, + fingerprints: bool = False, + log: bool = False, + maxcpdperfile: int = 0, + removesalt: bool = False, + retain3d: bool = False, + retainorder: bool = False, + standardizenitro: bool = False, standardizetautomers: bool = False, tautomerlist: str = None, usefilenameasmolname: bool = False, sp_timeout: int = None, - headless: bool = True) -> None: - ''' padeldescriptor: complete wrapper for PaDEL-Descriptor descriptor/ + headless: bool = True + ) -> None: + """padeldescriptor: complete wrapper for PaDEL-Descriptor descriptor/ fingerprint generation software Args: @@ -103,7 +113,7 @@ def padeldescriptor(maxruntime: int = -1, waitingjobs: int = -1, Returns: None - ''' + """ if which('java') is None: raise ReferenceError( @@ -158,4 +168,3 @@ def padeldescriptor(maxruntime: int = -1, waitingjobs: int = -1, err.decode('utf-8') )) return -