From b9a80160464dc66e5373c284be330b5b2cc053d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nahuel=20Unai=20Rosell=C3=B3=20Beneitez?= Date: Wed, 3 Jul 2024 11:22:09 -0400 Subject: [PATCH] Revert code, don't catch TimeoutExpired inside system_call --- sisyphus/aws_batch_engine.py | 7 ++----- sisyphus/load_sharing_facility_engine.py | 19 ++++++++++--------- ..._utility_for_resource_management_engine.py | 19 ++++++++++--------- sisyphus/son_of_grid_engine.py | 19 ++++++++++--------- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/sisyphus/aws_batch_engine.py b/sisyphus/aws_batch_engine.py index baff7b6..2ad4e90 100644 --- a/sisyphus/aws_batch_engine.py +++ b/sisyphus/aws_batch_engine.py @@ -90,11 +90,8 @@ def system_call(self, command, send_to_stdin=None): logging.debug("shell_cmd: %s" % " ".join(system_command)) if send_to_stdin: send_to_stdin = send_to_stdin.encode() - try: - p = subprocess.run(system_command, input=send_to_stdin, capture_output=True, timeout=30) - except subprocess.TimeoutExpired: - logging.warning("Timeout expired for command: %s" % " ".join(system_command)) - return [], ["TimeoutExpired".encode()], -1 + + p = subprocess.run(system_command, input=send_to_stdin, capture_output=True, timeout=30) def fix_output(o): """ diff --git a/sisyphus/load_sharing_facility_engine.py b/sisyphus/load_sharing_facility_engine.py index 09f0b13..eae2c5a 100644 --- a/sisyphus/load_sharing_facility_engine.py +++ b/sisyphus/load_sharing_facility_engine.py @@ -58,11 +58,8 @@ def system_call(self, command, send_to_stdin=None): logging.debug("shell_cmd: %s" % " ".join(system_command)) if send_to_stdin: send_to_stdin = send_to_stdin.encode() - try: - p = subprocess.run(system_command, input=send_to_stdin, capture_output=True, timeout=30) - except subprocess.TimeoutExpired: - logging.warning(self._system_call_timeout_warn_msg(system_command)) - return [], ["TimeoutExpired".encode()], -1 + + p = subprocess.run(system_command, input=send_to_stdin, capture_output=True, timeout=30) def fix_output(o): # split output and drop last empty line @@ -189,8 +186,10 @@ def submit_helper(self, call, logpath, rqmt, name, task_name, rangestring): while True: logging.info("bsub_call: %s" % bsub_call) logging.info("command: %s" % command) - out, err, retval = self.system_call(bsub_call, command) - if retval != 0: + try: + out, err, retval = self.system_call(bsub_call, command) + except subprocess.TimeoutExpired: + logging.warning(self._system_call_timeout_warn_msg(bsub_call)) time.sleep(gs.WAIT_PERIOD_SSH_TIMEOUT) continue break @@ -248,8 +247,10 @@ def queue_state(self): # get bjobs output system_command = ["bjobs", "-w"] while True: - out, err, retval = self.system_call(system_command) - if retval != 0: + try: + out, err, retval = self.system_call(system_command) + except subprocess.TimeoutExpired: + logging.warning(self._system_call_timeout_warn_msg(system_command)) time.sleep(gs.WAIT_PERIOD_SSH_TIMEOUT) continue break diff --git a/sisyphus/simple_linux_utility_for_resource_management_engine.py b/sisyphus/simple_linux_utility_for_resource_management_engine.py index f27b9c4..063de9a 100644 --- a/sisyphus/simple_linux_utility_for_resource_management_engine.py +++ b/sisyphus/simple_linux_utility_for_resource_management_engine.py @@ -96,11 +96,8 @@ def system_call(self, command, send_to_stdin=None): logging.debug("shell_cmd: %s" % " ".join(system_command)) if send_to_stdin: send_to_stdin = send_to_stdin.encode() - try: - p = subprocess.run(system_command, input=send_to_stdin, capture_output=True, timeout=30) - except subprocess.TimeoutExpired: - logging.warning(self._system_call_timeout_warn_msg(system_command)) - return [], ["TimeoutExpired".encode()], -1 + + p = subprocess.run(system_command, input=send_to_stdin, capture_output=True, timeout=30) def fix_output(o): """ @@ -236,8 +233,10 @@ def submit_helper(self, call, logpath, rqmt, name, task_name, start_id, end_id, sbatch_call += ["-a", "%i-%i:%i" % (start_id, end_id, step_size)] sbatch_call += ["--wrap=%s" % " ".join(call)] while True: - out, err, retval = self.system_call(sbatch_call) - if retval != 0: + try: + out, err, retval = self.system_call(sbatch_call) + except subprocess.TimeoutExpired: + logging.warning(self._system_call_timeout_warn_msg(sbatch_call)) time.sleep(gs.WAIT_PERIOD_SSH_TIMEOUT) continue break @@ -306,8 +305,10 @@ def queue_state(self): "arrayjobid,arraytaskid,state,name:1000", ] while True: - out, err, retval = self.system_call(system_command) - if retval != 0: + try: + out, err, retval = self.system_call(system_command) + except subprocess.TimeoutExpired: + logging.warning(self._system_call_timeout_warn_msg(system_command)) time.sleep(gs.WAIT_PERIOD_SSH_TIMEOUT) continue break diff --git a/sisyphus/son_of_grid_engine.py b/sisyphus/son_of_grid_engine.py index be20f5d..84387ae 100644 --- a/sisyphus/son_of_grid_engine.py +++ b/sisyphus/son_of_grid_engine.py @@ -97,11 +97,8 @@ def system_call(self, command, send_to_stdin=None): logging.debug("shell_cmd: %s" % " ".join(system_command)) if send_to_stdin: send_to_stdin = send_to_stdin.encode() - try: - p = subprocess.run(system_command, input=send_to_stdin, capture_output=True, timeout=30) - except subprocess.TimeoutExpired: - logging.warning(self._system_call_timeout_warn_msg(system_command)) - return [], ["TimeoutExpired".encode()], -1 + + p = subprocess.run(system_command, input=send_to_stdin, capture_output=True, timeout=30) def fix_output(o): """ @@ -252,8 +249,10 @@ def submit_helper(self, call, logpath, rqmt, name, task_name, start_id, end_id, qsub_call += ["-t", "%i-%i:%i" % (start_id, end_id, step_size)] command = " ".join(call) + "\n" while True: - out, err, retval = self.system_call(qsub_call, command) - if retval != 0: + try: + out, err, retval = self.system_call(qsub_call, command) + except subprocess.TimeoutExpired: + logging.warning(self._system_call_timeout_warn_msg(qsub_call)) time.sleep(gs.WAIT_PERIOD_SSH_TIMEOUT) continue break @@ -317,8 +316,10 @@ def queue_state(self): # get qstat output system_command = ["qstat", "-xml", "-u", getpass.getuser()] while True: - out, err, retval = self.system_call(system_command) - if retval != 0: + try: + out, err, retval = self.system_call(system_command) + except subprocess.TimeoutExpired: + logging.warning(self._system_call_timeout_warn_msg(system_command)) time.sleep(gs.WAIT_PERIOD_SSH_TIMEOUT) continue break