From b354a539b96a23f40e2b2bdc732eb64ea6b64676 Mon Sep 17 00:00:00 2001 From: Yu Shan Date: Wed, 18 Aug 2021 10:28:31 +0800 Subject: [PATCH] Kill process group instead of iterator of pids in shutdown hook (#4494) * kill process group instead of process iter * change name * change name * update doc * fix style * change to string --- python/orca/src/bigdl/orca/ray/raycontext.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/python/orca/src/bigdl/orca/ray/raycontext.py b/python/orca/src/bigdl/orca/ray/raycontext.py index d949ef86ac2..b6f339bafba 100755 --- a/python/orca/src/bigdl/orca/ray/raycontext.py +++ b/python/orca/src/bigdl/orca/ray/raycontext.py @@ -33,22 +33,21 @@ class JVMGuard: """ - The registered pids would be put into the killing list of Spark Executor. + The process group id would be registered and killed in the shutdown hook of Spark Executor. """ @staticmethod - def register_pids(pids): + def register_pgid(pgid): import traceback try: from zoo.common.utils import callZooFunc import zoo callZooFunc("float", - "jvmGuardRegisterPids", - pids) + "jvmGuardRegisterPgid", + pgid) except Exception as err: print(traceback.format_exc()) print("Cannot successfully register pid into JVMGuard") - for pid in pids: - os.kill(pid, signal.SIGKILL) + os.killpg(pgid, signal.SIGKILL) raise err @@ -205,7 +204,7 @@ def _start_ray_node(self, command, tag): modified_env = self._prepare_env() print("Starting {} by running: {}".format(tag, command)) process_info = session_execute(command=command, env=modified_env, tag=tag) - JVMGuard.register_pids(process_info.pids) + JVMGuard.register_pgid(process_info.pgid) import ray._private.services as rservices process_info.node_ip = rservices.get_node_ip_address() return process_info