Skip to content

Commit

Permalink
Kill process group instead of iterator of pids in shutdown hook (inte…
Browse files Browse the repository at this point in the history
…l-analytics#4494)

* kill process group instead of process iter

* change name

* change name

* update doc

* fix style

* change to string
  • Loading branch information
shanyu-sys committed Aug 18, 2021
1 parent fc85374 commit cbe55eb
Showing 1 changed file with 6 additions and 7 deletions.
13 changes: 6 additions & 7 deletions python/orca/src/bigdl/orca/ray/raycontext.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,21 @@

class JVMGuard:
"""
The registered pids would be put into the killing list of Spark Executor.
The process group id would be registered and killed in the shutdown hook of Spark Executor.
"""
@staticmethod
def register_pids(pids):
def register_pgid(pgid):
import traceback
try:
from zoo.common.utils import callZooFunc
import zoo
callZooFunc("float",
"jvmGuardRegisterPids",
pids)
"jvmGuardRegisterPgid",
pgid)
except Exception as err:
print(traceback.format_exc())
print("Cannot successfully register pid into JVMGuard")
for pid in pids:
os.kill(pid, signal.SIGKILL)
os.killpg(pgid, signal.SIGKILL)
raise err


Expand Down Expand Up @@ -205,7 +204,7 @@ def _start_ray_node(self, command, tag):
modified_env = self._prepare_env()
print("Starting {} by running: {}".format(tag, command))
process_info = session_execute(command=command, env=modified_env, tag=tag)
JVMGuard.register_pids(process_info.pids)
JVMGuard.register_pgid(process_info.pgid)
import ray._private.services as rservices
process_info.node_ip = rservices.get_node_ip_address()
return process_info
Expand Down

0 comments on commit cbe55eb

Please sign in to comment.