From d3462158d58b60edddcc6c28c5281b0499ccf9a0 Mon Sep 17 00:00:00 2001 From: Myungjin Lee Date: Fri, 24 May 2024 16:12:26 -0700 Subject: [PATCH] temp fix: disable destory_process_group (#5) When a broken world (process group) is detected, attempting to destroy the process group causes the program to hang. We temporarily disable this call to prevent this deadlock situation. We will revisit this later. --- multiworld/world_manager.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/multiworld/world_manager.py b/multiworld/world_manager.py index 4b36b42..acdb44e 100644 --- a/multiworld/world_manager.py +++ b/multiworld/world_manager.py @@ -173,7 +173,10 @@ def remove_world(self, world_name): logger.debug(f"destory process group for {world_name}") self.set_world(world_name) del self._worlds[world_name] - dist.destroy_process_group() + # FIXME: calling destroy_process_group() here causes program hang. + # we need to find out a right timing/way to call this function. + # calling this function is temporarily disabled. + # dist.destroy_process_group() logger.debug(f"done removing world {world_name}") def set_world(self, world_name):