diff --git a/vllm/engine/ray_utils.py b/vllm/engine/ray_utils.py index 742f3dc575190..27414f085b45a 100644 --- a/vllm/engine/ray_utils.py +++ b/vllm/engine/ray_utils.py @@ -33,8 +33,17 @@ def __getattr__(self, name): return getattr(self.worker, name) def execute_method(self, method, *args, **kwargs): - executor = getattr(self, method) - return executor(*args, **kwargs) + try: + executor = getattr(self, method) + return executor(*args, **kwargs) + except Exception as e: + # exceptions in ray worker may cause deadlock + # see https://github.com/vllm-project/vllm/issues/3455 + # print the error and inform the user to solve the error + msg = (f"Error executing method {method}. " + "This might cause deadlock in distributed execution.") + logger.exception(msg) + raise e def get_node_ip(self) -> str: return get_ip()