diff --git a/CHANGELOG.md b/CHANGELOG.md index d88a299704525b..09d21d85f57cdc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed -- +- Raise exception in `init_dist_connection()` when torch distibuted is not available ([#10418](https://github.com/PyTorchLightning/pytorch-lightning/issues/10418)) - diff --git a/pytorch_lightning/utilities/distributed.py b/pytorch_lightning/utilities/distributed.py index ef0f0b5ef3e97d..b99f5213d02d8e 100644 --- a/pytorch_lightning/utilities/distributed.py +++ b/pytorch_lightning/utilities/distributed.py @@ -381,7 +381,9 @@ def init_dist_connection( world_size = world_size if world_size is not None else cluster_environment.world_size() os.environ["MASTER_ADDR"] = cluster_environment.main_address os.environ["MASTER_PORT"] = str(cluster_environment.main_port) - if torch.distributed.is_available() and not torch.distributed.is_initialized(): + if not torch.distributed.is_available(): + raise RuntimeError("torch.distributed is not available. Cannot initialize distributed process group") + if not torch.distributed.is_initialized(): log.info(f"initializing distributed: GLOBAL_RANK: {global_rank}, MEMBER: {global_rank + 1}/{world_size}") torch.distributed.init_process_group( torch_distributed_backend, rank=global_rank, world_size=world_size, **kwargs