diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c34b2b49dc0fc..2eaf080f6374d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,240 +4,6 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - -## [1.7.0] - 2022-MM-DD - -### Added - -- Added a flag named `log_rank_zero_only` to `EarlyStopping` to disable logging to non-zero rank processes ([#13233](https://github.com/PyTorchLightning/pytorch-lightning/pull/13233)) - - -- Added support for reloading the last checkpoint saved by passing `ckpt_path="last"` ([#12816](https://github.com/PyTorchLightning/pytorch-lightning/pull/12816)) - - -- Added `LightningDataModule.load_from_checkpoint` to support loading datamodules directly from checkpoint ([#12550](https://github.com/PyTorchLightning/pytorch-lightning/pull/12550)) - - -- Added a friendly error message when attempting to call `Trainer.save_checkpoint()` without a model attached ([#12772](https://github.com/PyTorchLightning/pytorch-lightning/pull/12772)) - - -- Added a friendly error message when attempting to use `DeepSpeedStrategy` on unsupported accelerators ([#12699](https://github.com/PyTorchLightning/pytorch-lightning/pull/12699)) - - -- Enabled `torch.inference_mode` for evaluation and prediction ([#12715](https://github.com/PyTorchLightning/pytorch-lightning/pull/12715)) - - -- Added support for setting `val_check_interval` to a value higher than the amount of training batches when `check_val_every_n_epoch=None` ([#11993](https://github.com/PyTorchLightning/pytorch-lightning/pull/11993)) - - -- Include the `pytorch_lightning` version as a header in the CLI config files ([#12532](https://github.com/PyTorchLightning/pytorch-lightning/pull/12532)) - - -- Added support for `Callback` registration through entry points ([#12739](https://github.com/PyTorchLightning/pytorch-lightning/pull/12739)) - - -- Added support for `Trainer(deterministic="warn")` to warn instead of fail when a non-deterministic operation is encountered ([#12588](https://github.com/PyTorchLightning/pytorch-lightning/pull/12588)) - - -- Added profiling to the loops' dataloader `__next__` calls ([#12124](https://github.com/PyTorchLightning/pytorch-lightning/pull/12124)) - - -- Added `CollaborativeStrategy` ([#12842](https://github.com/PyTorchLightning/pytorch-lightning/pull/12842)) - - -- Include a version suffix for new "last" checkpoints of later runs in the same directory ([#12902](https://github.com/PyTorchLightning/pytorch-lightning/pull/12902)) - - -- Added missing `predict_dataset` argument in `LightningDataModule.from_datasets` to create predict dataloaders ([#12942](https://github.com/PyTorchLightning/pytorch-lightning/pull/12942)) - - -- Added class name prefix to metrics logged by `DeviceStatsMonitor` ([#12228](https://github.com/PyTorchLightning/pytorch-lightning/pull/12228)) - - -- Added profiling of `LightningDataModule` hooks ([#12971](https://github.com/PyTorchLightning/pytorch-lightning/pull/12971)) - - -- Added Native FSDP Strategy ([#12447](https://github.com/PyTorchLightning/pytorch-lightning/pull/12447)) - - -- Added breaking of lazy graph across training, validation, test and predict steps when training with habana accelerators to ensure better performance ([#12938](https://github.com/PyTorchLightning/pytorch-lightning/pull/12938)) - - -- Added CPU metric tracking to `DeviceStatsMonitor` ([#11795](https://github.com/PyTorchLightning/pytorch-lightning/pull/11795)) - - -- Added `teardown()` method to `Accelerator` ([#11935](https://github.com/PyTorchLightning/pytorch-lightning/pull/11935)) -- - - -- Added a `timeout` argument to `DDPStrategy`. ([#13244](https://github.com/PyTorchLightning/pytorch-lightning/pull/13244)) -- - - -### Changed - -- Enable validation during overfitting ([#12527](https://github.com/PyTorchLightning/pytorch-lightning/pull/12527)) - - -- Added dataclass support to `extract_batch_size` ([#12573](https://github.com/PyTorchLightning/pytorch-lightning/pull/12573)) - - -- Changed checkpoints save path in the case of one logger and user-provided weights_save_path from `weights_save_path/name/version/checkpoints` to `weights_save_path/checkpoints` ([#12372](https://github.com/PyTorchLightning/pytorch-lightning/pull/12372)) - - -- Changed checkpoints save path in the case of multiple loggers and user-provided weights_save_path from `weights_save_path/name1_name2/version1_version2/checkpoints` to `weights_save_path/checkpoints` ([#12372](https://github.com/PyTorchLightning/pytorch-lightning/pull/12372)) - - -- Marked `swa_lrs` argument in `StochasticWeightAveraging` callback as required ([#12556](https://github.com/PyTorchLightning/pytorch-lightning/pull/12556)) - - -- `LightningCLI`'s shorthand notation changed to use jsonargparse native feature ([#12614](https://github.com/PyTorchLightning/pytorch-lightning/pull/12614)) - - -- `LightningCLI` changed to use jsonargparse native support for list append ([#13129](https://github.com/PyTorchLightning/pytorch-lightning/pull/13129)) - - -- Changed `seed_everything_default` argument in the `LightningCLI` to type `Union[bool, int]`. If set to `True` a seed is automatically generated for the parser argument `--seed_everything`. ([#12822](https://github.com/PyTorchLightning/pytorch-lightning/pull/12822), [#13110](https://github.com/PyTorchLightning/pytorch-lightning/pull/13110)) - - -- Make positional arguments required for classes passed into the `add_argparse_args` function. ([#12504](https://github.com/PyTorchLightning/pytorch-lightning/pull/12504)) - - -- Raise an error if there are insufficient training batches when using a float value of `limit_train_batches` ([#12885](https://github.com/PyTorchLightning/pytorch-lightning/pull/12885)) - - -- The `WandbLogger` will now use the run name in the logs folder if it is provided, and otherwise the project name ([#12604](https://github.com/PyTorchLightning/pytorch-lightning/pull/12604)) - - - -### Deprecated - -- Deprecated `pytorch_lightning.loggers.base.LightningLoggerBase` in favor of `pytorch_lightning.loggers.logger.Logger`, and deprecated `pytorch_lightning.loggers.base` in favor of `pytorch_lightning.loggers.logger` ([#120148](https://github.com/PyTorchLightning/pytorch-lightning/pull/12014)) - - -- Deprecated `pytorch_lightning.callbacks.base.Callback` in favor of `pytorch_lightning.callbacks.callback.Callback` ([#13031](https://github.com/PyTorchLightning/pytorch-lightning/pull/13031)) - - -- Deprecated `num_processes`, `gpus`, `tpu_cores,` and `ipus` from the `Trainer` constructor in favor of using the `accelerator` and `devices` arguments ([#11040](https://github.com/PyTorchLightning/pytorch-lightning/pull/11040)) - - -- Deprecated setting `LightningCLI(seed_everything_default=None)` in favor of `False` ([#12804](https://github.com/PyTorchLightning/pytorch-lightning/issues/12804)). - - -- Deprecated `pytorch_lightning.core.lightning.LightningModule` in favor of `pytorch_lightning.core.module.LightningModule` ([#12740](https://github.com/PyTorchLightning/pytorch-lightning/pull/12740)) - - -- Deprecated `pytorch_lightning.loops.base.Loop` in favor of `pytorch_lightning.loops.loop.Loop` ([#13043](https://github.com/PyTorchLightning/pytorch-lightning/pull/13043)) - - -- Deprecated `Trainer.reset_train_val_dataloaders()` in favor of `Trainer.reset_{train,val}_dataloader` ([#12184](https://github.com/PyTorchLightning/pytorch-lightning/pull/12184)) - - -- Deprecated LightningCLI's registries in favor of importing the respective package ([#13221](https://github.com/PyTorchLightning/pytorch-lightning/pull/13221)) - - -### Removed - -- Removed the deprecated `Logger.close` method ([#13149](https://github.com/PyTorchLightning/pytorch-lightning/pull/13149)) - - -- Removed the deprecated `weights_summary` argument from the `Trainer` constructor ([#13070](https://github.com/PyTorchLightning/pytorch-lightning/pull/13070)) - - -- Removed the deprecated `flush_logs_every_n_steps` argument from the `Trainer` constructor ([#13074](https://github.com/PyTorchLightning/pytorch-lightning/pull/13074)) - - -- Removed the deprecated `process_position` argument from the `Trainer` constructor ([13071](https://github.com/PyTorchLightning/pytorch-lightning/pull/13071)) - - -- Removed the deprecated `checkpoint_callback` argument from the `Trainer` constructor ([#13027](https://github.com/PyTorchLightning/pytorch-lightning/pull/13027)) - - -- Removed the deprecated `on_{train,val,test,predict}_dataloader` hooks from the `LightningModule` and `LightningDataModule` ([#13033](https://github.com/PyTorchLightning/pytorch-lightning/pull/13033)) - - -- Removed the deprecated `TestTubeLogger` ([#12859](https://github.com/PyTorchLightning/pytorch-lightning/pull/12859)) - - -- Removed the deprecated `pytorch_lightning.core.memory.LayerSummary` and `pytorch_lightning.core.memory.ModelSummary` ([#12593](https://github.com/PyTorchLightning/pytorch-lightning/pull/12593)) - - -- Removed the deprecated `summarize` method from the `LightningModule` ([#12559](https://github.com/PyTorchLightning/pytorch-lightning/pull/12559)) - - -- Removed the deprecated `model_size` property from the `LightningModule` class ([#12641](https://github.com/PyTorchLightning/pytorch-lightning/pull/12641)) - - -- Removed the deprecated `stochastic_weight_avg` argument from the `Trainer` constructor ([#12535](https://github.com/PyTorchLightning/pytorch-lightning/pull/12535)) - - -- Removed the deprecated `progress_bar_refresh_rate` argument from the `Trainer` constructor ([#12514](https://github.com/PyTorchLightning/pytorch-lightning/pull/12514)) - - -- Removed the deprecated `prepare_data_per_node` argument from the `Trainer` constructor ([#12536](https://github.com/PyTorchLightning/pytorch-lightning/pull/12536)) - - -- Removed the deprecated `pytorch_lightning.core.memory.{get_gpu_memory_map,get_memory_profile}` ([#12659](https://github.com/PyTorchLightning/pytorch-lightning/pull/12659)) - - -- Removed the deprecated `terminate_on_nan` argument from the `Trainer` constructor ([#12553](https://github.com/PyTorchLightning/pytorch-lightning/pull/12553)) - - -- Removed the deprecated `XLAStatsMonitor` callback ([#12688](https://github.com/PyTorchLightning/pytorch-lightning/pull/12688)) - - -- Remove deprecated `pytorch_lightning.callbacks.progress.progress` ([#12658](https://github.com/PyTorchLightning/pytorch-lightning/pull/12658)) - - -- Removed the deprecated `dim` and `size` arguments from the `LightningDataModule` constructor([#12780](https://github.com/PyTorchLightning/pytorch-lightning/pull/12780)) - - -- Removed the deprecated `train_transforms` argument from the `LightningDataModule` constructor([#12662](https://github.com/PyTorchLightning/pytorch-lightning/pull/12662)) - - -- Removed the deprecated `log_gpu_memory` argument from the `Trainer` constructor ([#12657](https://github.com/PyTorchLightning/pytorch-lightning/pull/12657)) - - -- Removed the deprecated automatic logging of GPU stats by the logger connector ([#12657](https://github.com/PyTorchLightning/pytorch-lightning/pull/12657)) - - -- Removed deprecated `GPUStatsMonitor` callback ([#12554](https://github.com/PyTorchLightning/pytorch-lightning/pull/12554)) - - -- Removed support for passing strategy names or strategy instances to the accelerator Trainer argument ([#12696](https://github.com/PyTorchLightning/pytorch-lightning/pull/12696)) - - -- Removed support for passing strategy names or strategy instances to the plugins Trainer argument ([#12700](https://github.com/PyTorchLightning/pytorch-lightning/pull/12700)) - - -- Removed the deprecated `val_transforms` argument from the `LightningDataModule` constructor ([#12763](https://github.com/PyTorchLightning/pytorch-lightning/pull/12763)) - - -- Removed the deprecated `test_transforms` argument from the `LightningDataModule` constructor ([#12773](https://github.com/PyTorchLightning/pytorch-lightning/pull/12773)) - - -- Removed deprecated `dataloader_idx` argument from `on_train_batch_start/end` hooks `Callback` and `LightningModule` ([#12769](https://github.com/PyTorchLightning/pytorch-lightning/pull/12769), [#12977](https://github.com/PyTorchLightning/pytorch-lightning/pull/12977)) - - -- Removed deprecated `get_progress_bar_dict` property from `LightningModule` ([#12839](https://github.com/PyTorchLightning/pytorch-lightning/pull/12839)) - -- Removed sanity check for multi-optimizer support with habana backends ([#13217](https://github.com/PyTorchLightning/pytorch-lightning/pull/13217)) - - -- Removed the need to explicitly load habana module ([#13338](https://github.com/PyTorchLightning/pytorch-lightning/pull/13338)) - - -### Fixed - -- Fixed an issue with unsupported torch.inference_mode() on hpu backends by making it use no_grad ([#13014](https://github.com/PyTorchLightning/pytorch-lightning/pull/13014)) - - -- The model wrapper returned by `LightningLite.setup()` now properly supports pass-through when looking up attributes ([#12597](https://github.com/PyTorchLightning/pytorch-lightning/pull/12597)) - - -- Fixed issue where the CLI fails with certain torch objects ([#13153](https://github.com/PyTorchLightning/pytorch-lightning/pull/13153)) - - ## [1.6.5] - 2022-07-05 ### Fixed @@ -247,6 +13,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed Model Summary when using DeepSpeed Stage 3 ([#13427](https://github.com/PyTorchLightning/pytorch-lightning/pull/13427)) - Fixed `pytorch_lightning.utilities.distributed.gather_all_tensors` to handle tensors of different dimensions ([#12630](https://github.com/PyTorchLightning/pytorch-lightning/pull/12630)) - The loops now call `.set_epoch()` also on batch samplers if the dataloader has one wrapped in a distributed sampler ([#13396](https://github.com/PyTorchLightning/pytorch-lightning/pull/13396)) +- Fixed the input validation for the accelerator Trainer argument when passed as a string ([#13417](https://github.com/PyTorchLightning/pytorch-lightning/pull/13417)) ## [1.6.4] - 2022-06-01 diff --git a/_notebooks b/_notebooks index 8a36a41548f34c..290fb466de1fcc 160000 --- a/_notebooks +++ b/_notebooks @@ -1 +1 @@ -Subproject commit 8a36a41548f34c44ac455d515a72994487e85813 +Subproject commit 290fb466de1fcc2ac6025f74b56906592911e856 diff --git a/pytorch_lightning/__about__.py b/pytorch_lightning/__about__.py index fdaa499392c7eb..ec1d1701bebd29 100644 --- a/pytorch_lightning/__about__.py +++ b/pytorch_lightning/__about__.py @@ -1,7 +1,7 @@ import time _this_year = time.strftime("%Y") -__version__ = "1.6.4" +__version__ = "1.6.5" __author__ = "William Falcon et al." __author_email__ = "waf2107@columbia.edu" __license__ = "Apache-2.0" diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index d242236e8317e3..3f9f0163fbd575 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -290,17 +290,18 @@ def _check_config_and_set_final_flags( f" and you can only specify one strategy, but you have passed {plugin} as a plugin." ) - if accelerator is not None: - if accelerator in self._accelerator_types or accelerator == "auto" or isinstance(accelerator, Accelerator): - self._accelerator_flag = accelerator - elif accelerator in self._registered_strategies or isinstance(accelerator, Strategy): - rank_zero_deprecation( - f"Passing `Trainer(accelerator={accelerator!r})` has been deprecated" - f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator!r})` instead." - ) - self._strategy_flag = accelerator - elif accelerator == "ddp_cpu" and not self._strategy_flag: - self._strategy_flag = accelerator + if ( + accelerator is not None + and accelerator not in self._accelerator_types + and accelerator != "auto" + and not isinstance(accelerator, Accelerator) + ): + raise ValueError( + f"You selected an invalid accelerator name: `accelerator={accelerator!r}`." + f" Available names are: {', '.join(self._accelerator_types)}." + ) + + self._accelerator_flag = accelerator if precision is not None: if str(precision) not in self._precision_types: @@ -504,12 +505,6 @@ def _set_parallel_devices_and_init_accelerator(self) -> None: self.accelerator: Accelerator = self._accelerator_flag else: assert self._accelerator_flag is not None - self._accelerator_flag = self._accelerator_flag.lower() - if self._accelerator_flag not in AcceleratorRegistry: - raise MisconfigurationException( - "When passing string value for the `accelerator` argument of `Trainer`," - f" it can only be one of {self._accelerator_types}." - ) self.accelerator = AcceleratorRegistry.get(self._accelerator_flag) if not self.accelerator.is_available(): diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 48b6c67c6d06b5..9ab176fd36e5b3 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -56,229 +56,9 @@ def test_accelerator_choice_cpu(tmpdir): assert isinstance(trainer.strategy, SingleDeviceStrategy) -@pytest.mark.parametrize(("devices", "num_nodes"), ([(1, 1), (1, 2), (2, 1), (2, 2)])) -def test_accelerator_choice_ddp_cpu(tmpdir, devices: int, num_nodes: int): - trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", devices=devices, num_nodes=num_nodes) - assert isinstance(trainer.accelerator, CPUAccelerator) - no_spawn = devices == 1 and num_nodes > 1 - assert isinstance(trainer.strategy, DDPStrategy if no_spawn else DDPSpawnStrategy) - assert isinstance(trainer.strategy.cluster_environment, LightningEnvironment) - - -@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"}) -@mock.patch("torch.cuda.device_count", return_value=2) -@mock.patch("torch.cuda.is_available", return_value=True) -def test_accelerator_choice_ddp(cuda_available_mock, device_count_mock): - with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated"): - trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1) - assert isinstance(trainer.accelerator, GPUAccelerator) - assert isinstance(trainer.strategy, DDPStrategy) - assert isinstance(trainer.strategy.cluster_environment, LightningEnvironment) - - -@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"}) -@mock.patch("torch.cuda.device_count", return_value=2) -@mock.patch("torch.cuda.is_available", return_value=True) -def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock): - with pytest.deprecated_call(match=r"accelerator='ddp_spawn'\)` has been deprecated"): - trainer = Trainer(fast_dev_run=True, accelerator="ddp_spawn", gpus=1) - assert isinstance(trainer.accelerator, GPUAccelerator) - assert isinstance(trainer.strategy, DDPSpawnStrategy) - assert isinstance(trainer.strategy.cluster_environment, LightningEnvironment) - - -@mock.patch.dict( - os.environ, - { - "CUDA_VISIBLE_DEVICES": "0,1", - "SLURM_NTASKS": "2", - "SLURM_JOB_NAME": "SOME_NAME", - "SLURM_NODEID": "0", - "SLURM_PROCID": "1", - "SLURM_LOCALID": "1", - }, -) -@mock.patch("torch.cuda.set_device") -@mock.patch("torch.cuda.device_count", return_value=2) -@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) -@mock.patch("torch.cuda.is_available", return_value=True) -def test_accelerator_choice_ddp_slurm(*_): - with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"): - trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2) - assert trainer._accelerator_connector._is_slurm_managing_tasks() - assert isinstance(trainer.accelerator, GPUAccelerator) - assert isinstance(trainer.strategy, DDPStrategy) - assert isinstance(trainer.strategy.cluster_environment, SLURMEnvironment) - assert trainer.strategy.cluster_environment.local_rank() == 1 - assert trainer.strategy.local_rank == 1 - - -@mock.patch.dict( - os.environ, - { - "CUDA_VISIBLE_DEVICES": "0,1", - "SLURM_NTASKS": "2", - "SLURM_JOB_NAME": "SOME_NAME", - "SLURM_NODEID": "0", - "SLURM_PROCID": "1", - "SLURM_LOCALID": "1", - }, -) -@mock.patch("torch.cuda.set_device") -@mock.patch("torch.cuda.device_count", return_value=2) -@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) -@mock.patch("torch.cuda.is_available", return_value=True) -def test_accelerator_choice_ddp2_slurm(*_): - with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"): - trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2) - assert trainer._accelerator_connector._is_slurm_managing_tasks() - assert isinstance(trainer.accelerator, GPUAccelerator) - assert isinstance(trainer.strategy, DDP2Strategy) - assert isinstance(trainer.strategy.cluster_environment, SLURMEnvironment) - assert trainer.strategy.cluster_environment.local_rank() == 1 - assert trainer.strategy.local_rank == 1 - - -@mock.patch.dict( - os.environ, - { - "CUDA_VISIBLE_DEVICES": "0,1", - "WORLD_SIZE": "2", - "LOCAL_WORLD_SIZE": "2", - "RANK": "1", - "LOCAL_RANK": "1", - "GROUP_RANK": "0", - "TORCHELASTIC_RUN_ID": "1", # present for torch >= 1.9.1 - }, -) -@mock.patch("torch.cuda.set_device") -@mock.patch("torch.cuda.device_count", return_value=1) -@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) -@mock.patch("torch.cuda.is_available", return_value=True) -def test_accelerator_choice_ddp_te(*_): - with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"): - trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2) - assert isinstance(trainer.accelerator, GPUAccelerator) - assert isinstance(trainer.strategy, DDPStrategy) - assert isinstance(trainer.strategy.cluster_environment, TorchElasticEnvironment) - assert trainer.strategy.cluster_environment.local_rank() == 1 - assert trainer.strategy.local_rank == 1 - - -@mock.patch.dict( - os.environ, - { - "CUDA_VISIBLE_DEVICES": "0,1", - "WORLD_SIZE": "2", - "LOCAL_WORLD_SIZE": "2", - "RANK": "1", - "LOCAL_RANK": "1", - "GROUP_RANK": "0", - "TORCHELASTIC_RUN_ID": "1", - }, -) -@mock.patch("torch.cuda.set_device") -@mock.patch("torch.cuda.device_count", return_value=1) -@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) -@mock.patch("torch.cuda.is_available", return_value=True) -def test_accelerator_choice_ddp2_te(*_): - with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"): - trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2) - assert isinstance(trainer.accelerator, GPUAccelerator) - assert isinstance(trainer.strategy, DDP2Strategy) - assert isinstance(trainer.strategy.cluster_environment, TorchElasticEnvironment) - assert trainer.strategy.cluster_environment.local_rank() == 1 - assert trainer.strategy.local_rank == 1 - - -@mock.patch.dict( - os.environ, - { - "WORLD_SIZE": "2", - "LOCAL_WORLD_SIZE": "2", - "RANK": "1", - "LOCAL_RANK": "1", - "GROUP_RANK": "0", - "TORCHELASTIC_RUN_ID": "1", - }, -) -@mock.patch("torch.cuda.device_count", return_value=0) -@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) -def test_accelerator_choice_ddp_cpu_te(*_): - trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", devices=2) - assert isinstance(trainer.accelerator, CPUAccelerator) - assert isinstance(trainer.strategy, DDPStrategy) - assert isinstance(trainer.strategy.cluster_environment, TorchElasticEnvironment) - assert trainer.strategy.cluster_environment.local_rank() == 1 - assert trainer.strategy.local_rank == 1 - - -@mock.patch.dict( - os.environ, - { - "CUDA_VISIBLE_DEVICES": "0", - "KUBERNETES_PORT": "tcp://127.0.0.1:443", - "MASTER_ADDR": "1.2.3.4", - "MASTER_PORT": "500", - "WORLD_SIZE": "20", - "RANK": "1", - }, -) -@mock.patch("torch.cuda.set_device") -@mock.patch("torch.cuda.device_count", return_value=1) -@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) -@mock.patch("torch.cuda.is_available", return_value=True) -def test_accelerator_choice_ddp_kubeflow(*_): - with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"): - trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1) - assert isinstance(trainer.accelerator, GPUAccelerator) - assert isinstance(trainer.strategy, DDPStrategy) - assert isinstance(trainer.strategy.cluster_environment, KubeflowEnvironment) - assert trainer.strategy.cluster_environment.local_rank() == 0 - assert trainer.strategy.local_rank == 0 - - -@mock.patch.dict( - os.environ, - { - "KUBERNETES_PORT": "tcp://127.0.0.1:443", - "MASTER_ADDR": "1.2.3.4", - "MASTER_PORT": "500", - "WORLD_SIZE": "20", - "RANK": "1", - }, -) -@mock.patch("torch.cuda.device_count", return_value=0) -@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) -def test_accelerator_choice_ddp_cpu_kubeflow(*_): - trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", devices=1) - assert isinstance(trainer.accelerator, CPUAccelerator) - assert isinstance(trainer.strategy, DDPStrategy) - assert isinstance(trainer.strategy.cluster_environment, KubeflowEnvironment) - assert trainer.strategy.cluster_environment.local_rank() == 0 - assert trainer.strategy.local_rank == 0 - - -@mock.patch.dict( - os.environ, - { - "SLURM_NTASKS": "2", - "SLURM_JOB_NAME": "SOME_NAME", - "SLURM_NODEID": "0", - "LOCAL_RANK": "0", - "SLURM_PROCID": "0", - "SLURM_LOCALID": "0", - }, -) -@mock.patch("torch.cuda.device_count", return_value=0) -@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) -def test_accelerator_choice_ddp_cpu_slurm(*_): - trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", devices=2) - assert trainer._accelerator_connector._is_slurm_managing_tasks() - assert isinstance(trainer.accelerator, CPUAccelerator) - assert isinstance(trainer.strategy, DDPStrategy) - assert isinstance(trainer.strategy.cluster_environment, SLURMEnvironment) - assert trainer.strategy.local_rank == 0 +def test_accelerator_invalid_choice(): + with pytest.raises(ValueError, match="You selected an invalid accelerator name: `accelerator='invalid'`"): + Trainer(accelerator="invalid") @RunIf(skip_windows=True, standalone=True) @@ -551,13 +331,6 @@ def test_accelerator_auto_with_devices_gpu(): assert trainer.num_devices == 1 -def test_validate_accelerator_and_devices(): - - trainer = Trainer(accelerator="ddp_cpu", devices=2) - assert isinstance(trainer.accelerator, CPUAccelerator) - assert trainer.num_devices == 2 - - def test_set_devices_if_none_cpu(): trainer = Trainer(accelerator="cpu", devices=3)