From 477baaaf47d62a01b96fd712616f611f886d8513 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Tue, 8 Nov 2022 15:50:13 +0000
Subject: [PATCH 01/22] Fix issues with PL 1.8

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 nemo/collections/nlp/parts/nlp_overrides.py |  4 ++--
 nemo/collections/tts/models/fastpitch.py    | 11 +++++------
 nemo/collections/tts/models/radtts.py       | 11 +++++------
 nemo/collections/tts/models/tacotron2.py    | 11 +++++------
 nemo/collections/tts/models/waveglow.py     | 11 +++++------
 nemo/utils/exp_manager.py                   | 22 ---------------------
 requirements/requirements_lightning.txt     |  2 +-
 tests/core/test_optimizers_schedulers.py    |  3 ++-
 tests/core_ptl/check_for_ranks.py           |  2 +-
 tests/core_ptl/test_ptl_stateless_timer.py  |  1 -
 10 files changed, 26 insertions(+), 52 deletions(-)

diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py
index 2e52be81ce34..c31be8a9109e 100644
--- a/nemo/collections/nlp/parts/nlp_overrides.py
+++ b/nemo/collections/nlp/parts/nlp_overrides.py
@@ -22,16 +22,16 @@
 
 import pytorch_lightning as pl
 import torch
+from lightning_lite.plugins import ClusterEnvironment
+from lightning_lite.utilities.types import _PATH
 from omegaconf import OmegaConf
 from pytorch_lightning.overrides import LightningDistributedModule
-from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
 from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin
 from pytorch_lightning.strategies.ddp import DDPStrategy
 from pytorch_lightning.trainer.trainer import Trainer
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.fetching import DataFetcher
-from pytorch_lightning.utilities.types import _PATH
 from torch.distributed.algorithms.ddp_comm_hooks.debugging_hooks import noop_hook
 from torch.nn.parallel import DistributedDataParallel
 
diff --git a/nemo/collections/tts/models/fastpitch.py b/nemo/collections/tts/models/fastpitch.py
index 880004a47d1d..a173dda87a4e 100644
--- a/nemo/collections/tts/models/fastpitch.py
+++ b/nemo/collections/tts/models/fastpitch.py
@@ -19,7 +19,7 @@
 from hydra.utils import instantiate
 from omegaconf import DictConfig, OmegaConf, open_dict
 from pytorch_lightning import Trainer
-from pytorch_lightning.loggers import LoggerCollection, TensorBoardLogger
+from pytorch_lightning.loggers import TensorBoardLogger
 
 from nemo.collections.common.parts.preprocessing import parsers
 from nemo.collections.tts.helpers.helpers import plot_alignment_to_numpy, plot_spectrogram_to_numpy, process_batch
@@ -228,11 +228,10 @@ def tb_logger(self):
             if self.logger is None and self.logger.experiment is None:
                 return None
             tb_logger = self.logger.experiment
-            if isinstance(self.logger, LoggerCollection):
-                for logger in self.logger:
-                    if isinstance(logger, TensorBoardLogger):
-                        tb_logger = logger.experiment
-                        break
+            for logger in self.trainer.loggers:
+                if isinstance(logger, TensorBoardLogger):
+                    tb_logger = logger.experiment
+                    break
             self._tb_logger = tb_logger
         return self._tb_logger
 
diff --git a/nemo/collections/tts/models/radtts.py b/nemo/collections/tts/models/radtts.py
index 47251b4a3f61..c94467491e19 100644
--- a/nemo/collections/tts/models/radtts.py
+++ b/nemo/collections/tts/models/radtts.py
@@ -21,7 +21,7 @@
 from hydra.utils import instantiate
 from omegaconf import DictConfig, OmegaConf
 from pytorch_lightning import Trainer
-from pytorch_lightning.loggers import LoggerCollection, TensorBoardLogger
+from pytorch_lightning.loggers import TensorBoardLogger
 
 from nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers import BaseTokenizer
 from nemo.collections.tts.helpers.helpers import plot_alignment_to_numpy
@@ -389,11 +389,10 @@ def tb_logger(self):
             if self.logger is None and self.logger.experiment is None:
                 return None
             tb_logger = self.logger.experiment
-            if isinstance(self.logger, LoggerCollection):
-                for logger in self.logger:
-                    if isinstance(logger, TensorBoardLogger):
-                        tb_logger = logger.experiment
-                        break
+            for logger in self.trainer.loggers:
+                if isinstance(logger, TensorBoardLogger):
+                    tb_logger = logger.experiment
+                    break
             self._tb_logger = tb_logger
         return self._tb_logger
 
diff --git a/nemo/collections/tts/models/tacotron2.py b/nemo/collections/tts/models/tacotron2.py
index d5e7747d4cbc..bbcc7d48af79 100644
--- a/nemo/collections/tts/models/tacotron2.py
+++ b/nemo/collections/tts/models/tacotron2.py
@@ -20,7 +20,7 @@
 from hydra.utils import instantiate
 from omegaconf import MISSING, DictConfig, OmegaConf, open_dict
 from omegaconf.errors import ConfigAttributeError
-from pytorch_lightning.loggers import LoggerCollection, TensorBoardLogger, WandbLogger
+from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
 from torch import nn
 
 from nemo.collections.common.parts.preprocessing import parsers
@@ -284,11 +284,10 @@ def validation_step(self, batch, batch_idx):
     def validation_epoch_end(self, outputs):
         if self.logger is not None and self.logger.experiment is not None:
             logger = self.logger.experiment
-            if isinstance(self.logger, LoggerCollection):
-                for logger in self.logger:
-                    if isinstance(logger, TensorBoardLogger):
-                        logger = logger.experiment
-                        break
+            for logger in self.trainer.loggers:
+                if isinstance(logger, TensorBoardLogger):
+                    logger = logger.experiment
+                    break
             if isinstance(logger, TensorBoardLogger):
                 tacotron2_log_to_tb_func(
                     logger, outputs[0].values(), self.global_step, tag="val", log_images=True, add_audio=False,
diff --git a/nemo/collections/tts/models/waveglow.py b/nemo/collections/tts/models/waveglow.py
index a1a522a44c0c..d54b77b61721 100644
--- a/nemo/collections/tts/models/waveglow.py
+++ b/nemo/collections/tts/models/waveglow.py
@@ -16,7 +16,7 @@
 import torch
 from hydra.utils import instantiate
 from omegaconf import DictConfig, open_dict
-from pytorch_lightning.loggers import LoggerCollection, TensorBoardLogger
+from pytorch_lightning.loggers import TensorBoardLogger
 
 from nemo.collections.tts.helpers.helpers import OperationMode, waveglow_log_to_tb_func
 from nemo.collections.tts.losses.waveglowloss import WaveGlowLoss
@@ -124,11 +124,10 @@ def validation_step(self, batch, batch_idx):
     def validation_epoch_end(self, outputs):
         if self.logger is not None and self.logger.experiment is not None:
             tb_logger = self.logger.experiment
-            if isinstance(self.logger, LoggerCollection):
-                for logger in self.logger:
-                    if isinstance(logger, TensorBoardLogger):
-                        tb_logger = logger.experiment
-                        break
+            for logger in self.trainer.loggers:
+                if isinstance(logger, TensorBoardLogger):
+                    tb_logger = logger.experiment
+                    break
             waveglow_log_to_tb_func(
                 tb_logger,
                 outputs[0].values(),
diff --git a/nemo/utils/exp_manager.py b/nemo/utils/exp_manager.py
index 4e15943b5e2e..18a3b1f6ae94 100644
--- a/nemo/utils/exp_manager.py
+++ b/nemo/utils/exp_manager.py
@@ -32,7 +32,6 @@
 from omegaconf import DictConfig, OmegaConf, open_dict
 from pytorch_lightning.callbacks import Callback, ModelCheckpoint
 from pytorch_lightning.callbacks.timer import Interval, Timer
-from pytorch_lightning.loggers import LoggerCollection as _LoggerCollection
 from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
 from pytorch_lightning.loops import TrainingEpochLoop
 from pytorch_lightning.strategies.ddp import DDPStrategy
@@ -658,24 +657,6 @@ def get_git_diff():
         return "{}\n".format(err.output.decode("utf-8"))
 
 
-class LoggerList(_LoggerCollection):
-    """ A thin wrapper on Lightning's LoggerCollection such that name and version are better aligned with exp_manager
-    """
-
-    def __init__(self, _logger_iterable, nemo_name=None, nemo_version=""):
-        super().__init__(_logger_iterable)
-        self._nemo_name = nemo_name
-        self._nemo_version = nemo_version
-
-    @property
-    def name(self) -> str:
-        return self._nemo_name
-
-    @property
-    def version(self) -> str:
-        return self._nemo_version
-
-
 def configure_loggers(
     trainer: 'pytorch_lightning.Trainer',
     exp_dir: [Path, str],
@@ -718,9 +699,6 @@ def configure_loggers(
         logger_list.append(wandb_logger)
         logging.info("WandBLogger has been set up")
 
-    logger_list = (
-        LoggerList(logger_list, nemo_name=name, nemo_version=version) if len(logger_list) > 1 else logger_list[0]
-    )
     trainer._logger_connector.configure_logger(logger_list)
 
 
diff --git a/requirements/requirements_lightning.txt b/requirements/requirements_lightning.txt
index 8610e3f992f0..b4da4c37a4dc 100644
--- a/requirements/requirements_lightning.txt
+++ b/requirements/requirements_lightning.txt
@@ -1,6 +1,6 @@
 hydra-core>=1.2.0,<1.3
 omegaconf>=2.2,<2.3
-pytorch-lightning>=1.7.0,<1.8
+pytorch-lightning>=1.8.0
 pyyaml<6  # Pinned until omegaconf works with pyyaml>=6
 torchmetrics>=0.4.1rc0
 transformers>=4.0.1,<=4.21.2
diff --git a/tests/core/test_optimizers_schedulers.py b/tests/core/test_optimizers_schedulers.py
index 1e3bf2896c99..6c822cf6add0 100644
--- a/tests/core/test_optimizers_schedulers.py
+++ b/tests/core/test_optimizers_schedulers.py
@@ -20,6 +20,7 @@
 import pytorch_lightning as pl
 import torch
 import torch.optim
+from pytorch_lightning.utilities import rank_zero_only
 
 from nemo.core import config, optim
 from nemo.core.optim.lr_scheduler import AVAILABLE_SCHEDULERS
@@ -85,7 +86,7 @@ def configure_optimizers(self):
 
 
 class Callback(pl.callbacks.Callback):
-    @pl.utilities.distributed.rank_zero_only
+    @rank_zero_only
     def on_train_end(self, trainer, module):
         count = module.my_opt.param_groups[0]['count']
         if trainer.global_step != count or trainer.global_step != module.max_steps:
diff --git a/tests/core_ptl/check_for_ranks.py b/tests/core_ptl/check_for_ranks.py
index 4ae967593bc1..d8f785957131 100644
--- a/tests/core_ptl/check_for_ranks.py
+++ b/tests/core_ptl/check_for_ranks.py
@@ -18,7 +18,7 @@
 import torch
 from omegaconf import OmegaConf
 from pytorch_lightning import Trainer
-from pytorch_lightning.utilities.distributed import rank_zero_only
+from pytorch_lightning.utilities import rank_zero_only
 
 from nemo.core import ModelPT
 from nemo.utils import logging
diff --git a/tests/core_ptl/test_ptl_stateless_timer.py b/tests/core_ptl/test_ptl_stateless_timer.py
index f63f56bdf446..c20cac4fecf0 100644
--- a/tests/core_ptl/test_ptl_stateless_timer.py
+++ b/tests/core_ptl/test_ptl_stateless_timer.py
@@ -19,7 +19,6 @@
 import torch
 from omegaconf import OmegaConf
 from pytorch_lightning import Trainer
-from pytorch_lightning.utilities.distributed import rank_zero_only
 
 from nemo.core import ModelPT
 from nemo.utils import logging

From c6eb78c04d0815a87605dd6914ee86ef91acd990 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Tue, 8 Nov 2022 16:19:09 +0000
Subject: [PATCH 02/22] Set scripting variable

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 nemo/core/classes/exportable.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nemo/core/classes/exportable.py b/nemo/core/classes/exportable.py
index 5a9ab55a4ee7..f31a92f2a411 100644
--- a/nemo/core/classes/exportable.py
+++ b/nemo/core/classes/exportable.py
@@ -16,6 +16,7 @@
 from typing import List, Union
 
 import torch
+from pytorch_lightning.core.module import _jit_is_scripting
 from torch.onnx import TrainingMode
 
 from nemo.core.classes import typecheck
@@ -128,7 +129,7 @@ def _export(
             # Set module mode
             with torch.onnx.select_model_mode_for_export(
                 self, training
-            ), torch.inference_mode(), torch.jit.optimized_execution(True):
+            ), torch.inference_mode(), torch.jit.optimized_execution(True), _jit_is_scripting():
 
                 if input_example is None:
                     input_example = self.input_module.input_example()

From 4cc63013ea5a2606898d7057adb1140d503eb047 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Tue, 8 Nov 2022 16:50:02 +0000
Subject: [PATCH 03/22] Fix missing arg

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 nemo/core/config/pytorch_lightning.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nemo/core/config/pytorch_lightning.py b/nemo/core/config/pytorch_lightning.py
index 46a294bd1cca..70f3d6cad0a9 100644
--- a/nemo/core/config/pytorch_lightning.py
+++ b/nemo/core/config/pytorch_lightning.py
@@ -84,6 +84,7 @@ class TrainerConfig:
     strategy: Any = None
     enable_checkpointing: bool = False
     enable_model_summary: bool = True
+    inference_mode: bool = True
 
 
 # Register the trainer config.

From 25e0f77c4a379ce904d18553ba7b4fb1b3d6f6a1 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Tue, 8 Nov 2022 16:57:09 +0000
Subject: [PATCH 04/22] Cleanup list

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 tests/core/test_config_utils.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/core/test_config_utils.py b/tests/core/test_config_utils.py
index a1922a3c6f86..4eccd90afada 100644
--- a/tests/core/test_config_utils.py
+++ b/tests/core/test_config_utils.py
@@ -116,11 +116,8 @@ class DummyDataClass:
     @pytest.mark.unit
     def test_ptl_config(self):
         PTL_DEPRECATED = [
-            'distributed_backend',
-            'automatic_optimization',
             'gpus',
             'num_processes',
-            'weights_save_path',
         ]
 
         result = config_utils.assert_dataclass_signature_match(ptl.Trainer, TrainerConfig, ignore_args=PTL_DEPRECATED)

From 60db72b02f7a0b592ab0a7c53b8d8a799c7ab0da Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Wed, 9 Nov 2022 10:44:02 +0000
Subject: [PATCH 05/22] Fix reference

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 tests/core/test_optimizers_schedulers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/core/test_optimizers_schedulers.py b/tests/core/test_optimizers_schedulers.py
index 6c822cf6add0..968bbea11d7a 100644
--- a/tests/core/test_optimizers_schedulers.py
+++ b/tests/core/test_optimizers_schedulers.py
@@ -111,13 +111,13 @@ class SchedulerNoOpCallback(Callback):
     def on_train_batch_end(self, trainer: pl.Trainer, pl_module, outputs, batch, batch_idx):
         # pl_module.max_steps is "original" max steps without trainer extra steps.
         if (trainer.global_step + 1) % 3 == 0 and (trainer.global_step + 1) < pl_module.max_steps:
-            schedulers = trainer.lr_schedulers
+            schedulers = trainer.lr_scheduler_configs
 
             for scheduler in schedulers:
                 # Decrement the counter by 2, then perform a scheduler.step() to perform a no-up
                 # as well as update the optimizer lr in all param groups
-                scheduler['scheduler'].last_epoch -= 2
-                scheduler['scheduler'].step()
+                scheduler.scheduler.last_epoch -= 2
+                scheduler.scheduler.step()
 
             # Increase the max step count by 1
             trainer.fit_loop.max_steps = trainer.fit_loop.max_steps + 1

From 6ef98e776500d57e22f8fa5ea86d5eeed8e6a340 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Wed, 9 Nov 2022 12:03:42 +0000
Subject: [PATCH 06/22] Try to fix hanging EMA test

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 Jenkinsfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index c7d16ec5560e..3ad689d46eff 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -225,6 +225,7 @@ pipeline {
         stage('Speech to Text EMA') {
           steps {
             sh 'python examples/asr/asr_ctc/speech_to_text_ctc.py \
+            --config-path="../conf/" --config-name="config"
             model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
             model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
             trainer.devices=2 \

From e25704ca8fc581a71dac07d5a1bb12b930d23aea Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Wed, 9 Nov 2022 14:54:16 +0000
Subject: [PATCH 07/22] Missing \

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 3ad689d46eff..add629fcb288 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -225,7 +225,7 @@ pipeline {
         stage('Speech to Text EMA') {
           steps {
             sh 'python examples/asr/asr_ctc/speech_to_text_ctc.py \
-            --config-path="../conf/" --config-name="config"
+            --config-path="../conf/" --config-name="config" \
             model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
             model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
             trainer.devices=2 \

From 4da05ebfe32fd5a9163ed5735534e8a7dad0ef24 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Wed, 9 Nov 2022 15:33:37 +0000
Subject: [PATCH 08/22] Add strategy

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 Jenkinsfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index add629fcb288..54584022d287 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -229,6 +229,7 @@ pipeline {
             model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
             model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
             trainer.devices=2 \
+            trainer.strategy="ddp" \
             trainer.accelerator="gpu" \
             +trainer.fast_dev_run=True \
             +exp_manager.ema.enable=True \

From 4e6913cc3acfe4bf60895cc4cd1d5c00756ac2f1 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Wed, 9 Nov 2022 16:38:33 +0000
Subject: [PATCH 09/22] See if setting the chdir fixes the hanging DDP test

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 nemo/core/config/hydra_runner.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/nemo/core/config/hydra_runner.py b/nemo/core/config/hydra_runner.py
index 41d4557d6f36..de65e6b968df 100644
--- a/nemo/core/config/hydra_runner.py
+++ b/nemo/core/config/hydra_runner.py
@@ -67,6 +67,9 @@ def wrapper(cfg_passthrough: Optional[DictConfig] = None) -> Any:
                 # Set run.dir ONLY for ExpManager "compatibility" - to be removed.
                 overrides.append("hydra.run.dir=.")
 
+                # Set working directory to the job's output directory
+                overrides.append("hydra.job.chdir=True")
+
                 # Check if user set the schema.
                 if schema is not None:
                     # Create config store.

From 07d429210aa100b0bec626363d99aa22185e071c Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Thu, 10 Nov 2022 11:49:38 +0000
Subject: [PATCH 10/22] See if removing the subdir setter fixes the issue

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 nemo/core/config/hydra_runner.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/nemo/core/config/hydra_runner.py b/nemo/core/config/hydra_runner.py
index de65e6b968df..16376507a774 100644
--- a/nemo/core/config/hydra_runner.py
+++ b/nemo/core/config/hydra_runner.py
@@ -57,9 +57,6 @@ def wrapper(cfg_passthrough: Optional[DictConfig] = None) -> Any:
                 # Get overriding args in dot string format
                 overrides = parsed_args.overrides  # type: list
 
-                # Disable the creation of .hydra subdir
-                # https://hydra.cc/docs/tutorials/basic/running_your_app/working_directory
-                overrides.append("hydra.output_subdir=null")
                 # Hydra logging outputs only to stdout (no log file).
                 # https://hydra.cc/docs/configure_hydra/logging
                 overrides.append("hydra/job_logging=stdout")
@@ -67,9 +64,6 @@ def wrapper(cfg_passthrough: Optional[DictConfig] = None) -> Any:
                 # Set run.dir ONLY for ExpManager "compatibility" - to be removed.
                 overrides.append("hydra.run.dir=.")
 
-                # Set working directory to the job's output directory
-                overrides.append("hydra.job.chdir=True")
-
                 # Check if user set the schema.
                 if schema is not None:
                     # Create config store.

From 15087e515b7b148fa56c6d0871d3f6e7fb86212b Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Thu, 10 Nov 2022 12:09:13 +0000
Subject: [PATCH 11/22] Remove checks

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 tests/hydra/test_hydra_runner.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/hydra/test_hydra_runner.py b/tests/hydra/test_hydra_runner.py
index 1da0a914cfaf..803d6aa6af8a 100644
--- a/tests/hydra/test_hydra_runner.py
+++ b/tests/hydra/test_hydra_runner.py
@@ -41,8 +41,6 @@ def test_config1(self):
         # Run the call as subprocess.
         subprocess.check_call(call, shell=True, stdout=sys.stdout, stderr=sys.stdout)
 
-        # Make sure that .hydra dir is not present.
-        assert not path.exists(f".hydra")
         # Make sure that default hydra log file is not present.
         assert not path.exists(f"my_app.log")
 
@@ -67,8 +65,6 @@ def test_config2(self):
         # Run the call as subprocess.
         subprocess.check_call(call, shell=True, stdout=sys.stdout, stderr=sys.stdout)
 
-        # Make sure that .hydra dir is not present.
-        assert not path.exists(f".hydra")
         # Make sure that default hydra log file is not present.
         assert not path.exists(f"my_app.log")
 

From 4a01cec27a7ec1063a43a0ce1ca0a9fc50aba5c0 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Thu, 10 Nov 2022 12:55:50 +0000
Subject: [PATCH 12/22] Try [0,1] for devices

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 2239b5cb698c..b677855d003d 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -228,7 +228,7 @@ pipeline {
             --config-path="../conf/" --config-name="config" \
             model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
             model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
-            trainer.devices=2 \
+            trainer.devices=[0,1] \
             trainer.strategy="ddp" \
             trainer.accelerator="gpu" \
             +trainer.fast_dev_run=True \

From b1add4039ec559bb1f9264cf7b7eabe074d9162a Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Tue, 15 Nov 2022 13:08:19 +0000
Subject: [PATCH 13/22] Add code back

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 nemo/core/config/hydra_runner.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/nemo/core/config/hydra_runner.py b/nemo/core/config/hydra_runner.py
index daa11643098d..4f61b9275e15 100644
--- a/nemo/core/config/hydra_runner.py
+++ b/nemo/core/config/hydra_runner.py
@@ -58,6 +58,10 @@ def wrapper(cfg_passthrough: Optional[DictConfig] = None) -> Any:
                 # Get overriding args in dot string format
                 overrides = parsed_args.overrides  # type: list
 
+                # Disable the creation of .hydra subdir
+                # https://hydra.cc/docs/tutorials/basic/running_your_app/working_directory
+                overrides.append("hydra.output_subdir=null")
+
                 # Hydra logging outputs only to stdout (no log file).
                 # https://hydra.cc/docs/configure_hydra/logging
                 overrides.append("hydra/job_logging=stdout")

From 8c7d7e00c7b66abe09859d811b0870c680823e5f Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Mon, 21 Nov 2022 16:56:46 +0000
Subject: [PATCH 14/22] Remove space

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 nemo/core/config/hydra_runner.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nemo/core/config/hydra_runner.py b/nemo/core/config/hydra_runner.py
index 4f61b9275e15..6c6c9b47e0fd 100644
--- a/nemo/core/config/hydra_runner.py
+++ b/nemo/core/config/hydra_runner.py
@@ -61,7 +61,6 @@ def wrapper(cfg_passthrough: Optional[DictConfig] = None) -> Any:
                 # Disable the creation of .hydra subdir
                 # https://hydra.cc/docs/tutorials/basic/running_your_app/working_directory
                 overrides.append("hydra.output_subdir=null")
-
                 # Hydra logging outputs only to stdout (no log file).
                 # https://hydra.cc/docs/configure_hydra/logging
                 overrides.append("hydra/job_logging=stdout")

From 9e8ab6b00d1c3c20f37d0e05ae63b3d7b8082d29 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Wed, 23 Nov 2022 11:10:58 +0000
Subject: [PATCH 15/22] Update requirements

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 requirements/requirements_lightning.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/requirements_lightning.txt b/requirements/requirements_lightning.txt
index b4da4c37a4dc..dcde2c6bd810 100644
--- a/requirements/requirements_lightning.txt
+++ b/requirements/requirements_lightning.txt
@@ -1,6 +1,6 @@
 hydra-core>=1.2.0,<1.3
 omegaconf>=2.2,<2.3
-pytorch-lightning>=1.8.0
+pytorch-lightning>=1.8.3
 pyyaml<6  # Pinned until omegaconf works with pyyaml>=6
 torchmetrics>=0.4.1rc0
 transformers>=4.0.1,<=4.21.2

From b33501d0f3f594e34c11482fc0af76b6e23f80e7 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Wed, 23 Nov 2022 13:47:10 +0000
Subject: [PATCH 16/22] Swap import path

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 examples/nlp/language_modeling/megatron_bart_pretraining.py     | 2 +-
 examples/nlp/language_modeling/megatron_bert_pretraining.py     | 2 +-
 examples/nlp/language_modeling/megatron_ckpt_to_nemo.py         | 2 +-
 examples/nlp/language_modeling/megatron_gpt_pretraining.py      | 2 +-
 examples/nlp/language_modeling/megatron_gpt_prompt_learning.py  | 2 +-
 examples/nlp/language_modeling/megatron_retro_cal_shape.py      | 2 +-
 .../nlp/language_modeling/megatron_retro_mutransfer_pretrain.py | 2 +-
 examples/nlp/language_modeling/megatron_retro_pretraining.py    | 2 +-
 .../nlp/language_modeling/megatron_t5_lm_adaptation_finetune.py | 2 +-
 examples/nlp/language_modeling/megatron_t5_pretraining.py       | 2 +-
 examples/nlp/language_modeling/megatron_t5_prompt_learning.py   | 2 +-
 examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py      | 2 +-
 examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py  | 2 +-
 .../nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py | 2 +-
 .../nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py     | 2 +-
 .../nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py  | 2 +-
 examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py | 2 +-
 examples/nlp/machine_translation/megatron_nmt_training.py       | 2 +-
 tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb                | 2 +-
 19 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/examples/nlp/language_modeling/megatron_bart_pretraining.py b/examples/nlp/language_modeling/megatron_bart_pretraining.py
index 9a7300656f99..b08772c24348 100644
--- a/examples/nlp/language_modeling/megatron_bart_pretraining.py
+++ b/examples/nlp/language_modeling/megatron_bart_pretraining.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks import ModelSummary
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector
 
 from nemo.collections.nlp.models.language_modeling.megatron_bart_model import MegatronBARTModel
diff --git a/examples/nlp/language_modeling/megatron_bert_pretraining.py b/examples/nlp/language_modeling/megatron_bert_pretraining.py
index b5c26259f711..e21a29a6f77a 100644
--- a/examples/nlp/language_modeling/megatron_bert_pretraining.py
+++ b/examples/nlp/language_modeling/megatron_bert_pretraining.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector
 
 from nemo.collections.nlp.models.language_modeling.megatron_bert_model import MegatronBertModel
diff --git a/examples/nlp/language_modeling/megatron_ckpt_to_nemo.py b/examples/nlp/language_modeling/megatron_ckpt_to_nemo.py
index 2e4987f2e18c..a0eec29f38fb 100644
--- a/examples/nlp/language_modeling/megatron_ckpt_to_nemo.py
+++ b/examples/nlp/language_modeling/megatron_ckpt_to_nemo.py
@@ -29,7 +29,7 @@
 
 import torch
 from apex.transformer import parallel_state
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from pytorch_lightning.trainer.trainer import Trainer
 
 from nemo.collections.nlp.models.language_modeling.megatron_bart_model import MegatronBARTModel
diff --git a/examples/nlp/language_modeling/megatron_gpt_pretraining.py b/examples/nlp/language_modeling/megatron_gpt_pretraining.py
index 811d8833a99e..0563cdf703b1 100644
--- a/examples/nlp/language_modeling/megatron_gpt_pretraining.py
+++ b/examples/nlp/language_modeling/megatron_gpt_pretraining.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector
 
 from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
diff --git a/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py b/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py
index ddd6b8eb8d97..1d0debb924f1 100644
--- a/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py
+++ b/examples/nlp/language_modeling/megatron_gpt_prompt_learning.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 
 from nemo.collections.nlp.models.language_modeling.megatron_gpt_prompt_learning_model import (
     MegatronGPTPromptLearningModel,
diff --git a/examples/nlp/language_modeling/megatron_retro_cal_shape.py b/examples/nlp/language_modeling/megatron_retro_cal_shape.py
index 06bec216e925..7e8cf1dca755 100644
--- a/examples/nlp/language_modeling/megatron_retro_cal_shape.py
+++ b/examples/nlp/language_modeling/megatron_retro_cal_shape.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin
 
 from nemo.collections.nlp.models.language_modeling.megatron_retrieval_model import MegatronRetrievalModel
diff --git a/examples/nlp/language_modeling/megatron_retro_mutransfer_pretrain.py b/examples/nlp/language_modeling/megatron_retro_mutransfer_pretrain.py
index 80804a2602e5..d755da52fe2f 100644
--- a/examples/nlp/language_modeling/megatron_retro_mutransfer_pretrain.py
+++ b/examples/nlp/language_modeling/megatron_retro_mutransfer_pretrain.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin
 from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector
 
diff --git a/examples/nlp/language_modeling/megatron_retro_pretraining.py b/examples/nlp/language_modeling/megatron_retro_pretraining.py
index 374bb938583e..f9bde24ca1ba 100644
--- a/examples/nlp/language_modeling/megatron_retro_pretraining.py
+++ b/examples/nlp/language_modeling/megatron_retro_pretraining.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin
 from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector
 
diff --git a/examples/nlp/language_modeling/megatron_t5_lm_adaptation_finetune.py b/examples/nlp/language_modeling/megatron_t5_lm_adaptation_finetune.py
index 063147d66abb..3550d5e2918c 100644
--- a/examples/nlp/language_modeling/megatron_t5_lm_adaptation_finetune.py
+++ b/examples/nlp/language_modeling/megatron_t5_lm_adaptation_finetune.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks import ModelSummary
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector
 
 from nemo.collections.nlp.models.language_modeling.megatron_t5_model import MegatronT5Model
diff --git a/examples/nlp/language_modeling/megatron_t5_pretraining.py b/examples/nlp/language_modeling/megatron_t5_pretraining.py
index 4f044cb3c34d..018cdeae4c24 100644
--- a/examples/nlp/language_modeling/megatron_t5_pretraining.py
+++ b/examples/nlp/language_modeling/megatron_t5_pretraining.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks import ModelSummary
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector
 
 from nemo.collections.nlp.models.language_modeling.megatron_t5_model import MegatronT5Model
diff --git a/examples/nlp/language_modeling/megatron_t5_prompt_learning.py b/examples/nlp/language_modeling/megatron_t5_prompt_learning.py
index e91c7c178c94..68c9f2cf5b30 100644
--- a/examples/nlp/language_modeling/megatron_t5_prompt_learning.py
+++ b/examples/nlp/language_modeling/megatron_t5_prompt_learning.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 
 from nemo.collections.nlp.models.language_modeling.megatron_t5_prompt_learning_model import (
     MegatronT5PromptLearningModel,
diff --git a/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py b/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py
index e78d34adee65..2d1e104660a2 100644
--- a/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py
+++ b/examples/nlp/language_modeling/megatron_t5_seq2seq_eval.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from megatron_t5_seq2seq_finetune import load_from_checkpoint_dir, load_from_nemo, validate_checkpoint_loading_args
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin
 
 from nemo.collections.nlp.models.language_modeling.megatron_finetune_model import MegatronT5FinetuneModel
diff --git a/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py b/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py
index 84b78739f673..84dec0fac387 100644
--- a/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py
+++ b/examples/nlp/language_modeling/megatron_t5_seq2seq_finetune.py
@@ -15,10 +15,10 @@
 import os
 import tempfile
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector
 
 from nemo.collections.nlp.models.language_modeling.megatron_finetune_model import MegatronT5FinetuneModel
diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py b/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py
index 325d9767e167..aeabe18b3d9a 100644
--- a/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py
+++ b/examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 
 from nemo.collections.nlp.models.language_modeling.megatron_gpt_adapter_model import MegatronGPTAdapterLearningModel
 from nemo.collections.nlp.parts.nlp_overrides import (
diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py
index db1b8ef723d5..8103be100b10 100644
--- a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py
+++ b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_tuning.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 
 from nemo.collections.nlp.models.language_modeling.megatron_gpt_adapter_model import MegatronGPTInfusedAdapterModel
 from nemo.collections.nlp.parts.nlp_overrides import (
diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py
index 5f3cae47024c..50e126e0de52 100644
--- a/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py
+++ b/examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 
 from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5AdapterLearningModel
 from nemo.collections.nlp.parts.nlp_overrides import (
diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py
index 5a72b7829bfe..6230231638c7 100644
--- a/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py
+++ b/examples/nlp/language_modeling/tuning/megatron_t5_ia3_tuning.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 
 from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5InfusedAdapterModel
 from nemo.collections.nlp.parts.nlp_overrides import (
diff --git a/examples/nlp/machine_translation/megatron_nmt_training.py b/examples/nlp/machine_translation/megatron_nmt_training.py
index 0bd349225fb2..9299996efc24 100644
--- a/examples/nlp/machine_translation/megatron_nmt_training.py
+++ b/examples/nlp/machine_translation/megatron_nmt_training.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 
 
+from lightning_lite.plugins.environments import TorchElasticEnvironment
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks import ModelSummary
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment
 from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector
 
 from nemo.collections.nlp.models.language_modeling.megatron_bart_model import MegatronBARTModel
diff --git a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb
index b03316bfce02..512a38bc90cc 100644
--- a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb
+++ b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb
@@ -826,7 +826,7 @@
     "import torch\n",
     "import pytorch_lightning as pl\n",
     "from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy\n",
-    "from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment\n",
+    "from lightning_lite.plugins.environments import TorchElasticEnvironment\n",
     "\n",
     "# lets modify some trainer configs\n",
     "# checks if we have GPU available and uses it\n",

From 580654cfaf279d1050b9d6cf7d74f140423debea Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Wed, 23 Nov 2022 15:09:47 +0000
Subject: [PATCH 17/22] Update references

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 .../models/language_modeling/megatron_base_model.py    | 10 +++++-----
 .../language_modeling/megatron_retrieval_model.py      | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
index 9d09020fc137..7119e47acb98 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
@@ -302,16 +302,16 @@ def on_train_batch_end(self, outputs, batch, batch_idx: int, unused: Optional[in
                 # If the grad scaler skipped its optimizer step due to infs/nans,
                 # decrement the step of all schedulers.
                 if grad_scaler.optimizer_update_skipped is not None and grad_scaler.optimizer_update_skipped is True:
-                    schedulers = self.trainer.lr_schedulers
+                    scheduler_cfgs = self.trainer.lr_scheduler_configs
 
-                    if not schedulers or not self.trainer.lightning_module.automatic_optimization:
+                    if not scheduler_cfgs or not self.trainer.lightning_module.automatic_optimization:
                         return
 
-                    for scheduler in schedulers:
+                    for scheduler_cfg in scheduler_cfgs:
                         # Decrement the counter by 2, then perform a scheduler.step() to perform a no-up
                         # as well as update the optimizer lr in all param groups
-                        scheduler['scheduler'].last_epoch -= 2
-                        scheduler['scheduler'].step()
+                        scheduler_cfg.scheduler.last_epoch -= 2
+                        scheduler_cfg.scheduler.step()
 
                     # Removing the line below because it messes up train_valid_test_num_samples calculation.
                     # self.trainer.fit_loop.max_steps = self.trainer.fit_loop.max_steps + 1
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py b/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py
index 97efa254459a..66e43458d20e 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py
@@ -309,16 +309,16 @@ def on_train_batch_end(self, outputs, batch, batch_idx: int, unused: Optional[in
                 # If the grad scaler skipped its optimizer step due to infs/nans,
                 # decrement the step of all schedulers.
                 if grad_scaler.optimizer_update_skipped is not None and grad_scaler.optimizer_update_skipped is True:
-                    schedulers = self.trainer.lr_schedulers
+                    scheduler_cfgs = self.trainer.lr_scheduler_configs
 
-                    if not schedulers or not self.trainer.lightning_module.automatic_optimization:
+                    if not scheduler_cfgs or not self.trainer.lightning_module.automatic_optimization:
                         return
 
-                    for scheduler in schedulers:
+                    for scheduler_cfg in scheduler_cfgs:
                         # Decrement the counter by 2, then perform a scheduler.step() to perform a no-up
                         # as well as update the optimizer lr in all param groups
-                        scheduler['scheduler'].last_epoch -= 2
-                        scheduler['scheduler'].step()
+                        scheduler_cfg.scheduler.last_epoch -= 2
+                        scheduler_cfg.scheduler.step()
 
                     # Increase the max step count by 1
 

From ed8df6f317901be00fae6b66f6ea52fb984f3291 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Wed, 23 Nov 2022 16:00:04 +0000
Subject: [PATCH 18/22] Fix deprecated variables

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 nemo_text_processing/g2p/models/ctc_g2p.py | 2 +-
 nemo_text_processing/g2p/models/t5_g2p.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/nemo_text_processing/g2p/models/ctc_g2p.py b/nemo_text_processing/g2p/models/ctc_g2p.py
index 7f77ed6595e5..a456942c1ad8 100644
--- a/nemo_text_processing/g2p/models/ctc_g2p.py
+++ b/nemo_text_processing/g2p/models/ctc_g2p.py
@@ -69,7 +69,7 @@ class CTCG2PModel(G2PModel, ASRBPEMixin):
     def __init__(self, cfg: DictConfig, trainer: Trainer = None):
         self.world_size = 1
         if trainer is not None:
-            self.world_size = trainer.num_nodes * trainer.num_gpus
+            self.world_size = trainer.num_nodes * trainer.num_devices
 
         self.mode = cfg.model_name.lower()
 
diff --git a/nemo_text_processing/g2p/models/t5_g2p.py b/nemo_text_processing/g2p/models/t5_g2p.py
index 437b9bf57475..7ed1c917a880 100644
--- a/nemo_text_processing/g2p/models/t5_g2p.py
+++ b/nemo_text_processing/g2p/models/t5_g2p.py
@@ -58,7 +58,7 @@ def output_types(self) -> Optional[Dict[str, NeuralType]]:
     def __init__(self, cfg: DictConfig, trainer: Trainer = None):
         self.world_size = 1
         if trainer is not None:
-            self.world_size = trainer.num_nodes * trainer.num_gpus
+            self.world_size = trainer.num_nodes * trainer.num_devices
 
         # Load appropriate tokenizer from HuggingFace
         self.model_name = cfg.model_name

From 12d7ea21f552a68a5b5d4cdf33bf4ae889252e4a Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Wed, 23 Nov 2022 16:38:40 +0000
Subject: [PATCH 19/22] Fix missing var

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 .../nlp/models/dialogue/dialogue_gpt_classification_model.py    | 2 +-
 .../nlp/models/dialogue/dialogue_gpt_generation_model.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/nemo/collections/nlp/models/dialogue/dialogue_gpt_classification_model.py b/nemo/collections/nlp/models/dialogue/dialogue_gpt_classification_model.py
index 9608a0320bd6..223b9238bb8a 100644
--- a/nemo/collections/nlp/models/dialogue/dialogue_gpt_classification_model.py
+++ b/nemo/collections/nlp/models/dialogue/dialogue_gpt_classification_model.py
@@ -710,7 +710,7 @@ def prepare_data(self):
         self.data_prepared = True
 
     def setup(self, stage=None):
-        super().setup()
+        super().setup(stage)
         if self.cfg.library == "megatron" and self.prompt_learning and stage == "fit":
             if self.cfg.virtual_prompt_style == VirtualPromptStyle.PROMPT_TUNING:
                 self.language_model.init_new_prompts()
diff --git a/nemo/collections/nlp/models/dialogue/dialogue_gpt_generation_model.py b/nemo/collections/nlp/models/dialogue/dialogue_gpt_generation_model.py
index 69ff6d37527e..c6c976a501a1 100644
--- a/nemo/collections/nlp/models/dialogue/dialogue_gpt_generation_model.py
+++ b/nemo/collections/nlp/models/dialogue/dialogue_gpt_generation_model.py
@@ -216,7 +216,7 @@ def mask_and_reduce_loss(self, loss_mask, output_tensor):
         return loss
 
     def setup(self, stage=None):
-        super().setup()
+        super().setup(stage)
         if self.cfg.library == "megatron" and self.prompt_learning:
             self.language_model.init_new_prompts()
 

From 0e8e59ab31049fa3e88f15dce51d7aaabd31ed44 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Wed, 23 Nov 2022 18:25:13 +0000
Subject: [PATCH 20/22] Fix var

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 nemo/collections/nlp/models/dialogue/sgdqa_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nemo/collections/nlp/models/dialogue/sgdqa_model.py b/nemo/collections/nlp/models/dialogue/sgdqa_model.py
index c8b6c468b988..2dd4011d74ac 100644
--- a/nemo/collections/nlp/models/dialogue/sgdqa_model.py
+++ b/nemo/collections/nlp/models/dialogue/sgdqa_model.py
@@ -226,7 +226,7 @@ def eval_step_helper(self, batch: List[torch.Tensor]):
         all_start_char_idx = []
         all_end_char_idx = []
 
-        if self.trainer.devices and self.trainer.world_size > 1:
+        if self.trainer.num_devices and self.trainer.world_size > 1:
             world_size = self.trainer.world_size
             for ind in range(world_size):
                 all_example_id_num.append(torch.empty_like(example_id_num))

From d672304e6656b597aadec779da7edaf3ce89be04 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Wed, 23 Nov 2022 21:00:03 +0000
Subject: [PATCH 21/22] Revert changes

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 Jenkinsfile | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 36c23ecc9c08..2003e468e6a9 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -225,11 +225,9 @@ pipeline {
         stage('Speech to Text EMA') {
           steps {
             sh 'python examples/asr/asr_ctc/speech_to_text_ctc.py \
-            --config-path="../conf/" --config-name="config" \
             model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
             model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
-            trainer.devices=[0,1] \
-            trainer.strategy="ddp" \
+            trainer.devices=2 \
             trainer.accelerator="gpu" \
             +trainer.fast_dev_run=True \
             +exp_manager.ema.enable=True \

From d6143783c0f134d219337b6e2f5e43359cd9aa10 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Thu, 24 Nov 2022 10:31:35 +0000
Subject: [PATCH 22/22] Address review

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 nemo/core/classes/exportable.py  | 1 -
 tests/hydra/test_hydra_runner.py | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/nemo/core/classes/exportable.py b/nemo/core/classes/exportable.py
index 53b47cf48bad..50266dab3dbe 100644
--- a/nemo/core/classes/exportable.py
+++ b/nemo/core/classes/exportable.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
 from abc import ABC
 from typing import List, Union
 
diff --git a/tests/hydra/test_hydra_runner.py b/tests/hydra/test_hydra_runner.py
index 803d6aa6af8a..1da0a914cfaf 100644
--- a/tests/hydra/test_hydra_runner.py
+++ b/tests/hydra/test_hydra_runner.py
@@ -41,6 +41,8 @@ def test_config1(self):
         # Run the call as subprocess.
         subprocess.check_call(call, shell=True, stdout=sys.stdout, stderr=sys.stdout)
 
+        # Make sure that .hydra dir is not present.
+        assert not path.exists(f".hydra")
         # Make sure that default hydra log file is not present.
         assert not path.exists(f"my_app.log")
 
@@ -65,6 +67,8 @@ def test_config2(self):
         # Run the call as subprocess.
         subprocess.check_call(call, shell=True, stdout=sys.stdout, stderr=sys.stdout)
 
+        # Make sure that .hydra dir is not present.
+        assert not path.exists(f".hydra")
         # Make sure that default hydra log file is not present.
         assert not path.exists(f"my_app.log")