Merge pull request #895 from NVIDIA/flatten_pl_config

Flatten pl.trainer to trainer
NVIDIA · Jul 23, 2020 · 32f659e · 32f659e
2 parents 54472fd + d4af44c
commit 32f659e
Show file tree

Hide file tree

Showing 18 changed files with 140 additions and 152 deletions.
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -103,8 +103,8 @@ pipeline {
             sh 'python examples/asr/speech_to_text.py \
             model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
             model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
-            pl.trainer.gpus=[0] \
-            +pl.trainer.fast_dev_run=True \
+            trainer.gpus=[0] \
+            +trainer.fast_dev_run=True \
             exp_manager.root_dir=examples/asr/speech_to_text_results'
             sh 'rm -rf examples/asr/speech_to_text_results'
           }
@@ -115,8 +115,8 @@ pipeline {
             sh 'python examples/asr/speech_to_label.py \
             model.train_ds.manifest_filepath=/home/TestData/speech_commands/train_manifest.json \
             model.validation_ds.manifest_filepath=/home/TestData/speech_commands/test_manifest.json \
-            pl.trainer.gpus=[1] \
-            +pl.trainer.fast_dev_run=True \
+            trainer.gpus=[1] \
+            +trainer.fast_dev_run=True \
             model.preprocessor.cls=nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor \
             model.preprocessor.params=null \
             exp_manager.root_dir=examples/asr/speech_to_label_results'
@@ -131,8 +131,8 @@ pipeline {
             model.validation_ds.batch_size=2 \
             model.train_ds.manifest_filepath=/home/TestData/an4_speaker/train.json \
             model.validation_ds.manifest_filepath=/home/TestData/an4_speaker/dev.json \
-            pl.trainer.gpus=[1] \
-            +pl.trainer.fast_dev_run=True \
+            trainer.gpus=[1] \
+            +trainer.fast_dev_run=True \
             exp_manager.root_dir=examples/speaker_recognition/speaker_recognition_results'
             sh 'rm -rf examples/speaker_recognition/speaker_recognition_results'
           }
@@ -161,10 +161,10 @@ pipeline {
             model.validation_ds.use_cache=false \
             model.language_model.pretrained_model_name=bert-base-uncased \
             model.version_2_with_negative=false \
-            pl.trainer.precision=16 \
-            pl.trainer.amp_level=O1 \
-            pl.trainer.gpus=[0] \
-            +pl.trainer.fast_dev_run=true \
+            trainer.precision=16 \
+            trainer.amp_level=O1 \
+            trainer.gpus=[0] \
+            +trainer.fast_dev_run=true \
             exp_manager.root_dir=exp_bert_squad_1.1 \
             '
             sh 'rm -rf examples/nlp/question_answering/exp_bert_squad_1.1'
@@ -179,10 +179,10 @@ pipeline {
             model.validation_ds.file=/home/TestData/nlp/squad_mini/v2.0/dev-v2.0.json \
             model.language_model.pretrained_model_name=bert-base-uncased \
             model.version_2_with_negative=true \
-            pl.trainer.precision=16 \
-            pl.trainer.amp_level=O1 \
-            pl.trainer.gpus=[1] \
-            +pl.trainer.fast_dev_run=true \
+            trainer.precision=16 \
+            trainer.amp_level=O1 \
+            trainer.gpus=[1] \
+            +trainer.fast_dev_run=true \
             exp_manager.root_dir=exp_bert_squad_2.0 \
             '
             sh 'rm -rf examples/nlp/question_answering/exp_bert_squad_2.0'
@@ -210,10 +210,10 @@ pipeline {
             model.language_model.do_lower_case=true \
             model.language_model.pretrained_model_name=roberta-base \
             model.version_2_with_negative=false \
-            pl.trainer.precision=16 \
-            pl.trainer.amp_level=O1 \
-            pl.trainer.gpus=[0] \
-            +pl.trainer.fast_dev_run=true \
+            trainer.precision=16 \
+            trainer.amp_level=O1 \
+            trainer.gpus=[0] \
+            +trainer.fast_dev_run=true \
             exp_manager.root_dir=exp_roberta_squad_1.1 \
             '
             sh 'rm -rf examples/nlp/question_answering/exp_roberta_squad_1.1'
@@ -229,10 +229,10 @@ pipeline {
             model.language_model.do_lower_case=true \
             model.language_model.pretrained_model_name=roberta-base \
             model.version_2_with_negative=true \
-            pl.trainer.precision=16 \
-            pl.trainer.amp_level=O1 \
-            pl.trainer.gpus=[1] \
-            +pl.trainer.fast_dev_run=true \
+            trainer.precision=16 \
+            trainer.amp_level=O1 \
+            trainer.gpus=[1] \
+            +trainer.fast_dev_run=true \
             exp_manager.root_dir=exp_roberta_squad_2.0 \
             '
             sh 'rm -rf examples/nlp/question_answering/exp_roberta_squad_2.0'
@@ -261,8 +261,8 @@ pipeline {
             model.train_ds.batch_size=10 \
             model.train_ds.use_cache=false \
             model.language_model.do_lower_case=true \
-            pl.trainer.gpus=[0] \
-            +pl.trainer.fast_dev_run=true \
+            trainer.gpus=[0] \
+            +trainer.fast_dev_run=true \
             exp_manager.root_dir=exp_bert_base_uncased \
             '
             sh 'rm -rf examples/nlp/text_classification/exp_bert_base_uncased'
@@ -309,8 +309,8 @@ pipeline {
           sh 'cd examples/nlp/token_classification && \
           python ner.py \
           model.data_dir=/home/TestData/nlp/token_classification_punctuation/ \
-          pl.trainer.gpus=[0] \
-          +pl.trainer.fast_dev_run=true \
+          trainer.gpus=[0] \
+          +trainer.fast_dev_run=true \
           model.use_cache=false \
           '
         }
@@ -329,9 +329,9 @@ pipeline {
           model.data_dir=/home/TestData/nlp/token_classification_punctuation/ \
           model.language_model.pretrained_model_name=distilbert-base-uncased \
           model.use_cache=false \
-          pl.trainer.gpus=[0,1] \
-          pl.trainer.distributed_backend=ddp \
-          +pl.trainer.fast_dev_run=true \
+          trainer.gpus=[0,1] \
+          trainer.distributed_backend=ddp \
+          +trainer.fast_dev_run=true \
           exp_manager.root_dir=exp_distilbert_base_uncased \
           '
           sh 'rm -rf examples/nlp/token_classification/exp_distilbert_base_uncased'

diff --git a/examples/asr/conf/config.yaml b/examples/asr/conf/config.yaml
@@ -158,16 +158,15 @@ model:
       min_lr: 0.0
       last_epoch: -1
 
-pl:
-  trainer:
-    gpus: 0 # number of gpus
-    max_epochs: 5
-    max_steps: null # computed at runtime if not set
-    num_nodes: 1
-    distributed_backend: ddp
-    accumulate_grad_batches: 1
-    checkpoint_callback: False  # Provided by exp_manager
-    logger: False  # Provided by exp_manager
+trainer:
+  gpus: 0 # number of gpus
+  max_epochs: 5
+  max_steps: null # computed at runtime if not set
+  num_nodes: 1
+  distributed_backend: ddp
+  accumulate_grad_batches: 1
+  checkpoint_callback: False  # Provided by exp_manager
+  logger: False  # Provided by exp_manager
 
 exp_manager:
   root_dir: null

diff --git a/examples/asr/conf/matchboxnet_3x1x64_v1.yaml b/examples/asr/conf/matchboxnet_3x1x64_v1.yaml
@@ -160,16 +160,15 @@ model:
       min_lr: 0.001
       last_epoch: -1
 
-pl:
-  trainer:
-    gpus: 0 # number of gpus
-    max_epochs: 200
-    max_steps: null # computed at runtime if not set
-    num_nodes: 1
-    distributed_backend: ddp
-    accumulate_grad_batches: 1
-    checkpoint_callback: False  # Provided by exp_manager
-    logger: False  # Provided by exp_manager
+trainer:
+  gpus: 0 # number of gpus
+  max_epochs: 200
+  max_steps: null # computed at runtime if not set
+  num_nodes: 1
+  distributed_backend: ddp
+  accumulate_grad_batches: 1
+  checkpoint_callback: False  # Provided by exp_manager
+  logger: False  # Provided by exp_manager
 
 exp_manager:
   root_dir: null

diff --git a/examples/asr/conf/matchboxnet_3x1x64_v2.yaml b/examples/asr/conf/matchboxnet_3x1x64_v2.yaml
@@ -160,16 +160,15 @@ model:
       min_lr: 0.001
       last_epoch: -1
 
-pl:
-  trainer:
-    gpus: 0 # number of gpus
-    max_epochs: 200
-    max_steps: null # computed at runtime if not set
-    num_nodes: 1
-    distributed_backend: ddp
-    accumulate_grad_batches: 1
-    checkpoint_callback: False  # Provided by exp_manager
-    logger: False  # Provided by exp_manager
+trainer:
+  gpus: 0 # number of gpus
+  max_epochs: 200
+  max_steps: null # computed at runtime if not set
+  num_nodes: 1
+  distributed_backend: ddp
+  accumulate_grad_batches: 1
+  checkpoint_callback: False  # Provided by exp_manager
+  logger: False  # Provided by exp_manager
 
 exp_manager:
   root_dir: null

diff --git a/examples/asr/experimental/configs/config_bpe.yaml b/examples/asr/experimental/configs/config_bpe.yaml
@@ -155,16 +155,15 @@ model:
       min_lr: 1e-6
       last_epoch: -1
 
-pl:
-  trainer:
-    gpus: 0 # number of gpus
-    max_epochs: 5
-    max_steps: null # computed at runtime if not set
-    num_nodes: 1
-    distributed_backend: ddp
-    accumulate_grad_batches: 1
-    checkpoint_callback: False  # Provided by exp_manager
-    logger: False  # Provided by exp_manager
+trainer:
+  gpus: 0 # number of gpus
+  max_epochs: 5
+  max_steps: null # computed at runtime if not set
+  num_nodes: 1
+  distributed_backend: ddp
+  accumulate_grad_batches: 1
+  checkpoint_callback: False  # Provided by exp_manager
+  logger: False  # Provided by exp_manager
 
 exp_manager:
   root_dir: null

diff --git a/examples/asr/speech_to_label.py b/examples/asr/speech_to_label.py
@@ -33,7 +33,7 @@
 
 @hydra_runner(config_path="conf", config_name="matchboxnet_3x1x64_v1.yaml")
 def main(cfg):
-    trainer = pl.Trainer(**cfg.pl.trainer)
+    trainer = pl.Trainer(**cfg.trainer)
     exp_manager(trainer, cfg.get("exp_manager", None))
     asr_model = EncDecClassificationModel(cfg=cfg.model, trainer=trainer)
 

diff --git a/examples/asr/speech_to_text.py b/examples/asr/speech_to_text.py
@@ -25,14 +25,14 @@
         model.train_ds.manifest_filepath="/Users/okuchaiev/Data/an4_dataset/an4_train.json" \
         model.validation_ds.manifest_filepath="/Users/okuchaiev/Data/an4_dataset/an4_val.json" \
         hydra.run.dir="." \
-        pl.trainer.gpus=0 \
-        pl.trainer.max_epochs=50
+        trainer.gpus=0 \
+        trainer.max_epochs=50
 
 
 Add PyTorch Lightning Trainer arguments from CLI:
     python speech_to_text.py \
         ... \
-        +pl.trainer.fast_dev_run=true
+        +trainer.fast_dev_run=true
 
 Hydra logs will be found in "$(./outputs/$(date +"%y-%m-%d")/$(date +"%H-%M-%S")/.hydra)"
 PTL logs will be found in "$(./outputs/$(date +"%y-%m-%d")/$(date +"%H-%M-%S")/lightning_logs)"
@@ -42,8 +42,8 @@
     model.train_ds.manifest_filepath="./an4/train_manifest.json" \
     model.validation_ds.manifest_filepath="./an4/test_manifest.json" \
     hydra.run.dir="." \
-    pl.trainer.gpus=2 \
-    pl.trainer.max_epochs=2 \
+    trainer.gpus=2 \
+    trainer.max_epochs=2 \
     model.optim.args.params.betas=[0.8,0.5] \
     model.optim.args.params.weight_decay=0.0001
 
@@ -52,8 +52,8 @@
     model.train_ds.manifest_filepath="./an4/train_manifest.json" \
     model.validation_ds.manifest_filepath="./an4/test_manifest.json" \
     hydra.run.dir="." \
-    pl.trainer.gpus=2 \
-    pl.trainer.max_epochs=2 \
+    trainer.gpus=2 \
+    trainer.max_epochs=2 \
     model.optim.name=adamw \
     model.optim.lr=0.001 \
     ~model.optim.args \
@@ -65,7 +65,7 @@
 
 @hydra_runner(config_path="conf", config_name="config")
 def main(cfg):
-    trainer = pl.Trainer(**cfg.pl.trainer)
+    trainer = pl.Trainer(**cfg.trainer)
     exp_manager(trainer, cfg.get("exp_manager", None))
     asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer)
 

diff --git a/examples/asr/speech_to_text_bpe.py b/examples/asr/speech_to_text_bpe.py
@@ -17,9 +17,9 @@
     model.train_ds.manifest_filepath="./an4/train_manifest.json" \
     model.validation_ds.manifest_filepath="./an4/test_manifest.json" \
     model.tokenizer.path="./an4/tokenizer/LibriSpeechTokenizer/librispeech_tokenizer_bpe_v1024/" \
-    pl.trainer.gpus=2 \
-    pl.trainer.distributed_backend="ddp" \
-    pl.trainer.max_epochs=100 \
+    trainer.gpus=2 \
+    trainer.distributed_backend="ddp" \
+    trainer.max_epochs=100 \
     model.optim.name="adamw" \
     model.optim.lr=0.1 \
     model.optim.args.params.betas=[0.9,0.999] \
@@ -40,7 +40,7 @@
 @hydra_runner(config_path="experimental/configs/", config_name="config_bpe")
 def main(cfg):
     logging.info(f'Hydra config: {cfg.pretty()}')
-    trainer = pl.Trainer(**cfg.pl.trainer)
+    trainer = pl.Trainer(**cfg.trainer)
     exp_manager(trainer, cfg.get("exp_manager", None))
     asr_model = EncDecCTCModelBPE(cfg=cfg.model, trainer=trainer)
 

diff --git a/examples/nlp/question_answering/conf/config.yaml b/examples/nlp/question_answering/conf/config.yaml
@@ -1,20 +1,19 @@
 # Question Answering with SQUAD
 name: &name QA
 
-pl:
-  trainer:
-    gpus: 1 # the number of gpus, 0 for CPU, or list with gpu indices
-    num_nodes: 1
-    max_epochs: 2 # the number of training epochs
-    max_steps: null # precedence over max_epochs
-    accumulate_grad_batches: 1 # accumulates grads every k batches
-    precision: 16 # 16 to use AMP
-    amp_level: O1 # O1 or O2 if using AMP 
-    distributed_backend: ddp
-    gradient_clip_val: 0.0
-    val_check_interval: 1.0 # check once per epoch .25 for 4 times per epoch
-    checkpoint_callback: false # provided by exp_manager
-    logger: false # provided by exp_manager
+trainer:
+  gpus: 1 # the number of gpus, 0 for CPU, or list with gpu indices
+  num_nodes: 1
+  max_epochs: 2 # the number of training epochs
+  max_steps: null # precedence over max_epochs
+  accumulate_grad_batches: 1 # accumulates grads every k batches
+  precision: 16 # 16 to use AMP
+  amp_level: O1 # O1 or O2 if using AMP 
+  distributed_backend: ddp
+  gradient_clip_val: 0.0
+  val_check_interval: 1.0 # check once per epoch .25 for 4 times per epoch
+  checkpoint_callback: false # provided by exp_manager
+  logger: false # provided by exp_manager
 
 model:
 
@@ -127,7 +126,6 @@ model:
     log_softmax: false
     use_transformer_init: true
 
-  pl: null # used at runtime
 
 exp_manager:
   root_dir: null # where to store logs and checkpoints

diff --git a/examples/nlp/question_answering/question_answering_squad.py b/examples/nlp/question_answering/question_answering_squad.py
@@ -25,7 +25,7 @@
 @hydra_runner(config_path="conf", config_name="config")
 def main(cfg: DictConfig) -> None:
     logging.info(f'Config: {cfg.pretty()}')
-    trainer = pl.Trainer(**cfg.pl.trainer)
+    trainer = pl.Trainer(**cfg.trainer)
     exp_manager(trainer, cfg.get("exp_manager", None))
     question_answering_model = QAModel(cfg.model, trainer=trainer)
     trainer.fit(question_answering_model)

diff --git a/examples/nlp/text_classification/conf/text_classification_config.yaml b/examples/nlp/text_classification/conf/text_classification_config.yaml
@@ -16,17 +16,16 @@
 
 # Config file for text classification with pretrained BERT models
 
-pl:
-  trainer:
-    gpus: 1 # the number of gpus, 0 for CPU
-    num_nodes: 1
-    max_epochs: 100
-    max_steps: null # precedence over max_epochs
-    accumulate_grad_batches: 1 # accumulates grads every k batches
-    amp_level: O0 # O1/O2 for mixed precision
-    distributed_backend: ddp
-    checkpoint_callback: False  # Provided by exp_manager
-    logger: False  # Provided by exp_manager
+trainer:
+  gpus: 1 # the number of gpus, 0 for CPU
+  num_nodes: 1
+  max_epochs: 100
+  max_steps: null # precedence over max_epochs
+  accumulate_grad_batches: 1 # accumulates grads every k batches
+  amp_level: O0 # O1/O2 for mixed precision
+  distributed_backend: ddp
+  checkpoint_callback: False  # Provided by exp_manager
+  logger: False  # Provided by exp_manager
 
 model:
   data_dir: ??? # /path/to/data
@@ -91,7 +90,6 @@ model:
           warmup_ratio: 0.1
           last_epoch: -1
 
-  pl: null # used at runtime
 
 exp_manager:
   root_dir: null  # root_dir for your experiment, if None, defaults to "./NeMo_experiments"