diff --git a/megatron/global_vars.py b/megatron/global_vars.py index 1584a72b8..1b52815f2 100644 --- a/megatron/global_vars.py +++ b/megatron/global_vars.py @@ -152,7 +152,11 @@ def _set_tensorboard_writer(args): 'no TensorBoard logs will be written.', flush=True) +# Important: the codecarbon is very unstable and its latest incarnation using the python scheduler interferes with the asyncio library we use in the test suite which breaks everything, so making this a no-op for now. def _set_codecarbon_tracker(args): + + return # turned off + global _GLOBAL_CODECARBON_TRACKER if not hasattr(args, 'codecarbon_dir') or args.codecarbon_dir is None: return @@ -187,6 +191,9 @@ def _set_codecarbon_tracker(args): def codecarbon_tracker_start(): + + return # turned off, see the notes above + global _GLOBAL_CODECARBON_TRACKER if _GLOBAL_CODECARBON_TRACKER is None: return @@ -196,6 +203,9 @@ def codecarbon_tracker_start(): def codecarbon_tracker_stop(): + + return # turned off, see the notes above + global _GLOBAL_CODECARBON_TRACKER if _GLOBAL_CODECARBON_TRACKER is None: return @@ -205,6 +215,9 @@ def codecarbon_tracker_stop(): def codecarbon_tracker_flush(): + + return # turned off, see the notes above + global _GLOBAL_CODECARBON_TRACKER if _GLOBAL_CODECARBON_TRACKER is None: return diff --git a/requirements.txt b/requirements.txt index ef6c6af42..e65c51c4a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,8 +9,6 @@ tensorboard torch transformers DeepSpeed @ git+https://github.com/microsoft/DeepSpeed.git -# at some point when it starts working freeze with ether min version or sha using the syntax codecarbon.git@deadbeaf -codecarbon @ git+https://github.com/mlco2/codecarbon.git # versions from HF transformers black==21.4b0 isort>=5.5.4 diff --git a/tests/test_training.py b/tests/test_training.py index 8e02052df..0952c0afb 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -111,7 +111,6 @@ def get_variation_config(self, variation, output_dir): --save {output_dir}/checkpoints --load {output_dir}/checkpoints --data-path {data_dir}/meg-gpt2-openwebtext_text_document - --codecarbon-dir {output_dir}/codecarbon --tensorboard-dir {output_dir}/tensorboard --tensorboard-queue-size 5 --log-timers-to-tensorboard @@ -314,7 +313,6 @@ def test_training_prefix_lm_all(self): --save {output_dir}/checkpoints --load {output_dir}/checkpoints --data-path {data_dir}/meg-gpt2-openwebtext_text_document - --codecarbon-dir {output_dir}/codecarbon --tensorboard-dir {output_dir}/tensorboard --tensorboard-queue-size 5 --log-timers-to-tensorboard