From 4e27f6d2e48d914e8dca45a6044094eed30e6795 Mon Sep 17 00:00:00 2001 From: Shaden Smith Date: Mon, 9 Aug 2021 16:42:45 -0700 Subject: [PATCH] query deepspeed global grad norm (#8) --- megatron/training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megatron/training.py b/megatron/training.py index 22ab5f242..21ef13b94 100644 --- a/megatron/training.py +++ b/megatron/training.py @@ -380,7 +380,7 @@ def train_step(forward_step_func, data_iterator, assert isinstance(model[0], deepspeed.PipelineEngine), model loss = model[0].train_batch(data_iter=data_iterator) skipped_iter = 0 - grad_norm = 0. + grad_norm = model[0].get_global_grad_norm() num_zeros_in_grad = 0 return {'lm loss' : loss}, skipped_iter, grad_norm, num_zeros_in_grad