From 41434e6ad0c3f73539ba8b4f9788e68410099467 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 11 Dec 2024 13:01:07 -0500 Subject: [PATCH] fix no_sync context for deepspeed across all zero types --- src/transformers/trainer.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index b6b0c7d19aae19..876551c22b51a3 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -2513,13 +2513,9 @@ def _inner_training_loop( self.control = self.callback_handler.on_step_begin(args, self.state, self.control) # We explicitly want to avoid relying on `accelerator.accumulate` for generation training - disable_deepspeed_no_sync = ( - self.accelerator.distributed_type == DistributedType.DEEPSPEED - and self.accelerator.deepspeed_engine_wrapped.engine.zero_optimization_partition_gradients() - ) context = ( functools.partial(self.accelerator.no_sync, model=model) - if i != len(batch_samples) - 1 and not disable_deepspeed_no_sync + if i != len(batch_samples) - 1 and self.accelerator.distributed_type != DistributedType.DEEPSPEED else contextlib.nullcontext ) with context():