fix no_sync context for deepspeed across all zero types

winglian · Dec 11, 2024 · 41434e6 · 41434e6
1 parent 37773bd
commit 41434e6
Showing 1 changed file with 1 addition and 5 deletions.
diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
@@ -2513,13 +2513,9 @@ def _inner_training_loop(
                         self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
 
                     # We explicitly want to avoid relying on `accelerator.accumulate` for generation training
-                    disable_deepspeed_no_sync = (
-                        self.accelerator.distributed_type == DistributedType.DEEPSPEED
-                        and self.accelerator.deepspeed_engine_wrapped.engine.zero_optimization_partition_gradients()
-                    )
                     context = (
                         functools.partial(self.accelerator.no_sync, model=model)
-                        if i != len(batch_samples) - 1 and not disable_deepspeed_no_sync
+                        if i != len(batch_samples) - 1 and self.accelerator.distributed_type != DistributedType.DEEPSPEED
                         else contextlib.nullcontext
                     )
                     with context():