diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 06879cbce72228..945b557021c785 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -1850,12 +1850,6 @@ def _inner_training_loop( if is_sagemaker_mp_enabled() and args.fp16: self.optimizer.clip_master_grads(args.max_grad_norm) - elif hasattr(self.optimizer, "clip_grad_norm"): - # Some optimizers (like the sharded optimizer) have a specific way to do gradient clipping - self.optimizer.clip_grad_norm(args.max_grad_norm) - elif hasattr(model, "clip_grad_norm_"): - # Some models (like FullyShardedDDP) have a specific way to do gradient clipping - model.clip_grad_norm_(args.max_grad_norm) elif self.use_apex: # Revert to normal clipping otherwise, handling Apex or full precision nn.utils.clip_grad_norm_( diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index cc8a3de56b21bb..507515c696af2e 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -1384,10 +1384,7 @@ def __post_init__(self): if self.bf16: if self.half_precision_backend == "apex": - raise ValueError( - " `--half_precision_backend apex`: GPU bf16 is not supported by apex. Use" - " `--half_precision_backend cuda_amp` instead" - ) + raise ValueError(" `--half_precision_backend apex`: GPU bf16 is not supported by apex.") if self.lr_scheduler_type == SchedulerType.REDUCE_ON_PLATEAU: if self.evaluation_strategy == IntervalStrategy.NO: