diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
index 06879cbce72228..945b557021c785 100755
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -1850,12 +1850,6 @@ def _inner_training_loop(
 
                         if is_sagemaker_mp_enabled() and args.fp16:
                             self.optimizer.clip_master_grads(args.max_grad_norm)
-                        elif hasattr(self.optimizer, "clip_grad_norm"):
-                            # Some optimizers (like the sharded optimizer) have a specific way to do gradient clipping
-                            self.optimizer.clip_grad_norm(args.max_grad_norm)
-                        elif hasattr(model, "clip_grad_norm_"):
-                            # Some models (like FullyShardedDDP) have a specific way to do gradient clipping
-                            model.clip_grad_norm_(args.max_grad_norm)
                         elif self.use_apex:
                             # Revert to normal clipping otherwise, handling Apex or full precision
                             nn.utils.clip_grad_norm_(
diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py
index cc8a3de56b21bb..507515c696af2e 100644
--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -1384,10 +1384,7 @@ def __post_init__(self):
 
         if self.bf16:
             if self.half_precision_backend == "apex":
-                raise ValueError(
-                    " `--half_precision_backend apex`: GPU bf16 is not supported by apex. Use"
-                    " `--half_precision_backend cuda_amp` instead"
-                )
+                raise ValueError(" `--half_precision_backend apex`: GPU bf16 is not supported by apex.")
 
         if self.lr_scheduler_type == SchedulerType.REDUCE_ON_PLATEAU:
             if self.evaluation_strategy == IntervalStrategy.NO: