use Tensor.grad_ instead of Tensor.grad to optimize clip_grad_norm (#…

…69661)
PaddlePaddle · Nov 25, 2024 · 3e56bff · 3e56bff
1 parent 1c2e9e4
commit 3e56bff
Showing 1 changed file with 4 additions and 3 deletions.
diff --git a/python/paddle/nn/utils/clip_grad_norm_.py b/python/paddle/nn/utils/clip_grad_norm_.py
@@ -80,7 +80,7 @@ def clip_grad_norm_(
     if norm_type not in support_norm_type:
         raise ValueError(f'norm_type only support {support_norm_type}')
 
-    grads = [p.grad for p in parameters if p.grad is not None]
+    grads = [p.grad_ for p in parameters if p.grad_ is not None]
     max_norm = float(max_norm)
     norm_type = float(norm_type)
     if len(grads) == 0:
@@ -113,6 +113,7 @@ def clip_grad_norm_(
     clip_coef_clamped = clip_coef.clip_(max=1.0)
 
     for _, p in enumerate(parameters):
-        if p.grad is not None:
-            p.grad = paddle.multiply(x=p.grad, y=clip_coef_clamped)
+        if p.grad_ is not None:
+            p.grad_.multiply_(y=clip_coef_clamped)
+
     return total_norm