Skip to content

Commit

Permalink
use Tensor.grad_ instead of Tensor.grad to optimize clip_grad_norm (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
HydrogenSulfate authored Nov 25, 2024
1 parent 1c2e9e4 commit 3e56bff
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions python/paddle/nn/utils/clip_grad_norm_.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def clip_grad_norm_(
if norm_type not in support_norm_type:
raise ValueError(f'norm_type only support {support_norm_type}')

grads = [p.grad for p in parameters if p.grad is not None]
grads = [p.grad_ for p in parameters if p.grad_ is not None]
max_norm = float(max_norm)
norm_type = float(norm_type)
if len(grads) == 0:
Expand Down Expand Up @@ -113,6 +113,7 @@ def clip_grad_norm_(
clip_coef_clamped = clip_coef.clip_(max=1.0)

for _, p in enumerate(parameters):
if p.grad is not None:
p.grad = paddle.multiply(x=p.grad, y=clip_coef_clamped)
if p.grad_ is not None:
p.grad_.multiply_(y=clip_coef_clamped)

return total_norm

0 comments on commit 3e56bff

Please sign in to comment.