diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py index 70003b48cc897..1bc700d5f53ec 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py @@ -73,6 +73,7 @@ "add_triple_grad", "silu_double_grad", "tanh_triple_grad", + "minimum_double_grad", ] # white ops list whose kernel can automaically do type promotion. diff --git a/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h b/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h index abafca001a354..4e9f09a0c52f3 100644 --- a/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h +++ b/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h @@ -89,6 +89,32 @@ void cos_double_grad(const Tensor& x, } } +template +void minimum_double_grad(const Tensor& x, + const Tensor& y, + const paddle::optional& grad_x_grad, + const paddle::optional& grad_y_grad, + Tensor* grad_out_grad) { + if (grad_out_grad) { + if (grad_x_grad && grad_y_grad) { + auto x_mask = cast(less_than(x, y), grad_x_grad.get().dtype()); + auto ddout = + grad_x_grad.get() * x_mask + grad_y_grad.get() * (1 - x_mask); + set_output(ddout, grad_out_grad); + } else if (grad_x_grad) { + auto x_mask = cast(less_than(x, y), grad_x_grad.get().dtype()); + auto ddout = grad_x_grad.get() * x_mask; + set_output(ddout, grad_out_grad); + } else if (grad_y_grad) { + auto y_mask = cast(greater_equal(x, y), grad_y_grad.get().dtype()); + auto ddout = grad_y_grad.get() * y_mask; + set_output(ddout, grad_out_grad); + } else { + grad_out_grad = nullptr; + } + } +} + template void tanh_triple_grad(const Tensor& out, const Tensor& grad_out_forward, diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index e5529aa6c5efa..2ca26f1efbdd5 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -421,6 +421,7 @@ kernel : func : minimum_grad composite : minimum_grad(x, y, out_grad, axis, x_grad, y_grad) + backward : minimum_double_grad - backward_op : mish_grad forward : mish (Tensor x, float lambda) -> Tensor(out) @@ -876,6 +877,13 @@ func : fused_gemm_epilogue_grad optional : reserve_space +- backward_op: minimum_double_grad + forward: minimum_grad(Tensor x, Tensor y, Tensor grad_out) -> Tensor(grad_x), Tensor(grad_y) + args: (Tensor x, Tensor y, Tensor grad_x_grad, Tensor grad_y_grad) + output: Tensor(grad_out_grad) + composite: minimum_double_grad(x, y, grad_x_grad, grad_y_grad, grad_out_grad) + optional : grad_x_grad, grad_y_grad + - backward_op: unpool_grad forward: unpool (Tensor x, Tensor indices, int[] ksize, int[] strides, int[] padding, IntArray output_size, str data_format) -> Tensor(out) args: (Tensor x, Tensor indices, Tensor out, Tensor out_grad, int[] ksize, int[] strides, int[] padding, IntArray output_size, str data_format) diff --git a/test/prim/prim/vjp/test_comp_high_grad.py b/test/prim/prim/vjp/test_comp_high_grad.py index 96762679df519..204999c9ff05c 100644 --- a/test/prim/prim/vjp/test_comp_high_grad.py +++ b/test/prim/prim/vjp/test_comp_high_grad.py @@ -411,5 +411,79 @@ def test_high_grad(self): self.func_triple(p) +@param.parameterized_class( + ('shape1', 'shape2'), + [ + ( + [2, 3, 4], + [2, 3, 4], + ), + ( + [2, 3, 3, 4], + [3, 1, 4], + ), + ( + [2, 3, 3, 4], + [3, 1, 1], + ), + ( + [2, 3, 3, 4], + [2, 3, 1, 4], + ), + ( + [2, 3, 3, 4], + [2, 3, 1, 1], + ), + ], +) +class TestMinimumHighGradCheck(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.shape1 = cls.shape1 + cls.shape2 = cls.shape2 + + def minimum_wrapper(self, x): + return paddle.minimum(x[0], x[1]) + + @prog_scope() + def func_double(self, place): + shape1 = self.shape1 + shape2 = self.shape2 + eps = 0.0005 + dtype = np.float64 + x = paddle.static.data('x', shape1, dtype=dtype) + y = paddle.static.data('y', shape2, dtype=dtype) + x.persistable = True + y.persistable = True + out = paddle.minimum(x, y) + x_arr = np.random.uniform(-1, 1, shape1).astype(dtype) + y_arr = np.random.uniform(-2, 2, shape2).astype(dtype) + x_arr[np.abs(x_arr) < 0.005] = 0.002 + y_arr[np.abs(y_arr) < 0.005] = 0.002 + from paddle.base import core + + core._set_prim_backward_enabled(True) + core._set_prim_backward_blacklist("minimum_grad") + gradient_checker.double_grad_check( + [x, y], y=out, x_init=[x_arr, y_arr], place=place, eps=eps + ) + gradient_checker.double_grad_check_for_dygraph( + self.minimum_wrapper, + [x, y], + y=out, + x_init=[x_arr, y_arr], + place=place, + ) + core._set_prim_backward_enabled(False) + + def test_high_grad(self): + paddle.enable_static() + places = [base.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(base.CUDAPlace(0)) + for p in places: + self.func_double(p) + + if __name__ == '__main__': unittest.main()