Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【prim】add forward output for Silu grad signature #53632

Merged
merged 9 commits into from
May 12, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,12 @@ void leaky_relu_grad(const Tensor& out,
}

template <typename T>
void silu_grad(const Tensor& x, const Tensor& out_grad, Tensor* x_grad) {
void silu_grad(const Tensor& x,
const Tensor& out,
const Tensor& out_grad,
Tensor* x_grad) {
if (x_grad) {
auto sigmoid = 1.0 / (1.0 + exp<T>(-x));
auto sigmoid = out / x;
auto res = out_grad * sigmoid * (1.0 + x * (1.0 - sigmoid));
set_output<T>(res, x_grad);
}
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/api/yaml/backward.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1588,14 +1588,14 @@

- backward_op : silu_grad
forward : silu (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
args : (Tensor x, Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : silu_grad
composite : silu_grad(x, out_grad, x_grad)
composite : silu_grad(x, out, out_grad, x_grad)
inplace : (out_grad -> x_grad)

- backward_op : sin_double_grad
Expand Down
7 changes: 6 additions & 1 deletion paddle/phi/kernels/activation_grad_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ namespace phi {
DenseTensor* dx);

template <typename T, typename Context>
void SiluGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out,
const DenseTensor& dout,
DenseTensor* dx);
template <typename T, typename Context>
void ReluDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& out,
const DenseTensor& ddx,
Expand Down Expand Up @@ -277,7 +283,6 @@ DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Asinh);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Acosh);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Atanh);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Silu);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Square);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Softsign);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(LogSigmoid);
Expand Down
12 changes: 11 additions & 1 deletion paddle/phi/kernels/cpu/activation_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Asinh, AsinhGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Acosh, AcoshGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Atanh, AtanhGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink, TanhShrinkGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, SiluGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, SquareGradFunctor);

DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, ExpGradFunctor);
Expand Down Expand Up @@ -190,6 +189,17 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid,
slope,
offset);

template <typename T, typename Context>
void SiluGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out,
const DenseTensor& dout,
DenseTensor* dx) {
funcs::SiluGradFunctor<T> functor;
ActivationGradImpl<T, Context, funcs::SiluGradFunctor<T>>(
dev_ctx, &x, &out, &dout, dx, functor);
}

template <typename T, typename Context>
void EluGradKernel(const Context& dev_ctx,
const DenseTensor& x,
Expand Down
11 changes: 10 additions & 1 deletion paddle/phi/kernels/gpu/activation_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Asinh, CudaAsinhGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Acosh, CudaAcoshGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Atanh, CudaAtanhGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink, CudaTanhShrinkGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, CudaSiluGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, CudaSquareGradFunctor);

DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, CudaExpGradFunctor);
Expand Down Expand Up @@ -249,6 +248,16 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid,
slope,
offset);

template <typename T, typename Context>
void SiluGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out,
const DenseTensor& dout,
DenseTensor* dx) {
funcs::CudaSiluGradFunctor<T> functor;
ActivationGradGPUImpl<T, Context, funcs::CudaSiluGradFunctor<T>>(
dev_ctx, &x, &out, &dout, dx, functor);
}
template <typename T, typename Context>
void EluGradKernel(const Context& dev_ctx,
const DenseTensor& x,
Expand Down
11 changes: 10 additions & 1 deletion paddle/phi/kernels/xpu/activation_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,6 @@ DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh, XPUTanhGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu, XPUReluGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu6, XPURelu6GradFunctor);

DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, XPUSiluGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, XPULogGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, XPUSquareGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Swish, XPUSwishGradFunctor);
Expand Down Expand Up @@ -605,6 +604,16 @@ void HardSwishGradKernel(const Context& dev_ctx,
dev_ctx, &x, nullptr, &dout, dx, functor);
}

template <typename T, typename Context>
void SiluGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out,
const DenseTensor& dout,
DenseTensor* dx) {
XPUSiluGradFunctor<T> functor;
ActivationGradXPUImpl<T, Context, XPUSiluGradFunctor<T>>(
dev_ctx, &x, &out, &dout, dx, functor);
}
} // namespace phi

PD_REGISTER_KERNEL(relu_grad,
Expand Down