Update the reduce interface call

PaddlePaddle · Dec 20, 2021 · f0f1cf3 · f0f1cf3
1 parent 080bf95
commit f0f1cf3
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 16 deletions.
diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.cu b/paddle/fluid/operators/elementwise/elementwise_div_op.cu
@@ -117,15 +117,16 @@ default_elementwise_div_grad(const framework::ExecutionContext& ctx,
       ElementwiseComputeEx<DivGradFunctor<T>, DeviceContext, T>(
           ctx, dout, y, axis, DivGradFunctor<T>(), dx);
     } else {
-      framework::Tensor res_dx;
-      res_dx.Resize(dout->dims());
+      framework::Tensor tmp_dx;
+      tmp_dx.Resize(dout->dims());
 
       ElementwiseComputeEx<DivGradFunctor<T>, DeviceContext, T>(
-          ctx, dout, y, axis, DivGradFunctor<T>(), &res_dx);
+          ctx, dout, y, axis, DivGradFunctor<T>(), &tmp_dx);
 
       std::vector<int> reduce_dims = GetReduceDim(x->dims(), out->dims(), axis);
       gpuStream_t stream = ctx.cuda_device_context().stream();
-      TensorReduceFunctorImpl<T, T, CustomSum>(res_dx, dx, reduce_dims, stream);
+      TensorReduceFunctorImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
+          tmp_dx, dx, kps::IdentityFunctor<T>(), reduce_dims, stream);
     }
   }
   // dy
@@ -144,24 +145,21 @@ default_elementwise_div_grad(const framework::ExecutionContext& ctx,
             nullptr, dy->mutable_data<T>(ctx.GetPlace()));
       }
     } else {
-      framework::Tensor res_dy;
-      res_dy.mutable_data<T>(dout->dims(), ctx.GetPlace());
+      framework::Tensor tmp_dy;
+      tmp_dy.mutable_data<T>(dout->dims(), ctx.GetPlace());
 
-      std::vector<const framework::Tensor*> ins;
-      std::vector<framework::Tensor*> outs;
-      ins.emplace_back(dout);
-      ins.emplace_back(out);
-      ins.emplace_back(y);
-      outs.emplace_back(&res_dy);
+      std::vector<const framework::Tensor*> ins = {dout, out, y};
+      std::vector<framework::Tensor*> outs = {&tmp_dy};
 
       const auto& dev_ctx =
           ctx.template device_context<platform::CUDADeviceContext>();
-      LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>(
+      LaunchElementwiseCudaKernel<ElementwiseType::kTernary, T, T>(
           dev_ctx, ins, &outs, axis, DivGradYFunctor<T>());
 
       std::vector<int> reduce_dims = GetReduceDim(y->dims(), out->dims(), axis);
       gpuStream_t stream = ctx.cuda_device_context().stream();
-      TensorReduceFunctorImpl<T, T, CustomSub>(res_dy, dy, reduce_dims, stream);
+      TensorReduceFunctorImpl<T, T, kps::AddFunctor, kps::InverseFunctor<T>>(
+          tmp_dy, dy, kps::InverseFunctor<T>(), reduce_dims, stream);
     }
   }
 }

diff --git a/paddle/fluid/operators/elementwise/elementwise_functor.h b/paddle/fluid/operators/elementwise/elementwise_functor.h
@@ -165,9 +165,9 @@ struct DivGradYFunctor<paddle::platform::complex<T>> {
       const paddle::platform::complex<T>& c) const {
     paddle::platform::complex<T> c_conj(c.real, -c.imag);
     return a * b / c_conj;
-    }
+  }
 };
-   
+
 // Fmax
 template <typename T>
 struct FMaxFunctor {