Skip to content

Commit

Permalink
Update the reduce interface call
Browse files Browse the repository at this point in the history
  • Loading branch information
Zjq9409 committed Dec 20, 2021
1 parent 080bf95 commit f0f1cf3
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 16 deletions.
26 changes: 12 additions & 14 deletions paddle/fluid/operators/elementwise/elementwise_div_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,16 @@ default_elementwise_div_grad(const framework::ExecutionContext& ctx,
ElementwiseComputeEx<DivGradFunctor<T>, DeviceContext, T>(
ctx, dout, y, axis, DivGradFunctor<T>(), dx);
} else {
framework::Tensor res_dx;
res_dx.Resize(dout->dims());
framework::Tensor tmp_dx;
tmp_dx.Resize(dout->dims());

ElementwiseComputeEx<DivGradFunctor<T>, DeviceContext, T>(
ctx, dout, y, axis, DivGradFunctor<T>(), &res_dx);
ctx, dout, y, axis, DivGradFunctor<T>(), &tmp_dx);

std::vector<int> reduce_dims = GetReduceDim(x->dims(), out->dims(), axis);
gpuStream_t stream = ctx.cuda_device_context().stream();
TensorReduceFunctorImpl<T, T, CustomSum>(res_dx, dx, reduce_dims, stream);
TensorReduceFunctorImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
tmp_dx, dx, kps::IdentityFunctor<T>(), reduce_dims, stream);
}
}
// dy
Expand All @@ -144,24 +145,21 @@ default_elementwise_div_grad(const framework::ExecutionContext& ctx,
nullptr, dy->mutable_data<T>(ctx.GetPlace()));
}
} else {
framework::Tensor res_dy;
res_dy.mutable_data<T>(dout->dims(), ctx.GetPlace());
framework::Tensor tmp_dy;
tmp_dy.mutable_data<T>(dout->dims(), ctx.GetPlace());

std::vector<const framework::Tensor*> ins;
std::vector<framework::Tensor*> outs;
ins.emplace_back(dout);
ins.emplace_back(out);
ins.emplace_back(y);
outs.emplace_back(&res_dy);
std::vector<const framework::Tensor*> ins = {dout, out, y};
std::vector<framework::Tensor*> outs = {&tmp_dy};

const auto& dev_ctx =
ctx.template device_context<platform::CUDADeviceContext>();
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>(
LaunchElementwiseCudaKernel<ElementwiseType::kTernary, T, T>(
dev_ctx, ins, &outs, axis, DivGradYFunctor<T>());

std::vector<int> reduce_dims = GetReduceDim(y->dims(), out->dims(), axis);
gpuStream_t stream = ctx.cuda_device_context().stream();
TensorReduceFunctorImpl<T, T, CustomSub>(res_dy, dy, reduce_dims, stream);
TensorReduceFunctorImpl<T, T, kps::AddFunctor, kps::InverseFunctor<T>>(
tmp_dy, dy, kps::InverseFunctor<T>(), reduce_dims, stream);
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/elementwise/elementwise_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,9 @@ struct DivGradYFunctor<paddle::platform::complex<T>> {
const paddle::platform::complex<T>& c) const {
paddle::platform::complex<T> c_conj(c.real, -c.imag);
return a * b / c_conj;
}
}
};

// Fmax
template <typename T>
struct FMaxFunctor {
Expand Down

0 comments on commit f0f1cf3

Please sign in to comment.