PaddlePaddle · JiayiFeng · Jan 18, 2018 · Jan 15, 2018 · Jan 15, 2018 · Jan 15, 2018
diff --git a/paddle/gserver/tests/sequence_recurrent_group.py b/paddle/gserver/tests/sequence_recurrent_group.py
@@ -1,16 +1,16 @@
 #  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from paddle.trainer_config_helpers import *
 
 ######################## data source ################################

diff --git a/paddle/operators/elementwise_add_op.h b/paddle/operators/elementwise_add_op.h
@@ -28,39 +28,7 @@ template <typename DeviceContext, typename T>
 class ElementwiseAddKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    using Tensor = framework::Tensor;
-
-    auto* x = ctx.Input<Tensor>("X");
-    auto* y = ctx.Input<Tensor>("Y");
-    auto* z = ctx.Output<Tensor>("Out");
-    z->mutable_data<T>(ctx.GetPlace());
-    TransformFunctor<AddFunctor<T>, T, DeviceContext> functor(
-        x, y, z, ctx.template device_context<DeviceContext>(), AddFunctor<T>());
-
-    auto x_dims = x->dims();
-    auto y_dims = y->dims();
-    PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
-                      "Rank of first input must >= rank of second input.");
-
-    if (x_dims == y_dims) {
-      functor.Run();
-      return;
-    }
-
-    int axis = ctx.Attr<int>("axis");
-    axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
-    PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
-                   "Axis should be in range [0, x_dims)");
-
-    int pre, n, post;
-    get_mid_dims(x_dims, y_dims, axis, pre, n, post);
-    if (post == 1) {
-      functor.RunRowWise(n, pre);
-      return;
-    } else {
-      functor.RunMidWise(n, pre, post);
-      return;
-    }
+    ElementwiseComputeEx<AddFunctor<T>, DeviceContext, T>(ctx);
   }
 };
 

diff --git a/paddle/operators/elementwise_max_op.cc b/paddle/operators/elementwise_max_op.cc
@@ -0,0 +1,45 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/elementwise_max_op.h"
+#include "paddle/operators/elementwise_op.h"
+
+namespace paddle {
+namespace operators {
+class ElementwiseMaxOpMaker : public ElementwiseOpMaker {
+ public:
+  ElementwiseMaxOpMaker(OpProto* proto, OpAttrChecker* op_checker)
+      : ElementwiseOpMaker(proto, op_checker) {
+    SetComment("Max", "Out = max(X, Y)");
+    AddComment(comment_);
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(elementwise_max, ops::ElementwiseOp, ops::ElementwiseMaxOpMaker,
+            elementwise_max_grad, ops::ElementwiseOpGrad);
+REGISTER_OP_CPU_KERNEL(
+    elementwise_max,
+    ops::ElementwiseMaxKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::ElementwiseMaxKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::ElementwiseMaxKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::ElementwiseMaxKernel<paddle::platform::CPUDeviceContext, int64_t>);
+REGISTER_OP_CPU_KERNEL(
+    elementwise_max_grad,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
diff --git a/paddle/operators/elementwise_max_op.cu b/paddle/operators/elementwise_max_op.cu
@@ -0,0 +1,32 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#define EIGEN_USE_GPU
+#include "paddle/operators/elementwise_max_op.h"
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_CUDA_KERNEL(
+    elementwise_max,
+    ops::ElementwiseMaxKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::ElementwiseMaxKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::ElementwiseMaxKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::ElementwiseMaxKernel<paddle::platform::CUDADeviceContext, int64_t>);
+REGISTER_OP_CUDA_KERNEL(
+    elementwise_max_grad,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CUDADeviceContext,
+                                  int64_t>);
diff --git a/paddle/operators/elementwise_max_op.h b/paddle/operators/elementwise_max_op.h
@@ -0,0 +1,120 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/operators/elementwise_op_function.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct MaxFunctor {
+  inline HOSTDEVICE T operator()(T a, T b) const { return a > b ? a : b; }
+};
+
+template <typename DeviceContext, typename T>
+class ElementwiseMaxKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    ElementwiseComputeEx<MaxFunctor<T>, DeviceContext, T>(ctx);
+  }
+};
+
+template <typename T>
+struct ElementwiseMaxGradFunctor {
+  template <typename Device, typename X, typename Y, typename Z, typename dX,
+            typename dY, typename dZ>
+  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) {
+    auto x_e = framework::EigenVector<T>::Flatten(*x);
+    auto y_e = framework::EigenVector<T>::Flatten(*y);
+    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
+
+    if (dx) {
+      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
+      dx_e.device(d) = (x_e > y_e).template cast<T>() * dz_e;
+    }
+    if (dy) {
+      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
+      dy_e.device(d) = (x_e <= y_e).template cast<T>() * dz_e;
+    }
+  }
+};
+
+template <typename T>
+struct ElementwiseMaxBroadCastGradFunctor {
+  template <typename Device, typename X, typename Y, typename Z, typename dX,
+            typename dY, typename dZ, typename Pre, typename N>
+  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n) {
+    auto x_e = framework::EigenVector<T>::Flatten(*x);
+    auto y_e = framework::EigenVector<T>::Flatten(*y);
+    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
+
+    auto y_e_bcast = y_e.reshape(Eigen::DSizes<int, 2>(1, n))
+                         .broadcast(Eigen::DSizes<int, 2>(pre, 1))
+                         .reshape(Eigen::DSizes<int, 1>(x_e.size()));
+
+    if (dx) {
+      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
+      dx_e.device(d) = (x_e > y_e_bcast).template cast<T>() * dz_e;
+    }
+
+    if (dy) {
+      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
+      dy_e.device(d) = ((x_e <= y_e_bcast).template cast<T>() * dz_e)
+                           .reshape(Eigen::DSizes<int, 2>(pre, n))
+                           .sum(Eigen::array<int, 1>{{0}});
+    }
+  }
+};
+
+template <typename T>
+struct ElementwiseMaxBroadCast2GradFunctor {
+  template <typename Device, typename X, typename Y, typename Z, typename dX,
+            typename dY, typename dZ, typename Pre, typename N, typename Post>
+  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n,
+                  Post post) {
+    auto x_e = framework::EigenVector<T>::Flatten(*x);
+    auto y_e = framework::EigenVector<T>::Flatten(*y);
+    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
+
+    auto y_e_bcast = y_e.reshape(Eigen::DSizes<int, 3>(1, n, 1))
+                         .broadcast(Eigen::DSizes<int, 3>(pre, 1, post))
+                         .reshape(Eigen::DSizes<int, 1>(x_e.size()));
+    if (dx) {
+      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
+      dx_e.device(d) = (x_e > y_e_bcast).template cast<T>() * dz_e;
+    }
+
+    if (dy) {
+      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
+      dy_e.device(d) = ((x_e <= y_e_bcast).template cast<T>() * dz_e)
+                           .reshape(Eigen::DSizes<int, 3>(pre, n, post))
+                           .sum(Eigen::array<int, 2>{{0, 2}});
+    }
+  }
+};
+
+template <typename DeviceContext, typename T>
+class ElementwiseMaxGradKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    ElementwiseGradCompute<DeviceContext, T, ElementwiseMaxGradFunctor<T>,
+                           ElementwiseMaxBroadCastGradFunctor<T>,
+                           ElementwiseMaxBroadCast2GradFunctor<T>>(ctx);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/elementwise_min_op.cc b/paddle/operators/elementwise_min_op.cc
@@ -0,0 +1,45 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/elementwise_min_op.h"
+#include "paddle/operators/elementwise_op.h"
+
+namespace paddle {
+namespace operators {
+class ElementwiseMinOpMaker : public ElementwiseOpMaker {
+ public:
+  ElementwiseMinOpMaker(OpProto* proto, OpAttrChecker* op_checker)
+      : ElementwiseOpMaker(proto, op_checker) {
+    SetComment("Max", "Out = min(X, Y)");
+    AddComment(comment_);
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(elementwise_min, ops::ElementwiseOp, ops::ElementwiseMinOpMaker,
+            elementwise_min_grad, ops::ElementwiseOpGrad);
+REGISTER_OP_CPU_KERNEL(
+    elementwise_min,
+    ops::ElementwiseMinKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::ElementwiseMinKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::ElementwiseMinKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::ElementwiseMinKernel<paddle::platform::CPUDeviceContext, int64_t>);
+REGISTER_OP_CPU_KERNEL(
+    elementwise_min_grad,
+    ops::ElementwiseMinGradKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::ElementwiseMinGradKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::ElementwiseMinGradKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::ElementwiseMinGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
diff --git a/paddle/operators/elementwise_min_op.cu b/paddle/operators/elementwise_min_op.cu
@@ -0,0 +1,32 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#define EIGEN_USE_GPU
+#include "paddle/operators/elementwise_min_op.h"
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_CUDA_KERNEL(
+    elementwise_min,
+    ops::ElementwiseMinKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::ElementwiseMinKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::ElementwiseMinKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::ElementwiseMinKernel<paddle::platform::CUDADeviceContext, int64_t>);
+REGISTER_OP_CUDA_KERNEL(
+    elementwise_min_grad,
+    ops::ElementwiseMinGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::ElementwiseMinGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::ElementwiseMinGradKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::ElementwiseMinGradKernel<paddle::platform::CUDADeviceContext,
+                                  int64_t>);