PaddlePaddle · wanghaoshuang · Oct 30, 2017 · Oct 11, 2017 · Oct 11, 2017 · Oct 11, 2017
diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
@@ -103,5 +103,34 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin,
   lod_ = new_lod;
 }
 
+Vector<size_t> expand_lod(Vector<size_t> level, Vector<size_t> starts,
+                          Vector<size_t> scales, bool repeat) {
+  Vector<size_t> result;
+  result.push_back(level[0]);
+  size_t p = 0, start = 0, end = 0;
+  if (!repeat) {
+    for (size_t i = 0; i < scales.size(); ++i) {
+      result.push_back(result.back() + scales[i] * (level[i + 1] - level[i]));
+    }
+  } else {
+    for (size_t i = 0; i < scales.size(); ++i) {
+      while (starts[i] != level[p] && p < level.size()) {
+        ++p;
+      }
+      start = p;
+      while (starts[i + 1] != level[p] && p < level.size()) {
+        ++p;
+      }
+      end = p + 1;
+      for (size_t j = 0; j < scales[i]; ++j) {
+        for (size_t index = start; index < end - 1; ++index) {
+          result.push_back(result.back() + level[index + 1] - level[index]);
+        }
+      }
+    }
+  }
+  return result;
+}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h
@@ -122,5 +122,9 @@ class LoDTensor : public Tensor {
  private:
   LoD lod_;
 };
+
+Vector<size_t> expand_lod(Vector<size_t> level, Vector<size_t> starts,
+                          Vector<size_t> scales, bool repeat);
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/operators/seq_expand_op.cc b/paddle/operators/seq_expand_op.cc
@@ -0,0 +1,148 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/seq_expand_op.h"
+
+namespace paddle {
+namespace operators {
+
+using framework::Tensor;
+
+class SeqExpandOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"),
+                   "Input(X) of SeqExpandOp should not be null.");
+    int repeat = ctx->Attrs().Get<int>("repeat");
+    framework::DDim out_dim;
+    if (repeat == 0) {
+      PADDLE_ENFORCE(
+          ctx->HasInput("Y"),
+          "Input(Y) of SeqExpandOp should not be null while repeat == 0.");
+      out_dim = ctx->GetInputDim("Y");
+      ctx->ShareLoD("Y", "Out");
+    } else {
+      out_dim = ctx->GetInputDim("X");
+      out_dim[0] = out_dim[0] * repeat;
+    }
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "Output(Out) of SeqExpandOp should not be null.");
+    ctx->SetOutputDim("Out", out_dim);
+  }
+};
+
+class SeqExpandOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  SeqExpandOpMaker(framework::OpProto* proto,
+                   framework::OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput(
+        "X",
+        "The input('X') of seq_expand op. It can be LoDTensor or base Tensor.");
+    AddInput(
+        "Y",
+        "The reference input('Y') of seq_expand op."
+        "It must be a LoDTensor with k-level(k>0)."
+        "This reference input is essential if 'repeat' attribute is not "
+        "configured."
+        "Input(X) will be expanded by LoD of input(Y) while repeat ==  0.");
+    AddOutput("Out",
+              "The output of seq_expand op."
+              "The output is a (k+1)-level LoDTensor"
+              "while input(X) being k-level LoDTensor."
+              "(Given base tensor is 0-level LoDTensor.)");
+    AddAttr<int>("repeat",
+                 "(type:int; default value: 0)"
+                 "Repeatting times of each element while expanding input(X)."
+                 "It works while input(Y) is not configured.")
+        .SetDefault(0);
+    AddComment(R"DOC(
+Expand k-level LoDTensor to (k+1)-level LoDTensor
+by lod of input(Y) or 'repeat' attribute.
+
+Case 1:
+
+Given a 2-level LoDTensor X:
+    X.data = [1, 2 , 3, 4]
+    X.lod = [[0, 3, 4], [0, 1, 3, 4]]
+and
+    repeat = 2
+then we get 3-level LoDTensor
+    Out.data = [1, 2, 3, 1, 2, 3, 4, 4]
+    Out.lod = [[0, 6, 8],
+               [0, 3, 6, 7, 8],
+               [0, 1, 3, 4, 6, 7, 8]]
+
+Case 2:
+
+Given 2-level a LoDTensor X
+    X.data = [1, 2, 3, 4]
+    X.lod = [[0, 3, 4], [0, 1, 3, 4]]
+and
+    Y.lod = [[0, 6, 8],
+             [0, 3, 6, 7, 8],
+             [0,1,3,4,6,7,8]]
+then we get 3-level LoDTensor
+    Out.data = [1, 2, 3, 1, 2, 3, 4, 4]
+    Out.lod = [[0, 6, 8],
+               [0, 3, 6, 7, 8],
+               [0, 1, 3, 4, 6, 7, 8]]
+
+Case 3:
+
+Given a 0-level LoDTensor X
+    X.data = [1, 2, 3, 4]
+    X.lod = NULL
+and
+    repeat = 2
+then we get 1-level LoDTensor
+    Out.data = [1, 1, 2, 2, 3, 3, 4, 4]
+    Out.lod = [[0, 2, 4, 6, 8]]
+
+)DOC");
+  }
+};
+
+class SeqExpandOpGrad : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
+    PADDLE_ENFORCE(ctx->HasInput("Out"), "Input(Out) should not be null");
+    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
+                   "Input(Out@GRAD) should not be null");
+    auto x_dims = ctx->GetInputDim("X");
+    auto x_grad_name = framework::GradVarName("X");
+    if (ctx->HasOutput(x_grad_name)) {
+      ctx->SetOutputDim(x_grad_name, x_dims);
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(seq_expand, ops::SeqExpandOp, ops::SeqExpandOpMaker,
+            seq_expand_grad, ops::SeqExpandOpGrad);
+REGISTER_OP_CPU_KERNEL(seq_expand,
+                       ops::SeqExpandKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(
+    seq_expand_grad,
+    ops::SeqExpandGradKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/seq_expand_op.cu b/paddle/operators/seq_expand_op.cu
@@ -0,0 +1,23 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#define EIGEN_USE_GPU
+#include "paddle/operators/seq_expand_op.h"
+
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(seq_expand,
+                       ops::SeqExpandKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(
+    seq_expand_grad,
+    ops::SeqExpandGradKernel<paddle::platform::GPUPlace, float>);
diff --git a/paddle/operators/seq_expand_op.h b/paddle/operators/seq_expand_op.h
@@ -0,0 +1,146 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/op_registry.h"
+#include "paddle/memory/memcpy.h"
+#include "unsupported/Eigen/CXX11/Tensor"
+
+namespace paddle {
+namespace operators {
+
+using LoDTensor = framework::LoDTensor;
+
+template <typename Place, typename T>
+class SeqExpandKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* x = context.Input<LoDTensor>("X");
+    auto* out = context.Output<LoDTensor>("Out");
+    const T* x_data = x->data<T>();
+    auto x_dims = x->dims();
+    auto x_lod = x->lod();
+
+    if (x_lod.size() == 0) {
+      framework::Vector<size_t> level;
+      for (int i = 0; i < x->dims()[0] + 1; ++i) {
+        level.push_back(i);
+      }
+      x_lod.push_back(level);
+    } else {
+      x_lod.insert(x_lod.begin(), x_lod[0]);
+    }
+
+    size_t repeat = static_cast<size_t>(context.Attr<int>("repeat"));
+    framework::Vector<size_t> scales;
+    if (repeat != 0) {
+      for (int i = 0; i < x_lod[0].size() - 1; ++i) {
+        scales.push_back(repeat);
+      }
+      std::vector<int64_t> dims = framework::vectorize(x->dims());
+      dims[0] = dims[0] * repeat;
+      auto out_dims = framework::make_ddim(dims);
+      out->Resize(out_dims);
+    } else {
+      auto* y = context.Input<LoDTensor>("Y");
+      auto y_lod = y->lod();
+      for (int i = 0; i < y_lod[0].size() - 1; ++i) {
+        scales.push_back((y_lod[0][i + 1] - y_lod[0][i]) /
+                         (x_lod[0][i + 1] - x_lod[0][i]));
+      }
+      out->Resize(y->dims());
+    }
+
+    framework::LoD out_lod;
+    auto level0 = framework::expand_lod(x_lod[0], x_lod[0], scales, false);
+    out_lod.push_back(level0);
+    for (int i = 1; i < x_lod.size(); ++i) {
+      out_lod.push_back(
+          framework::expand_lod(x_lod[i], x_lod[0], scales, true));
+    }
+
+    size_t element_len = framework::product(x_dims) / x_dims[0];
+    T* out_data = out->mutable_data<T>(context.GetPlace());
+
+    // copy data
+    auto place = context.GetPlace();
+    size_t count = 0;
+    if (platform::is_cpu_place(place)) {
+      auto& cpu_place = boost::get<platform::CPUPlace>(place);
+      for (size_t i = 0; i < scales.size(); ++i) {
+        count = element_len * (x_lod[0][i + 1] - x_lod[0][i]);
+        for (size_t j = 0; j < scales[i]; ++j) {
+          memory::Copy(cpu_place, out_data, cpu_place, x_data,
+                       sizeof(T) * count);
+          out_data += count;
+        }
+        x_data += count;
+      }
+    } else {
+#ifdef PADDLE_WITH_CUDA
+      auto& gpu_place = boost::get<platform::GPUPlace>(place);
+      auto stream = reinterpret_cast<const platform::CUDADeviceContext&>(
+                        context.device_context())
+                        .stream();
+      for (size_t i = 0; i < scales.size(); ++i) {
+        count = element_len * (x_lod[0][i + 1] - x_lod[0][i]);
+        for (size_t j = 0; j < scales[i]; ++j) {
+          memory::Copy(gpu_place, out_data, gpu_place, x_data,
+                       sizeof(T) * count, stream);
+          out_data += count;
+        }
+        x_data += count;
+      }
+#else
+      PADDLE_THROW("Paddle is not compiled with GPU");
+#endif
+    }
+
+    out->set_lod(out_lod);
+  }
+};
+
+template <typename Place, typename T>
+class SeqExpandGradKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* d_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
+    auto* x = context.Input<LoDTensor>("X");
+    auto* out = context.Input<LoDTensor>("Out");
+    auto* d_x = context.Output<LoDTensor>(framework::GradVarName("X"));
+    auto out_lod = out->lod();
+    d_x->set_lod(x->lod());
+    const T* d_out_data = d_out->data<T>();
+    auto d_out_dims = d_out->dims();
+    T* d_x_data = d_x->mutable_data<T>(context.GetPlace());
+    size_t element_len = framework::product(d_out_dims) / d_out_dims[0];
+    for (size_t i = 0; i < out->NumElements(); ++i) {
+      size_t ele_count = out_lod[0][i + 1] - out_lod[0][i];
+      size_t repeat = out->NumElements(0, i);
+      Eigen::TensorMap<Eigen::Tensor<const T, 2>> d_out_t(
+          d_out_data, static_cast<int>(repeat),
+          static_cast<int>((ele_count * element_len) / repeat));
+      Eigen::TensorMap<Eigen::Tensor<T, 1>> d_x_t(
+          d_x_data, static_cast<int>((ele_count * element_len) / repeat));
+      auto place = context.GetEigenDevice<Place>();
+      d_x_t.device(place) = d_out_t.sum(Eigen::array<int, 1>({{0}}));
+      d_out_data += (ele_count * element_len);
+      d_x_data += ((ele_count * element_len) / repeat);
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/sequence_concat_op.cc b/paddle/operators/sequence_concat_op.cc
@@ -68,20 +68,20 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
                  "The level should be less than the level number of inputs.")
         .SetDefault(0);
     AddComment(R"DOC(
-    The sequence_concat operator concatenates multiple LoDTensors. 
-    It only supports sequence (LoD Tensor with level number is 1) 
+    The sequence_concat operator concatenates multiple LoDTensors.
+    It only supports sequence (LoD Tensor with level number is 1)
     or a nested sequence (LoD tensor with level number is 2) as its input.
     - Case1:
       If the axis is other than 0(here, axis is 1 and level is 1),
-      each input should have the same LoD information and the LoD 
+      each input should have the same LoD information and the LoD
       information of the output keeps the same as the input.
 
       LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
       LoD(x1) = {{0,2,4}, {0,1,2,3,4}}; Dims(x1) = (4,4,4)
       LoD(Out) = {{0,2,4}, {0,1,2,3,4}}; Dims(Out) = (4,7,4)
 
     - Case2:
-      If the axis is 0(here, leve is 0), the inputs are concatenated along 
+      If the axis is 0(here, leve is 0), the inputs are concatenated along
       time steps, the LoD information of the output need to re-compute.
 
       LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
@@ -94,7 +94,7 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
       LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
       LoD(x1) = {{0,3,5}, {0,1,3,4,5}}; Dims(x1) = (5,3,4)
       LoD(Out) = {{0,5,9}, {0,2,5,7,9}}; Dims(Out) = (9,3,4)
-      
+
     NOTE: The levels of all the inputs should be the same.
     )DOC");
   }