PaddlePaddle · pkuyym · May 15, 2017 · May 15, 2017 · May 15, 2017 · May 17, 2017
diff --git a/paddle/gserver/layers/ConvShiftLayer.cpp b/paddle/gserver/layers/ConvShiftLayer.cpp
@@ -71,11 +71,11 @@ void ConvShiftLayer::forward(PassType passType) {
 
   MatrixPtr inV0 = getInputValue(0);
   MatrixPtr inV1 = getInputValue(1);
+  const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;
 
   size_t batchSize = inV0->getHeight();
   size_t dataDim = inV0->getWidth();
 
-  CHECK_EQ(batchSize, inV1->getHeight());
   CHECK_EQ(dataDim, getSize());
 
   {
@@ -86,7 +86,7 @@ void ConvShiftLayer::forward(PassType passType) {
   MatrixPtr outV = getOutputValue();
 
   REGISTER_TIMER_INFO("FwConvShiftTimer", getName().c_str());
-  outV->circularConv(*inV0, *inV1);
+  outV->circularConv(*inV0, *inV1, seqStartPosPtr, useGpu_);
 }
 
 void ConvShiftLayer::backward(const UpdateCallback& callback) {
@@ -95,11 +95,13 @@ void ConvShiftLayer::backward(const UpdateCallback& callback) {
   MatrixPtr outG = getOutputGrad();
   MatrixPtr inG0 = getInputGrad(0);
   MatrixPtr inG1 = getInputGrad(1);
+  const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;
 
   REGISTER_TIMER_INFO("BwConvShiftTimer", getName().c_str());
 
   if (inG0 && inG1) {
-    outG->circularConvDerivative(*outG, *inV0, *inV1, *inG0, *inG1);
+    outG->circularConvDerivative(
+        *outG, *inV0, *inV1, *inG0, *inG1, seqStartPosPtr, useGpu_);
   } else {
     CHECK(!inG0 || !inG1) << "Not supported";
   }

diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -910,18 +910,43 @@ TEST(Layer, SequenceReshapeLayer) {
   }
 }
 
-TEST(Layer, ConvShiftLayer) {
+void testConvShiftLayer(string trans_type, size_t maxLen = 0) {
   TestConfig config;
   config.layerConfig.set_type("conv_shift");
   config.layerConfig.set_size(10);
+  size_t batch_size = 100;
 
-  config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
+  if (trans_type == "non-seq") {
+    config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
+  } else {
+    // Generate sequence data
+    vector<int> seqStartPositions(batch_size + 1, 0);
+    int seqLen = 0;
+    size_t pos = 0;
+    for (size_t i = 0; i < batch_size; ++i) {
+      seqLen = uniformRandom(maxLen) + 1;
+      seqStartPositions[i] = pos;
+      pos += seqLen;
+    }
+    seqStartPositions[batch_size] = pos;
+
+    MatrixPtr matValuePtr = Matrix::create(pos, 10, false, false);
+    matValuePtr->randomizeUniform();
+
+    config.inputDefs.push_back(
+        {INPUT_SELF_DEFINE_DATA, "layer_0", matValuePtr, seqStartPositions});
+  }
   config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0});
   config.layerConfig.add_inputs();
   config.layerConfig.add_inputs();
 
   // Not support GPU now
-  testLayerGrad(config, "conv_shift", 100, false, false);
+  testLayerGrad(config, "conv_shift", batch_size, false, false);
+}
+
+TEST(Layer, ConvShiftLayer) {
+  testConvShiftLayer("non-seq");
+  testConvShiftLayer("seq", 5);
 }
 
 TEST(Layer, PowerLayer) {

diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
@@ -3881,71 +3881,102 @@ real CpuMatrix::getMax() {
   return res;
 }
 
-void CpuMatrix::circularConv(Matrix& in0, Matrix& in1) {
-  size_t height = this->getHeight();
+void CpuMatrix::circularConv(Matrix& in0,
+                             Matrix& in1,
+                             const ICpuGpuVectorPtr& seqStartPosPtr,
+                             bool useGpu) {
+  size_t height0 = this->getHeight();
   size_t width0 = this->getWidth();
   size_t width1 = in1.getWidth();
+  size_t numSeqs = height0;
+  // if sequence type, height1 should be sequence number
+  if (nullptr != seqStartPosPtr) {
+    numSeqs = seqStartPosPtr->getSize() - 1;
+  }
 
-  CHECK_EQ(height, in0.getHeight());
+  CHECK_EQ(height0, in0.getHeight());
   CHECK_EQ(width0, in0.getWidth());
-  CHECK_EQ(height, in1.getHeight());
+  CHECK_EQ(numSeqs, in1.getHeight());
 
   CHECK_EQ(width1 % 2, 1U);
 
   real* outV = this->getData();
   real* inV0 = in0.getData();
   real* inV1 = in1.getData();
 
+  const int* startPosIntPtr = nullptr;
+  if (nullptr != seqStartPosPtr) {
+    startPosIntPtr = seqStartPosPtr->getData(useGpu);
+  }
+
   int leftCtxLen = (width1 - 1) / 2;
-  for (size_t x = 0; x < height;
-       ++x, outV += width0, inV0 += width0, inV1 += width1) {
-    for (size_t i = 0; i < width0; ++i) {  // each dimension of output
-      for (size_t j = 0; j < width1; ++j) {
-        // iterate over all dimentions of inV1
-        int index = i + j - leftCtxLen;
-        index = (index + width0) % width0;
-        outV[i] += inV0[index] * inV1[j];
+  // row first order, treate multiple rows as a long row
+  for (size_t x = 0; x < numSeqs; ++x) {
+    size_t curSeqWidth = width0;
+    if (nullptr != startPosIntPtr)
+      curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
+    // conv a complete sequence
+    for (size_t i = 0; i < curSeqWidth; ++i) {
+      for (size_t j = 0; j < width1;
+           ++j) {  // iterate over convolution template
+        int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
+        *(outV + i) += *(inV0 + index) * inV1[j];
       }
     }
+    outV += curSeqWidth;
+    inV0 += curSeqWidth;
+    inV1 += width1;
   }
 }
 
-void CpuMatrix::circularConvDerivative(
-    Matrix& outG, Matrix& in0, Matrix& in1, Matrix& inG0, Matrix& inG1) {
-  size_t height = in0.getHeight();
+void CpuMatrix::circularConvDerivative(Matrix& outG,
+                                       Matrix& in0,
+                                       Matrix& in1,
+                                       Matrix& inG0,
+                                       Matrix& inG1,
+                                       const ICpuGpuVectorPtr& seqStartPosPtr,
+                                       bool useGpu) {
+  size_t height0 = in0.getHeight();
   size_t width0 = in0.getWidth();
   size_t width1 = in1.getWidth();
+  size_t numSeqs = height0;
+  if (nullptr != seqStartPosPtr) numSeqs = seqStartPosPtr->getSize() - 1;
 
-  CHECK_EQ(height, in1.getHeight());
-  CHECK_EQ(height, inG0.getHeight());
+  CHECK_EQ(numSeqs, in1.getHeight());
+  CHECK_EQ(height0, inG0.getHeight());
   CHECK_EQ(width0, inG0.getWidth());
-  CHECK_EQ(height, inG1.getHeight());
+  CHECK_EQ(numSeqs, inG1.getHeight());
   CHECK_EQ(width1, inG1.getWidth());
-  CHECK_EQ(height, outG.getHeight());
+  CHECK_EQ(height0, outG.getHeight());
   CHECK_EQ(width0, outG.getWidth());
 
   real* outGV = outG.getData();
   real* inV0 = in0.getData();
   real* inV1 = in1.getData();
   real* inGV0 = inG0.getData();
   real* inGV1 = inG1.getData();
+  const int* startPosIntPtr = nullptr;
+  if (nullptr != seqStartPosPtr) {
+    startPosIntPtr = seqStartPosPtr->getData(useGpu);
+  }
 
   int leftCtxLen = (width1 - 1) / 2;
-  for (size_t x = 0; x < height; ++x,
-              outGV += width0,
-              inV0 += width0,
-              inV1 += width1,
-              inGV0 += width0,
-              inGV1 += width1) {
-    for (size_t j = 0; j < width1; ++j) {  // iterate over width1
-      for (size_t i = 0; i < width0; ++i) {
-        // such over all dimensions of outG
-        int index = i + j - leftCtxLen;
-        index = (index + width0) % width0;
-        inGV0[index] += outGV[i] * inV1[j];
-        inGV1[j] += outGV[i] * inV0[index];
+  for (size_t x = 0; x < numSeqs; ++x) {
+    size_t curSeqWidth = width0;
+    if (nullptr != startPosIntPtr)
+      curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
+    for (size_t j = 0; j < width1; ++j) {  // iterate over convolution template
+      for (size_t i = 0; i < curSeqWidth; ++i) {
+        int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
+        *(inGV0 + index) += *(outGV + i) * inV1[j];
+        inGV1[j] += *(outGV + i) * *(inV0 + index);
       }
     }
+    outGV += curSeqWidth;
+    inV0 += curSeqWidth;
+    inV1 += width1;
+    inGV0 += curSeqWidth;
+    inGV1 += width1;
   }
 }
 

diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h
@@ -745,15 +745,20 @@ class Matrix : public BaseMatrix {
    * b's index arithmetic is computed modulo M,
    * c's index arithmetic is computed modulo N.
    */
-  virtual void circularConv(Matrix& b, Matrix& c) {
+  virtual void circularConv(Matrix& b,
+                            Matrix& c,
+                            const ICpuGpuVectorPtr& seqStartPosPtr,
+                            bool useGpu) {
     LOG(FATAL) << "Not implemented";
   }
 
   virtual void circularConvDerivative(Matrix& output,
                                       Matrix& prevOut1,
                                       Matrix& prevOut2,
                                       Matrix& prevGrad1,
-                                      Matrix& prevGrad2) {
+                                      Matrix& prevGrad2,
+                                      const ICpuGpuVectorPtr& seqStartPosPtr,
+                                      bool useGpu) {
     LOG(FATAL) << "Not implemented";
   }
 
@@ -1720,12 +1725,17 @@ class CpuMatrix : public Matrix {
                                         IVector& label,
                                         real alpha);
 
-  void circularConv(Matrix& b, Matrix& c);
+  void circularConv(Matrix& b,
+                    Matrix& c,
+                    const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
+                    bool useGpu = false);
   void circularConvDerivative(Matrix& output,
                               Matrix& prevOut1,
                               Matrix& prevOut2,
                               Matrix& prevGrad1,
-                              Matrix& prevGrad2);
+                              Matrix& prevGrad2,
+                              const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
+                              bool useGpu = false);
 
   void softmax(Matrix& output);
   void sequenceSoftmax(Matrix& output, const IVector& index);