PaddlePaddle · pkuyym · May 15, 2017 · May 15, 2017 · May 15, 2017 · May 17, 2017
diff --git a/paddle/gserver/layers/ConvShiftLayer.cpp b/paddle/gserver/layers/ConvShiftLayer.cpp
@@ -71,11 +71,11 @@ void ConvShiftLayer::forward(PassType passType) {
 
   MatrixPtr inV0 = getInputValue(0);
   MatrixPtr inV1 = getInputValue(1);
+  const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;
 
   size_t batchSize = inV0->getHeight();
   size_t dataDim = inV0->getWidth();
 
-  CHECK_EQ(batchSize, inV1->getHeight());
   CHECK_EQ(dataDim, getSize());
 
   {
@@ -86,7 +86,7 @@ void ConvShiftLayer::forward(PassType passType) {
   MatrixPtr outV = getOutputValue();
 
   REGISTER_TIMER_INFO("FwConvShiftTimer", getName().c_str());
-  outV->circularConv(*inV0, *inV1);
+  outV->circularConv(*inV0, *inV1, seqStartPosPtr, useGpu_);
 }
 
 void ConvShiftLayer::backward(const UpdateCallback& callback) {
@@ -95,11 +95,13 @@ void ConvShiftLayer::backward(const UpdateCallback& callback) {
   MatrixPtr outG = getOutputGrad();
   MatrixPtr inG0 = getInputGrad(0);
   MatrixPtr inG1 = getInputGrad(1);
+  const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;
 
   REGISTER_TIMER_INFO("BwConvShiftTimer", getName().c_str());
 
   if (inG0 && inG1) {
-    outG->circularConvDerivative(*outG, *inV0, *inV1, *inG0, *inG1);
+    outG->circularConvDerivative(
+        *outG, *inV0, *inV1, *inG0, *inG1, seqStartPosPtr, useGpu_);
   } else {
     CHECK(!inG0 || !inG1) << "Not supported";
   }

diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp
@@ -387,17 +387,41 @@ void initDataLayer(TestConfig testConf,
         data.value->sigmoid(*data.value);
         data.grad->zeroMem();
         break;
+      case INPUT_SEQUENCE_MNUM_DATA: {
+        // first calculate height
+        sequenceStartPositions =
+            ICpuGpuVector::create(batchSize + 1, /*useGpu=*/false);
+        int seqLen = 0;
+        int* buf = sequenceStartPositions->getMutableData(false);
+        int64_t pos = 0;
+        for (size_t j = 0; j < batchSize; ++j) {
+          seqLen = uniformRandom(testConf.inputDefs[i].maxLen) + 1;
+          buf[j] = pos;
+          pos += seqLen;
+        }
+        buf[batchSize] = pos;
+        fillData(trans, layer->getSize(), pos);
+        data.value->randomizeUniform();
+        data.value->add(-0.5);
+        if (testLayerName != "prelu") {
+          data.value->sigmoid(*data.value);
+        }
+        data.grad->zeroMem();
+        break;
+      }
       default:
         LOG(FATAL) << " unknown inputType ";
         return;
     }
     if (testConf.inputDefs[i].inputType == INPUT_SEQUENCE_DATA ||
         testConf.inputDefs[i].inputType == INPUT_HASSUB_SEQUENCE_DATA ||
         testConf.inputDefs[i].inputType == INPUT_SEQUENCE_LABEL ||
-        testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MDIM_DATA) {
+        testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MDIM_DATA ||
+        testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MNUM_DATA) {
       if (!sequenceStartPositions) {
         generateSequenceStartPositions(batchSize, sequenceStartPositions);
       }
+
       data.sequenceStartPositions = sequenceStartPositions;
     }
     if (testConf.inputDefs[i].inputType == INPUT_HASSUB_SEQUENCE_DATA) {

diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h
@@ -31,7 +31,8 @@ enum InputType {
   INPUT_SEQUENCE_LABEL,
   INPUT_SPARSE_NON_VALUE_DATA,
   INPUT_SPARSE_FLOAT_VALUE_DATA,
-  INPUT_DENSE_DIM_DATA,  // using sequence length to init dense data
+  INPUT_DENSE_DIM_DATA,      // using sequence length to init dense data
+  INPUT_SEQUENCE_MNUM_DATA,  // regard batchSize as sequence number
 };
 
 struct ParaSparse {
@@ -62,6 +63,7 @@ struct InputDef {
   string name;
   size_t dim;
   size_t paraSize;
+  size_t maxLen;  // maximum length of sequence data
   ParaSparse sparse;
   bool isStatic;
   std::vector<int> labelInitValue;
@@ -76,6 +78,20 @@ struct InputDef {
     isStatic = false;
   }
 
+  InputDef(InputType type,
+           string nameIn,
+           size_t dimIn,
+           size_t sizeIn,
+           size_t maxSeqLen) {
+    inputType = type;
+    name = nameIn;
+    dim = dimIn;
+    paraSize = sizeIn;
+    maxLen = maxSeqLen;
+    sparse = {""};
+    isStatic = false;
+  }
+
   InputDef(InputType type,
            string nameIn,
            size_t dimIn,

diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -902,12 +902,16 @@ TEST(Layer, SequenceReshapeLayer) {
   }
 }
 
-TEST(Layer, ConvShiftLayer) {
+void testConvShiftLayer(string trans_type, size_t maxLen = 0) {
   TestConfig config;
   config.layerConfig.set_type("conv_shift");
   config.layerConfig.set_size(10);
 
-  config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
+  if (trans_type == "non-seq")
+    config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
+  else
+    config.inputDefs.push_back(
+        {INPUT_SEQUENCE_MNUM_DATA, "layer_0", 10, 0, maxLen});
   config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0});
   config.layerConfig.add_inputs();
   config.layerConfig.add_inputs();
@@ -916,6 +920,11 @@ TEST(Layer, ConvShiftLayer) {
   testLayerGrad(config, "conv_shift", 100, false, false);
 }
 
+TEST(Layer, ConvShiftLayer) {
+  testConvShiftLayer("non-seq");
+  testConvShiftLayer("seq", 5);
+}
+
 TEST(Layer, PowerLayer) {
   TestConfig config;
   config.layerConfig.set_type("power");

diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
@@ -3877,71 +3877,102 @@ real CpuMatrix::getMax() {
   return res;
 }
 
-void CpuMatrix::circularConv(Matrix& in0, Matrix& in1) {
-  size_t height = this->getHeight();
+void CpuMatrix::circularConv(Matrix& in0,
+                             Matrix& in1,
+                             const ICpuGpuVectorPtr& seqStartPosPtr,
+                             bool useGpu) {
+  size_t height0 = this->getHeight();
   size_t width0 = this->getWidth();
   size_t width1 = in1.getWidth();
+  size_t numSeqs = height0;
+  // if sequence type, height1 should be sequence number
+  if (nullptr != seqStartPosPtr) {
+    numSeqs = seqStartPosPtr->getSize() - 1;
+  }
 
-  CHECK_EQ(height, in0.getHeight());
+  CHECK_EQ(height0, in0.getHeight());
   CHECK_EQ(width0, in0.getWidth());
-  CHECK_EQ(height, in1.getHeight());
+  CHECK_EQ(numSeqs, in1.getHeight());
 
   CHECK_EQ(width1 % 2, 1U);
 
   real* outV = this->getData();
   real* inV0 = in0.getData();
   real* inV1 = in1.getData();
 
+  const int* startPosIntPtr = nullptr;
+  if (nullptr != seqStartPosPtr) {
+    startPosIntPtr = seqStartPosPtr->getData(useGpu);
+  }
+
   int leftCtxLen = (width1 - 1) / 2;
-  for (size_t x = 0; x < height;
-       ++x, outV += width0, inV0 += width0, inV1 += width1) {
-    for (size_t i = 0; i < width0; ++i) {  // each dimension of output
-      for (size_t j = 0; j < width1; ++j) {
-        // iterate over all dimentions of inV1
-        int index = i + j - leftCtxLen;
-        index = (index + width0) % width0;
-        outV[i] += inV0[index] * inV1[j];
+  // row first order, treate multiple rows as a long row
+  for (size_t x = 0; x < numSeqs; ++x) {
+    size_t curSeqWidth = width0;
+    if (nullptr != startPosIntPtr)
+      curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
+    // conv a complete sequence
+    for (size_t i = 0; i < curSeqWidth; ++i) {
+      for (size_t j = 0; j < width1;
+           ++j) {  // iterate over convolution template
+        int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
+        *(outV + i) += *(inV0 + index) * inV1[j];
       }
     }
+    outV += curSeqWidth;
+    inV0 += curSeqWidth;
+    inV1 += width1;
   }
 }
 
-void CpuMatrix::circularConvDerivative(
-    Matrix& outG, Matrix& in0, Matrix& in1, Matrix& inG0, Matrix& inG1) {
-  size_t height = in0.getHeight();
+void CpuMatrix::circularConvDerivative(Matrix& outG,
+                                       Matrix& in0,
+                                       Matrix& in1,
+                                       Matrix& inG0,
+                                       Matrix& inG1,
+                                       const ICpuGpuVectorPtr& seqStartPosPtr,
+                                       bool useGpu) {
+  size_t height0 = in0.getHeight();
   size_t width0 = in0.getWidth();
   size_t width1 = in1.getWidth();
+  size_t numSeqs = height0;
+  if (nullptr != seqStartPosPtr) numSeqs = seqStartPosPtr->getSize() - 1;
 
-  CHECK_EQ(height, in1.getHeight());
-  CHECK_EQ(height, inG0.getHeight());
+  CHECK_EQ(numSeqs, in1.getHeight());
+  CHECK_EQ(height0, inG0.getHeight());
   CHECK_EQ(width0, inG0.getWidth());
-  CHECK_EQ(height, inG1.getHeight());
+  CHECK_EQ(numSeqs, inG1.getHeight());
   CHECK_EQ(width1, inG1.getWidth());
-  CHECK_EQ(height, outG.getHeight());
+  CHECK_EQ(height0, outG.getHeight());
   CHECK_EQ(width0, outG.getWidth());
 
   real* outGV = outG.getData();
   real* inV0 = in0.getData();
   real* inV1 = in1.getData();
   real* inGV0 = inG0.getData();
   real* inGV1 = inG1.getData();
+  const int* startPosIntPtr = nullptr;
+  if (nullptr != seqStartPosPtr) {
+    startPosIntPtr = seqStartPosPtr->getData(useGpu);
+  }
 
   int leftCtxLen = (width1 - 1) / 2;
-  for (size_t x = 0; x < height; ++x,
-              outGV += width0,
-              inV0 += width0,
-              inV1 += width1,
-              inGV0 += width0,
-              inGV1 += width1) {
-    for (size_t j = 0; j < width1; ++j) {  // iterate over width1
-      for (size_t i = 0; i < width0; ++i) {
-        // such over all dimensions of outG
-        int index = i + j - leftCtxLen;
-        index = (index + width0) % width0;
-        inGV0[index] += outGV[i] * inV1[j];
-        inGV1[j] += outGV[i] * inV0[index];
+  for (size_t x = 0; x < numSeqs; ++x) {
+    size_t curSeqWidth = width0;
+    if (nullptr != startPosIntPtr)
+      curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
+    for (size_t j = 0; j < width1; ++j) {  // iterate over convolution template
+      for (size_t i = 0; i < curSeqWidth; ++i) {
+        int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
+        *(inGV0 + index) += *(outGV + i) * inV1[j];
+        inGV1[j] += *(outGV + i) * *(inV0 + index);
       }
     }
+    outGV += curSeqWidth;
+    inV0 += curSeqWidth;
+    inV1 += width1;
+    inGV0 += curSeqWidth;
+    inGV1 += width1;
   }
 }
 

diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h
@@ -744,15 +744,20 @@ class Matrix : public BaseMatrix {
    * b's index arithmetic is computed modulo M,
    * c's index arithmetic is computed modulo N.
    */
-  virtual void circularConv(Matrix& b, Matrix& c) {
+  virtual void circularConv(Matrix& b,
+                            Matrix& c,
+                            const ICpuGpuVectorPtr& seqStartPosPtr,
+                            bool useGpu) {
     LOG(FATAL) << "Not implemented";
   }
 
   virtual void circularConvDerivative(Matrix& output,
                                       Matrix& prevOut1,
                                       Matrix& prevOut2,
                                       Matrix& prevGrad1,
-                                      Matrix& prevGrad2) {
+                                      Matrix& prevGrad2,
+                                      const ICpuGpuVectorPtr& seqStartPosPtr,
+                                      bool useGpu) {
     LOG(FATAL) << "Not implemented";
   }
 
@@ -1719,12 +1724,17 @@ class CpuMatrix : public Matrix {
                                         IVector& label,
                                         real alpha);
 
-  void circularConv(Matrix& b, Matrix& c);
+  void circularConv(Matrix& b,
+                    Matrix& c,
+                    const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
+                    bool useGpu = false);
   void circularConvDerivative(Matrix& output,
                               Matrix& prevOut1,
                               Matrix& prevOut2,
                               Matrix& prevGrad1,
-                              Matrix& prevGrad2);
+                              Matrix& prevGrad2,
+                              const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
+                              bool useGpu = false);
 
   void softmax(Matrix& output);
   void sequenceSoftmax(Matrix& output, const IVector& index);