From 8cd2222e49a1a4f07665efc3864c9a3f43f53941 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 15 May 2017 14:56:21 +0800 Subject: [PATCH] merge convolution logic into class Matrix --- paddle/gserver/layers/ConvShiftLayer.cpp | 146 ++--------------------- paddle/math/Matrix.cpp | 95 ++++++++++----- paddle/math/Matrix.h | 18 ++- 3 files changed, 90 insertions(+), 169 deletions(-) diff --git a/paddle/gserver/layers/ConvShiftLayer.cpp b/paddle/gserver/layers/ConvShiftLayer.cpp index a8b04a88267d7..e4dd7f0ee034b 100644 --- a/paddle/gserver/layers/ConvShiftLayer.cpp +++ b/paddle/gserver/layers/ConvShiftLayer.cpp @@ -52,9 +52,6 @@ class ConvShiftLayer : public Layer { void forward(PassType passType) override; void backward(const UpdateCallback& callback = nullptr) override; - bool isSeqType(); - void circularConvSeq(); - void circularConvSeqDerivative(); }; REGISTER_LAYER(conv_shift, ConvShiftLayer); @@ -69,122 +66,12 @@ bool ConvShiftLayer::init(const LayerMap& layerMap, return true; } -bool ConvShiftLayer::isSeqType() { - const Argument& inLayer0 = getInput(0); - if (nullptr == inLayer0.sequenceStartPositions) - return false; - else - return true; -} - -void ConvShiftLayer::circularConvSeq() { - const Argument& inLayer0 = getInput(0); - MatrixPtr in0 = inLayer0.value; - MatrixPtr in1 = getInputValue(1); - MatrixPtr out = getOutputValue(); - const ICpuGpuVectorPtr& sequenceStartPositions = - inLayer0.sequenceStartPositions; - - size_t width0 = in0->getWidth(); - size_t numSeqs = sequenceStartPositions->getSize() - 1; - size_t height0 = in0->getHeight(); - size_t width1 = in1->getWidth(); - size_t height1 = in1->getHeight(); - - CHECK_EQ(numSeqs, height1); - CHECK_EQ(width0, out->getWidth()); - CHECK_EQ(height0, out->getHeight()); - - CHECK_EQ(width1 % 2, 1U); - - real* inV0 = in0->getData(); - const int* startPosIntPtr = sequenceStartPositions->getData(false); - real* inV1 = in1->getData(); - real* outV = out->getData(); - - int leftCtxLen = (width1 - 1) / 2; - for (size_t x = 0; x < numSeqs - 1; x++) { - int curSeqLen = startPosIntPtr[x + 1]; - size_t curSeqWidth = curSeqLen * width0; - for (size_t i = 0; i < curSeqWidth; i++) { - for (size_t j = 0; j < width1; ++j) { - int index = i + j - leftCtxLen; - index = (index + curSeqWidth) % curSeqWidth; - int outVRowOffset = i / width0; - int outVColOffset = i % width0; - int inV0RowOffset = index / width0; - int inV0ColOffset = index % width0; - (outV + outVRowOffset)[outVColOffset] += - (inV0 + inV0RowOffset)[inV0ColOffset] * inV1[j]; - } - } - outV += curSeqWidth; - inV0 += curSeqWidth; - inV1 += width1; - } -} - -void ConvShiftLayer::circularConvSeqDerivative() { - const Argument& inLayer0 = getInput(0); - MatrixPtr in0 = inLayer0.value; - MatrixPtr in1 = getInputValue(1); - MatrixPtr inG0 = getInputGrad(0); - MatrixPtr inG1 = getInputGrad(1); - MatrixPtr outG = getOutputGrad(); - const ICpuGpuVectorPtr& sequenceStartPositions = - inLayer0.sequenceStartPositions; - - size_t height0 = in0->getHeight(); - size_t height1 = in1->getHeight(); - size_t numSeqs = sequenceStartPositions->getSize() - 1; - size_t width0 = in0->getWidth(); - size_t width1 = in1->getWidth(); - - CHECK_EQ(height1, numSeqs); - CHECK_EQ(height0, inG0->getHeight()); - CHECK_EQ(width0, inG0->getWidth()); - CHECK_EQ(height1, inG1->getHeight()); - CHECK_EQ(width1, inG1->getWidth()); - CHECK_EQ(height0, outG->getHeight()); - CHECK_EQ(width0, outG->getWidth()); - - const int* startPosIntPtr = sequenceStartPositions->getData(false); - real* outGV = outG->getData(); - real* inV0 = in0->getData(); - real* inV1 = in1->getData(); - real* inGV0 = inG0->getData(); - real* inGV1 = inG1->getData(); - - int leftCtxLen = (width1 - 1) / 2; - for (size_t x = 0; x < numSeqs - 1; x++) { - int curSeqLen = startPosIntPtr[x + 1]; - size_t curSeqWidth = curSeqLen * width0; - for (size_t j = 0; j < width1; j++) { - for (size_t i = 0; i < curSeqWidth; i++) { - int index = i + j - leftCtxLen; - index = (index + curSeqWidth) % curSeqWidth; - int inGV0RowOffset = index / width0; - int inGV0ColOffset = index % width0; - int outGVRowOffset = i / width0; - int outGVColOffset = i % width0; - (inGV0 + inGV0RowOffset)[inGV0ColOffset] += - (outGV + outGVRowOffset)[outGVColOffset] * inV1[j]; - inGV1[j] += (outGV + outGVRowOffset)[outGVColOffset] * - (inGV0 + inGV0RowOffset)[inGV0ColOffset]; - } - } - outGV += curSeqWidth; - inV0 += curSeqWidth; - inV1 += width1; - inGV0 += curSeqWidth; - inGV1 += width1; - } -} - void ConvShiftLayer::forward(PassType passType) { Layer::forward(passType); MatrixPtr inV0 = getInputValue(0); + MatrixPtr inV1 = getInputValue(1); + const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions; size_t batchSize = inV0->getHeight(); size_t dataDim = inV0->getWidth(); @@ -196,34 +83,27 @@ void ConvShiftLayer::forward(PassType passType) { resetOutput(batchSize, dataDim); } + MatrixPtr outV = getOutputValue(); + REGISTER_TIMER_INFO("FwConvShiftTimer", getName().c_str()); - if (!isSeqType()) { - MatrixPtr inV1 = getInputValue(1); - CHECK_EQ(batchSize, inV1->getHeight()); - MatrixPtr outV = getOutputValue(); - outV->circularConv(*inV0, *inV1); - } else { - circularConvSeq(); - } + outV->circularConv(*inV0, *inV1, seqStartPosPtr, useGpu_); } void ConvShiftLayer::backward(const UpdateCallback& callback) { + MatrixPtr inV0 = getInputValue(0); + MatrixPtr inV1 = getInputValue(1); + MatrixPtr outG = getOutputGrad(); MatrixPtr inG0 = getInputGrad(0); MatrixPtr inG1 = getInputGrad(1); + const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions; REGISTER_TIMER_INFO("BwConvShiftTimer", getName().c_str()); - if (!(inG0 && inG1)) { - CHECK(!inG0 || !inG1) << "Not supported"; - } - - if (!isSeqType()) { - MatrixPtr inV0 = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - MatrixPtr outG = getOutputGrad(); - outG->circularConvDerivative(*outG, *inV0, *inV1, *inG0, *inG1); + if (inG0 && inG1) { + outG->circularConvDerivative( + *outG, *inV0, *inV1, *inG0, *inG1, seqStartPosPtr, useGpu_); } else { - circularConvSeqDerivative(); + CHECK(!inG0 || !inG1) << "Not supported"; } } diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 6ac61be0bf1b7..bf282eb524e3d 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -3877,14 +3877,22 @@ real CpuMatrix::getMax() { return res; } -void CpuMatrix::circularConv(Matrix& in0, Matrix& in1) { - size_t height = this->getHeight(); +void CpuMatrix::circularConv(Matrix& in0, + Matrix& in1, + const ICpuGpuVectorPtr& seqStartPosPtr, + bool useGpu) { + size_t height0 = this->getHeight(); size_t width0 = this->getWidth(); size_t width1 = in1.getWidth(); + size_t numSeqs = height0; + // if sequence type, height1 should be sequence number + if (nullptr != seqStartPosPtr) { + numSeqs = seqStartPosPtr->getSize() - 1; + } - CHECK_EQ(height, in0.getHeight()); + CHECK_EQ(height0, in0.getHeight()); CHECK_EQ(width0, in0.getWidth()); - CHECK_EQ(height, in1.getHeight()); + CHECK_EQ(numSeqs, in1.getHeight()); CHECK_EQ(width1 % 2, 1U); @@ -3892,32 +3900,50 @@ void CpuMatrix::circularConv(Matrix& in0, Matrix& in1) { real* inV0 = in0.getData(); real* inV1 = in1.getData(); + const int* startPosIntPtr = nullptr; + if (nullptr != seqStartPosPtr) { + startPosIntPtr = seqStartPosPtr->getData(useGpu); + } + int leftCtxLen = (width1 - 1) / 2; - for (size_t x = 0; x < height; - ++x, outV += width0, inV0 += width0, inV1 += width1) { - for (size_t i = 0; i < width0; ++i) { // each dimension of output - for (size_t j = 0; j < width1; ++j) { - // iterate over all dimentions of inV1 - int index = i + j - leftCtxLen; - index = (index + width0) % width0; - outV[i] += inV0[index] * inV1[j]; + // row first order, treate multiple rows as a long row + for (size_t x = 0; x < numSeqs; ++x) { + size_t curSeqWidth = width0; + if (nullptr != startPosIntPtr) + curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x]; + // conv a complete sequence + for (size_t i = 0; i < curSeqWidth; ++i) { + for (size_t j = 0; j < width1; + ++j) { // iterate over convolution template + int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth; + *(outV + i) += *(inV0 + index) * inV1[j]; } } + outV += curSeqWidth; + inV0 += curSeqWidth; + inV1 += width1; } } -void CpuMatrix::circularConvDerivative( - Matrix& outG, Matrix& in0, Matrix& in1, Matrix& inG0, Matrix& inG1) { - size_t height = in0.getHeight(); +void CpuMatrix::circularConvDerivative(Matrix& outG, + Matrix& in0, + Matrix& in1, + Matrix& inG0, + Matrix& inG1, + const ICpuGpuVectorPtr& seqStartPosPtr, + bool useGpu) { + size_t height0 = in0.getHeight(); size_t width0 = in0.getWidth(); size_t width1 = in1.getWidth(); + size_t numSeqs = height0; + if (nullptr != seqStartPosPtr) numSeqs = seqStartPosPtr->getSize() - 1; - CHECK_EQ(height, in1.getHeight()); - CHECK_EQ(height, inG0.getHeight()); + CHECK_EQ(numSeqs, in1.getHeight()); + CHECK_EQ(height0, inG0.getHeight()); CHECK_EQ(width0, inG0.getWidth()); - CHECK_EQ(height, inG1.getHeight()); + CHECK_EQ(numSeqs, inG1.getHeight()); CHECK_EQ(width1, inG1.getWidth()); - CHECK_EQ(height, outG.getHeight()); + CHECK_EQ(height0, outG.getHeight()); CHECK_EQ(width0, outG.getWidth()); real* outGV = outG.getData(); @@ -3925,23 +3951,28 @@ void CpuMatrix::circularConvDerivative( real* inV1 = in1.getData(); real* inGV0 = inG0.getData(); real* inGV1 = inG1.getData(); + const int* startPosIntPtr = nullptr; + if (nullptr != seqStartPosPtr) { + startPosIntPtr = seqStartPosPtr->getData(useGpu); + } int leftCtxLen = (width1 - 1) / 2; - for (size_t x = 0; x < height; ++x, - outGV += width0, - inV0 += width0, - inV1 += width1, - inGV0 += width0, - inGV1 += width1) { - for (size_t j = 0; j < width1; ++j) { // iterate over width1 - for (size_t i = 0; i < width0; ++i) { - // such over all dimensions of outG - int index = i + j - leftCtxLen; - index = (index + width0) % width0; - inGV0[index] += outGV[i] * inV1[j]; - inGV1[j] += outGV[i] * inV0[index]; + for (size_t x = 0; x < numSeqs; ++x) { + size_t curSeqWidth = width0; + if (nullptr != startPosIntPtr) + curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x]; + for (size_t j = 0; j < width1; ++j) { // iterate over convolution template + for (size_t i = 0; i < curSeqWidth; i++) { + int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth; + *(inGV0 + index) += *(outGV + i) * inV1[j]; + inGV1[j] += *(outGV + i) * *(inV0 + index); } } + outGV += curSeqWidth; + inV0 += curSeqWidth; + inV1 += width1; + inGV0 += curSeqWidth; + inGV1 += width1; } } diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 3252adb19e4c2..2dcc04fb59feb 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -744,7 +744,10 @@ class Matrix : public BaseMatrix { * b's index arithmetic is computed modulo M, * c's index arithmetic is computed modulo N. */ - virtual void circularConv(Matrix& b, Matrix& c) { + virtual void circularConv(Matrix& b, + Matrix& c, + const ICpuGpuVectorPtr& seqStartPosPtr, + bool useGpu) { LOG(FATAL) << "Not implemented"; } @@ -752,7 +755,9 @@ class Matrix : public BaseMatrix { Matrix& prevOut1, Matrix& prevOut2, Matrix& prevGrad1, - Matrix& prevGrad2) { + Matrix& prevGrad2, + const ICpuGpuVectorPtr& seqStartPosPtr, + bool useGpu) { LOG(FATAL) << "Not implemented"; } @@ -1719,12 +1724,17 @@ class CpuMatrix : public Matrix { IVector& label, real alpha); - void circularConv(Matrix& b, Matrix& c); + void circularConv(Matrix& b, + Matrix& c, + const ICpuGpuVectorPtr& seqStartPosPtr = nullptr, + bool useGpu = false); void circularConvDerivative(Matrix& output, Matrix& prevOut1, Matrix& prevOut2, Matrix& prevGrad1, - Matrix& prevGrad2); + Matrix& prevGrad2, + const ICpuGpuVectorPtr& seqStartPosPtr = nullptr, + bool useGpu = false); void softmax(Matrix& output); void sequenceSoftmax(Matrix& output, const IVector& index);