Skip to content

Commit

Permalink
merge convolution logic into class Matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
yangyaming committed May 15, 2017
1 parent 6adf4ac commit 8cd2222
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 169 deletions.
146 changes: 13 additions & 133 deletions paddle/gserver/layers/ConvShiftLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,6 @@ class ConvShiftLayer : public Layer {

void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
bool isSeqType();
void circularConvSeq();
void circularConvSeqDerivative();
};

REGISTER_LAYER(conv_shift, ConvShiftLayer);
Expand All @@ -69,122 +66,12 @@ bool ConvShiftLayer::init(const LayerMap& layerMap,
return true;
}

bool ConvShiftLayer::isSeqType() {
const Argument& inLayer0 = getInput(0);
if (nullptr == inLayer0.sequenceStartPositions)
return false;
else
return true;
}

void ConvShiftLayer::circularConvSeq() {
const Argument& inLayer0 = getInput(0);
MatrixPtr in0 = inLayer0.value;
MatrixPtr in1 = getInputValue(1);
MatrixPtr out = getOutputValue();
const ICpuGpuVectorPtr& sequenceStartPositions =
inLayer0.sequenceStartPositions;

size_t width0 = in0->getWidth();
size_t numSeqs = sequenceStartPositions->getSize() - 1;
size_t height0 = in0->getHeight();
size_t width1 = in1->getWidth();
size_t height1 = in1->getHeight();

CHECK_EQ(numSeqs, height1);
CHECK_EQ(width0, out->getWidth());
CHECK_EQ(height0, out->getHeight());

CHECK_EQ(width1 % 2, 1U);

real* inV0 = in0->getData();
const int* startPosIntPtr = sequenceStartPositions->getData(false);
real* inV1 = in1->getData();
real* outV = out->getData();

int leftCtxLen = (width1 - 1) / 2;
for (size_t x = 0; x < numSeqs - 1; x++) {
int curSeqLen = startPosIntPtr[x + 1];
size_t curSeqWidth = curSeqLen * width0;
for (size_t i = 0; i < curSeqWidth; i++) {
for (size_t j = 0; j < width1; ++j) {
int index = i + j - leftCtxLen;
index = (index + curSeqWidth) % curSeqWidth;
int outVRowOffset = i / width0;
int outVColOffset = i % width0;
int inV0RowOffset = index / width0;
int inV0ColOffset = index % width0;
(outV + outVRowOffset)[outVColOffset] +=
(inV0 + inV0RowOffset)[inV0ColOffset] * inV1[j];
}
}
outV += curSeqWidth;
inV0 += curSeqWidth;
inV1 += width1;
}
}

void ConvShiftLayer::circularConvSeqDerivative() {
const Argument& inLayer0 = getInput(0);
MatrixPtr in0 = inLayer0.value;
MatrixPtr in1 = getInputValue(1);
MatrixPtr inG0 = getInputGrad(0);
MatrixPtr inG1 = getInputGrad(1);
MatrixPtr outG = getOutputGrad();
const ICpuGpuVectorPtr& sequenceStartPositions =
inLayer0.sequenceStartPositions;

size_t height0 = in0->getHeight();
size_t height1 = in1->getHeight();
size_t numSeqs = sequenceStartPositions->getSize() - 1;
size_t width0 = in0->getWidth();
size_t width1 = in1->getWidth();

CHECK_EQ(height1, numSeqs);
CHECK_EQ(height0, inG0->getHeight());
CHECK_EQ(width0, inG0->getWidth());
CHECK_EQ(height1, inG1->getHeight());
CHECK_EQ(width1, inG1->getWidth());
CHECK_EQ(height0, outG->getHeight());
CHECK_EQ(width0, outG->getWidth());

const int* startPosIntPtr = sequenceStartPositions->getData(false);
real* outGV = outG->getData();
real* inV0 = in0->getData();
real* inV1 = in1->getData();
real* inGV0 = inG0->getData();
real* inGV1 = inG1->getData();

int leftCtxLen = (width1 - 1) / 2;
for (size_t x = 0; x < numSeqs - 1; x++) {
int curSeqLen = startPosIntPtr[x + 1];
size_t curSeqWidth = curSeqLen * width0;
for (size_t j = 0; j < width1; j++) {
for (size_t i = 0; i < curSeqWidth; i++) {
int index = i + j - leftCtxLen;
index = (index + curSeqWidth) % curSeqWidth;
int inGV0RowOffset = index / width0;
int inGV0ColOffset = index % width0;
int outGVRowOffset = i / width0;
int outGVColOffset = i % width0;
(inGV0 + inGV0RowOffset)[inGV0ColOffset] +=
(outGV + outGVRowOffset)[outGVColOffset] * inV1[j];
inGV1[j] += (outGV + outGVRowOffset)[outGVColOffset] *
(inGV0 + inGV0RowOffset)[inGV0ColOffset];
}
}
outGV += curSeqWidth;
inV0 += curSeqWidth;
inV1 += width1;
inGV0 += curSeqWidth;
inGV1 += width1;
}
}

void ConvShiftLayer::forward(PassType passType) {
Layer::forward(passType);

MatrixPtr inV0 = getInputValue(0);
MatrixPtr inV1 = getInputValue(1);
const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;

size_t batchSize = inV0->getHeight();
size_t dataDim = inV0->getWidth();
Expand All @@ -196,34 +83,27 @@ void ConvShiftLayer::forward(PassType passType) {
resetOutput(batchSize, dataDim);
}

MatrixPtr outV = getOutputValue();

REGISTER_TIMER_INFO("FwConvShiftTimer", getName().c_str());
if (!isSeqType()) {
MatrixPtr inV1 = getInputValue(1);
CHECK_EQ(batchSize, inV1->getHeight());
MatrixPtr outV = getOutputValue();
outV->circularConv(*inV0, *inV1);
} else {
circularConvSeq();
}
outV->circularConv(*inV0, *inV1, seqStartPosPtr, useGpu_);
}

void ConvShiftLayer::backward(const UpdateCallback& callback) {
MatrixPtr inV0 = getInputValue(0);
MatrixPtr inV1 = getInputValue(1);
MatrixPtr outG = getOutputGrad();
MatrixPtr inG0 = getInputGrad(0);
MatrixPtr inG1 = getInputGrad(1);
const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;

REGISTER_TIMER_INFO("BwConvShiftTimer", getName().c_str());

if (!(inG0 && inG1)) {
CHECK(!inG0 || !inG1) << "Not supported";
}

if (!isSeqType()) {
MatrixPtr inV0 = getInputValue(0);
MatrixPtr inV1 = getInputValue(1);
MatrixPtr outG = getOutputGrad();
outG->circularConvDerivative(*outG, *inV0, *inV1, *inG0, *inG1);
if (inG0 && inG1) {
outG->circularConvDerivative(
*outG, *inV0, *inV1, *inG0, *inG1, seqStartPosPtr, useGpu_);
} else {
circularConvSeqDerivative();
CHECK(!inG0 || !inG1) << "Not supported";
}
}

Expand Down
95 changes: 63 additions & 32 deletions paddle/math/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3877,71 +3877,102 @@ real CpuMatrix::getMax() {
return res;
}

void CpuMatrix::circularConv(Matrix& in0, Matrix& in1) {
size_t height = this->getHeight();
void CpuMatrix::circularConv(Matrix& in0,
Matrix& in1,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
size_t height0 = this->getHeight();
size_t width0 = this->getWidth();
size_t width1 = in1.getWidth();
size_t numSeqs = height0;
// if sequence type, height1 should be sequence number
if (nullptr != seqStartPosPtr) {
numSeqs = seqStartPosPtr->getSize() - 1;
}

CHECK_EQ(height, in0.getHeight());
CHECK_EQ(height0, in0.getHeight());
CHECK_EQ(width0, in0.getWidth());
CHECK_EQ(height, in1.getHeight());
CHECK_EQ(numSeqs, in1.getHeight());

CHECK_EQ(width1 % 2, 1U);

real* outV = this->getData();
real* inV0 = in0.getData();
real* inV1 = in1.getData();

const int* startPosIntPtr = nullptr;
if (nullptr != seqStartPosPtr) {
startPosIntPtr = seqStartPosPtr->getData(useGpu);
}

int leftCtxLen = (width1 - 1) / 2;
for (size_t x = 0; x < height;
++x, outV += width0, inV0 += width0, inV1 += width1) {
for (size_t i = 0; i < width0; ++i) { // each dimension of output
for (size_t j = 0; j < width1; ++j) {
// iterate over all dimentions of inV1
int index = i + j - leftCtxLen;
index = (index + width0) % width0;
outV[i] += inV0[index] * inV1[j];
// row first order, treate multiple rows as a long row
for (size_t x = 0; x < numSeqs; ++x) {
size_t curSeqWidth = width0;
if (nullptr != startPosIntPtr)
curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
// conv a complete sequence
for (size_t i = 0; i < curSeqWidth; ++i) {
for (size_t j = 0; j < width1;
++j) { // iterate over convolution template
int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
*(outV + i) += *(inV0 + index) * inV1[j];
}
}
outV += curSeqWidth;
inV0 += curSeqWidth;
inV1 += width1;
}
}

void CpuMatrix::circularConvDerivative(
Matrix& outG, Matrix& in0, Matrix& in1, Matrix& inG0, Matrix& inG1) {
size_t height = in0.getHeight();
void CpuMatrix::circularConvDerivative(Matrix& outG,
Matrix& in0,
Matrix& in1,
Matrix& inG0,
Matrix& inG1,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
size_t height0 = in0.getHeight();
size_t width0 = in0.getWidth();
size_t width1 = in1.getWidth();
size_t numSeqs = height0;
if (nullptr != seqStartPosPtr) numSeqs = seqStartPosPtr->getSize() - 1;

CHECK_EQ(height, in1.getHeight());
CHECK_EQ(height, inG0.getHeight());
CHECK_EQ(numSeqs, in1.getHeight());
CHECK_EQ(height0, inG0.getHeight());
CHECK_EQ(width0, inG0.getWidth());
CHECK_EQ(height, inG1.getHeight());
CHECK_EQ(numSeqs, inG1.getHeight());
CHECK_EQ(width1, inG1.getWidth());
CHECK_EQ(height, outG.getHeight());
CHECK_EQ(height0, outG.getHeight());
CHECK_EQ(width0, outG.getWidth());

real* outGV = outG.getData();
real* inV0 = in0.getData();
real* inV1 = in1.getData();
real* inGV0 = inG0.getData();
real* inGV1 = inG1.getData();
const int* startPosIntPtr = nullptr;
if (nullptr != seqStartPosPtr) {
startPosIntPtr = seqStartPosPtr->getData(useGpu);
}

int leftCtxLen = (width1 - 1) / 2;
for (size_t x = 0; x < height; ++x,
outGV += width0,
inV0 += width0,
inV1 += width1,
inGV0 += width0,
inGV1 += width1) {
for (size_t j = 0; j < width1; ++j) { // iterate over width1
for (size_t i = 0; i < width0; ++i) {
// such over all dimensions of outG
int index = i + j - leftCtxLen;
index = (index + width0) % width0;
inGV0[index] += outGV[i] * inV1[j];
inGV1[j] += outGV[i] * inV0[index];
for (size_t x = 0; x < numSeqs; ++x) {
size_t curSeqWidth = width0;
if (nullptr != startPosIntPtr)
curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
for (size_t j = 0; j < width1; ++j) { // iterate over convolution template
for (size_t i = 0; i < curSeqWidth; i++) {
int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
*(inGV0 + index) += *(outGV + i) * inV1[j];
inGV1[j] += *(outGV + i) * *(inV0 + index);
}
}
outGV += curSeqWidth;
inV0 += curSeqWidth;
inV1 += width1;
inGV0 += curSeqWidth;
inGV1 += width1;
}
}

Expand Down
18 changes: 14 additions & 4 deletions paddle/math/Matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -744,15 +744,20 @@ class Matrix : public BaseMatrix {
* b's index arithmetic is computed modulo M,
* c's index arithmetic is computed modulo N.
*/
virtual void circularConv(Matrix& b, Matrix& c) {
virtual void circularConv(Matrix& b,
Matrix& c,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
LOG(FATAL) << "Not implemented";
}

virtual void circularConvDerivative(Matrix& output,
Matrix& prevOut1,
Matrix& prevOut2,
Matrix& prevGrad1,
Matrix& prevGrad2) {
Matrix& prevGrad2,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
LOG(FATAL) << "Not implemented";
}

Expand Down Expand Up @@ -1719,12 +1724,17 @@ class CpuMatrix : public Matrix {
IVector& label,
real alpha);

void circularConv(Matrix& b, Matrix& c);
void circularConv(Matrix& b,
Matrix& c,
const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
bool useGpu = false);
void circularConvDerivative(Matrix& output,
Matrix& prevOut1,
Matrix& prevOut2,
Matrix& prevGrad1,
Matrix& prevGrad2);
Matrix& prevGrad2,
const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
bool useGpu = false);

void softmax(Matrix& output);
void sequenceSoftmax(Matrix& output, const IVector& index);
Expand Down

0 comments on commit 8cd2222

Please sign in to comment.