Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enrich ConvShift to support sequence data input #2133

Closed
wants to merge 9 commits into from
8 changes: 5 additions & 3 deletions paddle/gserver/layers/ConvShiftLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@ void ConvShiftLayer::forward(PassType passType) {

MatrixPtr inV0 = getInputValue(0);
MatrixPtr inV1 = getInputValue(1);
const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;

size_t batchSize = inV0->getHeight();
size_t dataDim = inV0->getWidth();

CHECK_EQ(batchSize, inV1->getHeight());
CHECK_EQ(dataDim, getSize());

{
Expand All @@ -86,7 +86,7 @@ void ConvShiftLayer::forward(PassType passType) {
MatrixPtr outV = getOutputValue();

REGISTER_TIMER_INFO("FwConvShiftTimer", getName().c_str());
outV->circularConv(*inV0, *inV1);
outV->circularConv(*inV0, *inV1, seqStartPosPtr, useGpu_);
}

void ConvShiftLayer::backward(const UpdateCallback& callback) {
Expand All @@ -95,11 +95,13 @@ void ConvShiftLayer::backward(const UpdateCallback& callback) {
MatrixPtr outG = getOutputGrad();
MatrixPtr inG0 = getInputGrad(0);
MatrixPtr inG1 = getInputGrad(1);
const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;

REGISTER_TIMER_INFO("BwConvShiftTimer", getName().c_str());

if (inG0 && inG1) {
outG->circularConvDerivative(*outG, *inV0, *inV1, *inG0, *inG1);
outG->circularConvDerivative(
*outG, *inV0, *inV1, *inG0, *inG1, seqStartPosPtr, useGpu_);
} else {
CHECK(!inG0 || !inG1) << "Not supported";
}
Expand Down
31 changes: 28 additions & 3 deletions paddle/gserver/tests/test_LayerGrad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -910,18 +910,43 @@ TEST(Layer, SequenceReshapeLayer) {
}
}

TEST(Layer, ConvShiftLayer) {
void testConvShiftLayer(string trans_type, size_t maxLen = 0) {
TestConfig config;
config.layerConfig.set_type("conv_shift");
config.layerConfig.set_size(10);
size_t batch_size = 100;

config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
if (trans_type == "non-seq") {
config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
} else {
// Generate sequence data
vector<int> seqStartPositions(batch_size + 1, 0);
int seqLen = 0;
size_t pos = 0;
for (size_t i = 0; i < batch_size; ++i) {
seqLen = uniformRandom(maxLen) + 1;
seqStartPositions[i] = pos;
pos += seqLen;
}
seqStartPositions[batch_size] = pos;

MatrixPtr matValuePtr = Matrix::create(pos, 10, false, false);
matValuePtr->randomizeUniform();

config.inputDefs.push_back(
{INPUT_SELF_DEFINE_DATA, "layer_0", matValuePtr, seqStartPositions});
}
config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0});
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();

// Not support GPU now
testLayerGrad(config, "conv_shift", 100, false, false);
testLayerGrad(config, "conv_shift", batch_size, false, false);
}

TEST(Layer, ConvShiftLayer) {
testConvShiftLayer("non-seq");
testConvShiftLayer("seq", 5);
}

TEST(Layer, PowerLayer) {
Expand Down
95 changes: 63 additions & 32 deletions paddle/math/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3881,71 +3881,102 @@ real CpuMatrix::getMax() {
return res;
}

void CpuMatrix::circularConv(Matrix& in0, Matrix& in1) {
size_t height = this->getHeight();
void CpuMatrix::circularConv(Matrix& in0,
Matrix& in1,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
size_t height0 = this->getHeight();
size_t width0 = this->getWidth();
size_t width1 = in1.getWidth();
size_t numSeqs = height0;
// if sequence type, height1 should be sequence number
if (nullptr != seqStartPosPtr) {
numSeqs = seqStartPosPtr->getSize() - 1;
}

CHECK_EQ(height, in0.getHeight());
CHECK_EQ(height0, in0.getHeight());
CHECK_EQ(width0, in0.getWidth());
CHECK_EQ(height, in1.getHeight());
CHECK_EQ(numSeqs, in1.getHeight());

CHECK_EQ(width1 % 2, 1U);

real* outV = this->getData();
real* inV0 = in0.getData();
real* inV1 = in1.getData();

const int* startPosIntPtr = nullptr;
if (nullptr != seqStartPosPtr) {
startPosIntPtr = seqStartPosPtr->getData(useGpu);
}

int leftCtxLen = (width1 - 1) / 2;
for (size_t x = 0; x < height;
++x, outV += width0, inV0 += width0, inV1 += width1) {
for (size_t i = 0; i < width0; ++i) { // each dimension of output
for (size_t j = 0; j < width1; ++j) {
// iterate over all dimentions of inV1
int index = i + j - leftCtxLen;
index = (index + width0) % width0;
outV[i] += inV0[index] * inV1[j];
// row first order, treate multiple rows as a long row
for (size_t x = 0; x < numSeqs; ++x) {
size_t curSeqWidth = width0;
if (nullptr != startPosIntPtr)
curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
// conv a complete sequence
for (size_t i = 0; i < curSeqWidth; ++i) {
for (size_t j = 0; j < width1;
++j) { // iterate over convolution template
int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
*(outV + i) += *(inV0 + index) * inV1[j];
}
}
outV += curSeqWidth;
inV0 += curSeqWidth;
inV1 += width1;
}
}

void CpuMatrix::circularConvDerivative(
Matrix& outG, Matrix& in0, Matrix& in1, Matrix& inG0, Matrix& inG1) {
size_t height = in0.getHeight();
void CpuMatrix::circularConvDerivative(Matrix& outG,
Matrix& in0,
Matrix& in1,
Matrix& inG0,
Matrix& inG1,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
size_t height0 = in0.getHeight();
size_t width0 = in0.getWidth();
size_t width1 = in1.getWidth();
size_t numSeqs = height0;
if (nullptr != seqStartPosPtr) numSeqs = seqStartPosPtr->getSize() - 1;

CHECK_EQ(height, in1.getHeight());
CHECK_EQ(height, inG0.getHeight());
CHECK_EQ(numSeqs, in1.getHeight());
CHECK_EQ(height0, inG0.getHeight());
CHECK_EQ(width0, inG0.getWidth());
CHECK_EQ(height, inG1.getHeight());
CHECK_EQ(numSeqs, inG1.getHeight());
CHECK_EQ(width1, inG1.getWidth());
CHECK_EQ(height, outG.getHeight());
CHECK_EQ(height0, outG.getHeight());
CHECK_EQ(width0, outG.getWidth());

real* outGV = outG.getData();
real* inV0 = in0.getData();
real* inV1 = in1.getData();
real* inGV0 = inG0.getData();
real* inGV1 = inG1.getData();
const int* startPosIntPtr = nullptr;
if (nullptr != seqStartPosPtr) {
startPosIntPtr = seqStartPosPtr->getData(useGpu);
}

int leftCtxLen = (width1 - 1) / 2;
for (size_t x = 0; x < height; ++x,
outGV += width0,
inV0 += width0,
inV1 += width1,
inGV0 += width0,
inGV1 += width1) {
for (size_t j = 0; j < width1; ++j) { // iterate over width1
for (size_t i = 0; i < width0; ++i) {
// such over all dimensions of outG
int index = i + j - leftCtxLen;
index = (index + width0) % width0;
inGV0[index] += outGV[i] * inV1[j];
inGV1[j] += outGV[i] * inV0[index];
for (size_t x = 0; x < numSeqs; ++x) {
size_t curSeqWidth = width0;
if (nullptr != startPosIntPtr)
curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
for (size_t j = 0; j < width1; ++j) { // iterate over convolution template
for (size_t i = 0; i < curSeqWidth; ++i) {
int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
*(inGV0 + index) += *(outGV + i) * inV1[j];
inGV1[j] += *(outGV + i) * *(inV0 + index);
}
}
outGV += curSeqWidth;
inV0 += curSeqWidth;
inV1 += width1;
inGV0 += curSeqWidth;
inGV1 += width1;
}
}

Expand Down
18 changes: 14 additions & 4 deletions paddle/math/Matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -745,15 +745,20 @@ class Matrix : public BaseMatrix {
* b's index arithmetic is computed modulo M,
* c's index arithmetic is computed modulo N.
*/
virtual void circularConv(Matrix& b, Matrix& c) {
virtual void circularConv(Matrix& b,
Matrix& c,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
LOG(FATAL) << "Not implemented";
}

virtual void circularConvDerivative(Matrix& output,
Matrix& prevOut1,
Matrix& prevOut2,
Matrix& prevGrad1,
Matrix& prevGrad2) {
Matrix& prevGrad2,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
LOG(FATAL) << "Not implemented";
}

Expand Down Expand Up @@ -1720,12 +1725,17 @@ class CpuMatrix : public Matrix {
IVector& label,
real alpha);

void circularConv(Matrix& b, Matrix& c);
void circularConv(Matrix& b,
Matrix& c,
const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
bool useGpu = false);
void circularConvDerivative(Matrix& output,
Matrix& prevOut1,
Matrix& prevOut2,
Matrix& prevGrad1,
Matrix& prevGrad2);
Matrix& prevGrad2,
const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
bool useGpu = false);

void softmax(Matrix& output);
void sequenceSoftmax(Matrix& output, const IVector& index);
Expand Down