Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enrich ConvShift to support sequence data input #2133

Closed
wants to merge 9 commits into from
8 changes: 5 additions & 3 deletions paddle/gserver/layers/ConvShiftLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@ void ConvShiftLayer::forward(PassType passType) {

MatrixPtr inV0 = getInputValue(0);
MatrixPtr inV1 = getInputValue(1);
const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;

size_t batchSize = inV0->getHeight();
size_t dataDim = inV0->getWidth();

CHECK_EQ(batchSize, inV1->getHeight());
CHECK_EQ(dataDim, getSize());

{
Expand All @@ -86,7 +86,7 @@ void ConvShiftLayer::forward(PassType passType) {
MatrixPtr outV = getOutputValue();

REGISTER_TIMER_INFO("FwConvShiftTimer", getName().c_str());
outV->circularConv(*inV0, *inV1);
outV->circularConv(*inV0, *inV1, seqStartPosPtr, useGpu_);
}

void ConvShiftLayer::backward(const UpdateCallback& callback) {
Expand All @@ -95,11 +95,13 @@ void ConvShiftLayer::backward(const UpdateCallback& callback) {
MatrixPtr outG = getOutputGrad();
MatrixPtr inG0 = getInputGrad(0);
MatrixPtr inG1 = getInputGrad(1);
const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;

REGISTER_TIMER_INFO("BwConvShiftTimer", getName().c_str());

if (inG0 && inG1) {
outG->circularConvDerivative(*outG, *inV0, *inV1, *inG0, *inG1);
outG->circularConvDerivative(
*outG, *inV0, *inV1, *inG0, *inG1, seqStartPosPtr, useGpu_);
} else {
CHECK(!inG0 || !inG1) << "Not supported";
}
Expand Down
26 changes: 25 additions & 1 deletion paddle/gserver/tests/LayerGradUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -387,17 +387,41 @@ void initDataLayer(TestConfig testConf,
data.value->sigmoid(*data.value);
data.grad->zeroMem();
break;
case INPUT_SEQUENCE_MNUM_DATA: {
// first calculate height
sequenceStartPositions =
ICpuGpuVector::create(batchSize + 1, /*useGpu=*/false);
int seqLen = 0;
int* buf = sequenceStartPositions->getMutableData(false);
int64_t pos = 0;
for (size_t j = 0; j < batchSize; ++j) {
seqLen = uniformRandom(testConf.inputDefs[i].maxLen) + 1;
buf[j] = pos;
pos += seqLen;
}
buf[batchSize] = pos;
fillData(trans, layer->getSize(), pos);
data.value->randomizeUniform();
data.value->add(-0.5);
if (testLayerName != "prelu") {
data.value->sigmoid(*data.value);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

406-408行可以去掉,这里不会用prelu这个layer

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

data.grad->zeroMem();
break;
}
default:
LOG(FATAL) << " unknown inputType ";
return;
}
if (testConf.inputDefs[i].inputType == INPUT_SEQUENCE_DATA ||
testConf.inputDefs[i].inputType == INPUT_HASSUB_SEQUENCE_DATA ||
testConf.inputDefs[i].inputType == INPUT_SEQUENCE_LABEL ||
testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MDIM_DATA) {
testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MDIM_DATA ||
testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MNUM_DATA) {
if (!sequenceStartPositions) {
generateSequenceStartPositions(batchSize, sequenceStartPositions);
}

data.sequenceStartPositions = sequenceStartPositions;
}
if (testConf.inputDefs[i].inputType == INPUT_HASSUB_SEQUENCE_DATA) {
Expand Down
18 changes: 17 additions & 1 deletion paddle/gserver/tests/LayerGradUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ enum InputType {
INPUT_SEQUENCE_LABEL,
INPUT_SPARSE_NON_VALUE_DATA,
INPUT_SPARSE_FLOAT_VALUE_DATA,
INPUT_DENSE_DIM_DATA, // using sequence length to init dense data
INPUT_DENSE_DIM_DATA, // using sequence length to init dense data
INPUT_SEQUENCE_MNUM_DATA, // regard batchSize as sequence number
};

struct ParaSparse {
Expand Down Expand Up @@ -62,6 +63,7 @@ struct InputDef {
string name;
size_t dim;
size_t paraSize;
size_t maxLen; // maximum length of sequence data
ParaSparse sparse;
bool isStatic;
std::vector<int> labelInitValue;
Expand All @@ -76,6 +78,20 @@ struct InputDef {
isStatic = false;
}

InputDef(InputType type,
string nameIn,
size_t dimIn,
size_t sizeIn,
size_t maxSeqLen) {
inputType = type;
name = nameIn;
dim = dimIn;
paraSize = sizeIn;
maxLen = maxSeqLen;
sparse = {""};
isStatic = false;
}

InputDef(InputType type,
string nameIn,
size_t dimIn,
Expand Down
13 changes: 11 additions & 2 deletions paddle/gserver/tests/test_LayerGrad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -902,12 +902,16 @@ TEST(Layer, SequenceReshapeLayer) {
}
}

TEST(Layer, ConvShiftLayer) {
void testConvShiftLayer(string trans_type, size_t maxLen = 0) {
TestConfig config;
config.layerConfig.set_type("conv_shift");
config.layerConfig.set_size(10);

config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
if (trans_type == "non-seq")
config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
else
config.inputDefs.push_back(
{INPUT_SEQUENCE_MNUM_DATA, "layer_0", 10, 0, maxLen});
config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0});
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();
Expand All @@ -916,6 +920,11 @@ TEST(Layer, ConvShiftLayer) {
testLayerGrad(config, "conv_shift", 100, false, false);
}

TEST(Layer, ConvShiftLayer) {
testConvShiftLayer("non-seq");
testConvShiftLayer("seq", 5);
}

TEST(Layer, PowerLayer) {
TestConfig config;
config.layerConfig.set_type("power");
Expand Down
95 changes: 63 additions & 32 deletions paddle/math/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3877,71 +3877,102 @@ real CpuMatrix::getMax() {
return res;
}

void CpuMatrix::circularConv(Matrix& in0, Matrix& in1) {
size_t height = this->getHeight();
void CpuMatrix::circularConv(Matrix& in0,
Matrix& in1,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
size_t height0 = this->getHeight();
size_t width0 = this->getWidth();
size_t width1 = in1.getWidth();
size_t numSeqs = height0;
// if sequence type, height1 should be sequence number
if (nullptr != seqStartPosPtr) {
numSeqs = seqStartPosPtr->getSize() - 1;
}

CHECK_EQ(height, in0.getHeight());
CHECK_EQ(height0, in0.getHeight());
CHECK_EQ(width0, in0.getWidth());
CHECK_EQ(height, in1.getHeight());
CHECK_EQ(numSeqs, in1.getHeight());

CHECK_EQ(width1 % 2, 1U);

real* outV = this->getData();
real* inV0 = in0.getData();
real* inV1 = in1.getData();

const int* startPosIntPtr = nullptr;
if (nullptr != seqStartPosPtr) {
startPosIntPtr = seqStartPosPtr->getData(useGpu);
}

int leftCtxLen = (width1 - 1) / 2;
for (size_t x = 0; x < height;
++x, outV += width0, inV0 += width0, inV1 += width1) {
for (size_t i = 0; i < width0; ++i) { // each dimension of output
for (size_t j = 0; j < width1; ++j) {
// iterate over all dimentions of inV1
int index = i + j - leftCtxLen;
index = (index + width0) % width0;
outV[i] += inV0[index] * inV1[j];
// row first order, treate multiple rows as a long row
for (size_t x = 0; x < numSeqs; ++x) {
size_t curSeqWidth = width0;
if (nullptr != startPosIntPtr)
curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
// conv a complete sequence
for (size_t i = 0; i < curSeqWidth; ++i) {
for (size_t j = 0; j < width1;
++j) { // iterate over convolution template
int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
*(outV + i) += *(inV0 + index) * inV1[j];
}
}
outV += curSeqWidth;
inV0 += curSeqWidth;
inV1 += width1;
}
}

void CpuMatrix::circularConvDerivative(
Matrix& outG, Matrix& in0, Matrix& in1, Matrix& inG0, Matrix& inG1) {
size_t height = in0.getHeight();
void CpuMatrix::circularConvDerivative(Matrix& outG,
Matrix& in0,
Matrix& in1,
Matrix& inG0,
Matrix& inG1,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
size_t height0 = in0.getHeight();
size_t width0 = in0.getWidth();
size_t width1 = in1.getWidth();
size_t numSeqs = height0;
if (nullptr != seqStartPosPtr) numSeqs = seqStartPosPtr->getSize() - 1;

CHECK_EQ(height, in1.getHeight());
CHECK_EQ(height, inG0.getHeight());
CHECK_EQ(numSeqs, in1.getHeight());
CHECK_EQ(height0, inG0.getHeight());
CHECK_EQ(width0, inG0.getWidth());
CHECK_EQ(height, inG1.getHeight());
CHECK_EQ(numSeqs, inG1.getHeight());
CHECK_EQ(width1, inG1.getWidth());
CHECK_EQ(height, outG.getHeight());
CHECK_EQ(height0, outG.getHeight());
CHECK_EQ(width0, outG.getWidth());

real* outGV = outG.getData();
real* inV0 = in0.getData();
real* inV1 = in1.getData();
real* inGV0 = inG0.getData();
real* inGV1 = inG1.getData();
const int* startPosIntPtr = nullptr;
if (nullptr != seqStartPosPtr) {
startPosIntPtr = seqStartPosPtr->getData(useGpu);
}

int leftCtxLen = (width1 - 1) / 2;
for (size_t x = 0; x < height; ++x,
outGV += width0,
inV0 += width0,
inV1 += width1,
inGV0 += width0,
inGV1 += width1) {
for (size_t j = 0; j < width1; ++j) { // iterate over width1
for (size_t i = 0; i < width0; ++i) {
// such over all dimensions of outG
int index = i + j - leftCtxLen;
index = (index + width0) % width0;
inGV0[index] += outGV[i] * inV1[j];
inGV1[j] += outGV[i] * inV0[index];
for (size_t x = 0; x < numSeqs; ++x) {
size_t curSeqWidth = width0;
if (nullptr != startPosIntPtr)
curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
for (size_t j = 0; j < width1; ++j) { // iterate over convolution template
for (size_t i = 0; i < curSeqWidth; ++i) {
int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
*(inGV0 + index) += *(outGV + i) * inV1[j];
inGV1[j] += *(outGV + i) * *(inV0 + index);
}
}
outGV += curSeqWidth;
inV0 += curSeqWidth;
inV1 += width1;
inGV0 += curSeqWidth;
inGV1 += width1;
}
}

Expand Down
18 changes: 14 additions & 4 deletions paddle/math/Matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -744,15 +744,20 @@ class Matrix : public BaseMatrix {
* b's index arithmetic is computed modulo M,
* c's index arithmetic is computed modulo N.
*/
virtual void circularConv(Matrix& b, Matrix& c) {
virtual void circularConv(Matrix& b,
Matrix& c,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
LOG(FATAL) << "Not implemented";
}

virtual void circularConvDerivative(Matrix& output,
Matrix& prevOut1,
Matrix& prevOut2,
Matrix& prevGrad1,
Matrix& prevGrad2) {
Matrix& prevGrad2,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
LOG(FATAL) << "Not implemented";
}

Expand Down Expand Up @@ -1719,12 +1724,17 @@ class CpuMatrix : public Matrix {
IVector& label,
real alpha);

void circularConv(Matrix& b, Matrix& c);
void circularConv(Matrix& b,
Matrix& c,
const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
bool useGpu = false);
void circularConvDerivative(Matrix& output,
Matrix& prevOut1,
Matrix& prevOut2,
Matrix& prevGrad1,
Matrix& prevGrad2);
Matrix& prevGrad2,
const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
bool useGpu = false);

void softmax(Matrix& output);
void sequenceSoftmax(Matrix& output, const IVector& index);
Expand Down