Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/error handling in paddle #1149

Merged
merged 13 commits into from
Jan 19, 2017
126 changes: 98 additions & 28 deletions paddle/gserver/activations/ActivationFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,14 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
class IdentityActivation : public ActivationFunction {
public:
static const std::string name;
void forward(Argument& act) { (void)act; }
void backward(Argument& act) { (void)act; }
Status forward(Argument& act) {
(void)act;
return Status();
}
Status backward(Argument& act) {
(void)act;
return Status();
}
const std::string& getName() const { return name; }
};
const std::string IdentityActivation::name = "";
Expand All @@ -86,8 +92,14 @@ static InitFunction __reg_activation__identity([] {
* \f]
*/
BEGIN_DEFINE_ACTIVATION(sigmoid)
void forward(Argument& act) { act.value->sigmoid(*act.value); }
void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); }
Status forward(Argument& act) {
act.value->sigmoid(*act.value);
return Status();
}
Status backward(Argument& act) {
act.grad->sigmoidDerivative(*act.value);
return Status();
}
END_DEFINE_ACTIVATION(sigmoid)

/**
Expand All @@ -103,9 +115,12 @@ MatrixPtr sftMaxDot_;
MatrixPtr one_;

public:
void forward(Argument& act) { act.value->softmax(*act.value); }
Status forward(Argument& act) {
act.value->softmax(*act.value);
return Status();
}

void backward(Argument& act) {
Status backward(Argument& act) {
MatrixPtr outputV = act.value;
MatrixPtr outputG = act.grad;

Expand Down Expand Up @@ -137,6 +152,7 @@ void backward(Argument& act) {

act.grad->softmaxDerivative(*act.value, *sftMaxSum_);
}
return Status();
}
END_DEFINE_ACTIVATION(softmax)

Expand All @@ -151,8 +167,11 @@ ACTIVATION_CLASS_NAME(softmax) softmax_;
Argument argument_;

public:
void forward(Argument& act) {
CHECK_EQ(act.value->getWidth(), 1UL);
Status forward(Argument& act) {
if (act.value->getWidth() != 1UL) {
return Status(
"Input width for each timestep of sequence softmax should be 1");
}

if (!argument_.value) {
argument_.value = Matrix::create(nullptr,
Expand All @@ -169,10 +188,14 @@ void forward(Argument& act) {

auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId));
act.value->sequenceSoftmax(*act.value, *starts);
return Status();
}

void backward(Argument& act) {
CHECK_EQ(act.grad->getWidth(), 1UL);
Status backward(Argument& act) {
if (act.value->getWidth() != 1UL) {
return Status(
"Input width for each timestep of sequence softmax should be 1");
}

size_t numSequences = act.getNumSequences();
const int* starts = act.sequenceStartPositions->getData(false);
Expand All @@ -186,6 +209,7 @@ void backward(Argument& act) {

softmax_.backward(argument_);
}
return Status();
}
END_DEFINE_ACTIVATION(sequence_softmax)

Expand All @@ -200,9 +224,15 @@ END_DEFINE_ACTIVATION(sequence_softmax)
* 0 otherwise.
*/
BEGIN_DEFINE_ACTIVATION(relu)
void forward(Argument& act) { act.value->relu(*act.value); }
Status forward(Argument& act) {
act.value->relu(*act.value);
return Status();
}

void backward(Argument& act) { act.grad->reluDerivative(*act.value); }
Status backward(Argument& act) {
act.grad->reluDerivative(*act.value);
return Status();
}
END_DEFINE_ACTIVATION(relu)

/**
Expand All @@ -219,9 +249,15 @@ END_DEFINE_ACTIVATION(relu)
* TODO(yuyang18): Remove magic number 24 or make it configuable.
*/
BEGIN_DEFINE_ACTIVATION(brelu)
void forward(Argument& act) { act.value->brelu(*act.value); }
Status forward(Argument& act) {
act.value->brelu(*act.value);
return Status();
}

void backward(Argument& act) { act.grad->breluDerivative(*act.value); }
Status backward(Argument& act) {
act.grad->breluDerivative(*act.value);
return Status();
}
END_DEFINE_ACTIVATION(brelu)

/**
Expand All @@ -231,9 +267,15 @@ END_DEFINE_ACTIVATION(brelu)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(tanh)
void forward(Argument& act) { act.value->tanh(*act.value); }
Status forward(Argument& act) {
act.value->tanh(*act.value);
return Status();
}

void backward(Argument& act) { act.grad->tanhDerivative(*act.value); }
Status backward(Argument& act) {
act.grad->tanhDerivative(*act.value);
return Status();
}
END_DEFINE_ACTIVATION(tanh)

/**
Expand All @@ -248,10 +290,14 @@ real a, b;

public:
ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {}
void forward(Argument& act) { act.value->scaledTanh(*act.value, a, b); }
Status forward(Argument& act) {
act.value->scaledTanh(*act.value, a, b);
return Status();
}

void backward(Argument& act) {
Status backward(Argument& act) {
act.grad->scaledTanhDerivative(*act.value, a, b);
return Status();
}
END_DEFINE_ACTIVATION(stanh)

Expand All @@ -262,9 +308,15 @@ END_DEFINE_ACTIVATION(stanh)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(softrelu)
void forward(Argument& act) { act.value->softrelu(*act.value); }
Status forward(Argument& act) {
act.value->softrelu(*act.value);
return Status();
}

void backward(Argument& act) { act.grad->softreluDerivative(*act.value); }
Status backward(Argument& act) {
act.grad->softreluDerivative(*act.value);
return Status();
}
END_DEFINE_ACTIVATION(softrelu)

/**
Expand All @@ -280,7 +332,7 @@ END_DEFINE_ACTIVATION(softrelu)
* 0 if z=0
*/
BEGIN_DEFINE_ACTIVATION(abs)
void forward(Argument& act) {
Status forward(Argument& act) {
SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in,
act.value->getHeight(),
Expand All @@ -290,9 +342,13 @@ void forward(Argument& act) {

act.in->copyFrom(*act.value);
act.value->abs2(*act.value);
return Status();
}

void backward(Argument& act) { act.grad->absDerivative(*act.in); }
Status backward(Argument& act) {
act.grad->absDerivative(*act.in);
return Status();
}
END_DEFINE_ACTIVATION(abs)

/**
Expand All @@ -302,7 +358,7 @@ END_DEFINE_ACTIVATION(abs)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(square)
void forward(Argument& act) {
Status forward(Argument& act) {
SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in,
act.value->getHeight(),
Expand All @@ -312,9 +368,13 @@ void forward(Argument& act) {

act.in->copyFrom(*act.value);
act.value->square2(*act.value);
return Status();
}

void backward(Argument& act) { act.grad->squareDerivative(*act.in); }
Status backward(Argument& act) {
act.grad->squareDerivative(*act.in);
return Status();
}
END_DEFINE_ACTIVATION(square)

/**
Expand All @@ -324,9 +384,15 @@ END_DEFINE_ACTIVATION(square)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(exponential)
void forward(Argument& act) { act.value->exp2(*act.value); }
Status forward(Argument& act) {
act.value->exp2(*act.value);
return Status();
}

void backward(Argument& act) { act.grad->expDerivative(*act.value); }
Status backward(Argument& act) {
act.grad->expDerivative(*act.value);
return Status();
}
END_DEFINE_ACTIVATION(exponential)

/**
Expand All @@ -336,7 +402,7 @@ END_DEFINE_ACTIVATION(exponential)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(log)
void forward(Argument& act) {
Status forward(Argument& act) {
SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in,
act.value->getHeight(),
Expand All @@ -346,9 +412,13 @@ void forward(Argument& act) {

act.in->copyFrom(*act.value);
act.value->log2(*act.value);
return Status();
}

void backward(Argument& act) { act.grad->dotDiv(*act.grad, *act.in); }
Status backward(Argument& act) {
act.grad->dotDiv(*act.grad, *act.in);
return Status();
}
END_DEFINE_ACTIVATION(log)

ActivationFunction* ActivationFunction::create(const std::string& type) {
Expand Down
5 changes: 3 additions & 2 deletions paddle/gserver/activations/ActivationFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/utils/Status.h"

namespace paddle {

Expand Down Expand Up @@ -48,7 +49,7 @@ class ActivationFunction {
*
* Usually, act is Layer::output_
*/
virtual void forward(Argument& act) = 0;
virtual Status forward(Argument& act) = 0;

/**
* @brief Backward propagaion
Expand All @@ -57,7 +58,7 @@ class ActivationFunction {
* - Before calling backward(), act.grad = dE / dy, where E is the error/cost
* - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx)
*/
virtual void backward(Argument& act) = 0;
virtual Status backward(Argument& act) = 0;

virtual const std::string& getName() const = 0;
};
Expand Down
7 changes: 5 additions & 2 deletions paddle/gserver/layers/Layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License. */

#include "paddle/math/SparseMatrix.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Status.h"

#include "AddtoLayer.h"
#include "CRFLayer.h"
Expand Down Expand Up @@ -334,7 +335,8 @@ void Layer::showOutputStats() {

void Layer::forwardActivation() {
/* activation */
activation_->forward(output_);
auto status = activation_->forward(output_);
CHECK(status.isOK()) << status.what();

/* dropout */
if (config_.drop_rate() > 0) {
Expand Down Expand Up @@ -372,7 +374,8 @@ void Layer::backwardActivation() {
oGrad->dotMul(*oGrad, *dropOutMask_);
}

activation_->backward(output_);
auto status = activation_->backward(output_);
CHECK(status.isOK()) << status.what();
}

void Layer::forwardDropOut() {
Expand Down
Loading