Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
add leaky relu
Browse files Browse the repository at this point in the history
  • Loading branch information
antinucleon committed Sep 14, 2015
1 parent 5eb04f8 commit 006b188
Show file tree
Hide file tree
Showing 25 changed files with 656 additions and 340 deletions.
38 changes: 19 additions & 19 deletions src/operator/activation-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ struct ActivationParam : public dmlc::Parameter<ActivationParam> {
int act_type;
DMLC_DECLARE_PARAMETER(ActivationParam) {
DMLC_DECLARE_FIELD(act_type).set_default(kReLU)
.add_enum("relu", kReLU)
.add_enum("sigmoid", kSigmoid)
.add_enum("tanh", kTanh)
.describe("Activation function to be applied.");
.add_enum("relu", kReLU)
.add_enum("sigmoid", kSigmoid)
.add_enum("tanh", kTanh)
.describe("Activation function to be applied.");
}
};

Expand Down Expand Up @@ -91,11 +91,11 @@ class ActivationProp : public OperatorProperty {
}

bool InferShape(std::vector<TShape> *in_shape,
std::vector<TShape> *out_shape,
std::vector<TShape> *aux_shape) const override {
std::vector<TShape> *out_shape,
std::vector<TShape> *aux_shape) const override {
using namespace mshadow;
CHECK_EQ(in_shape->size(), 1) << "Input:[data]";
const TShape &dshape = in_shape->at(0);
const TShape &dshape = in_shape->at(kData);
if (dshape.ndim() == 0) return false;
out_shape->clear();
out_shape->push_back(dshape);
Expand All @@ -114,27 +114,27 @@ class ActivationProp : public OperatorProperty {

// decalre dependency and inplace optimization options
std::vector<int> DeclareBackwardDependency(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data) const override {
#if MXNET_USE_CUDNN == 1
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data) const override {
#if MXNET_USE_CUDNN == 1
return {out_grad[kOut], out_data[kOut], in_data[kData]};
#else
#else
return {out_grad[kOut], out_data[kOut]};
#endif // MXNET_USE_CUDNN
#endif // MXNET_USE_CUDNN
}

std::vector<std::pair<int, void*> > BackwardInplaceOption(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data,
const std::vector<void*> &in_grad) const override {
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data,
const std::vector<void*> &in_grad) const override {
return {{out_grad[kOut], in_grad[kData]}};
}

std::vector<std::pair<int, void*> > ForwardInplaceOption(
const std::vector<int> &in_data,
const std::vector<void*> &out_data) const override {
const std::vector<int> &in_data,
const std::vector<void*> &out_data) const override {
return {{in_data[kData], out_data[kOut]}};
}

Expand Down
9 changes: 6 additions & 3 deletions src/operator/activation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@ namespace op {
template<>
Operator *CreateOp<cpu>(ActivationParam param) {
switch (param.act_type) {
case kReLU: return new ActivationOp<cpu, mshadow_op::relu, mshadow_op::relu_grad>();
case kSigmoid: return new ActivationOp<cpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad>();
case kTanh: return new ActivationOp<cpu, mshadow_op::tanh, mshadow_op::tanh_grad>();
case kReLU:
return new ActivationOp<cpu, mshadow_op::relu, mshadow_op::relu_grad>();
case kSigmoid:
return new ActivationOp<cpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad>();
case kTanh:
return new ActivationOp<cpu, mshadow_op::tanh, mshadow_op::tanh_grad>();
default:
LOG(FATAL) << "unknown activation type";
return NULL;
Expand Down
15 changes: 9 additions & 6 deletions src/operator/activation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,21 @@ namespace mxnet {
namespace op {
template<>
Operator *CreateOp<gpu>(ActivationParam param) {
#if MXNET_USE_CUDNN == 1
#if MXNET_USE_CUDNN == 1
return new CuDNNActivationOp(param);
#else
#else
switch(param.act_type) {
case kReLU: return new ActivationOp<gpu, mshadow_op::relu, mshadow_op::relu_grad>();
case kSigmoid: return new ActivationOp<gpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad>();
case kTanh: return new ActivationOp<gpu, mshadow_op::tanh, mshadow_op::tanh_grad>();
case kReLU:
return new ActivationOp<gpu, mshadow_op::relu, mshadow_op::relu_grad>();
case kSigmoid:
return new ActivationOp<gpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad>();
case kTanh:
return new ActivationOp<gpu, mshadow_op::tanh, mshadow_op::tanh_grad>();
default:
LOG(FATAL) << "unknown activation";
return NULL;
}
#endif // MXNET_USE_CUDNN
#endif // MXNET_USE_CUDNN
}
} // op
} // namespace mxnet
Expand Down
53 changes: 29 additions & 24 deletions src/operator/batch_norm-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ struct BatchNormParam : public dmlc::Parameter<BatchNormParam> {
float momentum;
DMLC_DECLARE_PARAMETER(BatchNormParam) {
DMLC_DECLARE_FIELD(eps).set_default(1e-10f)
.describe("Epsilon to prevent div 0");
.describe("Epsilon to prevent div 0");
DMLC_DECLARE_FIELD(momentum).set_default(0.1f)
.describe("Momentum for moving average");
.describe("Momentum for moving average");
}
};

Expand Down Expand Up @@ -89,18 +89,19 @@ class BatchNormOp : public Operator {
Tensor<xpu, 1> var = out_data[kVar].get<xpu, 1, real_t>(s);
Assign(mean, req[kMean], scale * sumall_except_dim<1>(data));
Assign(var, req[kVar], scale * sumall_except_dim<1>(
F<mshadow_op::square>(data - broadcast<1>(mean, data.shape_))));
F<mshadow_op::square>(data - broadcast<1>(mean, data.shape_))));
Assign(out_no_affine, req[kOutNoAffine], (data - broadcast<1>(mean, data.shape_)) /
F<mshadow_op::square_root>(broadcast<1>(var + param_.eps, data.shape_)));
F<mshadow_op::square_root>(broadcast<1>(var + param_.eps, data.shape_)));
Assign(out, req[kOut], out_no_affine * broadcast<1>(slope, out.shape_) +
broadcast<1>(bias, out.shape_));
broadcast<1>(bias, out.shape_));
moving_mean = moving_mean * param_.momentum + mean * (1 - param_.momentum);
moving_var = moving_var * param_.momentum + var * (1 - param_.momentum);
} else {
Assign(out, req[kOut], broadcast<1>(slope /
F<mshadow_op::square_root>(moving_var + param_.eps), data.shape_) * data +
broadcast<1>(bias - (slope * moving_mean) /
F<mshadow_op::square_root>(moving_var + param_.eps), data.shape_));
F<mshadow_op::square_root>(moving_var + param_.eps),
data.shape_) * data +
broadcast<1>(bias - (slope * moving_mean) /
F<mshadow_op::square_root>(moving_var + param_.eps), data.shape_));
}
}

Expand Down Expand Up @@ -149,9 +150,10 @@ class BatchNormOp : public Operator {
Tensor<xpu, 1> tmp = tmp_[2];
// cal
gvar = sumall_except_dim<1>((grad * broadcast<1>(slope, data.shape_)) *
(data - broadcast<1>(mean, data.shape_)) *
-0.5f *
F<mshadow_op::power>(broadcast<1>(var + param_.eps, data.shape_), -1.5f));
(data - broadcast<1>(mean, data.shape_)) *
-0.5f *
F<mshadow_op::power>(broadcast<1>(var + param_.eps, data.shape_),
-1.5f));
gmean = sumall_except_dim<1>(grad * broadcast<1>(slope, data.shape_));
gmean *= -1.0f / F<mshadow_op::square_root>(var + param_.eps);
tmp = scale * sumall_except_dim<1>(-2.0f * (data - broadcast<1>(mean, data.shape_)));
Expand All @@ -161,9 +163,10 @@ class BatchNormOp : public Operator {
Assign(gslope, req[kGamma], sumall_except_dim<1>(grad * out_no_affine));
Assign(gbias, req[kBeta], sumall_except_dim<1>(grad));
Assign(grad_in, req[kData], (grad * broadcast<1>(slope, data.shape_)) *
broadcast<1>(1.0f / F<mshadow_op::square_root>(var + param_.eps), data.shape_) +
broadcast<1>(gvar, data.shape_) * scale * 2.0f * (data - broadcast<1>(mean, data.shape_)) +
broadcast<1>(gmean, data.shape_) * scale);
broadcast<1>(1.0f / F<mshadow_op::square_root>(var + param_.eps), data.shape_) +
broadcast<1>(gvar, data.shape_) * scale * 2.0f * (data - broadcast<1>(mean,
data.shape_)) +
broadcast<1>(gmean, data.shape_) * scale);
}

private:
Expand Down Expand Up @@ -193,8 +196,8 @@ class BatchNormProp : public OperatorProperty {
}

bool InferShape(std::vector<TShape> *in_shape,
std::vector<TShape> *out_shape,
std::vector<TShape> *aux_shape) const override {
std::vector<TShape> *out_shape,
std::vector<TShape> *aux_shape) const override {
using namespace mshadow;
CHECK_EQ(in_shape->size(), 3) << "Input:[data, gamma, beta]";
const TShape &dshape = in_shape->at(0);
Expand Down Expand Up @@ -223,19 +226,20 @@ class BatchNormProp : public OperatorProperty {
}

std::vector<int> DeclareBackwardDependency(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data) const override {
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data) const override {
return {out_grad[kOut],
out_data[kOut], out_data[kOutNoAffine], out_data[kMean], out_data[kVar],
in_data[kData], in_data[kGamma], in_data[kBeta]};
in_data[kData], in_data[kGamma], in_data[kBeta]
};
}

std::vector<std::pair<int, void*> > BackwardInplaceOption(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data,
const std::vector<void*> &in_grad) const override {
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data,
const std::vector<void*> &in_grad) const override {
return {{out_grad[kOut], in_grad[kData]}};
}

Expand Down Expand Up @@ -273,3 +277,4 @@ class BatchNormProp : public OperatorProperty {
} // namespace op
} // namespace mxnet
#endif // MXNET_OPERATOR_BATCH_NORM_INL_H_

20 changes: 10 additions & 10 deletions src/operator/concat-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ struct ConcatParam : public dmlc::Parameter<ConcatParam> {
int num_args;
DMLC_DECLARE_PARAMETER(ConcatParam) {
DMLC_DECLARE_FIELD(num_args).set_range(1, 6)
.describe("Number of inputs to be concated.");
.describe("Number of inputs to be concated.");
}
}; // struct ConcatParam

Expand Down Expand Up @@ -178,8 +178,8 @@ class ConcatProp : public OperatorProperty {
}

bool InferShape(std::vector<TShape> *in_shape,
std::vector<TShape> *out_shape,
std::vector<TShape> *aux_shape) const override {
std::vector<TShape> *out_shape,
std::vector<TShape> *aux_shape) const override {
using namespace mshadow;
CHECK_EQ(in_shape->size(), static_cast<size_t>(param_.num_args));
TShape dshape = in_shape->at(kData0);
Expand All @@ -193,10 +193,10 @@ class ConcatProp : public OperatorProperty {
dshape[1] += tmp[1];
} else {
CHECK_EQ(dshape[j], tmp[j])
<< "Incorrect shape[" << i << "]: "
<< tmp << ". "
<< "(first input shape: "
<< dshape << ")";
<< "Incorrect shape[" << i << "]: "
<< tmp << ". "
<< "(first input shape: "
<< dshape << ")";
}
}
}
Expand All @@ -216,9 +216,9 @@ class ConcatProp : public OperatorProperty {
}

std::vector<int> DeclareBackwardDependency(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data) const override {
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data) const override {
return out_grad;
}

Expand Down
Loading

0 comments on commit 006b188

Please sign in to comment.