Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Make SoftmaxActivation stateless.
Browse files Browse the repository at this point in the history
  • Loading branch information
zheng-da committed Nov 28, 2017
1 parent 790887f commit d1d632b
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 87 deletions.
124 changes: 49 additions & 75 deletions src/operator/nn/softmax_activation-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,98 +61,72 @@ struct SoftmaxActivationParam : public dmlc::Parameter<SoftmaxActivationParam> {
}
};

/**
* \brief This is the implementation of softmax_activation operator.
* \tparam xpu The device that the op will be executed on.
*/
template<typename xpu>
class SoftmaxActivationOp {
public:
void Init(SoftmaxActivationParam p) {
this->param_ = p;
}

void Forward(const OpContext &ctx, const TBlob &in_data,
const OpReqType &req, const TBlob &out_data) {
using namespace mshadow;
using namespace mshadow::expr;
Stream<xpu> *s = ctx.get_stream<xpu>();
if (param_.mode == softmax_activation::kInstance) {
Tensor<xpu, 2> data = in_data.FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> out = out_data.FlatTo2D<xpu, real_t>(s);
Softmax(out, data);
} else {
CHECK_GE(in_data.ndim(), 3)
<< "Input need to have a least 3 dimensions when mode=channel";
int n = in_data.size(0);
int k = in_data.size(1);
Shape<3> s3 = Shape3(n, k, static_cast<int>(in_data.Size()/n/k));
Tensor<xpu, 3, real_t> data = in_data.get_with_shape<xpu, 3, real_t>(s3, s);
Tensor<xpu, 3, real_t> out = out_data.get_with_shape<xpu, 3, real_t>(s3, s);
Softmax(out, data);
}
}

void Backward(const OpContext &ctx, const TBlob &out_grad,
const TBlob &out_data, const OpReqType &req, const TBlob &in_grad) {
using namespace mshadow;
using namespace mshadow::expr;
// Use 3d tensor for both mode -> {instance, channel}. Get shapes
int total_size = in_grad.Size();
int batch_size = in_grad.shape_[0];
int channel_num = in_grad.shape_[1];
int rest_size = total_size / (batch_size * channel_num);
const Shape<3> data_shape = Shape3(batch_size, channel_num, rest_size);
// Get tensors
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 3> m_out_grad =
out_grad.get_with_shape<xpu, 3, real_t>(data_shape, s);
Tensor<xpu, 3> m_out_data =
out_data.get_with_shape<xpu, 3, real_t>(data_shape, s);
Tensor<xpu, 3> m_in_grad =
in_grad.get_with_shape<xpu, 3, real_t>(data_shape, s);
// get requested temp space
Tensor<xpu, 2> workspace = ctx.requested[softmax_activation::kTempSpace].get_space<xpu>(
Shape2(batch_size, rest_size), s);
workspace = reduce_with_axis<red::sum, false>(m_out_grad * m_out_data, 1);
Assign(m_in_grad, req,
m_out_data * (m_out_grad - broadcast_with_axis(workspace, 0, channel_num)));
}

private:
SoftmaxActivationParam param_;
}; // class SoftmaxActivationOp


template<typename xpu>
void SoftmaxActivationCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<OpReqType>& reqs,
const std::vector<TBlob>& outputs) {
using namespace mshadow;
using namespace mshadow::expr;
const SoftmaxActivationParam& param = nnvm::get<SoftmaxActivationParam>(attrs.parsed);
CHECK_EQ(inputs.size(), 1U);
CHECK_EQ(outputs.size(), 1U);

static thread_local SoftmaxActivationOp<xpu> op;
op.Init(param);
op.Forward(ctx, inputs[0], req[0], outputs[0]);
const TBlob &in_data = inputs[softmax_activation::kData];
const OpReqType &req = reqs[softmax_activation::kOut];
const TBlob &out_data = outputs[softmax_activation::kOut];
Stream<xpu> *s = ctx.get_stream<xpu>();
if (param.mode == softmax_activation::kInstance) {
Tensor<xpu, 2> data = in_data.FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> out = out_data.FlatTo2D<xpu, real_t>(s);
Softmax(out, data);
} else {
CHECK_GE(in_data.ndim(), 3)
<< "Input need to have a least 3 dimensions when mode=channel";
int n = in_data.size(0);
int k = in_data.size(1);
Shape<3> s3 = Shape3(n, k, static_cast<int>(in_data.Size()/n/k));
Tensor<xpu, 3, real_t> data = in_data.get_with_shape<xpu, 3, real_t>(s3, s);
Tensor<xpu, 3, real_t> out = out_data.get_with_shape<xpu, 3, real_t>(s3, s);
Softmax(out, data);
}
}

template<typename xpu>
void SoftmaxActivationGradCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<OpReqType>& reqs,
const std::vector<TBlob>& outputs) {
const SoftmaxActivationParam& param = nnvm::get<SoftmaxActivationParam>(attrs.parsed);
using namespace mshadow;
using namespace mshadow::expr;
CHECK_EQ(inputs.size(), 2U);
CHECK_EQ(outputs.size(), 1);
CHECK_EQ(req.size(), 1);

static thread_local SoftmaxActivationOp<xpu> op;
op.Init(param);
op.Backward(ctx, inputs[0], inputs[1], req[0], outputs[0]);
CHECK_EQ(reqs.size(), 1);
const TBlob &out_grad = inputs[0];
const TBlob &out_data = inputs[1];
const OpReqType &req = reqs[0];
const TBlob &in_grad = outputs[0];
// Use 3d tensor for both mode -> {instance, channel}. Get shapes
int total_size = in_grad.Size();
int batch_size = in_grad.shape_[0];
int channel_num = in_grad.shape_[1];
int rest_size = total_size / (batch_size * channel_num);
const Shape<3> data_shape = Shape3(batch_size, channel_num, rest_size);
// Get tensors
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 3> m_out_grad =
out_grad.get_with_shape<xpu, 3, real_t>(data_shape, s);
Tensor<xpu, 3> m_out_data =
out_data.get_with_shape<xpu, 3, real_t>(data_shape, s);
Tensor<xpu, 3> m_in_grad =
in_grad.get_with_shape<xpu, 3, real_t>(data_shape, s);
// get requested temp space
Tensor<xpu, 2> workspace = ctx.requested[softmax_activation::kTempSpace].get_space<xpu>(
Shape2(batch_size, rest_size), s);
workspace = reduce_with_axis<red::sum, false>(m_out_grad * m_out_data, 1);
Assign(m_in_grad, req,
m_out_data * (m_out_grad - broadcast_with_axis(workspace, 0, channel_num)));
}

} // namespace op
Expand Down
14 changes: 2 additions & 12 deletions src/operator/nn/softmax_activation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
namespace mxnet {
namespace op {

#if MXNET_USE_CUDNN == 1
template<>
void SoftmaxActivationCompute<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
Expand All @@ -42,15 +43,9 @@ void SoftmaxActivationCompute<gpu>(const nnvm::NodeAttrs& attrs,
CHECK_EQ(inputs.size(), 1U);
CHECK_EQ(outputs.size(), 1U);

#if MXNET_USE_CUDNN == 1
static thread_local CuDNNSoftmaxActivationOp op;
op.Init(param);
op.Forward(ctx, inputs[0], req[0], outputs[0]);
#else
static thread_local SoftmaxActivationOp<xpu> op;
op.Init(param);
op.Forward(ctx, inputs[0], req[0], outputs[0]);
#endif
}

template<>
Expand All @@ -64,16 +59,11 @@ void SoftmaxActivationGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
CHECK_EQ(outputs.size(), 1);
CHECK_EQ(req.size(), 1);

#if MXNET_USE_CUDNN == 1
static thread_local CuDNNSoftmaxActivationOp op;
op.Init(param);
op.Backward(ctx, inputs[0], inputs[1], req[0], outputs[0]);
#else
static thread_local SoftmaxActivationOp<xpu> op;
op.Init(param);
op.Backward(ctx, inputs[0], inputs[1], req[0], outputs[0]);
#endif
}
#endif

NNVM_REGISTER_OP(SoftmaxActivation)
.set_attr<FCompute>("FCompute<gpu>", SoftmaxActivationCompute<gpu>);
Expand Down

0 comments on commit d1d632b

Please sign in to comment.