From d1d632b78ace986a50bb69f230d83df204d0bdf0 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 27 Nov 2017 23:54:18 +0000 Subject: [PATCH] Make SoftmaxActivation stateless. --- src/operator/nn/softmax_activation-inl.h | 124 +++++++++-------------- src/operator/nn/softmax_activation.cu | 14 +-- 2 files changed, 51 insertions(+), 87 deletions(-) diff --git a/src/operator/nn/softmax_activation-inl.h b/src/operator/nn/softmax_activation-inl.h index 5b91b6f79e98..b1d542e4068c 100644 --- a/src/operator/nn/softmax_activation-inl.h +++ b/src/operator/nn/softmax_activation-inl.h @@ -61,98 +61,72 @@ struct SoftmaxActivationParam : public dmlc::Parameter { } }; -/** - * \brief This is the implementation of softmax_activation operator. - * \tparam xpu The device that the op will be executed on. - */ -template -class SoftmaxActivationOp { - public: - void Init(SoftmaxActivationParam p) { - this->param_ = p; - } - - void Forward(const OpContext &ctx, const TBlob &in_data, - const OpReqType &req, const TBlob &out_data) { - using namespace mshadow; - using namespace mshadow::expr; - Stream *s = ctx.get_stream(); - if (param_.mode == softmax_activation::kInstance) { - Tensor data = in_data.FlatTo2D(s); - Tensor out = out_data.FlatTo2D(s); - Softmax(out, data); - } else { - CHECK_GE(in_data.ndim(), 3) - << "Input need to have a least 3 dimensions when mode=channel"; - int n = in_data.size(0); - int k = in_data.size(1); - Shape<3> s3 = Shape3(n, k, static_cast(in_data.Size()/n/k)); - Tensor data = in_data.get_with_shape(s3, s); - Tensor out = out_data.get_with_shape(s3, s); - Softmax(out, data); - } - } - - void Backward(const OpContext &ctx, const TBlob &out_grad, - const TBlob &out_data, const OpReqType &req, const TBlob &in_grad) { - using namespace mshadow; - using namespace mshadow::expr; - // Use 3d tensor for both mode -> {instance, channel}. Get shapes - int total_size = in_grad.Size(); - int batch_size = in_grad.shape_[0]; - int channel_num = in_grad.shape_[1]; - int rest_size = total_size / (batch_size * channel_num); - const Shape<3> data_shape = Shape3(batch_size, channel_num, rest_size); - // Get tensors - Stream *s = ctx.get_stream(); - Tensor m_out_grad = - out_grad.get_with_shape(data_shape, s); - Tensor m_out_data = - out_data.get_with_shape(data_shape, s); - Tensor m_in_grad = - in_grad.get_with_shape(data_shape, s); - // get requested temp space - Tensor workspace = ctx.requested[softmax_activation::kTempSpace].get_space( - Shape2(batch_size, rest_size), s); - workspace = reduce_with_axis(m_out_grad * m_out_data, 1); - Assign(m_in_grad, req, - m_out_data * (m_out_grad - broadcast_with_axis(workspace, 0, channel_num))); - } - - private: - SoftmaxActivationParam param_; -}; // class SoftmaxActivationOp - - template void SoftmaxActivationCompute(const nnvm::NodeAttrs& attrs, const OpContext& ctx, const std::vector& inputs, - const std::vector& req, + const std::vector& reqs, const std::vector& outputs) { + using namespace mshadow; + using namespace mshadow::expr; const SoftmaxActivationParam& param = nnvm::get(attrs.parsed); CHECK_EQ(inputs.size(), 1U); CHECK_EQ(outputs.size(), 1U); - - static thread_local SoftmaxActivationOp op; - op.Init(param); - op.Forward(ctx, inputs[0], req[0], outputs[0]); + const TBlob &in_data = inputs[softmax_activation::kData]; + const OpReqType &req = reqs[softmax_activation::kOut]; + const TBlob &out_data = outputs[softmax_activation::kOut]; + Stream *s = ctx.get_stream(); + if (param.mode == softmax_activation::kInstance) { + Tensor data = in_data.FlatTo2D(s); + Tensor out = out_data.FlatTo2D(s); + Softmax(out, data); + } else { + CHECK_GE(in_data.ndim(), 3) + << "Input need to have a least 3 dimensions when mode=channel"; + int n = in_data.size(0); + int k = in_data.size(1); + Shape<3> s3 = Shape3(n, k, static_cast(in_data.Size()/n/k)); + Tensor data = in_data.get_with_shape(s3, s); + Tensor out = out_data.get_with_shape(s3, s); + Softmax(out, data); + } } template void SoftmaxActivationGradCompute(const nnvm::NodeAttrs& attrs, const OpContext& ctx, const std::vector& inputs, - const std::vector& req, + const std::vector& reqs, const std::vector& outputs) { - const SoftmaxActivationParam& param = nnvm::get(attrs.parsed); + using namespace mshadow; + using namespace mshadow::expr; CHECK_EQ(inputs.size(), 2U); CHECK_EQ(outputs.size(), 1); - CHECK_EQ(req.size(), 1); - - static thread_local SoftmaxActivationOp op; - op.Init(param); - op.Backward(ctx, inputs[0], inputs[1], req[0], outputs[0]); + CHECK_EQ(reqs.size(), 1); + const TBlob &out_grad = inputs[0]; + const TBlob &out_data = inputs[1]; + const OpReqType &req = reqs[0]; + const TBlob &in_grad = outputs[0]; + // Use 3d tensor for both mode -> {instance, channel}. Get shapes + int total_size = in_grad.Size(); + int batch_size = in_grad.shape_[0]; + int channel_num = in_grad.shape_[1]; + int rest_size = total_size / (batch_size * channel_num); + const Shape<3> data_shape = Shape3(batch_size, channel_num, rest_size); + // Get tensors + Stream *s = ctx.get_stream(); + Tensor m_out_grad = + out_grad.get_with_shape(data_shape, s); + Tensor m_out_data = + out_data.get_with_shape(data_shape, s); + Tensor m_in_grad = + in_grad.get_with_shape(data_shape, s); + // get requested temp space + Tensor workspace = ctx.requested[softmax_activation::kTempSpace].get_space( + Shape2(batch_size, rest_size), s); + workspace = reduce_with_axis(m_out_grad * m_out_data, 1); + Assign(m_in_grad, req, + m_out_data * (m_out_grad - broadcast_with_axis(workspace, 0, channel_num))); } } // namespace op diff --git a/src/operator/nn/softmax_activation.cu b/src/operator/nn/softmax_activation.cu index 8e6e787f8072..1cfe64f7e916 100644 --- a/src/operator/nn/softmax_activation.cu +++ b/src/operator/nn/softmax_activation.cu @@ -32,6 +32,7 @@ namespace mxnet { namespace op { +#if MXNET_USE_CUDNN == 1 template<> void SoftmaxActivationCompute(const nnvm::NodeAttrs& attrs, const OpContext& ctx, @@ -42,15 +43,9 @@ void SoftmaxActivationCompute(const nnvm::NodeAttrs& attrs, CHECK_EQ(inputs.size(), 1U); CHECK_EQ(outputs.size(), 1U); -#if MXNET_USE_CUDNN == 1 static thread_local CuDNNSoftmaxActivationOp op; op.Init(param); op.Forward(ctx, inputs[0], req[0], outputs[0]); -#else - static thread_local SoftmaxActivationOp op; - op.Init(param); - op.Forward(ctx, inputs[0], req[0], outputs[0]); -#endif } template<> @@ -64,16 +59,11 @@ void SoftmaxActivationGradCompute(const nnvm::NodeAttrs& attrs, CHECK_EQ(outputs.size(), 1); CHECK_EQ(req.size(), 1); -#if MXNET_USE_CUDNN == 1 static thread_local CuDNNSoftmaxActivationOp op; op.Init(param); op.Backward(ctx, inputs[0], inputs[1], req[0], outputs[0]); -#else - static thread_local SoftmaxActivationOp op; - op.Init(param); - op.Backward(ctx, inputs[0], inputs[1], req[0], outputs[0]); -#endif } +#endif NNVM_REGISTER_OP(SoftmaxActivation) .set_attr("FCompute", SoftmaxActivationCompute);