From 90288052157b35870ac48c0494e443d6168cfc0b Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sun, 23 Aug 2015 20:18:42 -0600 Subject: [PATCH 1/3] add convolution op --- Makefile | 6 +- include/mxnet/base.h | 2 +- src/operator/convolution-inl.h | 317 ++++++++++++++++++ src/operator/convolution.cc | 32 ++ src/operator/convolution.cu | 19 ++ src/operator/fully_connected-inl.h | 3 +- src/operator/pooling-inl.h | 1 - .../static_operator/convolution_op-inl.h | 270 --------------- src/operator/static_operator/dropout_op-inl.h | 94 ------ src/operator/static_operator/reshape_op-inl.h | 76 ----- 10 files changed, 374 insertions(+), 446 deletions(-) create mode 100644 src/operator/convolution-inl.h create mode 100644 src/operator/convolution.cc create mode 100644 src/operator/convolution.cu delete mode 100644 src/operator/static_operator/convolution_op-inl.h delete mode 100644 src/operator/static_operator/dropout_op-inl.h delete mode 100644 src/operator/static_operator/reshape_op-inl.h diff --git a/Makefile b/Makefile index e95ee067980f..6a01e38d4d06 100644 --- a/Makefile +++ b/Makefile @@ -64,14 +64,14 @@ endif #BIN = test/test_threaded_engine test/api_registry_test OBJ = narray_function_cpu.o # add threaded engine after it is done -OBJCXX11 = engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o +OBJCXX11 = engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o convolution_cpu.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a LIB_DEP = $(DMLC_CORE)/libdmlc.a ifeq ($(USE_CUDA), 1) - CUOBJ += narray_function_gpu.o fully_connected_gpu.o activation_gpu.o elementwise_sum_gpu.o pooling_gpu.o softmax_gpu.o + CUOBJ += narray_function_gpu.o fully_connected_gpu.o activation_gpu.o elementwise_sum_gpu.o pooling_gpu.o softmax_gpu.o convolution_gpu.o endif .PHONY: clean all test lint doc @@ -101,6 +101,8 @@ pooling_cpu.o: src/operator/pooling.cc pooling_gpu.o: src/operator/pooling.cu softmax_cpu.o: src/operator/softmax.cc softmax_gpu.o: src/operator/softmax.cu +convolution_cpu.o: src/operator/convolution.cc +convolution_gpu.o: src/operator/convolution.cu lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) diff --git a/include/mxnet/base.h b/include/mxnet/base.h index fe260e082148..0d3f81ea7605 100644 --- a/include/mxnet/base.h +++ b/include/mxnet/base.h @@ -6,6 +6,7 @@ #ifndef MXNET_BASE_H_ #define MXNET_BASE_H_ #include +#include #include #include @@ -45,6 +46,5 @@ typedef mshadow::default_real_t real_t; typedef mshadow::TShape TShape; /*! \brief storage container type */ typedef mshadow::TBlob TBlob; - } // namespace mxnet #endif // MXNET_BASE_H_ diff --git a/src/operator/convolution-inl.h b/src/operator/convolution-inl.h new file mode 100644 index 000000000000..e96d81023ef5 --- /dev/null +++ b/src/operator/convolution-inl.h @@ -0,0 +1,317 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file convolution-inl.h + * \brief + * \author Bing Xu +*/ +#ifndef MXNET_OPERATOR_CONVOLUTION_INL_H_ +#define MXNET_OPERATOR_CONVOLUTION_INL_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "./operator_common.h" + + +namespace mxnet { +namespace op { + +enum FullyConnectedOpInputs {kData, kWeight, kBias}; +enum FullyConnectedOpOutputs {kOut}; + +struct ConvolutionParam : public dmlc::Parameter { + TShape kernel; + TShape stride; + TShape pad; + int nb_filter; + int nb_group; + uint32_t nstep; + bool no_bias; + DMLC_DECLARE_PARAMETER(ConvolutionParam) { + int shape[] = {1, 1}; + DMLC_DECLARE_FIELD(kernel).describe("convolution kernel size: (y, x)"); + DMLC_DECLARE_FIELD(stride).describe("convolution stride: (y, x)") + .set_default(TShape(shape, shape + 2)); + shape[0] = shape[1] = 0; + DMLC_DECLARE_FIELD(pad).describe("pad for convolution: (y, x)") + .set_default(TShape(shape, shape + 2)); + DMLC_DECLARE_FIELD(nb_filter).describe("convolution filter(channel) number") + .set_range(1, 100000); + DMLC_DECLARE_FIELD(nb_group).set_default(1) + .describe("number of groups partition"); + DMLC_DECLARE_FIELD(nstep) + .describe("process n images once").set_default(2).set_range(1, 10000); + DMLC_DECLARE_FIELD(no_bias).set_default(false) + .describe("Whether to disable bias parameter."); + } +}; + +template +class ConvolutionOp : public Operator { + public: + explicit ConvolutionOp(ConvolutionParam p) { + this->param_ = p; + } + + virtual void Forward(const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(req[kOut], kWriteTo); + size_t expected = param_.no_bias ? 2 : 3; + CHECK_EQ(in_data.size(), expected); + CHECK_EQ(out_data.size(), 1); + // TODO(bing): check the BLAS Handle, be careful + Stream *s = ctx.get_stream(); + Tensor data = in_data[kData].get(s); + Tensor wmat = in_data[kWeight].get(s); + Tensor out = out_data[kOut].get(s); + this->InitTemp(data.shape_, out.shape_); + const index_t nbatch = data.size(0); + for (index_t i = 0; i < nbatch; i += param_.nstep) { + const index_t step = std::min(param_.nstep, nbatch - i); + temp_col_.Resize(mshadow::Shape2(shape_colunit_[0], + shape_colunit_[1] * step)); + temp_dst_.Resize(mshadow::Shape3(shape_dstunit_[0], + shape_dstunit_[1], + shape_dstunit_[2] * step)); + if (param_.pad[0] == 0 && param_.pad[1] == 0) { + temp_col_ = unpack_patch2col(data.Slice(i, i + step), + param_.kernel[0], + param_.kernel[1], + param_.stride[0]); + // TODO(bing): make mshadow support dual stride + } else { + temp_col_ = unpack_patch2col(pad(data.Slice(i, i + step), + param_.pad[0], param_.pad[1]), + param_.kernel[0], + param_.kernel[1], + param_.stride[0]); + // TODO(bing): make mshadow support dual stride + } + const index_t gstride = temp_col_.size(0) / param_.nb_group; + for (int gid = 0; gid < param_.nb_group; ++gid) { + mshadow::Tensor tmpc = temp_col_.Slice(gstride * gid, + gstride * (gid + 1)); + temp_dst_[gid] = dot(wmat[gid], tmpc); + } + out.Slice(i, i + step) = swapaxis<1, 0>(reshape(temp_dst_, + mshadow::Shape4(param_.nb_filter, + step, + out.size(2), + out.size(3)))); + } + if (!param_.no_bias) { + // add bias, broadcast bias to dim 1: channel + Tensor bias = in_data[kBias].get(s); + out += broadcast<1>(bias, out.shape_); + } + } + + virtual void Backward(const OpContext &ctx, + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &req, + const std::vector &in_grad) { + using namespace mshadow; + using namespace mshadow::expr; + // TODO(bing): check the BLAS Handle, be careful + CHECK_EQ(out_grad.size(), 1); + size_t expected = param_.no_bias == 0 ? 3 : 2; + CHECK(in_data.size() == expected && in_grad.size() == expected); + CHECK_EQ(req.size(), expected); + // get data + Stream *s = ctx.get_stream(); + Tensor data = in_data[kData].get(s); + Tensor wmat = in_data[kWeight].get(s); + Tensor grad = out_grad[kOut].get(s); + Tensor gdata = in_grad[kData].get(s); + Tensor gwmat = in_grad[kWeight].get(s); + this->InitTemp(data.shape_, grad.shape_); + const index_t nbatch = data.size(0); + for (index_t i = 0; i < nbatch; i += param_.nstep) { + const index_t step = std::min(param_.nstep, nbatch - i); + temp_col_.Resize(mshadow::Shape2(shape_colunit_[0], \ + shape_colunit_[1] * step)); + temp_dst_.Resize(mshadow::Shape3(shape_dstunit_[0], \ + shape_dstunit_[1], shape_dstunit_[2] * step)); + temp_dst_ = reshape(swapaxis<1, 0>(grad.Slice(i, i + step)), temp_dst_.shape_); + if (param_.pad[0] == 0 && param_.pad[1] == 0) { + // TODO(bing): dual stride + temp_col_ = unpack_patch2col(data.Slice(i, i + step), \ + param_.kernel[0], \ + param_.kernel[1], \ + param_.stride[0]); + } else { + // TODO(bing): dual stride + temp_col_ = unpack_patch2col(pad(data.Slice(i, i + step), param_.pad[0], param_.pad[1]), \ + param_.kernel[0], \ + param_.kernel[1], \ + param_.stride[0]); + } + const index_t gstride = temp_col_.size(0) / param_.nb_group; + for (int gid = 0; gid < param_.nb_group; ++gid) { + mshadow::Tensor tmpc = temp_col_.Slice(gstride * gid, gstride * (gid + 1)); + gwmat[gid] += dot(temp_dst_[gid], tmpc.T()); + } + if (req[kData] == kWriteTo) { + for (int gid = 0; gid < param_.nb_group; ++gid) { + mshadow::Tensor tmpc = temp_col_.Slice(gstride * gid, gstride * (gid + 1)); + tmpc = dot(wmat[gid].T(), temp_dst_[gid]); + } + if (param_.pad[0] == 0 && param_.pad[1] == 0) { + gdata.Slice(i, i + step) = pack_col2patch(temp_col_, \ + data.Slice(i, i + step).shape_, \ + param_.kernel[0], \ + param_.kernel[1], \ + param_.stride[0]); + } else { + mshadow::Shape<4> pshape = data.Slice(i, i + step).shape_; + pshape[2] += 2 * param_.pad[0]; + pshape[3] += 2 * param_.pad[1]; + gdata.Slice(i, i + step) = crop(pack_col2patch(temp_col_, \ + pshape, \ + param_.kernel[0], \ + param_.kernel[1], \ + param_.stride[0]), \ + gdata[i][0].shape_); + } + } + } + if (!param_.no_bias) { + Tensor gbias = in_grad[kBias].get(s); + // Assign(gbias, req[kBias], sumall_except_dim<1>(grad); + gbias += sumall_except_dim<1>(grad); + } + } + + private: + // TODO(bing): use global resource allocator + inline void InitTemp(const mshadow::Shape<4> &ishape, + const mshadow::Shape<4> &oshape) { + const int ksize_y = param_.kernel[0]; + const int ksize_x = param_.kernel[1]; + shape_colunit_ = mshadow::Shape2(ishape[1] * ksize_y * ksize_x, + oshape[2] * oshape[3]); + shape_dstunit_ = mshadow::Shape3(param_.nb_group, + param_.nb_filter / param_.nb_group, + oshape[2] * oshape[3]); + int nop = (ishape[0] + param_.nstep - 1) / param_.nstep; + param_.nstep = (ishape[0] + nop - 1) / nop; + temp_col_.Resize(mshadow::Shape2(shape_colunit_[0], + shape_colunit_[1] * param_.nstep)); + temp_dst_.Resize(mshadow::Shape3(shape_dstunit_[0], + shape_dstunit_[1], + shape_dstunit_[2] * param_.nstep)); + } + + ConvolutionParam param_; + // TODO(bing): use global resource allocator + mshadow::TensorContainer temp_col_; + mshadow::TensorContainer temp_dst_; + mshadow::Shape<2> shape_colunit_; + mshadow::Shape<3> shape_dstunit_; +}; // class ConvolutionOp + +template +Operator* CreateOp(ConvolutionParam param); + +#if DMLC_USE_CXX11 +class ConvolutionProp : public OperatorProperty { + public: + virtual std::vector ListArguments() const { + if (!param_.no_bias) { + return {"data", "weight", "bias"}; + } else { + return {"data", "weight"}; + } + } + + virtual void Init(const std::vector >& kwargs) { + param_.Init(kwargs); + } + + virtual bool InferShape(std::vector *in_shape, + std::vector *out_shape) const { + using namespace mshadow; + if (!param_.no_bias) { + CHECK_EQ(in_shape->size(), 3) << "Input:[data, weight, bias]"; + } else { + CHECK_EQ(in_shape->size(), 2) << "Input:[data, weight]"; + } + const TShape &dshape = (*in_shape)[kData]; + if (dshape.ndim() == 0) return false; + CHECK_EQ(dshape.ndim(), 4) \ + << "Input data should be 4D in batch-nb_filter-y-x"; + SHAPE_ASSIGN_CHECK(*in_shape, \ + kWeight, \ + Shape3(param_.nb_group, \ + param_.nb_filter / param_.nb_group, \ + dshape[1] / param_.nb_group * param_.kernel[0] * param_.kernel[1])); + if (!param_.no_bias) { + SHAPE_ASSIGN_CHECK(*in_shape, kBias, Shape1(param_.nb_filter)); + } + out_shape->clear(); + out_shape->push_back(dshape); + const index_t ksize_y = static_cast(param_.kernel[0]); + const index_t ksize_x = static_cast(param_.kernel[1]); + const index_t kstride = static_cast(param_.stride[0]); + // TODO(bing) : support dual stride + CHECK_EQ(dshape[1] % param_.nb_group, 0) \ + << "input nb_filter must divide group size"; + CHECK_EQ(param_.nb_filter % param_.nb_group, 0) \ + << "output nb_filter must divide group size"; + CHECK_GE(param_.kernel.Size(), 0) \ + << "incorrect kernel size: " << param_.kernel; + CHECK_GE(param_.stride.Size(), 0) \ + << "incorrect stride size: " << param_.stride; + CHECK(ksize_x <= dshape[3] && ksize_y <= dshape[2]) + << "kernel size exceed input"; + (*out_shape)[kOut][1] = param_.nb_filter; + (*out_shape)[kOut][2] = (dshape[2] + 2 * param_.pad[0] - ksize_y) / kstride + 1; + (*out_shape)[kOut][3] = (dshape[3] + 2 * param_.pad[1] - ksize_x) / kstride + 1; + return true; + } + + virtual OperatorProperty* Copy() const { + auto ptr = new ConvolutionProp(); + ptr->param_ = param_; + return ptr; + } + + virtual std::string TypeString() const { + return "Convolution"; + } + + virtual std::vector DeclareBackwardDependency( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const { + return {out_grad[kOut], in_data[kData], in_data[kWeight]}; + } + + virtual std::vector > BackwardInplaceOption( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const { + return {{in_data[kData], in_grad[kData]}}; + } + + Operator* CreateOperator(Context ctx) const; + + private: + ConvolutionParam param_; +}; // class ConvolutionProp +#endif // DMLC_USE_CXX11 +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_CONVOLUTION_INL_H_ diff --git a/src/operator/convolution.cc b/src/operator/convolution.cc new file mode 100644 index 000000000000..e78533d0a985 --- /dev/null +++ b/src/operator/convolution.cc @@ -0,0 +1,32 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file convolution.cc + * \brief + * \author Bing Xu +*/ + +#include "./convolution-inl.h" + +namespace mxnet { +namespace op { +template<> +Operator* CreateOp(ConvolutionParam param) { + return new ConvolutionOp(param); +} + +Operator* ConvolutionProp::CreateOperator(Context ctx) const { + DO_BIND_DISPATCH(CreateOp, param_); +} + +DMLC_REGISTER_PARAMETER(ConvolutionParam); + +MXNET_REGISTER_OP_PROPERTY(Convolution, ConvolutionProp) +.describe("Apply convolution to input then add a bias.") +.add_argument("data", "Symbol", "Input data to the ConvolutionOp.") +.add_argument("weight", "Symbol", "Weight matrix.") +.add_argument("bias", "Symbol", "Bias parameter.") +.add_arguments(ConvolutionParam::__FIELDS__()); + +} // namespace op +} // namespace mxnet + diff --git a/src/operator/convolution.cu b/src/operator/convolution.cu new file mode 100644 index 000000000000..4f0a3ce78b45 --- /dev/null +++ b/src/operator/convolution.cu @@ -0,0 +1,19 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file convolution.cu + * \brief + * \author Bing Xu +*/ + +#include "./convolution-inl.h" + +namespace mxnet { +namespace op { +template<> +Operator* CreateOp(ConvolutionParam param) { + return new ConvolutionOp(param); +} + +} // namespace op +} // namespace mxnet + diff --git a/src/operator/fully_connected-inl.h b/src/operator/fully_connected-inl.h index 8f4efa6f6b3f..bad16894b4a6 100644 --- a/src/operator/fully_connected-inl.h +++ b/src/operator/fully_connected-inl.h @@ -135,8 +135,7 @@ class FullyConnectedProp : public OperatorProperty { } else { CHECK_EQ(in_shape->size(), 2) << "Input:[data, weight]"; } - CHECK_GT(param_.num_hidden, 0); - const TShape &dshape = (*in_shape)[0]; + const TShape &dshape = (*in_shape)[kData]; // require data to be known if (dshape.ndim() == 0) return false; diff --git a/src/operator/pooling-inl.h b/src/operator/pooling-inl.h index 6ccae18abf22..359d070cdf11 100644 --- a/src/operator/pooling-inl.h +++ b/src/operator/pooling-inl.h @@ -51,7 +51,6 @@ class PoolingOp : public Operator { public: explicit PoolingOp(PoolingParam p) { this->param_ = p; - std::cout << param_.kernel << std::endl; } virtual void Forward(const OpContext &ctx, diff --git a/src/operator/static_operator/convolution_op-inl.h b/src/operator/static_operator/convolution_op-inl.h deleted file mode 100644 index fc9b3369f2a6..000000000000 --- a/src/operator/static_operator/convolution_op-inl.h +++ /dev/null @@ -1,270 +0,0 @@ -/*! - * Copyright (c) 2015 by Contributors - * \file convolution_op-inl.h - * \brief convolution op - * \author Bing Xu -*/ -#ifndef MXNET_OPERATOR_STATIC_OPERATOR_CONVOLUTION_OP_INL_H_ -#define MXNET_OPERATOR_STATIC_OPERATOR_CONVOLUTION_OP_INL_H_ - -#include -#include -#include -#include "./static_operator_common.h" -#include "./param.h" - -namespace mxnet { -namespace op { -template -class ConvolutionOp : public StaticOperator { - public: - virtual std::vector DescribeArgs() const { - ArgType ret[] = {kDataArg, kWeightArg, kBiasArg}; - if (param_.no_bias == 0) { - return std::vector(ret, ret + 3); - } else { - return std::vector(ret, ret + 2); - } - } - virtual void SetParam(const char *name, const char *val) { - param_.SetParam(name, val); - } - virtual void InferShape(std::vector *in_shape, - std::vector *out_shape) { - using namespace mshadow; - if (param_.no_bias == 0) { - CHECK_EQ(in_shape->size(), 3) << "Input:[data, weight, bias]"; - } else { - CHECK_EQ(in_shape->size(), 2) << "Input:[data, weight]"; - } - CHECK_GT(param_.num_channel, 0); - const TShape &dshape = (*in_shape)[0]; - CHECK_EQ(dshape.ndim(), 4) << \ - "Input data should be 4D in batch-channel-y-x"; - ShapeAssignCheck((*in_shape)[1], Shape4(param_.num_channel, - dshape[1], - param_.kernel_y, - param_.kernel_x)); - if (param_.no_bias == 0) { - ShapeAssignCheck((*in_shape)[2], Shape1(param_.num_channel)); - } - out_shape->clear(); - out_shape->push_back(dshape); - const index_t ksize_y = static_cast(param_.kernel_y); - const index_t ksize_x = static_cast(param_.kernel_x); - const index_t kstride = static_cast(param_.stride_y); - // todo : support dual stride - mshadow::Shape<4> ishape = in_shape->at(0).get<4>(); - CHECK_EQ(ishape[1] % param_.num_group, 0) << \ - "input channels must divide group size"; - CHECK_EQ(param_.num_channel % param_.num_group, 0) << \ - "output channels must divide group size"; - CHECK(ksize_y > 0 && ksize_x > 0) << \ - "incorrect kernel size"; - CHECK(ksize_x <= ishape[3] && ksize_y <= ishape[2]) << \ - "kernel size exceed input"; - (*out_shape)[0][1] = param_.num_channel; - (*out_shape)[0][2] = (ishape[2] + 2 * param_.pad_y - ksize_y) / kstride + 1; - (*out_shape)[0][3] = (ishape[3] + 2 * param_.pad_x - ksize_x) / kstride + 1; - } - virtual void Forward(Option opt, - RunContext ctx, - const std::vector &in_data, - const std::vector &out_data) { - using namespace mshadow; - using namespace mshadow::expr; - // TODO(bing): check the BLAS Handle, be careful - // maybe need blas handle from context - size_t expected = param_.no_bias == 0 ? 3 : 2; - CHECK_EQ(in_data.size(), expected); - CHECK_EQ(out_data.size(), 1); - // weight shape with group - TShape ws; - ShapeAssignCheck(ws, Shape3(param_.num_group, - param_.num_channel / param_.num_group, - param_.num_input_channel / param_.num_group * - param_.kernel_y * param_.kernel_x)); - Stream *s = static_cast *>(ctx.stream); - Tensor data = in_data[0].get(s); - Tensor wmat = in_data[1].get_with_shape(ws, s); - Tensor out = out_data[0].get(s); - this->InitTemp(data.shape_, out.shape_); - const index_t nbatch = data.size(0); - for (index_t i = 0; i < nbatch; i += nstep_) { - // resize, incase last batch is smaller - const index_t step = std::min(nstep_, nbatch - i); - temp_col_.Resize(mshadow::Shape2(shape_colunit_[0], - shape_colunit_[1] * step)); - temp_dst_.Resize(mshadow::Shape3(shape_dstunit_[0], - shape_dstunit_[1], - shape_dstunit_[2] * step)); - - if (param_.pad_x == 0 && param_.pad_y == 0) { - temp_col_ = unpack_patch2col(data.Slice(i, i+step), - param_.kernel_y, - param_.kernel_x, - param_.stride_y); - // TODO(bing): make mshadow support dual stride - } else { - temp_col_ = unpack_patch2col(pad(data.Slice(i, i+step), - param_.pad_y, param_.pad_x), - param_.kernel_y, - param_.kernel_x, - param_.stride_y); - // TODO(bing): make mshadow support dual stride - } - const index_t gstride = temp_col_.size(0) / param_.num_group; - for (int gid = 0; gid < param_.num_group; ++gid) { - mshadow::Tensor tmpc = temp_col_.Slice(gstride * gid, - gstride * (gid + 1)); - temp_dst_[gid] = dot(wmat[gid], tmpc); - } - out.Slice(i, i + step) = swapaxis<1, 0>(reshape(temp_dst_, - mshadow::Shape4(param_.num_channel, - step, - out.size(2), - out.size(3)))); - } - if (param_.no_bias == 0) { - // add bias, broadcast bias to dim 1: channel - Tensor bias = in_data[2].get(s); - out += broadcast<1>(bias, out.shape_); - } - } - virtual void Backward(RunContext ctx, - const std::vector &grad_next, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &out_grad, - const std::vector &req) { - using namespace mshadow; - using namespace mshadow::expr; - // TODO(bing): check the BLAS Handle, be careful - // maybe need blas handle from context - CHECK_EQ(grad_next.size(), 1); - size_t expected = param_.no_bias == 0 ? 3 : 2; - CHECK(in_data.size() == expected && out_grad.size() == expected); - CHECK_EQ(req.size(), expected); - TShape ws; - ShapeAssignCheck(ws, Shape3(param_.num_group, - param_.num_channel / param_.num_group, - param_.num_input_channel / param_.num_group * - param_.kernel_y * param_.kernel_x)); - Stream *s = static_cast *>(ctx.stream); - Tensor data = in_data[0].get(s); - Tensor wmat = in_data[1].get_with_shape(ws, s); - Tensor grad = grad_next[0].get(s); - Tensor gdata = out_grad[0].get(s); - Tensor gwmat = out_grad[0].get_with_shape(ws, s); - this->InitTemp(data.shape_, grad.shape_); - const index_t nbatch = data.size(0); - for (index_t i = 0; i < nbatch; i += nstep_) { - const index_t step = std::min(nstep_, nbatch-i); - temp_col_.Resize(mshadow::Shape2(shape_colunit_[0], - shape_colunit_[1] * step)); - temp_dst_.Resize(mshadow::Shape3(shape_dstunit_[0], - shape_dstunit_[1], - shape_dstunit_[2] * step)); - temp_dst_ = reshape(swapaxis<1, 0>(grad.Slice(i, i + step)), - temp_dst_.shape_); - if (param_.pad_x == 0 && param_.pad_y == 0) { - temp_col_ = unpack_patch2col(data.Slice(i, i + step), - param_.kernel_y, - param_.kernel_x, - param_.stride_y); - // TODO(bing): dual stride - } else { - temp_col_ = unpack_patch2col(pad(data.Slice(i, i + step), - param_.pad_y, param_.pad_x), - param_.kernel_y, - param_.kernel_x, - param_.stride_y); - // TODO(bing): dual stride - } - const index_t gstride = temp_col_.size(0) / param_.num_group; - for (int gid = 0; gid < param_.num_group; ++gid) { - mshadow::Tensor tmpc = temp_col_.Slice(gstride * gid, - gstride * (gid + 1)); - gwmat[gid] += dot(temp_dst_[gid], tmpc.T()); - } - if (req[0] != kNullOp) { - for (int gid = 0; gid < param_.num_group; ++gid) { - mshadow::Tensor tmpc = temp_col_.Slice(gstride * gid, - gstride * (gid+1)); - tmpc = dot(wmat[gid].T(), temp_dst_[gid]); - } - - if (param_.pad_x == 0 && param_.pad_y == 0) { - Tensor gdata_tmp = gdata.Slice(i, i + step); - Assign(gdata_tmp, - req[0], - pack_col2patch(temp_col_, - data.Slice(i, i + step).shape_, - param_.kernel_y, - param_.kernel_x, - param_.stride_y)); - // TODO(bing): dual stride - } else { - mshadow::Shape<4> pshape = data.Slice(i, i + step).shape_; - pshape[2] += 2 * param_.pad_y; pshape[3] += 2 * param_.pad_x; - Tensor gdata_tmp = gdata.Slice(i, i + step); - Assign(gdata_tmp, - req[0], - crop(pack_col2patch(temp_col_, - pshape, - param_.kernel_y, - param_.kernel_x, - param_.stride_y), - data[i][0].shape_)); - // TODO(bing): dual stride - } - } - } - if (param_.no_bias == 0) { - Tensor gbias = out_grad[2].get(s); - Assign(gbias, req[2], sumall_except_dim<1>(grad)); - } - } - - private: - /*! \brief Alloc temp space for pack/unpack */ - inline void InitTemp(mshadow::Shape<4> ishape, mshadow::Shape<4> oshape) { - const index_t ksize_y = static_cast(param_.kernel_y); - const index_t ksize_x = static_cast(param_.kernel_x); - // this is the unit size of each temp structure - shape_colunit_ = mshadow::Shape2(ishape[1] * ksize_y * ksize_x, - oshape[2] * oshape[3]); - shape_dstunit_ = mshadow::Shape3(param_.num_group, - param_.num_channel/param_.num_group, - oshape[2] * oshape[3]); - nstep_ = std::max(std::min((index_t)(param_.temp_col_max / - shape_colunit_.Size()), - ishape[0]), 1U); - // make nstep more balanced, - // nstep will use exactly same number of operations to finish, - index_t nop = (ishape[0]+nstep_-1) / nstep_; - nstep_ = (ishape[0] + nop - 1)/ nop; - CHECK_GT(nstep_, 0); - // helper structure - temp_col_.Resize(mshadow::Shape2(shape_colunit_[0], - shape_colunit_[1] * nstep_)); - temp_dst_.Resize(mshadow::Shape3(shape_dstunit_[0], - shape_dstunit_[1], - shape_dstunit_[2] * nstep_)); - } - /*! \brief parameters that potentially be useful */ - Param param_; - /*! \brief temporary data structure to store patches */ - mshadow::TensorContainer temp_col_; - /*! \brief temporary data structure to store results */ - mshadow::TensorContainer temp_dst_; - /*! \brief shape of column unit */ - mshadow::Shape<2> shape_colunit_; - /*! \brief shape of dst unit */ - mshadow::Shape<3> shape_dstunit_; - /*! \brief how many number of batches to be unpacked together */ - mshadow::index_t nstep_; -}; // class ConvolutionOp -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_STATIC_OPERATOR_CONVOLUTION_OP_INL_H_ diff --git a/src/operator/static_operator/dropout_op-inl.h b/src/operator/static_operator/dropout_op-inl.h deleted file mode 100644 index 23c9f6aab457..000000000000 --- a/src/operator/static_operator/dropout_op-inl.h +++ /dev/null @@ -1,94 +0,0 @@ -/*! - * Copyright (c) 2015 by Contributors - * \file dropout_op-inl.h - * \brief dropout operator - * \author Bing Xu -*/ -#ifndef MXNET_OPERATOR_STATIC_OPERATOR_DROPOUT_OP_INL_H_ -#define MXNET_OPERATOR_STATIC_OPERATOR_DROPOUT_OP_INL_H_ - -#include -#include -#include "./mshadow_op.h" - -namespace mxnet { -namespace op { -template -class DropoutOp : public StaticOperator { - public: - explicit DropoutOp(mshadow::Random *prnd) - : prnd_(prnd), mask_used_(false) {} - virtual int DescribeProperty() const { - return kForwardRequireRnd | kContainInteralState; - } - virtual void SetParam(const char *name, const char* val) { - if (!strcmp("threshold", name)) pkeep_ = \ - static_cast(1.0f - atof(val)); - CHECK_GT(pkeep_, 0) << "invalid dropout threshold"; - } - virtual void InferShape(std::vector *in_shape, - std::vector *out_shape) { - CHECK_EQ(in_shape->size(), 1) << "Input: [data]"; - out_shape->clear(); - out_shape->push_back((*in_shape)[0]); - } - virtual void Forward(Option opt, - RunContext ctx, - const std::vector &in_data, - const std::vector &out_data) { - CHECK_EQ(in_data.size(), 1); - CHECK_EQ(out_data.size(), 1); - using namespace mshadow; - using namespace mshadow::expr; - Stream *s = static_cast *>(ctx.stream); - Tensor data = in_data[0].get(s); - Tensor out = out_data[0].get(s); - if (mask_.shape_!= out.shape_) { - mask_.Resize(out.shape_); - } - if (opt.is_train && pkeep_ != 1.0f) { - mask_ = F(prnd_->uniform(mask_.shape_), pkeep_) * \ - (1.0f / pkeep_); - out = data * mask_; - mask_used_ = true; - } else { - out = data; - mask_used_ = false; - } - } - virtual void Backward(RunContext ctx, - const std::vector &grad_next, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &out_grad, - const std::vector &req) { - CHECK_EQ(grad_next.size(), 1); - CHECK_EQ(out_grad.size(), 1); - CHECK_EQ(req.size(), 1); - using namespace mshadow; - using namespace mshadow::expr; - Stream *s = static_cast *>(ctx.stream); - Tensor grad = grad_next[0].get(s); - Tensor out = out_grad[0].get(s); - // mask won't be initialized in when - if (mask_used_) { - Assign(out, req[0], grad * mask_); - } else { - // avoid directly assign tensor to tensor - Assign(out, req[0], F(grad)); - } - } - - private: - /*! \brief random number generator */ - mshadow::Random *prnd_; - /*! \brief random mask */ - mshadow::TensorContainer mask_; - /*! \brief probability to keep */ - real_t pkeep_; - /*! \brief record whether mask is used in last forward */ - bool mask_used_; -}; // class DropoutOp -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_STATIC_OPERATOR_DROPOUT_OP_INL_H_ diff --git a/src/operator/static_operator/reshape_op-inl.h b/src/operator/static_operator/reshape_op-inl.h deleted file mode 100644 index ba966a62a29f..000000000000 --- a/src/operator/static_operator/reshape_op-inl.h +++ /dev/null @@ -1,76 +0,0 @@ -/*! - * Copyright (c) 2015 by Contributors - * \file reshape_op-inl.h - * \brief - * \author Bing Xu -*/ -#ifndef MXNET_OPERATOR_STATIC_OPERATOR_RESHAPE_OP_INL_H_ -#define MXNET_OPERATOR_STATIC_OPERATOR_RESHAPE_OP_INL_H_ - -#include -#include - -namespace mxnet { -namespace op { -template -class ReshapeOp : public StaticOperator { - public: - virtual void SetParam(const char *name, const char *val) { - if (!strcmp(name, "out_ch")) oshape_[1] = atoi(val); - if (!strcmp(name, "out_y")) oshape_[2] = atoi(val); - if (!strcmp(name, "out_x")) oshape_[3] = atoi(val); - } - virtual void InferShape(std::vector *in_shape, - std::vector *out_shape) { - CHECK_EQ(in_shape->size(), 1); - ishape_ = (*in_shape)[0].get<4>(); - oshape_[0] = ishape_[0]; - if (flatten) { - oshape_[1] = 1; - oshape_[2] = 1; - oshape_[3] = ishape_[1] * ishape_[2] * ishape_[3]; - } - CHECK_EQ(oshape_.Size(), ishape_.Size()) << "Incorrect new shape"; - TShape ts; - ts = oshape_; - out_shape->clear(); - out_shape->push_back(ts); - } - virtual void Forward(Option opt, - RunContext ctx, - const std::vector &in_data, - const std::vector &out_data) { - CHECK_EQ(in_data.size(), 1); - CHECK_EQ(out_data.size(), 1); - using namespace mshadow; - using namespace mshadow::expr; - Stream *s = static_cast *>(ctx.stream); - Tensor data = in_data[0].get(s); - Tensor out = out_data[0].get(s); - out = reshape(data, oshape_); - } - virtual void Backward(RunContext ctx, - const std::vector &grad_next, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &out_grad, - const std::vector &req) { - CHECK_EQ(grad_next.size(), 1); - CHECK_EQ(out_grad.size(), 1); - CHECK_EQ(req.size(), 1); - using namespace mshadow; - using namespace mshadow::expr; - Stream *s = static_cast *>(ctx.stream); - Tensor grad = grad_next[0].get(s); - Tensor out = out_grad[0].get(s); - Assign(out, req[0], reshape(grad, ishape_)); - } - - private: - mshadow::Shape<4> oshape_; - mshadow::Shape<4> ishape_; -}; // class Operator - -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_STATIC_OPERATOR_RESHAPE_OP_INL_H_ From 4bd535f300d19ae930d2709b150dcc65b3b95fd1 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Mon, 24 Aug 2015 14:52:28 -0600 Subject: [PATCH 2/3] remove 2d requirement in fullc --- src/operator/fully_connected-inl.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/operator/fully_connected-inl.h b/src/operator/fully_connected-inl.h index bad16894b4a6..532113e56c6d 100644 --- a/src/operator/fully_connected-inl.h +++ b/src/operator/fully_connected-inl.h @@ -139,16 +139,9 @@ class FullyConnectedProp : public OperatorProperty { // require data to be known if (dshape.ndim() == 0) return false; - index_t num_input; - if (dshape.ndim() == 4) { - // TODO(bing) consider deprecate 4D input - CHECK(dshape[1] == 1 && dshape[2] == 1); - num_input = dshape[3]; - } else { - CHECK_EQ(dshape.ndim(), 2) - << "FullyConnecteded: Input data should be 2D in (batch, num_hidden)"; - num_input = dshape[1]; - } + index_t num_input = 0; + mshadow::Shape<2> ishape = dshape.FlatTo2D(); + num_input = ishape[1]; SHAPE_ASSIGN_CHECK(*in_shape, kWeight, Shape2(param_.num_hidden, num_input)); if (!param_.no_bias) { SHAPE_ASSIGN_CHECK(*in_shape, kBias, Shape1(param_.num_hidden)); From 1045c18162b8e37d87adf40a789838d99bb7dcb6 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Mon, 24 Aug 2015 18:25:45 -0600 Subject: [PATCH 3/3] conv is able to work --- Makefile | 6 +- python/test_mnist.py | 49 +++++++------- src/operator/flatten-inl.h | 101 +++++++++++++++++++++++++++++ src/operator/flatten.cc | 27 ++++++++ src/operator/flatten.cu | 19 ++++++ src/operator/fully_connected-inl.h | 1 + 6 files changed, 174 insertions(+), 29 deletions(-) create mode 100644 src/operator/flatten-inl.h create mode 100644 src/operator/flatten.cc create mode 100644 src/operator/flatten.cu diff --git a/Makefile b/Makefile index 6a01e38d4d06..2f2b14bee0a7 100644 --- a/Makefile +++ b/Makefile @@ -64,14 +64,14 @@ endif #BIN = test/test_threaded_engine test/api_registry_test OBJ = narray_function_cpu.o # add threaded engine after it is done -OBJCXX11 = engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o convolution_cpu.o +OBJCXX11 = flatten_cpu.o engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o convolution_cpu.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a LIB_DEP = $(DMLC_CORE)/libdmlc.a ifeq ($(USE_CUDA), 1) - CUOBJ += narray_function_gpu.o fully_connected_gpu.o activation_gpu.o elementwise_sum_gpu.o pooling_gpu.o softmax_gpu.o convolution_gpu.o + CUOBJ += flatten_gpu.o narray_function_gpu.o fully_connected_gpu.o activation_gpu.o elementwise_sum_gpu.o pooling_gpu.o softmax_gpu.o convolution_gpu.o endif .PHONY: clean all test lint doc @@ -103,6 +103,8 @@ softmax_cpu.o: src/operator/softmax.cc softmax_gpu.o: src/operator/softmax.cu convolution_cpu.o: src/operator/convolution.cc convolution_gpu.o: src/operator/convolution.cu +flatten_cpu.o: src/operator/flatten.cc +flatten_gpu.o: src/operator/flatten.cu lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) diff --git a/python/test_mnist.py b/python/test_mnist.py index 3a3ee85a8d3f..63153cbe7f19 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -3,28 +3,14 @@ import numpy as np import os, cPickle, gzip -def Softmax(x): - batch, nidden = x.shape - maxes = np.max(x, axis=1) - x -= maxes.reshape(batch, 1) - x = np.exp(x) - norm = np.sum(x, axis=1) - prob = x / norm.reshape((batch, 1)) - return prob - def CalAcc(out, label): pred = np.argmax(out, axis=1) return np.sum(pred == label) * 1.0 / out.shape[0] -def SetGradient(out_grad, label): - assert(out_grad.shape[0] == label.shape[0]) - for i in xrange(label.shape[0]): - k = label[i] - out_grad[i][k] -= 1.0 # load data class MNISTIter(object): - def __init__(self, which_set, batch_size=100): + def __init__(self, which_set, batch_size=100, flatten=True): if not os.path.exists('mnist.pkl.gz'): os.system("wget http://deeplearning.net/data/mnist/mnist.pkl.gz") f = gzip.open('mnist.pkl.gz', 'rb') @@ -39,6 +25,7 @@ def __init__(self, which_set, batch_size=100): else: self.data = test_set[0] self.data = np.asarray(test_set[1]) + self.flatten = flatten self.batch_size = batch_size self.nbatch = self.data.shape[0] / batch_size assert(self.data.shape[0] % batch_size == 0) # I am lazy @@ -57,25 +44,34 @@ def Get(self): raise Exception("Iterator is at end") start = self.now_idx * self.batch_size end = (self.now_idx + 1) * self.batch_size - return (self.data[start:end, :], self.label[start:end]) + if self.flatten: + return (self.data[start:end, :], self.label[start:end]) + else: + return (self.data[start:end, :].reshape(batch_size, 1, 28, 28), + self.label[start:end]) # symbol net batch_size = 100 data = mx.symbol.Variable('data') -fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=160) +fc1 = mx.symbol.Convolution(data = data, name='conv1', nb_filter=32, kernel=(7,7), stride=(2,2), nstep=10, no_bias=1) act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu") -fc2 = mx.symbol.FullyConnected(data = act1, name='fc2', num_hidden=10) -args_list = fc2.list_arguments() +mp = mx.symbol.Pooling(data = act1, name = 'mp', kernel=(2,2), stride=(2,2), pool_type='avg') +fl = mx.symbol.Flatten(data = mp, name="flatten") +fc2 = mx.symbol.FullyConnected(data = fl, name='fc2', num_hidden=10) +softmax = mx.symbol.Softmax(data = fc2, name = 'sm') +args_list = softmax.list_arguments() # infer shape -data_shape = (batch_size, 784) -arg_shapes, out_shapes = fc2.infer_shape(data=data_shape) +#data_shape = (batch_size, 784) + +data_shape = (batch_size, 1, 28, 28) +arg_shapes, out_shapes = softmax.infer_shape(data=data_shape) arg_narrays = [mx.narray.create(shape) for shape in arg_shapes] grad_narrays = [mx.narray.create(shape) for shape in arg_shapes] mom_narrays = [mx.narray.create(shape) for shape in arg_shapes] inputs = dict(zip(args_list, arg_narrays)) - +print zip(args_list, arg_shapes) np.random.seed(0) # set random weight for name, narray in inputs.items(): @@ -87,7 +83,7 @@ def Get(self): req = ['write_to' for i in range(len(arg_narrays))] # bind executer # TODO(bing): think of a better bind interface -executor = fc2.bind(mx.Context('cpu'), arg_narrays, grad_narrays, req) +executor = softmax.bind(mx.Context('cpu'), arg_narrays, grad_narrays, req) # update out_narray = executor.heads()[0] @@ -104,8 +100,8 @@ def Update(mom, grad, weight): block = zip(mom_narrays, grad_narrays, arg_narrays) -train = MNISTIter("train", batch_size) -valid = MNISTIter("valid", batch_size) +train = MNISTIter("train", batch_size, False) +valid = MNISTIter("valid", batch_size, False) for i in xrange(epoch): # train @@ -115,11 +111,10 @@ def Update(mom, grad, weight): while train.Next(): data, label = train.Get() inputs["data"].numpy[:] = data + inputs["sm_label"].numpy[:] = label executor.forward() - out_narray.numpy[:] = Softmax(out_narray.numpy) train_acc += CalAcc(out_narray.numpy, label) grad_narray.numpy[:] = out_narray.numpy - SetGradient(grad_narray.numpy, label) executor.backward([grad_narray]) for mom, grad, weight in block: diff --git a/src/operator/flatten-inl.h b/src/operator/flatten-inl.h new file mode 100644 index 000000000000..da4110296909 --- /dev/null +++ b/src/operator/flatten-inl.h @@ -0,0 +1,101 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file flatten-inl.h + * \brief + * \author Bing Xu +*/ +#ifndef MXNET_OPERATOR_FLATTEN_INL_H_ +#define MXNET_OPERATOR_FLATTEN_INL_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "./operator_common.h" + +namespace mxnet { +namespace op { + +enum FlattenOpInputs {kData}; +enum FlattenOpOutputs {kOut}; + +template +class FlattenOp : public Operator { + public: + virtual void Forward(const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(in_data.size(), 1); + CHECK_EQ(req.size(), 1); + CHECK_EQ(out_data.size(), 1); + Stream *s = ctx.get_stream(); + Tensor data = in_data[kData].get(s); + Tensor out = out_data[kOut].get(s); + Assign(out, req[kOut], reshape(data, out.shape_)); + } + + virtual void Backward(const OpContext &ctx, + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &req, + const std::vector &in_grad) { + using namespace mshadow; + using namespace mshadow::expr; + Stream *s = ctx.get_stream(); + Tensor grad_out = out_grad[kData].get(s); + Tensor grad_in = in_grad[kOut].get(s); + Assign(grad_in, req[kData], reshape(grad_out, grad_in.shape_)); + } +}; // class FlattenOp + +template +Operator* CreateOp(); + +#if DMLC_USE_CXX11 +class FlattenProp : public OperatorProperty { + public: + FlattenProp() {} + + virtual void Init(const std::vector >& kwargs) {} + + virtual std::string TypeString() const { + return "Flatten"; + } + + virtual bool InferShape(std::vector *in_shape, + std::vector *out_shape) const { + CHECK_EQ(in_shape->size(), 1) << "Input: [data]"; + const TShape &dshape = in_shape->at(kData); + if (dshape.ndim() == 0) return false; + out_shape->clear(); + out_shape->push_back(mshadow::Shape4(dshape[0], 1, 1, dshape[1] * dshape[2] * dshape[3])); + return true; + } + + virtual OperatorProperty* Copy() const { + auto ptr = new FlattenProp(); + return ptr; + } + + virtual std::vector DeclareBackwardDependency( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const { + return {out_grad[kOut]}; + } + + Operator* CreateOperator(Context ctx) const; +}; // class FlattenProp +#endif // DMLC_USE_CXX11 + +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_FLATTEN_INL_H_ diff --git a/src/operator/flatten.cc b/src/operator/flatten.cc new file mode 100644 index 000000000000..db156def8ca2 --- /dev/null +++ b/src/operator/flatten.cc @@ -0,0 +1,27 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file flatten.cc + * \brief + * \author Bing Xu +*/ + +#include "./flatten-inl.h" + + +namespace mxnet { +namespace op { +template<> +Operator *CreateOp() { + return new FlattenOp(); +} + +Operator* FlattenProp::CreateOperator(Context ctx) const { + DO_BIND_DISPATCH(CreateOp); +} + +MXNET_REGISTER_OP_PROPERTY(Flatten, FlattenProp) +.add_argument("data", "Symbol", "Input data to flatten.") +.describe("Flatten 4D input to form batch-1-1-feature format"); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/flatten.cu b/src/operator/flatten.cu new file mode 100644 index 000000000000..5bf9d47c5691 --- /dev/null +++ b/src/operator/flatten.cu @@ -0,0 +1,19 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file flatten.cc + * \brief + * \author Bing Xu +*/ + +#include "./flatten-inl.h" + + +namespace mxnet { +namespace op { +template<> + Operator *CreateOp() { + return new FlattenOp(); +} + +} // namespace op +} // namespace mxnet diff --git a/src/operator/fully_connected-inl.h b/src/operator/fully_connected-inl.h index 532113e56c6d..ac5fd992cd82 100644 --- a/src/operator/fully_connected-inl.h +++ b/src/operator/fully_connected-inl.h @@ -60,6 +60,7 @@ class FullyConnectedOp : public Operator { CHECK_EQ(out_data.size(), 1); // TODO(bing): check the BLAS Handle, be careful // maybe need blas handle from context + // TODO(bing): judge shape to remove flatten op Stream *s = ctx.get_stream(); Tensor data = in_data[kData].FlatTo2D(s); Tensor wmat = in_data[kWeight].get(s);