Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
fix 2D case
Browse files Browse the repository at this point in the history
  • Loading branch information
antinucleon committed Sep 16, 2015
1 parent fbc713a commit 58d1553
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 52 deletions.
4 changes: 2 additions & 2 deletions example/mnist/mlp_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os, gzip
import pickle as pickle
import sys
sys.path.append("../../tests/python")
sys.path.append("../../tests/python/common/")
import get_data


Expand Down Expand Up @@ -48,7 +48,7 @@ def CalAcc(out, label):

# bind executer
# TODO(bing): think of a better bind interface
executor = softmax.bind(mx.Context('gpu'), arg_narrays, grad_narrays)
executor = softmax.bind(mx.gpu(), arg_narrays, grad_narrays)
# create gradient NArray
out_narray = executor.outputs[0]
grad_narray = mx.nd.zeros(out_narray.shape, ctx=mx.gpu())
Expand Down
15 changes: 6 additions & 9 deletions src/operator/convolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,9 @@ class ConvolutionOp : public Operator {
CHECK_EQ(s->blas_handle_ownership_, Stream<xpu>::OwnHandle)
<< "Must init CuBLAS handle in stream";
#endif
this->InitTemp(ctx, data.shape_, out.shape_);
const index_t nbatch = data.size(0);
Tensor<xpu, 1> workspace = ctx.requested[kTempSpace].get_space<xpu>(
Shape1(param_.workspace), s);
Shape1(this->InitTemp(data.shape_, out.shape_)), s);
for (index_t i = 0; i < nbatch; i += nstep_) {
const index_t step = std::min(nstep_, nbatch - i);
Tensor<xpu, 2> temp_col = Tensor<xpu, 2>(workspace.dptr_,
Expand Down Expand Up @@ -161,10 +160,9 @@ class ConvolutionOp : public Operator {
CHECK_EQ(s->blas_handle_ownership_, Stream<xpu>::OwnHandle)
<< "Must init CuBLAS handle in stream";
#endif
this->InitTemp(ctx, data.shape_, grad.shape_);
const index_t nbatch = data.size(0);
Tensor<xpu, 1> workspace = ctx.requested[kTempSpace].get_space<xpu>(
Shape1(param_.workspace), s);
Shape1(this->InitTemp(data.shape_, grad.shape_)), s);
for (index_t i = 0; i < nbatch; i += nstep_) {
const index_t step = std::min(nstep_, nbatch - i);
Tensor<xpu, 2> temp_col = Tensor<xpu, 2>(workspace.dptr_,
Expand Down Expand Up @@ -229,18 +227,16 @@ class ConvolutionOp : public Operator {
}

private:
// TODO(bing): use global resource allocator
inline void InitTemp(const OpContext &ctx,
const mshadow::Shape<4> &ishape,
const mshadow::Shape<4> &oshape) {
inline index_t InitTemp(const mshadow::Shape<4> &ishape,
const mshadow::Shape<4> &oshape) {
const int ksize_y = param_.kernel[0];
const int ksize_x = param_.kernel[1];
shape_colunit_ = mshadow::Shape2(ishape[1] * ksize_y * ksize_x,
oshape[2] * oshape[3]);
shape_dstunit_ = mshadow::Shape3(param_.num_group,
param_.num_filter / param_.num_group,
oshape[2] * oshape[3]);
const uint64_t workspace_size = param_.workspace * sizeof(real_t);
const uint64_t workspace_size = param_.workspace;
nstep_ = std::max(std::min(static_cast<index_t>(workspace_size / shape_colunit_.Size()),
ishape[0]), 1U);
int nop = (ishape[0] + nstep_ - 1) / nstep_;
Expand All @@ -253,6 +249,7 @@ class ConvolutionOp : public Operator {
CHECK_GE(param_.workspace, scol.Size() + sdst.Size())
<< "\nMinimum workspace size: " << scol.Size() + sdst.Size() << "\n"
<< "Given: " << param_.workspace;
return scol.Size() + sdst.Size();
}

ConvolutionParam param_;
Expand Down
65 changes: 37 additions & 28 deletions src/operator/cudnn_activation-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,30 @@ class CuDNNActivationOp : public Operator {
CHECK_EQ(in_data.size(), 1);
CHECK_EQ(out_data.size(), 1);
Stream<gpu> *s = ctx.get_stream<gpu>();
Tensor<gpu, 4> data = in_data[kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> out = out_data[kOut].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> data;
Tensor<gpu, 4> out;
if (in_data[kData].ndim() == 2) {
uint32_t ds[] = {in_data[kData].shape_[0], in_data[kData].shape_[1], 1, 1};
TShape dshape(ds, ds + 4);
data = in_data[kData].get_with_shape<gpu, 4, real_t>(dshape, s);
out = out_data[kOut].get_with_shape<gpu, 4, real_t>(dshape, s);
} else {
data = in_data[kData].get<gpu, 4, real_t>(s);
out = out_data[kOut].get<gpu, 4, real_t>(s);
}
float alpha = 1.0f;
float beta = 0.0f;
CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream<gpu>::OwnHandle);
if (!init_cudnn_) {
this->Init(s, in_data, out_data);
init_cudnn_ = true;
CHECK_EQ(cudnnCreateTensorDescriptor(&shape_desc_), CUDNN_STATUS_SUCCESS);
CHECK_EQ(cudnnSetTensor4dDescriptor(shape_desc_,
CUDNN_TENSOR_NCHW,
dtype_,
data.shape_[0],
data.shape_[1],
data.shape_[2],
data.shape_[3]), CUDNN_STATUS_SUCCESS);
}
CHECK_EQ(cudnnActivationForward(s->dnn_handle_,
mode_,
Expand Down Expand Up @@ -84,10 +101,23 @@ class CuDNNActivationOp : public Operator {
float alpha = 1.0f;
float beta = 0.0f;
Stream<gpu> *s = ctx.get_stream<gpu>();
Tensor<gpu, 4> grad = out_grad[kOut].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> data = in_data[kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> output_data = out_data[kOut].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> input_grad = in_grad[kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> grad;
Tensor<gpu, 4> data;
Tensor<gpu, 4> output_data;
Tensor<gpu, 4> input_grad;
if (in_data[kData].ndim() == 2) {
uint32_t ds[] = {in_data[kData].shape_[0], in_data[kData].shape_[1], 1, 1};
TShape dshape(ds, ds + 4);
data = in_data[kData].get_with_shape<gpu, 4, real_t>(dshape, s);
grad = out_grad[kOut].get_with_shape<gpu, 4, real_t>(dshape, s);
output_data = out_data[kOut].get_with_shape<gpu, 4, real_t>(dshape, s);
input_grad = in_grad[kData].get_with_shape<gpu, 4, real_t>(dshape, s);
} else {
data = in_data[kData].get<gpu, 4, real_t>(s);
output_data = out_data[kOut].get<gpu, 4, real_t>(s);
grad = out_grad[kOut].get<gpu, 4, real_t>(s);
input_grad = in_grad[kData].get<gpu, 4, real_t>(s);
}
CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream<gpu>::OwnHandle);
CHECK_EQ(cudnnActivationBackward(s->dnn_handle_,
mode_,
Expand All @@ -104,27 +134,6 @@ class CuDNNActivationOp : public Operator {
}

private:
inline void Init(mshadow::Stream<gpu> *s,
const std::vector<TBlob> &in_data,
const std::vector<TBlob> &out_data) {
using namespace mshadow;
CHECK_EQ(in_data.size(), 1);
CHECK_EQ(out_data.size(), 1);
if (!init_cudnn_) {
init_cudnn_ = true;
Tensor<gpu, 4> data = in_data[kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> out = out_data[kOut].get<gpu, 4, real_t>(s);
CHECK_EQ(data.shape_, out.shape_);
CHECK_EQ(cudnnCreateTensorDescriptor(&shape_desc_), CUDNN_STATUS_SUCCESS);
CHECK_EQ(cudnnSetTensor4dDescriptor(shape_desc_,
CUDNN_TENSOR_NCHW,
dtype_,
data.shape_[0],
data.shape_[1],
data.shape_[2],
data.shape_[3]), CUDNN_STATUS_SUCCESS);
}
}
bool init_cudnn_;
cudnnDataType_t dtype_;
cudnnActivationMode_t mode_;
Expand Down
18 changes: 10 additions & 8 deletions src/operator/cudnn_convolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ class CuDNNConvolutionOp : public Operator {
Init(s, in_data, out_data);
}
Tensor<gpu, 1> workspace = ctx.requested[kTempSpace].get_space<gpu>(
mshadow::Shape1(param_.workspace), s);
const size_t workspace_size = param_.workspace * sizeof(real_t);
mshadow::Shape1(workspace_), s);
CHECK_EQ(cudnnConvolutionForward(s->dnn_handle_,
&alpha,
in_desc_,
Expand All @@ -66,7 +65,7 @@ class CuDNNConvolutionOp : public Operator {
conv_desc_,
algo_,
workspace.dptr_,
workspace_size,
workspace_size_,
&beta,
out_desc_,
out.dptr_), CUDNN_STATUS_SUCCESS);
Expand Down Expand Up @@ -107,8 +106,7 @@ class CuDNNConvolutionOp : public Operator {
Tensor<gpu, 4> data = in_data[kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> gdata = in_grad[kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 1> workspace = ctx.requested[kTempSpace].get_space<gpu>(
mshadow::Shape1(param_.workspace), s);
const size_t workspace_size = param_.workspace * sizeof(real_t);
mshadow::Shape1(workspace_), s);
if (!param_.no_bias) {
Tensor<gpu, 1> gbias = in_grad[kBias].get<gpu, 1, real_t>(s);
CHECK_EQ(cudnnConvolutionBackwardBias(s->dnn_handle_,
Expand All @@ -128,7 +126,7 @@ class CuDNNConvolutionOp : public Operator {
conv_desc_,
back_algo_w_,
workspace.dptr_,
workspace_size,
workspace_size_,
&beta,
filter_desc_,
gwmat.dptr_), CUDNN_STATUS_SUCCESS);
Expand All @@ -141,7 +139,7 @@ class CuDNNConvolutionOp : public Operator {
conv_desc_,
back_algo_,
workspace.dptr_,
workspace_size,
workspace_size_,
&beta,
in_desc_,
gdata.dptr_), CUDNN_STATUS_SUCCESS);
Expand Down Expand Up @@ -253,13 +251,17 @@ class CuDNNConvolutionOp : public Operator {
algo_,
&workspace), CUDNN_STATUS_SUCCESS);
workspace = std::max(workspace, back_size);
CHECK_GE(param_.workspace * sizeof(real_t), workspace)
CHECK_GE(param_.workspace * sizeof(real_t), workspace + sizeof(real_t))
<< "\nMinimum workspace: " << workspace << "\n"
<< "Given: " << param_.workspace * sizeof(real_t);
workspace_ = workspace / sizeof(real_t) + 1;
workspace_size_ = workspace_ * sizeof(real_t);
}
}

bool init_cudnn_;
size_t workspace_;
size_t workspace_size_;
cudnnDataType_t dtype_;
cudnnTensorDescriptor_t in_desc_;
cudnnTensorDescriptor_t out_desc_;
Expand Down
14 changes: 9 additions & 5 deletions src/operator/cudnn_lrn-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ class CuDNNLocalResponseNormOp : public Operator {
Stream<gpu> *s = ctx.get_stream<gpu>();
Tensor<gpu, 4> data = in_data[kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> out = out_data[kOut].get<gpu, 4, real_t>(s);
if (!init_cudnn_) {
this->Init(s, in_data, out_data);
}
CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream<gpu>::OwnHandle);
CHECK_EQ(cudnnLRNCrossChannelForward(s->dnn_handle_,
lrn_desc_,
Expand Down Expand Up @@ -105,11 +108,12 @@ class CuDNNLocalResponseNormOp : public Operator {
double beta = param_.beta;
double lrn_k = param_.knorm;
CHECK_EQ(data.shape_, out.shape_);
CHECK_EQ(cudnnGetLRNDescriptor(lrn_desc_,
&lrn_n,
&alpha,
&beta,
&lrn_k), CUDNN_STATUS_SUCCESS);
CHECK_EQ(cudnnCreateLRNDescriptor(&lrn_desc_), CUDNN_STATUS_SUCCESS);
CHECK_EQ(cudnnSetLRNDescriptor(lrn_desc_,
lrn_n,
alpha,
beta,
lrn_k), CUDNN_STATUS_SUCCESS);
CHECK_EQ(cudnnCreateTensorDescriptor(&shape_desc_), CUDNN_STATUS_SUCCESS);
CHECK_EQ(cudnnSetTensor4dDescriptor(shape_desc_,
CUDNN_TENSOR_NCHW,
Expand Down

0 comments on commit 58d1553

Please sign in to comment.