Skip to content

Commit

Permalink
Improve workspace in convolution/deconvolution (apache#9904)
Browse files Browse the repository at this point in the history
* Improve workspace in convolution/deconvolution

Revise the description of the workspace parameter. Also, refine the workspace after the effective batch size is determined

* fix lint

* no need to update workspace
  • Loading branch information
sxjscience authored and piiswrong committed Mar 1, 2018
1 parent dffb9b9 commit 8a05a46
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 11 deletions.
9 changes: 5 additions & 4 deletions src/operator/convolution_v1-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ struct ConvolutionV1Param : public dmlc::Parameter<ConvolutionV1Param> {
.describe("Number of group partitions. Equivalent to slicing input into num_group\n "
"partitions, apply convolution on each, then concatenate the results");
DMLC_DECLARE_FIELD(workspace).set_default(1024).set_range(0, 8192)
.describe("Maximum tmp workspace allowed for convolution (MB).");
.describe("Maximum temporary workspace allowed for convolution (MB)."
"This parameter determines the effective batch size of the convolution "
"kernel, which may be smaller than the given batch size. "
"Also, the workspace will be automatically enlarged to make sure that we can "
"run the kernel with batch_size=1");
DMLC_DECLARE_FIELD(no_bias).set_default(false)
.describe("Whether to disable bias parameter.");
DMLC_DECLARE_FIELD(cudnn_tune)
Expand Down Expand Up @@ -344,9 +348,6 @@ class ConvolutionV1Op : public Operator {
shape_dstunit_[1],
shape_dstunit_[2] * nstep_);
index_t required_size = scol.Size() + sdst.Size();
CHECK_GE(param_.workspace, required_size)
<< "\nMinimum workspace size: " << required_size * sizeof(DType) << " Bytes\n"
<< "Given: " << param_.workspace * sizeof(DType) << " Bytes";
return required_size;
}

Expand Down
6 changes: 5 additions & 1 deletion src/operator/nn/convolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,11 @@ struct ConvolutionParam : public dmlc::Parameter<ConvolutionParam> {
DMLC_DECLARE_FIELD(num_group).set_default(1)
.describe("Number of group partitions.");
DMLC_DECLARE_FIELD(workspace).set_default(1024).set_range(0, 8192)
.describe("Maximum temporary workspace allowed for convolution (MB).");
.describe("Maximum temporary workspace allowed (MB) in convolution."
"This parameter has two usages. When CUDNN is not used, it determines the "
"effective batch size of the convolution kernel. When CUDNN is used, it controls "
"the maximum temporary storage used for tuning the best CUDNN kernel when "
"`limited_workspace` strategy is used.");
DMLC_DECLARE_FIELD(no_bias).set_default(false)
.describe("Whether to disable bias parameter.");
DMLC_DECLARE_FIELD(cudnn_tune)
Expand Down
13 changes: 7 additions & 6 deletions src/operator/nn/deconvolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,11 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
DMLC_DECLARE_FIELD(num_group).set_default(1)
.describe("Number of groups partition.");
DMLC_DECLARE_FIELD(workspace).set_default(512).set_range(0, 8192)
.describe("Maximum temporal workspace allowed for deconvolution (MB).");
.describe("Maximum temporary workspace allowed (MB) in deconvolution."
"This parameter has two usages. When CUDNN is not used, it determines the "
"effective batch size of the deconvolution kernel. When CUDNN is used, "
"it controls the maximum temporary storage used for tuning "
"the best CUDNN kernel when `limited_workspace` strategy is used.");
DMLC_DECLARE_FIELD(no_bias).set_default(true)
.describe("Whether to disable bias parameter.");
DMLC_DECLARE_FIELD(cudnn_tune)
Expand Down Expand Up @@ -200,7 +204,7 @@ class DeconvolutionOp {
void Init(DeconvolutionParam p) {
this->param_ = p;
// convert MBytes first to Bytes and then to elements.
param_.workspace = (param_.workspace << 20) / sizeof(real_t);
param_.workspace = (param_.workspace << 20) / sizeof(DType);
}

void Forward(const OpContext &ctx,
Expand Down Expand Up @@ -451,7 +455,7 @@ class DeconvolutionOp {
shape_dstunit_ = mshadow::Shape3(param_.num_group,
oshape[1] / param_.num_group,
oshape[2] * oshape[3]);
// See convolution for workspace calculations
// See convolution for workspace calculations. nstep_ will be the effective batch size
nstep_ = std::max(
std::min(
static_cast<index_t>(
Expand All @@ -465,9 +469,6 @@ class DeconvolutionOp {
shape_dstunit_[1],
shape_dstunit_[2] * nstep_);
index_t required_size = scol.Size() + sdst.Size();
CHECK_GE(param_.workspace, required_size)
<< "\nMinimum workspace size: " << required_size * sizeof(DType) << " Bytes\n"
<< "Given: " << param_.workspace * sizeof(DType);
return required_size;
}

Expand Down

0 comments on commit 8a05a46

Please sign in to comment.