Improve workspace in convolution/deconvolution (apache#9904)

* Improve workspace in convolution/deconvolution Revise the description of the workspace parameter. Also, refine the workspace after the effective batch size is determined * fix lint * no need to update workspace
rahul003 · Mar 1, 2018 · 8a05a46 · 8a05a46
1 parent dffb9b9
commit 8a05a46
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 11 deletions.
diff --git a/src/operator/convolution_v1-inl.h b/src/operator/convolution_v1-inl.h
@@ -76,7 +76,11 @@ struct ConvolutionV1Param : public dmlc::Parameter<ConvolutionV1Param> {
     .describe("Number of group partitions. Equivalent to slicing input into num_group\n    "
               "partitions, apply convolution on each, then concatenate the results");
     DMLC_DECLARE_FIELD(workspace).set_default(1024).set_range(0, 8192)
-    .describe("Maximum tmp workspace allowed for convolution (MB).");
+    .describe("Maximum temporary workspace allowed for convolution (MB)."
+              "This parameter determines the effective batch size of the convolution "
+              "kernel, which may be smaller than the given batch size. "
+              "Also, the workspace will be automatically enlarged to make sure that we can "
+              "run the kernel with batch_size=1");
     DMLC_DECLARE_FIELD(no_bias).set_default(false)
     .describe("Whether to disable bias parameter.");
     DMLC_DECLARE_FIELD(cudnn_tune)
@@ -344,9 +348,6 @@ class ConvolutionV1Op : public Operator {
                                              shape_dstunit_[1],
                                              shape_dstunit_[2] * nstep_);
     index_t required_size = scol.Size() + sdst.Size();
-    CHECK_GE(param_.workspace, required_size)
-      << "\nMinimum workspace size: " << required_size * sizeof(DType) << " Bytes\n"
-      << "Given: " << param_.workspace * sizeof(DType) << " Bytes";
     return required_size;
   }
 

diff --git a/src/operator/nn/convolution-inl.h b/src/operator/nn/convolution-inl.h
@@ -79,7 +79,11 @@ struct ConvolutionParam : public dmlc::Parameter<ConvolutionParam> {
     DMLC_DECLARE_FIELD(num_group).set_default(1)
     .describe("Number of group partitions.");
     DMLC_DECLARE_FIELD(workspace).set_default(1024).set_range(0, 8192)
-    .describe("Maximum temporary workspace allowed for convolution (MB).");
+    .describe("Maximum temporary workspace allowed (MB) in convolution."
+              "This parameter has two usages. When CUDNN is not used, it determines the "
+              "effective batch size of the convolution kernel. When CUDNN is used, it controls "
+              "the maximum temporary storage used for tuning the best CUDNN kernel when "
+              "`limited_workspace` strategy is used.");
     DMLC_DECLARE_FIELD(no_bias).set_default(false)
     .describe("Whether to disable bias parameter.");
     DMLC_DECLARE_FIELD(cudnn_tune)

diff --git a/src/operator/nn/deconvolution-inl.h b/src/operator/nn/deconvolution-inl.h
@@ -90,7 +90,11 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
     DMLC_DECLARE_FIELD(num_group).set_default(1)
         .describe("Number of groups partition.");
     DMLC_DECLARE_FIELD(workspace).set_default(512).set_range(0, 8192)
-      .describe("Maximum temporal workspace allowed for deconvolution (MB).");
+        .describe("Maximum temporary workspace allowed (MB) in deconvolution."
+                  "This parameter has two usages. When CUDNN is not used, it determines the "
+                  "effective batch size of the deconvolution kernel. When CUDNN is used, "
+                  "it controls the maximum temporary storage used for tuning "
+                  "the best CUDNN kernel when `limited_workspace` strategy is used.");
     DMLC_DECLARE_FIELD(no_bias).set_default(true)
         .describe("Whether to disable bias parameter.");
     DMLC_DECLARE_FIELD(cudnn_tune)
@@ -200,7 +204,7 @@ class DeconvolutionOp {
   void Init(DeconvolutionParam p) {
     this->param_ = p;
     // convert MBytes first to Bytes and then to elements.
-    param_.workspace = (param_.workspace << 20) / sizeof(real_t);
+    param_.workspace = (param_.workspace << 20) / sizeof(DType);
   }
 
   void Forward(const OpContext &ctx,
@@ -451,7 +455,7 @@ class DeconvolutionOp {
     shape_dstunit_ = mshadow::Shape3(param_.num_group,
                                      oshape[1] / param_.num_group,
                                      oshape[2] * oshape[3]);
-    // See convolution for workspace calculations
+    // See convolution for workspace calculations. nstep_ will be the effective batch size
     nstep_ = std::max(
         std::min(
             static_cast<index_t>(
@@ -465,9 +469,6 @@ class DeconvolutionOp {
                                              shape_dstunit_[1],
                                              shape_dstunit_[2] * nstep_);
     index_t required_size = scol.Size() + sdst.Size();
-    CHECK_GE(param_.workspace, required_size)
-      << "\nMinimum workspace size: " << required_size * sizeof(DType) << " Bytes\n"
-      << "Given: " << param_.workspace * sizeof(DType);
     return required_size;
   }