Skip to content

Commit

Permalink
[MXNET-266] Fix cudnn_conv and cudnn_deconv deadlock (apache#10392)
Browse files Browse the repository at this point in the history
* Fix deadlock of cudnn_conv wrapper

* Fix deconv deadlock

* Fix lint

* Revert "Fix lint"

This reverts commit 66f0936.

* Fix lint

* Fix indentation
  • Loading branch information
reminisce authored and zheng-da committed Jun 28, 2018
1 parent 6648405 commit 5b77ae9
Show file tree
Hide file tree
Showing 6 changed files with 455 additions and 462 deletions.
36 changes: 20 additions & 16 deletions src/operator/nn/convolution.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@ namespace op {

#if MXNET_USE_CUDNN == 1
template<typename DType>
static CuDNNConvolutionOp<DType> &GetCuDNNConvOp(const ConvolutionParam& param,
int forward_compute_type, int backward_compute_type,
const std::vector<TShape>& in_shape, const std::vector<TShape>& out_shape,
const Context& ctx) {
static CuDNNConvolutionOp<DType>& GetCuDNNConvOp(const ConvolutionParam& param,
int forward_compute_type,
int backward_compute_type,
const std::vector<TShape>& in_shape,
const std::vector<TShape>& out_shape,
const RunContext& rctx) {
#if DMLC_CXX11_THREAD_LOCAL
static thread_local std::unordered_map<ConvSignature,
std::shared_ptr<CuDNNConvolutionOp<DType> >,
Expand All @@ -62,7 +64,7 @@ static CuDNNConvolutionOp<DType> &GetCuDNNConvOp(const ConvolutionParam& param,
key.AddSign(backward_compute_type);
key.AddSign(in_shape);
key.AddSign(out_shape);
key.AddSign(ctx.dev_id);
key.AddSign(rctx.ctx.dev_id);

auto it = ops.find(key);
if (it == ops.end()) {
Expand All @@ -72,17 +74,18 @@ static CuDNNConvolutionOp<DType> &GetCuDNNConvOp(const ConvolutionParam& param,
CHECK(ins_ret.second);
it = ins_ret.first;
it->second->Init(param, forward_compute_type, backward_compute_type, in_shape,
out_shape, ctx);
out_shape, rctx);
}
return *it->second;
}
#endif

template<>
void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx, const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
const ConvolutionParam& param = nnvm::get<ConvolutionParam>(attrs.parsed);
int dtype = inputs[conv::kData].type_flag_;

Expand Down Expand Up @@ -120,7 +123,7 @@ void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
op.Init(param);
op.Forward(ctx, inputs, req, outputs);
} else if (!CuDNNConvolutionOp<DType>::Supports(param,
compute_type, compute_type, ctx.run_ctx.ctx)) {
compute_type, compute_type, ctx.run_ctx.ctx.dev_id)) {
LOG(WARNING) << "This convolution is not supported by cudnn, MXNET convolution is applied.";
ConvolutionOp<gpu, DType> op;
op.Init(param);
Expand All @@ -131,7 +134,7 @@ void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
for (size_t i = 0; i < in_shape.size(); i++)
in_shape[i] = inputs[i].shape_;
CuDNNConvolutionOp<DType> &op = GetCuDNNConvOp<DType>(param,
compute_type, compute_type, in_shape, out_shape, ctx.run_ctx.ctx);
compute_type, compute_type, in_shape, out_shape, ctx.run_ctx);
op.Forward(ctx, inputs, req, outputs);
}
})
Expand All @@ -146,9 +149,10 @@ void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,

template<>
void ConvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx, const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
const ConvolutionParam& param = nnvm::get<ConvolutionParam>(attrs.parsed);
std::vector<TBlob> in_data(inputs.begin() + 1, inputs.end());
const TBlob &out_grad = inputs[0];
Expand Down Expand Up @@ -190,7 +194,7 @@ void ConvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
op.Init(param);
op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad);
} else if (!CuDNNConvolutionOp<DType>::Supports(param,
compute_type, compute_type, ctx.run_ctx.ctx)) {
compute_type, compute_type, ctx.run_ctx.ctx.dev_id)) {
LOG(WARNING) << "This convolution is not supported by cudnn, MXNET convolution is applied.";
ConvolutionOp<gpu, DType> op;
op.Init(param);
Expand All @@ -202,7 +206,7 @@ void ConvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
for (size_t i = 0; i < in_shape.size(); i++)
in_shape[i] = in_data[i].shape_;
CuDNNConvolutionOp<DType> &op = GetCuDNNConvOp<DType>(param,
compute_type, compute_type, in_shape, out_shape, ctx.run_ctx.ctx);
compute_type, compute_type, in_shape, out_shape, ctx.run_ctx);
op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad);
}
})
Expand Down
Loading

0 comments on commit 5b77ae9

Please sign in to comment.