Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[BUGFIX] Reenable fwd conv engine 5 on test_group_conv2d_16c (#21104)
Browse files Browse the repository at this point in the history
* Reenable fwd conv engine 5 on test_group_conv2d_16c

* Test conv plan fix devel

* Fix clang format

* More clang format fixes

* switch to using std::vector::data()
  • Loading branch information
DickJC123 authored Aug 4, 2022
1 parent 97e25cf commit 9975ab4
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 37 deletions.
2 changes: 1 addition & 1 deletion src/common/cuda/cudnn_cxx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ void SetAttr(const Descriptor& desc,
std::vector<cudnnBackendDescriptor_t> raw(val.size());
std::transform(val.begin(), val.end(), raw.begin(), [](const Descriptor& d) { return d.get(); });
CUDNN_CALL(cudnnBackendSetAttribute(
desc.get(), name, CUDNN_TYPE_BACKEND_DESCRIPTOR, raw.size(), &raw[0]));
desc.get(), name, CUDNN_TYPE_BACKEND_DESCRIPTOR, raw.size(), raw.data()));
}

Descriptor GetAttr(const Descriptor& desc,
Expand Down
4 changes: 2 additions & 2 deletions src/common/cuda/cudnn_cxx.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,14 @@ void SetAttr(const Descriptor& desc, cudnnBackendAttributeName_t name, T val) {

template <typename T>
void SetAttr(const Descriptor& desc, cudnnBackendAttributeName_t name, const std::vector<T>& val) {
CUDNN_CALL(cudnnBackendSetAttribute(desc.get(), name, AttrType<T>::type, val.size(), &val[0]));
CUDNN_CALL(cudnnBackendSetAttribute(desc.get(), name, AttrType<T>::type, val.size(), val.data()));
}

template <typename T, size_t N>
void SetAttr(const Descriptor& desc,
cudnnBackendAttributeName_t name,
const std::array<T, N>& val) {
CUDNN_CALL(cudnnBackendSetAttribute(desc.get(), name, AttrType<T>::type, val.size(), &val[0]));
CUDNN_CALL(cudnnBackendSetAttribute(desc.get(), name, AttrType<T>::type, val.size(), val.data()));
}

inline void SetAttrs(const Descriptor& desc) {}
Expand Down
148 changes: 125 additions & 23 deletions src/operator/cudnn_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,20 @@ Descriptor MakeConvFwdOp(const Descriptor& conv,
return ret;
}

Descriptor Conv::MakeConvFwdOp(const OpContext& ctx,
const Param& param,
const TBlob& x,
const TBlob& w,
const TBlob& y) {
auto dtype = static_cast<mshadow::TypeFlag>(x.type_flag_);
auto conv = MakeConvDesc(param, dtype);
auto li = GetLayoutInfo(static_cast<mshadow::LayoutFlag>(param.layout.value()));
auto x_desc = MakeTensorDesc(ID_X, x, li, true, false);
auto w_desc = MakeTensorDesc(ID_W, w, li, true, false);
auto y_desc = MakeTensorDesc(ID_Y, y, li, true, false);
return cudnn::MakeConvFwdOp(conv, x_desc, w_desc, y_desc, param.add_to);
}

Descriptor MakeConvDgradOp(const Descriptor& conv,
const Descriptor& w,
const Descriptor& dy,
Expand Down Expand Up @@ -272,6 +286,20 @@ Descriptor MakeConvDgradOp(const Descriptor& conv,
return ret;
}

Descriptor ConvDgrad::MakeConvDgradOp(const OpContext& ctx,
const Param& param,
const TBlob& w,
const TBlob& dy,
const TBlob& dx) {
auto dtype = static_cast<mshadow::TypeFlag>(w.type_flag_);
auto conv = MakeConvDesc(param, dtype);
auto li = GetLayoutInfo(static_cast<mshadow::LayoutFlag>(param.layout.value()));
auto w_desc = MakeTensorDesc(ID_W, w, li, true, false);
auto dy_desc = MakeTensorDesc(ID_DY, dy, li, true, false);
auto dx_desc = MakeTensorDesc(ID_DX, dx, li, true, false);
return cudnn::MakeConvDgradOp(conv, w_desc, dy_desc, dx_desc, param.add_to);
}

Descriptor MakeConvWgradOp(const Descriptor& conv,
const Descriptor& x,
const Descriptor& dy,
Expand Down Expand Up @@ -303,6 +331,20 @@ Descriptor MakeConvWgradOp(const Descriptor& conv,
return ret;
}

Descriptor ConvWgrad::MakeConvWgradOp(const OpContext& ctx,
const Param& param,
const TBlob& x,
const TBlob& dy,
const TBlob& dw) {
auto dtype = static_cast<mshadow::TypeFlag>(x.type_flag_);
auto conv = MakeConvDesc(param, dtype);
auto li = GetLayoutInfo(static_cast<mshadow::LayoutFlag>(param.layout.value()));
auto x_desc = MakeTensorDesc(ID_X, x, li, true, false);
auto dy_desc = MakeTensorDesc(ID_DY, dy, li, true, false);
auto dw_desc = MakeTensorDesc(ID_DW, dw, li, true, false);
return cudnn::MakeConvWgradOp(conv, x_desc, dy_desc, dw_desc, param.add_to);
}

Descriptor MakeOpGraph(cudnnHandle_t handle, const std::vector<Descriptor>& ops) {
return MakeFinalized(CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR,
CUDNN_ATTR_OPERATIONGRAPH_HANDLE,
Expand All @@ -311,6 +353,44 @@ Descriptor MakeOpGraph(cudnnHandle_t handle, const std::vector<Descriptor>& ops)
ops);
}

Descriptor MakeOpGraph(cudnnHandle_t handle, Descriptor op) {
std::vector<Descriptor> ops;
ops.push_back(std::move(op));
return MakeOpGraph(handle, ops);
}

Descriptor ClonePlan(cudnnHandle_t handle, Descriptor op_graph, const Descriptor& plan) {
auto cfg =
GetAttr(plan, CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG, CUDNN_BACKEND_ENGINECFG_DESCRIPTOR);
auto engine = GetAttr(cfg, CUDNN_ATTR_ENGINECFG_ENGINE, CUDNN_BACKEND_ENGINE_DESCRIPTOR);
auto engine_idx = GetAttr<int64_t>(engine, CUDNN_ATTR_ENGINE_GLOBAL_INDEX);

auto choices = GetSomeAttrs(CUDNN_KNOB_TYPE_COUNTS,
cfg,
CUDNN_ATTR_ENGINECFG_KNOB_CHOICES,
CUDNN_BACKEND_KNOB_CHOICE_DESCRIPTOR);

auto cloned_engine = MakeFinalized(CUDNN_BACKEND_ENGINE_DESCRIPTOR,
CUDNN_ATTR_ENGINE_GLOBAL_INDEX,
engine_idx,
CUDNN_ATTR_ENGINE_OPERATION_GRAPH,
op_graph);

auto cloned_cfg = MakeFinalized(CUDNN_BACKEND_ENGINECFG_DESCRIPTOR,
CUDNN_ATTR_ENGINECFG_ENGINE,
cloned_engine,
CUDNN_ATTR_ENGINECFG_KNOB_CHOICES,
choices);

auto cloned_plan = cudnn_cxx::Make(CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR,
CUDNN_ATTR_EXECUTION_PLAN_HANDLE,
handle,
CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG,
cloned_cfg);
CUDNN_CALL(cudnnBackendFinalize(cloned_plan.get()));
return cloned_plan;
}

ConvParam::ConvParam(const ConvolutionParam& p, bool add_to)
: kernel(p.kernel),
stride(p.stride),
Expand Down Expand Up @@ -476,9 +556,7 @@ Descriptor SelectPlan(const OpContext& ctx,
int64_t out_size,
const std::string& excl_engines_var) {
auto s = ctx.get_stream<gpu>();
std::vector<Descriptor> ops;
ops.push_back(std::move(op));
auto op_graph = MakeOpGraph(s->dnn_handle_, ops);
auto op_graph = MakeOpGraph(s->dnn_handle_, std::move(op));

int verbose = dmlc::GetEnv("MXNET_CUDNN_ALGO_VERBOSE_LEVEL", 0);
if (verbose > 0)
Expand Down Expand Up @@ -592,12 +670,7 @@ cudnn_cxx::Descriptor Conv::Make(const OpContext& ctx,
const TBlob& x,
const TBlob& w,
const TBlob& y) {
auto conv = MakeConvDesc(param, static_cast<mshadow::TypeFlag>(x.type_flag_));
auto li = GetLayoutInfo(static_cast<mshadow::LayoutFlag>(param.layout.value()));
auto x_desc = MakeTensorDesc(ID_X, x, li, true, false);
auto w_desc = MakeTensorDesc(ID_W, w, li, true, false);
auto y_desc = MakeTensorDesc(ID_Y, y, li, true, false);
auto conv_fwd = MakeConvFwdOp(conv, x_desc, w_desc, y_desc, param.add_to);
auto conv_fwd = MakeConvFwdOp(ctx, param, x, w, y);

auto make_op_str = [&param, &x]() {
std::ostringstream ss;
Expand All @@ -619,6 +692,19 @@ cudnn_cxx::Descriptor Conv::Make(const OpContext& ctx,
"MXNET_CUDNN_DISABLED_CONV_FWD_ENGINES");
}

cudnn_cxx::Descriptor Conv::Clone(const cudnn_cxx::Descriptor& plan,
const OpContext& ctx,
const Param& param,
const TBlob& x,
const TBlob& w,
const TBlob& y) {
auto conv_fwd = MakeConvFwdOp(ctx, param, x, w, y);
auto handle = ctx.get_stream<gpu>()->dnn_handle_;
auto op_graph = MakeOpGraph(handle, std::move(conv_fwd));
auto cloned_plan = ClonePlan(handle, std::move(op_graph), plan);
return cloned_plan;
}

void Conv::Exec(const cudnn_cxx::Descriptor& plan,
const OpContext& ctx,
const TBlob& x,
Expand All @@ -645,12 +731,7 @@ cudnn_cxx::Descriptor ConvDgrad::Make(const OpContext& ctx,
const TBlob& w,
const TBlob& dy,
const TBlob& dx) {
auto conv = MakeConvDesc(param, static_cast<mshadow::TypeFlag>(w.type_flag_));
auto li = GetLayoutInfo(static_cast<mshadow::LayoutFlag>(param.layout.value()));
auto w_desc = MakeTensorDesc(ID_W, w, li, true, false);
auto dy_desc = MakeTensorDesc(ID_DY, dy, li, true, false);
auto dx_desc = MakeTensorDesc(ID_DX, dx, li, true, false);
auto dgrad = MakeConvDgradOp(conv, w_desc, dy_desc, dx_desc, param.add_to);
auto conv_dgrad = MakeConvDgradOp(ctx, param, w, dy, dx);

auto make_op_str = [&param, &dx]() {
std::ostringstream ss;
Expand All @@ -663,7 +744,7 @@ cudnn_cxx::Descriptor ConvDgrad::Make(const OpContext& ctx,

return SelectPlan(ctx,
param,
std::move(dgrad),
std::move(conv_dgrad),
kMaxDgradFallbacks,
make_op_str,
ids,
Expand All @@ -672,6 +753,19 @@ cudnn_cxx::Descriptor ConvDgrad::Make(const OpContext& ctx,
"MXNET_CUDNN_DISABLED_CONV_DGRAD_ENGINES");
}

cudnn_cxx::Descriptor ConvDgrad::Clone(const cudnn_cxx::Descriptor& plan,
const OpContext& ctx,
const Param& param,
const TBlob& w,
const TBlob& dy,
const TBlob& dx) {
auto conv_dgrad = MakeConvDgradOp(ctx, param, w, dy, dx);
auto handle = ctx.get_stream<gpu>()->dnn_handle_;
auto op_graph = MakeOpGraph(handle, std::move(conv_dgrad));
auto cloned_plan = ClonePlan(handle, std::move(op_graph), plan);
return cloned_plan;
}

void ConvDgrad::Exec(const cudnn_cxx::Descriptor& plan,
const OpContext& ctx,
const TBlob& w,
Expand All @@ -698,12 +792,7 @@ cudnn_cxx::Descriptor ConvWgrad::Make(const OpContext& ctx,
const TBlob& x,
const TBlob& dy,
const TBlob& dw) {
auto conv = MakeConvDesc(param, static_cast<mshadow::TypeFlag>(x.type_flag_));
auto li = GetLayoutInfo(static_cast<mshadow::LayoutFlag>(param.layout.value()));
auto x_desc = MakeTensorDesc(ID_X, x, li, true, false);
auto dy_desc = MakeTensorDesc(ID_DY, dy, li, true, false);
auto dw_desc = MakeTensorDesc(ID_DW, dw, li, true, false);
auto wgrad = MakeConvWgradOp(conv, x_desc, dy_desc, dw_desc, param.add_to);
auto conv_wgrad = MakeConvWgradOp(ctx, param, x, dy, dw);

auto make_op_str = [&param, &x]() {
std::ostringstream ss;
Expand All @@ -716,7 +805,7 @@ cudnn_cxx::Descriptor ConvWgrad::Make(const OpContext& ctx,

return SelectPlan(ctx,
param,
std::move(wgrad),
std::move(conv_wgrad),
kMaxWgradFallbacks,
make_op_str,
ids,
Expand All @@ -725,6 +814,19 @@ cudnn_cxx::Descriptor ConvWgrad::Make(const OpContext& ctx,
"MXNET_CUDNN_DISABLED_CONV_WGRAD_ENGINES");
}

cudnn_cxx::Descriptor ConvWgrad::Clone(const cudnn_cxx::Descriptor& plan,
const OpContext& ctx,
const Param& param,
const TBlob& x,
const TBlob& dy,
const TBlob& dw) {
auto conv_wgrad = MakeConvWgradOp(ctx, param, x, dy, dw);
auto handle = ctx.get_stream<gpu>()->dnn_handle_;
auto op_graph = MakeOpGraph(handle, std::move(conv_wgrad));
auto cloned_plan = ClonePlan(handle, std::move(op_graph), plan);
return cloned_plan;
}

void ConvWgrad::Exec(const cudnn_cxx::Descriptor& plan,
const OpContext& ctx,
const TBlob& x,
Expand Down
Loading

0 comments on commit 9975ab4

Please sign in to comment.