Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Environment variables to use native add-bias and bias-grad
Browse files Browse the repository at this point in the history
  • Loading branch information
Vladimir Cherepanov committed Dec 8, 2021
1 parent 9d1df0f commit 464a07e
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 4 deletions.
6 changes: 4 additions & 2 deletions src/operator/nn/convolution.cu
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
CHECK_EQ(inputs[conv::kBias].shape_.ndim(), 1);
auto layout = static_cast<mshadow::LayoutFlag>(param.layout.value());
auto li = cudnn::GetLayoutInfo(layout);
if (!cudnn::LegacyAddBias(ctx, li, outputs[conv::kOut], inputs[conv::kBias])) {
if (dmlc::GetEnv("MXNET_NATIVE_ADD_BIAS", li.channel_last) ||
!cudnn::LegacyAddBias(ctx, li, outputs[conv::kOut], inputs[conv::kBias])) {
int k = inputs[conv::kBias].shape_.Size();
auto b = inputs[conv::kBias].reshape(cudnn::ExpandChannelDims(layout, k));
BinaryBroadcastRTCCompute{"add"}( // NOLINT(whitespace/braces)
Expand Down Expand Up @@ -142,7 +143,8 @@ void ConvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
if (ok && !param.no_bias && req[conv::kBias] != kNullOp) {
auto li = cudnn::GetLayoutInfo(static_cast<mshadow::LayoutFlag>(param.layout.value()));
auto add_to = req[conv::kBias] == kAddTo;
if (!cudnn::LegacyBiasGrad(ctx, li, add_to, outputs[conv::kBias], inputs[0])) {
if (dmlc::GetEnv("MXNET_NATIVE_BIAS_GRAD", li.channel_last) ||
!cudnn::LegacyBiasGrad(ctx, li, add_to, outputs[conv::kBias], inputs[0])) {
if (li.channel_last) {
// This kernel should be faster.
auto y_grad = FlattenAs2DHead<gpu, DType>(inputs[0], ctx);
Expand Down
6 changes: 4 additions & 2 deletions src/operator/nn/deconvolution.cu
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ void DeconvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
CHECK_EQ(inputs[deconv::kBias].shape_.ndim(), 1);
auto layout = static_cast<mshadow::LayoutFlag>(param.layout.value());
auto li = cudnn::GetLayoutInfo(layout);
if (!cudnn::LegacyAddBias(ctx, li, outputs[deconv::kOut], inputs[deconv::kBias])) {
if (dmlc::GetEnv("MXNET_NATIVE_ADD_BIAS", li.channel_last) ||
!cudnn::LegacyAddBias(ctx, li, outputs[deconv::kOut], inputs[deconv::kBias])) {
int k = inputs[deconv::kBias].shape_.Size();
auto b = inputs[deconv::kBias].reshape(cudnn::ExpandChannelDims(layout, k));
BinaryBroadcastRTCCompute{"add"}( // NOLINT(whitespace/braces)
Expand Down Expand Up @@ -120,7 +121,8 @@ void DeconvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
if (ok && !param.no_bias && req[deconv::kBias] != kNullOp) {
auto li = cudnn::GetLayoutInfo(static_cast<mshadow::LayoutFlag>(param.layout.value()));
auto add_to = req[conv::kBias] == kAddTo;
if (!cudnn::LegacyBiasGrad(ctx, li, add_to, outputs[deconv::kBias], inputs[0])) {
if (dmlc::GetEnv("MXNET_NATIVE_BIAS_GRAD", li.channel_last) ||
!cudnn::LegacyBiasGrad(ctx, li, add_to, outputs[deconv::kBias], inputs[0])) {
if (li.channel_last) {
// This kernel should be faster.
auto y_grad = FlattenAs2DHead<gpu, DType>(inputs[0], ctx);
Expand Down

0 comments on commit 464a07e

Please sign in to comment.