Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ROCM] fix depthwise conv in ROCM, test=develop #32170

Merged
merged 1 commit into from
Apr 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion paddle/fluid/operators/conv_cudnn_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1363,7 +1363,14 @@ REGISTER_OP_KERNEL(
conv2d_grad_grad, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvDoubleGradOpKernel<float>,
paddle::operators::CUDNNConvDoubleGradOpKernel<plat::float16>);

// ROCM has limit thread in depthwise_conv.cu and willl result in accuracy issue
// Use depthwise_conv2d in MIOPEN to resolve this issue
REGISTER_OP_KERNEL(depthwise_conv2d, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvOpKernel<float>,
paddle::operators::CUDNNConvOpKernel<plat::float16>);
REGISTER_OP_KERNEL(depthwise_conv2d_grad, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvGradOpKernel<float>,
paddle::operators::CUDNNConvGradOpKernel<plat::float16>);
REGISTER_OP_CUDA_KERNEL(
depthwise_conv2d_grad_grad,
paddle::operators::CUDNNConvDoubleGradOpKernel<float>,
Expand Down
3 changes: 1 addition & 2 deletions paddle/fluid/operators/math/depthwise_conv.cu
Original file line number Diff line number Diff line change
Expand Up @@ -919,11 +919,10 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T,
batch_size * output_channels * output_height * output_width;
#ifdef __HIPCC__
int block_size = 256;
int grid_size = std::min((nums_output + block_size - 1) / block_size, 256);
#else
int block_size = 512;
int grid_size = (nums_output + block_size - 1) / block_size;
#endif
int grid_size = (nums_output + block_size - 1) / block_size;

#define check_case(c_filter_multiplier, c_stride, c_filter) \
if (c_filter_multiplier == 0 || \
Expand Down
4 changes: 4 additions & 0 deletions python/paddle/fluid/layers/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -1524,6 +1524,10 @@ def conv2d(input,
not use_cudnn):
l_type = 'depthwise_conv2d'

if (num_channels == groups and num_filters % num_channels == 0 and
core.is_compiled_with_rocm()):
l_type = 'depthwise_conv2d'

helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()

Expand Down
11 changes: 11 additions & 0 deletions python/paddle/fluid/tests/unittests/test_conv2d_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -1248,6 +1248,17 @@ def init_paddings(self):
create_test_cudnn_channel_last_class(TestWithGroup_AsyPadding)
create_test_cudnn_channel_last_class(TestWithDilation_AsyPadding)

# ------------ depthwise conv2d in MIOPEN ---------
if core.is_compiled_with_rocm():
create_test_cudnn_padding_SAME_class(TestDepthwiseConv_AsyPadding)
create_test_cudnn_padding_SAME_class(
TestDepthwiseConvWithDilation_AsyPadding)
create_test_padding_VALID_class(TestDepthwiseConv_AsyPadding)
create_test_padding_VALID_class(TestDepthwiseConvWithDilation_AsyPadding)
create_test_cudnn_channel_last_class(TestDepthwiseConv_AsyPadding)
create_test_cudnn_channel_last_class(
TestDepthwiseConvWithDilation2_AsyPadding)

create_test_cudnn_channel_last_fp16_class(
TestConv2DOp_AsyPadding, grad_check=False)
create_test_cudnn_channel_last_fp16_class(
Expand Down
9 changes: 8 additions & 1 deletion python/paddle/nn/functional/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import numpy as np
from ...device import get_cudnn_version
from ...fluid.framework import Variable, in_dygraph_mode
from ...fluid import core, dygraph_utils
from ...fluid import core, dygraph_utils, get_flags
from ...fluid.layers import nn, utils
from ...fluid.data_feeder import check_variable_and_dtype
from ...fluid.param_attr import ParamAttr
Expand Down Expand Up @@ -551,6 +551,13 @@ def conv2d(x,
if (num_channels == groups and num_channels != 1 and
num_filters % num_channels == 0):
l_type = 'depthwise_conv2d'
if core.is_compiled_with_rocm():
use_cudnn = True
else:
use_cudnn = False

if (core.is_compiled_with_cuda() and get_flags("FLAGS_conv2d_disable_cudnn")
["FLAGS_conv2d_disable_cudnn"]):
use_cudnn = False

return _conv_nd(x, weight, bias, stride, padding, padding_algorithm,
Expand Down
11 changes: 7 additions & 4 deletions python/paddle/nn/layer/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,13 @@ def _get_default_param_initializer():
in_channels != 1 and
out_channels % in_channels == 0):
self._op_type = 'depthwise_conv2d'
if core.is_compiled_with_rocm():
self._use_cudnn = True
else:
self._use_cudnn = False

if (core.is_compiled_with_cuda() and get_flags(
"FLAGS_conv2d_disable_cudnn")["FLAGS_conv2d_disable_cudnn"]):
self._use_cudnn = False

def extra_repr(self):
Expand Down Expand Up @@ -645,10 +652,6 @@ def __init__(self,
bias_attr=bias_attr,
data_format=data_format)

if (core.is_compiled_with_cuda() and get_flags(
"FLAGS_conv2d_disable_cudnn")["FLAGS_conv2d_disable_cudnn"]):
self._use_cudnn = False

def forward(self, x):
if self._padding_mode != 'zeros':
x = F.pad(x,
Expand Down