Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[v1.x][BUGFIX] Impose a plain format for concat's output when oneDNN …
Browse files Browse the repository at this point in the history
…would use padding (#19735)

* Impose a plain format for concat's output when mkldnn would choose a padded one

* Remove reference to temp and add missing comments in convolution

* Fix imposing a plain format and move definition to the .cc file

* Use const reference to a temporary object feature

* Fix other comments related to memory planning

* Add test for concat with blocked input

* Evaluate the occurrence of padding in a separate function.
  • Loading branch information
PawelGlomski-Intel authored Jan 26, 2021
1 parent 195f5aa commit 49b26b6
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 18 deletions.
7 changes: 2 additions & 5 deletions src/operator/nn/mkldnn/mkldnn_concat-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,9 @@ class MKLDNNConcatFwd {
public:
mkldnn::concat::primitive_desc fwd_pd;

MKLDNNConcatFwd(int concat_dim, const std::vector<mkldnn::memory::desc> &data_md)
: fwd_pd(concat_dim, data_md, CpuEngine::Get()->get_engine()) {
fwd_ = std::make_shared<mkldnn::concat>(fwd_pd);
}
MKLDNNConcatFwd(int concat_dim, const std::vector<mkldnn::memory::desc> &data_md);

const mkldnn::concat &GetFwd() const;
const mkldnn::concat &GetFwd() const { return *fwd_; }

private:
std::shared_ptr<mkldnn::concat> fwd_;
Expand Down
27 changes: 26 additions & 1 deletion src/operator/nn/mkldnn/mkldnn_concat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,32 @@
namespace mxnet {
namespace op {

const mkldnn::concat &MKLDNNConcatFwd::GetFwd() const { return *fwd_; }
static inline bool IsUsingPadding(const mkldnn::memory::desc &dst_md) {
// make sure a blocked format is used (at least one dimension is blocked)
bool is_blocked_format = dst_md.data.format_kind == mkldnn_blocked &&
dst_md.data.format_desc.blocking.inner_nblks > 0;
return is_blocked_format && !std::equal(dst_md.data.dims, dst_md.data.dims + dst_md.data.ndims,
dst_md.data.padded_dims);
}

MKLDNNConcatFwd::MKLDNNConcatFwd(int concat_dim, const std::vector<mkldnn::memory::desc> &data_md)
: fwd_pd(concat_dim, data_md, CpuEngine::Get()->get_engine()) {
// MKL-DNN introduced padded formats since 0.15 which require more memory
// compared to the actual size of the tensor. Currently, MKL-DNN operators
// still reuse memory from memory planning, so here we need to select a
// format that has the expected memory size requirements (a plain format)

// When fwd_pd uses padding, impose a plain format
const auto &dst_md = fwd_pd.dst_desc();
if (IsUsingPadding(dst_md)) {
auto plain_dst_tag = static_cast<mkldnn::memory::format_tag>(
GetDefaultFormat(dst_md.data.ndims));
auto plain_dst_md = mkldnn::memory::desc(dst_md.dims(), dst_md.data_type(), plain_dst_tag);
fwd_pd = mkldnn::concat::primitive_desc(plain_dst_md, concat_dim, data_md,
CpuEngine::Get()->get_engine());
}
fwd_ = std::make_shared<mkldnn::concat>(fwd_pd);
}

void MKLDNNConcatForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
const std::vector<NDArray> &in_data,
Expand Down
12 changes: 12 additions & 0 deletions src/operator/nn/mkldnn/mkldnn_convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ std::shared_ptr<mkldnn::convolution_forward::primitive_desc> GetConvFwdImpl(
&attr](const mkldnn::convolution_forward::desc &desc) {
auto engine = CpuEngine::Get()->get_engine();
try {
// MKL-DNN introduced padded formats since 0.15 which require more memory
// compared to the actual size of the tensor. Currently, MKL-DNN operators
// still reuse memory from memory planning, so here we need to select a
// suboptimal kernel for computation that has the expected memory size requirements
auto conv_pd =
std::make_shared<mkldnn::convolution_forward::primitive_desc>(desc, attr, engine);
while (conv_pd->dst_desc().get_size() != GetArraySize(output) ||
Expand Down Expand Up @@ -216,6 +220,10 @@ static std::shared_ptr<mkldnn::convolution_backward_data::primitive_desc> GetCon
&fwd_pd](const mkldnn::convolution_backward_data::desc &desc) {
auto engine = CpuEngine::Get()->get_engine();
try {
// MKL-DNN introduced padded formats since 0.15 which require more memory
// compared to the actual size of the tensor. Currently, MKL-DNN operators
// still reuse memory from memory planning, so here we need to select a
// suboptimal kernel for computation that has the expected memory size requirements
auto conv_pd =
std::make_shared<mkldnn::convolution_backward_data::primitive_desc>(desc, engine, fwd_pd);
while (conv_pd->diff_dst_desc().get_size() != GetArraySize(output) ||
Expand Down Expand Up @@ -299,6 +307,10 @@ static std::shared_ptr<mkldnn::convolution_backward_weights::primitive_desc> Get
&fwd_pd](const mkldnn::convolution_backward_weights::desc &desc) {
auto engine = CpuEngine::Get()->get_engine();
try {
// MKL-DNN introduced padded formats since 0.15 which require more memory
// compared to the actual size of the tensor. Currently, MKL-DNN operators
// still reuse memory from memory planning, so here we need to select a
// suboptimal kernel for computation that has the expected memory size requirements
auto conv_pd = std::make_shared<mkldnn::convolution_backward_weights::primitive_desc>(
desc, engine, fwd_pd);
while (conv_pd->diff_dst_desc().get_size() != GetArraySize(output) ||
Expand Down
21 changes: 9 additions & 12 deletions src/operator/nn/mkldnn/mkldnn_deconvolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,9 @@ std::shared_ptr<mkldnn::convolution_forward::primitive_desc> GetDeconvBwd_(
const mkldnn::engine &engine, const mkldnn::memory::dims &strides,
const mkldnn::memory::dims &padding, const mkldnn::memory::dims &dilates) {
// MKL-DNN introduced padded formats since 0.15 which require more memory
// for computation compared with the actual tensor size. Currently, MKL-DNN
// operators are still reusing those memory from memory planning and the
// memory size may smaller than what MKL-DNN kernels require. So here we need
// select suboptimal kernel for computation according to tensor sizes.
// compared to the actual size of the tensor. Currently, MKL-DNN operators
// still reuse memory from memory planning, so here we need to select a
// suboptimal kernel for computation that has the expected memory size requirements
if (!has_bias) {
mkldnn::convolution_forward::desc desc(
mkldnn::prop_kind::forward_training,
Expand Down Expand Up @@ -117,10 +116,9 @@ GetDeconvFwdImpl(const DeconvolutionParam &param, const NDArray &data,
std::make_shared<mkldnn::convolution_backward_data::primitive_desc>(
desc, engine, *bwd_pd);
// MKL-DNN introduced padded formats since 0.15 which require more memory
// for computation compared with the actual tensor size. Currently, MKL-DNN
// operators are still reusing those memory from memory planning and the
// memory size may smaller than what MKL-DNN kernels require. So here we need
// select suboptimal kernel for computation according to tensor sizes.
// compared to the actual size of the tensor. Currently, MKL-DNN operators
// still reuse memory from memory planning, so here we need to select a
// suboptimal kernel for computation that has the expected memory size requirements
while (deconv_pd->diff_dst_desc().get_size() != GetMemDescSize(data_md) ||
deconv_pd->diff_src_desc().get_size() != GetMemDescSize(out_md) ||
deconv_pd->weights_desc().get_size() != GetMemDescSize(weight_md)) {
Expand Down Expand Up @@ -176,10 +174,9 @@ GetDeconvBwdWeightsImpl(
dilate[1] = param.dilate[1] - 1;

// MKL-DNN introduced padded formats since 0.15 which require more memory
// for computation compared with the actual tensor size. Currently, MKL-DNN
// operators are still reusing those memory from memory planning and the
// memory size may smaller than what MKL-DNN kernels require. So here we need
// select suboptimal kernel for computation according to tensor sizes.
// compared to the actual size of the tensor. Currently, MKL-DNN operators
// still reuse memory from memory planning, so here we need to select a
// suboptimal kernel for computation that has the expected memory size requirements
if (!has_bias) {
mkldnn::convolution_backward_weights::desc desc(
mkldnn::algorithm::convolution_direct, out_md, weight_md, data_md,
Expand Down
32 changes: 32 additions & 0 deletions tests/python/mkl/test_mkldnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,38 @@ def check_concat_training(stype):
for stype in stypes:
check_concat_training(stype)


@with_seed()
def test_concat_blocked():
ctx = mx.cpu()
axis = 1
filters = 32 # must be a power of 2 and >= 16
kernel = (3, 3)
for in_dim_size in range(1, 17): # check cases with and without padding
in_shape = (1, in_dim_size, 64, 64)
in_data = mx.nd.random.uniform(-1, 1, in_shape, ctx=ctx)
conv_weights = mx.nd.random.uniform(-1, 1, (filters, in_shape[1], kernel[0], kernel[1]), ctx=ctx)

def calc_output_of_layer(layer):
ex = layer.simple_bind(ctx, x=in_shape)
in_data.copyto(ex.arg_arrays[0])
conv_weights.copyto(ex.arg_arrays[1])
return ex.forward()[0].asnumpy()

x = mx.sym.Variable('x')
w = mx.sym.Variable('w')
# convolution, so a blocked format is selected
conv = mx.sym.Convolution(data=x, weight=w, num_filter=filters, kernel=kernel, pad=(1, 1), no_bias=True)
conc = mx.sym.concat(conv, x, dim=axis)

# first calculate the output of the convolution to determine ref_out
conv_out = calc_output_of_layer(conv)
ref_out = np.concatenate((conv_out, in_data.asnumpy()), axis=axis)

out = calc_output_of_layer(conc)
assert_almost_equal(out, ref_out)


@with_seed()
def test_elemwise_add():
def ref_add(a, b):
Expand Down

0 comments on commit 49b26b6

Please sign in to comment.