From 36128ecbe37bb2667f15d132e6b0c07eec6a8eac Mon Sep 17 00:00:00 2001 From: Piotr Paturej Date: Mon, 6 Jun 2022 10:53:47 +0200 Subject: [PATCH 01/17] Piotrek's changes for pad3d --- .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 78 +++++++++++ paddle/fluid/operators/pad3d_op.cc | 28 +++- .../unittests/mkldnn/test_pad3d_mkldnn_op.py | 126 ++++++++++++++++++ 3 files changed, 227 insertions(+), 5 deletions(-) create mode 100644 paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc create mode 100644 python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc new file mode 100644 index 0000000000000..07d26bd223dc9 --- /dev/null +++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc @@ -0,0 +1,78 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/utils.h" +#include "paddle/fluid/platform/mkldnn_reuse.h" + +namespace paddle { +namespace operators { + +using paddle::framework::Tensor; + +template +class Pad3dMKLDNNKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + this->RunKernel(ctx); + } + + void RunKernel(const framework::ExecutionContext& ctx) const { + const auto& dev_ctx = + ctx.template device_context(); + + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); + + std::vector paddings = ctx.Attr>("paddings"); + + const T& pad_value = static_cast(ctx.Attr("value")); + const std::string& mode = ctx.Attr("mode"); + const std::string& data_format = ctx.Attr("data_format"); + + auto src_tz = phi::vectorize(input->dims()); + auto dst_tz = phi::vectorize(output->dims()); + + auto paddle_dt = framework::TransToProtoVarType(input->dtype()); + dnnl::memory::data_type onednn_dt = framework::ToMKLDNNDataType(paddle_dt); + + auto dims = phi::vectorize(output->dims()); + + } +}; +template +class Pad3dGradMKLDNNKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + this->RunKernel(ctx); + } + + void RunKernel(const framework::ExecutionContext& ctx) const { + const auto& dev_ctx = + ctx.template device_context(); + const auto& onednn_engine = dev_ctx.GetEngine(); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_KERNEL(pad3d, MKLDNN, paddle::platform::CPUPlace, + ops::Pad3dMKLDNNKernel, + ops::Pad3dMKLDNNKernel, + ops::Pad3dMKLDNNKernel, + ops::Pad3dMKLDNNKernel); + +REGISTER_OP_KERNEL(pad3d_grad, MKLDNN, paddle::platform::CPUPlace, + ops::Pad3dGradMKLDNNKernel, + ops::Pad3dGradMKLDNNKernel); diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc index b7a638d7ce930..c6a241d0dca92 100644 --- a/paddle/fluid/operators/pad3d_op.cc +++ b/paddle/fluid/operators/pad3d_op.cc @@ -34,8 +34,15 @@ class Pad3dOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType( - OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); + auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); +#ifdef PADDLE_WITH_MKLDNN + if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif + return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; @@ -78,6 +85,10 @@ class Pad3dOpMaker : public framework::OpProtoAndCheckerMaker { "An optional string from: \"NDHWC\", \"NCDHW\". " "Defaults to \"NDHWC\". Specify the data format of the input data.") .SetDefault("NCDHW"); + AddAttr( + "use_mkldnn", + "(bool, default false) Indicates if MKL-DNN kernel will be used") + .SetDefault(false); AddComment(R"DOC( Pad3d Operator. Pad 3-d images according to 'paddings' and 'mode'. @@ -153,9 +164,16 @@ class Pad3dOpGrad : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")), - ctx.GetPlace()); + auto input_data_type = OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")); +#ifdef PADDLE_WITH_MKLDNN + if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif + return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py new file mode 100644 index 0000000000000..a8688db7f2213 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py @@ -0,0 +1,126 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +from termios import N_PPP # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from paddle.fluid.tests.unittests.op_test import OpTest +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import paddle.fluid.core as core + +from paddle.fluid import Program, program_guard, Executor, default_main_program + + +class TestPad3dOneDNNOp(OpTest): + def setUp(self): + paddle.enable_static() + self.value = 0.0 + self.initTestCase() + self.op_type = "pad3d" + self.python_api = paddle.nn.functional.pad + self.inputs = {'X': np.random.random(self.shape).astype("float32")} + self.attrs = {'use_mkldnn': True} + if self.variable_paddings: + self.attrs['paddings'] = [] + self.inputs['Paddings'] = np.array( + self.paddings).flatten().astype("int32") + else: + self.attrs['paddings'] = np.array( + self.paddings).flatten().astype("int32") + self.attrs['value'] = self.value + self.attrs['mode'] = self.mode + self.attrs['data_format'] = self.data_format + if self.data_format == "NCDHW": + paddings = [ + (0, 0), + (0, 0), + (self.paddings[4], self.paddings[5]), + (self.paddings[2], self.paddings[3]), + (self.paddings[0], self.paddings[1]), + ] + else: + paddings = [ + (0, 0), + (self.paddings[4], self.paddings[5]), + (self.paddings[2], self.paddings[3]), + (self.paddings[0], self.paddings[1]), + (0, 0), + ] + if self.mode == "constant": + out = np.pad(self.inputs['X'], + paddings, + mode=self.mode, + constant_values=self.value) + elif self.mode == "reflect": + out = np.pad(self.inputs['X'], paddings, mode=self.mode) + elif self.mode == "replicate": + out = np.pad(self.inputs['X'], paddings, mode="edge") + elif self.mode == "circular": + out = np.pad(self.inputs['X'], paddings, mode="wrap") + self.outputs = {'Out': out} + + def test_check_output(self): + import sys + np.set_printoptions(threshold=sys.maxsize) + print(self.inputs["X"].shape) + print(self.outputs["Out"].shape) + # print("\n\n\n") + # print("inputs", self.inputs["X"]) + # print("\n\n\n") + # print("outputs", self.outputs["Out"]) + # print("\n\n\n") + self.check_output() + + # def test_check_grad_normal(self): + # self.check_grad(['X'], 'Out') + + def initTestCase(self): + self.shape = (2, 3, 4, 5, 6) + self.paddings = [0, 0, 0, 0, 0, 0] + self.mode = "constant" + self.data_format = "NCDHW" + self.pad_value = 0.0 + self.variable_paddings = False + + +class TestCase1(TestPad3dOneDNNOp): + def initTestCase(self): + self.shape = (2, 3, 4, 5, 6) + self.paddings = [0, 1, 2, 3, 4, 5] + self.mode = "constant" + self.data_format = "NCDHW" + self.value = 1.0 + self.variable_paddings = False + + + +if __name__ == '__main__': + paddle.enable_static() + unittest.main() From 4e8ae0340a65eb871801a374ebb832c93d47d7ba Mon Sep 17 00:00:00 2001 From: jakpiase Date: Mon, 27 Jun 2022 15:41:31 +0200 Subject: [PATCH 02/17] my changes --- .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 17 ----------------- paddle/fluid/operators/pad3d_op.cc | 14 +++++++------- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc index 07d26bd223dc9..f0502d55a7a51 100644 --- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc @@ -50,19 +50,6 @@ class Pad3dMKLDNNKernel : public framework::OpKernel { } }; -template -class Pad3dGradMKLDNNKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - this->RunKernel(ctx); - } - - void RunKernel(const framework::ExecutionContext& ctx) const { - const auto& dev_ctx = - ctx.template device_context(); - const auto& onednn_engine = dev_ctx.GetEngine(); - } -}; } // namespace operators } // namespace paddle @@ -72,7 +59,3 @@ REGISTER_OP_KERNEL(pad3d, MKLDNN, paddle::platform::CPUPlace, ops::Pad3dMKLDNNKernel, ops::Pad3dMKLDNNKernel, ops::Pad3dMKLDNNKernel); - -REGISTER_OP_KERNEL(pad3d_grad, MKLDNN, paddle::platform::CPUPlace, - ops::Pad3dGradMKLDNNKernel, - ops::Pad3dGradMKLDNNKernel); diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc index c6a241d0dca92..6632b82e4757d 100644 --- a/paddle/fluid/operators/pad3d_op.cc +++ b/paddle/fluid/operators/pad3d_op.cc @@ -35,13 +35,13 @@ class Pad3dOp : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); -#ifdef PADDLE_WITH_MKLDNN - if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { - return framework::OpKernelType(input_data_type, ctx.GetPlace(), - framework::DataLayout::kMKLDNN, - framework::LibraryType::kMKLDNN); - } -#endif +// #ifdef PADDLE_WITH_MKLDNN +// if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { +// return framework::OpKernelType(input_data_type, ctx.GetPlace(), +// framework::DataLayout::kMKLDNN, +// framework::LibraryType::kMKLDNN); +// } +// #endif return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; From 78c45ee77dbdb7695b4bbfa028667187b8851cf9 Mon Sep 17 00:00:00 2001 From: jakpiase Date: Mon, 27 Jun 2022 21:56:52 +0200 Subject: [PATCH 03/17] first version of pad3d, single copy, unnecessary reads --- .../mkldnn/fill_constant_mkldnn_op.cc | 33 +--------- .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 61 +++++++++++++++---- paddle/fluid/operators/pad3d_op.cc | 15 ++--- paddle/fluid/platform/mkldnn_reuse.h | 29 +++++++++ 4 files changed, 89 insertions(+), 49 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc index 615f43bb32c0f..a72ddaa6511e7 100644 --- a/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc @@ -20,35 +20,6 @@ namespace operators { using framework::Tensor; -template -class FillConstantMKLDNNHandler - : public platform::MKLDNNHandlerNoCachingT { - public: - FillConstantMKLDNNHandler(Tensor* out, - dnnl::engine engine, - platform::Place cpu_place) - : platform::MKLDNNHandlerNoCachingT(engine, cpu_place) { - const auto src0_md = - dnnl::memory::desc({out->numel(), sizeof(T)}, - platform::MKLDNNGetDataType(), - dnnl::memory::format_tag::ab); - - dnnl::primitive_attr attrs; - attrs.set_scales(DNNL_ARG_SRC_0, /* mask = */ 0, {0.0f}); - - this->AcquireForwardPrimitiveDescriptor( - attrs, dnnl::algorithm::binary_add, src0_md, src1_md, src0_md); - } - - static const dnnl::memory::desc src1_md; -}; - -template -const dnnl::memory::desc FillConstantMKLDNNHandler::src1_md( - {1, sizeof(T)}, - platform::MKLDNNGetDataType(), - dnnl::memory::format_tag::ab); - template class FillConstantMKLDNNKernel : public framework::OpKernel { public: @@ -67,10 +38,10 @@ class FillConstantMKLDNNKernel : public framework::OpKernel { auto shape = GetShape(ctx); out->Resize(shape); - FillConstantMKLDNNHandler handler(out, dnnl_engine, ctx.GetPlace()); + platform::FillConstantMKLDNNHandler handler(out, dnnl_engine, ctx.GetPlace()); dnnl::memory constant_value_memory = - dnnl::memory(FillConstantMKLDNNHandler::src1_md, + dnnl::memory(platform::FillConstantMKLDNNHandler::src1_md, dnnl_engine, reinterpret_cast(&fill_value)); diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc index f0502d55a7a51..dccc238eafc81 100644 --- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc @@ -30,24 +30,63 @@ class Pad3dMKLDNNKernel : public framework::OpKernel { void RunKernel(const framework::ExecutionContext& ctx) const { const auto& dev_ctx = ctx.template device_context(); + const auto& onednn_engine = dev_ctx.GetEngine(); - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); - std::vector paddings = ctx.Attr>("paddings"); + std::vector paddings(ctx.Attr>("paddings")); - const T& pad_value = static_cast(ctx.Attr("value")); - const std::string& mode = ctx.Attr("mode"); - const std::string& data_format = ctx.Attr("data_format"); + T pad_value = static_cast(ctx.Attr("value")); - auto src_tz = phi::vectorize(input->dims()); - auto dst_tz = phi::vectorize(output->dims()); + auto x_tz = phi::vectorize(x->dims()); + auto out_tz = phi::vectorize(out->dims()); - auto paddle_dt = framework::TransToProtoVarType(input->dtype()); - dnnl::memory::data_type onednn_dt = framework::ToMKLDNNDataType(paddle_dt); + auto paddle_dtype = framework::TransToProtoVarType(x->dtype()); - auto dims = phi::vectorize(output->dims()); + platform::FillConstantMKLDNNHandler handler(out, onednn_engine, ctx.GetPlace()); + dnnl::memory constant_value_memory = + dnnl::memory(platform::FillConstantMKLDNNHandler::src1_md, + onednn_engine, + reinterpret_cast(&pad_value)); + + auto src0_memory_p = handler.AcquireDstMemory(out); + auto fill_constant_p = handler.AcquireForwardPrimitive(); + + auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); + fill_constant_p->execute(astream, + {{DNNL_ARG_SRC_0, *src0_memory_p}, + {DNNL_ARG_SRC_1, constant_value_memory}, + {DNNL_ARG_DST, *src0_memory_p}}); + astream.wait(); + + // fill_constant handler flattens memory, so we have to revert it now + const dnnl::memory::desc real_out_md(out_tz, platform::MKLDNNGetDataType(), platform::GetPlainMKLDNNFormat(out_tz.size())); + + platform::ReorderMKLDNNHandler reorder_handler( + x_tz, + paddle_dtype, + framework::ToMKLDNNDataType(paddle_dtype), + onednn_engine); + + auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(x->mem_desc(), platform::to_void_cast(x->data())); + + auto reorder_dst_memory_p = std::make_shared(real_out_md, onednn_engine, out->data()); + + std::vector offsets(5, 0); // NCDHW + for(int i=0; i<3; ++i) { + offsets[4-i] = paddings[2*i]; + } + + auto slice_mem_p = reorder_handler.AcquireSubmemory(x_tz, offsets, reorder_dst_memory_p); + + auto reorder_p = + reorder_handler.AcquireReorder(slice_mem_p, reorder_src_memory_p); + reorder_p->execute(astream, *reorder_src_memory_p, *slice_mem_p); + astream.wait(); + + out->set_mem_desc(real_out_md); } }; } // namespace operators diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc index 27df6ae1aecb8..a09dc5c7817d0 100644 --- a/paddle/fluid/operators/pad3d_op.cc +++ b/paddle/fluid/operators/pad3d_op.cc @@ -35,13 +35,14 @@ class Pad3dOp : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); -// #ifdef PADDLE_WITH_MKLDNN -// if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { -// return framework::OpKernelType(input_data_type, ctx.GetPlace(), -// framework::DataLayout::kMKLDNN, -// framework::LibraryType::kMKLDNN); -// } -// #endif +#ifdef PADDLE_WITH_MKLDNN + // currently only constant mode and non-blocked layouts are supported for oneDNN + if (this->CanMKLDNNBeUsed(ctx, input_data_type) && ctx.Attr("mode") == "constant" && ctx.Input("X")->mem_desc().data.format_desc.blocking.inner_nblks == 0) { + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 05ebedf611a4b..c34a4a069f14f 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -912,6 +912,35 @@ class MatMulV2MKLDNNHandler } }; +template +class FillConstantMKLDNNHandler + : public platform::MKLDNNHandlerNoCachingT { + public: + FillConstantMKLDNNHandler(Tensor* out, + dnnl::engine engine, + platform::Place cpu_place) + : platform::MKLDNNHandlerNoCachingT(engine, cpu_place) { + const auto src0_md = + dnnl::memory::desc({out->numel(), sizeof(T)}, + platform::MKLDNNGetDataType(), + dnnl::memory::format_tag::ab); + + dnnl::primitive_attr attrs; + attrs.set_scales(DNNL_ARG_SRC_0, /* mask = */ 0, {0.0f}); + + this->AcquireForwardPrimitiveDescriptor( + attrs, dnnl::algorithm::binary_add, src0_md, src1_md, src0_md); + } + + static const dnnl::memory::desc src1_md; +}; + +template +const dnnl::memory::desc FillConstantMKLDNNHandler::src1_md( + {1, sizeof(T)}, + platform::MKLDNNGetDataType(), + dnnl::memory::format_tag::ab); + template class ActivationMKLDNNHandler : public MKLDNNHandlerNoCachingT Date: Wed, 29 Jun 2022 00:10:41 +0200 Subject: [PATCH 04/17] optimized pad3d kernel --- .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 127 ++++++++++++++---- 1 file changed, 101 insertions(+), 26 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc index dccc238eafc81..eb0437513c619 100644 --- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc @@ -15,11 +15,48 @@ limitations under the License. */ #include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/mkldnn_reuse.h" +#define PAD3D_SIZE 6 + namespace paddle { namespace operators { using paddle::framework::Tensor; + +/* +Pad3D is done by using up to 7 reorders. Following example is done +on 2D example for simplicity, but it is straightforward to extend it to 3D case. + +Let us consider following example: + + N C H W L R T B +X dims = (1, 1, 3, 3), paddings = (1, 2, 3, 4) in order Left, Right, Top, Bottom + +We have to copy the X tensor into Out tensor, but except from that we have to fill the rest of the memory with additional padding. +To avoid looping through the whole Out memory two times, only these parts of Out memory that won't store X's memory are filled with pad value. +That behavior is achieved by using oneDNN's submemory descriptors which allows us to set offsets for each dimension and skip some parts of the memory. +For 2D case up to 5 reorders will be used in Pad3D kernel(if padding=0 reorder is skipped). +In the following example i'th number means, that this part of memory was filled by i'th reorder. 4'th reorder is copying X memory into Out memory. +i&j means that both i'th and j'th reorder will set the padding at that location: + + INDEX + | 0 1 2 3 4 5 + |_______________________ + 0 |0&2 2 2 2 1&2 1&2 + 1 |0&2 2 2 2 1&2 1&2 +I 2 |0&2 2 2 2 1&2 1&2 +N 3 | 0 4 4 4 1 1 +D 4 | 0 4 4 4 1 1 +E 5 | 0 4 4 4 1 1 +X 6 |0&3 3 3 3 1&3 1&3 + 7 |0&3 3 3 3 1&3 1&3 + 8 |0&3 3 3 3 1&3 1&3 + 9 |0&3 3 3 3 1&3 1&3 + +Since oneDNN's reorder cannot set the pad value to the border memory, we have to prefill Out's memory and use it as a temporary buffer, which later is copied +into the rest of Out's memory. At the end last reorder is done which is copying X memory into Out memory. + +*/ template class Pad3dMKLDNNKernel : public framework::OpKernel { public: @@ -31,6 +68,7 @@ class Pad3dMKLDNNKernel : public framework::OpKernel { const auto& dev_ctx = ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); + auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); @@ -44,26 +82,6 @@ class Pad3dMKLDNNKernel : public framework::OpKernel { auto paddle_dtype = framework::TransToProtoVarType(x->dtype()); - platform::FillConstantMKLDNNHandler handler(out, onednn_engine, ctx.GetPlace()); - - dnnl::memory constant_value_memory = - dnnl::memory(platform::FillConstantMKLDNNHandler::src1_md, - onednn_engine, - reinterpret_cast(&pad_value)); - - auto src0_memory_p = handler.AcquireDstMemory(out); - auto fill_constant_p = handler.AcquireForwardPrimitive(); - - auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); - fill_constant_p->execute(astream, - {{DNNL_ARG_SRC_0, *src0_memory_p}, - {DNNL_ARG_SRC_1, constant_value_memory}, - {DNNL_ARG_DST, *src0_memory_p}}); - astream.wait(); - - // fill_constant handler flattens memory, so we have to revert it now - const dnnl::memory::desc real_out_md(out_tz, platform::MKLDNNGetDataType(), platform::GetPlainMKLDNNFormat(out_tz.size())); - platform::ReorderMKLDNNHandler reorder_handler( x_tz, paddle_dtype, @@ -71,8 +89,25 @@ class Pad3dMKLDNNKernel : public framework::OpKernel { onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(x->mem_desc(), platform::to_void_cast(x->data())); + auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(out, out_tz, platform::GetPlainMKLDNNFormat(5), ctx.GetPlace()); + + T* out_ptr = out->data(); + std::fill(out_ptr, out_ptr+CalculatePrefillElems(out_tz, paddings), pad_value); + + // paddings are in order: left, right, top, bottom, front, back + for(int i = 0; i < 6; ++i) { + if(paddings[i] != 0) { + std::vector offsets(5, 0); + std::vector chunk_tz(out_tz.begin(), out_tz.end()); - auto reorder_dst_memory_p = std::make_shared(real_out_md, onednn_engine, out->data()); + chunk_tz[4 - i / 2] = paddings[i]; + if (i % 2 == 1) { + offsets[4 - i / 2] = paddings[i - 1] + x_tz[4 - i / 2]; + } + + FillPartOfPadding(paddle_dtype, onednn_engine, out_ptr, reorder_dst_memory_p, chunk_tz, offsets); + } + } std::vector offsets(5, 0); // NCDHW for(int i=0; i<3; ++i) { @@ -86,7 +121,50 @@ class Pad3dMKLDNNKernel : public framework::OpKernel { reorder_p->execute(astream, *reorder_src_memory_p, *slice_mem_p); astream.wait(); - out->set_mem_desc(real_out_md); + out->set_mem_desc(reorder_dst_memory_p->get_desc()); + } + + int64_t CalculatePrefillElems(const std::vector& out_tz, const std::vector& paddings) const { + int64_t max_elems = 0; + + int64_t independent_dims = out_tz[0] * out_tz[1]; + + for(int i = 0; i < 3; ++i) { + int64_t elems = std::max(paddings[2*i], paddings[2*i+1]); + for(int j = 0; j < 3; ++j) { + if(j != i) { + elems *= out_tz[4 - j]; + } + } + + if(max_elems < elems) { + max_elems = elems; + } + } + return independent_dims * max_elems; + } + + void FillPartOfPadding(framework::proto::VarType::Type paddle_dtype, + const dnnl::engine& onednn_engine, + T* prefilled_mem_ptr, + const std::shared_ptr&out_mem_p, + std::vector& chunk_tz, + const std::vector& offsets) const { + auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); + + dnnl::memory::desc prefilled_mem_desc(chunk_tz, platform::MKLDNNGetDataType(), platform::GetPlainMKLDNNFormat(5)); + auto prefilled_mem_p = std::make_shared(prefilled_mem_desc, onednn_engine, prefilled_mem_ptr); + + platform::ReorderMKLDNNHandler reorder_handler( + chunk_tz, + paddle_dtype, + framework::ToMKLDNNDataType(paddle_dtype), + onednn_engine); + + auto out_slice_mem_p = reorder_handler.AcquireSubmemory(chunk_tz, offsets, out_mem_p); + auto reorder_p = + reorder_handler.AcquireReorder(out_slice_mem_p, prefilled_mem_p); + reorder_p->execute(astream, *prefilled_mem_p, *out_slice_mem_p); } }; } // namespace operators @@ -94,7 +172,4 @@ class Pad3dMKLDNNKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(pad3d, MKLDNN, paddle::platform::CPUPlace, - ops::Pad3dMKLDNNKernel, - ops::Pad3dMKLDNNKernel, - ops::Pad3dMKLDNNKernel, - ops::Pad3dMKLDNNKernel); + ops::Pad3dMKLDNNKernel); From e0410f286cc235c122706b4147aa25cccda7e482 Mon Sep 17 00:00:00 2001 From: jakpiase Date: Wed, 29 Jun 2022 00:11:21 +0200 Subject: [PATCH 05/17] test upadte --- .../unittests/mkldnn/test_pad3d_mkldnn_op.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py index a8688db7f2213..a93edc9baaf9b 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py @@ -41,7 +41,7 @@ class TestPad3dOneDNNOp(OpTest): def setUp(self): paddle.enable_static() - self.value = 0.0 + self.value = 1.0 self.initTestCase() self.op_type = "pad3d" self.python_api = paddle.nn.functional.pad @@ -102,25 +102,14 @@ def test_check_output(self): # self.check_grad(['X'], 'Out') def initTestCase(self): - self.shape = (2, 3, 4, 5, 6) - self.paddings = [0, 0, 0, 0, 0, 0] + self.shape = (6, 2, 3, 4, 5) + self.paddings = [2, 3, 4, 5, 0, 0] self.mode = "constant" self.data_format = "NCDHW" - self.pad_value = 0.0 + self.pad_value = 1.0 self.variable_paddings = False -class TestCase1(TestPad3dOneDNNOp): - def initTestCase(self): - self.shape = (2, 3, 4, 5, 6) - self.paddings = [0, 1, 2, 3, 4, 5] - self.mode = "constant" - self.data_format = "NCDHW" - self.value = 1.0 - self.variable_paddings = False - - - if __name__ == '__main__': paddle.enable_static() unittest.main() From 84e30fe1f3c780c6dab1c9233e11ba15f1151bbb Mon Sep 17 00:00:00 2001 From: jakpiase Date: Wed, 29 Jun 2022 17:58:56 +0200 Subject: [PATCH 06/17] removed magic numbers --- .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 38 +++++++++++-------- paddle/fluid/operators/pad3d_op.cc | 27 +++++++++---- 2 files changed, 43 insertions(+), 22 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc index eb0437513c619..b9c072ecc45ef 100644 --- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc @@ -72,13 +72,18 @@ class Pad3dMKLDNNKernel : public framework::OpKernel { auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); - std::vector paddings(ctx.Attr>("paddings")); - T pad_value = static_cast(ctx.Attr("value")); - auto x_tz = phi::vectorize(x->dims()); - auto out_tz = phi::vectorize(out->dims()); + std::vector x_tz = phi::vectorize(x->dims()); + // due to the need of supporting NDHWC, inferring out shape + // must be done inside the kernel + std::vector out_tz(x_tz); + + for(int i = 0; i < paddings.size() / 2; ++i) { + out_tz[out_tz.size() - 1 - i] += paddings[2 * i] + paddings[2 * i + 1]; + } + out->Resize(phi::make_ddim(out_tz)); auto paddle_dtype = framework::TransToProtoVarType(x->dtype()); @@ -89,29 +94,32 @@ class Pad3dMKLDNNKernel : public framework::OpKernel { onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(x->mem_desc(), platform::to_void_cast(x->data())); - auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(out, out_tz, platform::GetPlainMKLDNNFormat(5), ctx.GetPlace()); + auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(out, out_tz, platform::GetPlainMKLDNNFormat(out_tz.size()), ctx.GetPlace()); + // to avoid allocating new temporary memory, Out's memory is used as a tmp + // buffer for storing a contignuous memory consisting of pad_value, which + // later is used as a SRC for reorders that are filling Out with padding T* out_ptr = out->data(); std::fill(out_ptr, out_ptr+CalculatePrefillElems(out_tz, paddings), pad_value); // paddings are in order: left, right, top, bottom, front, back - for(int i = 0; i < 6; ++i) { + for(int i = 0; i < paddings.size(); ++i) { if(paddings[i] != 0) { - std::vector offsets(5, 0); + std::vector offsets(out_tz.size(), 0); std::vector chunk_tz(out_tz.begin(), out_tz.end()); - chunk_tz[4 - i / 2] = paddings[i]; + chunk_tz[out_tz.size() - 1 - i / 2] = paddings[i]; if (i % 2 == 1) { - offsets[4 - i / 2] = paddings[i - 1] + x_tz[4 - i / 2]; + offsets[out_tz.size() - 1 - i / 2] = paddings[i - 1] + x_tz[out_tz.size() - 1 - i / 2]; } FillPartOfPadding(paddle_dtype, onednn_engine, out_ptr, reorder_dst_memory_p, chunk_tz, offsets); } } - std::vector offsets(5, 0); // NCDHW - for(int i=0; i<3; ++i) { - offsets[4-i] = paddings[2*i]; + std::vector offsets(out_tz.size(), 0); + for(int i=0; i { int64_t independent_dims = out_tz[0] * out_tz[1]; - for(int i = 0; i < 3; ++i) { + for(int i = 0; i < paddings.size() / 2; ++i) { int64_t elems = std::max(paddings[2*i], paddings[2*i+1]); - for(int j = 0; j < 3; ++j) { + for(int j = 0; j < paddings.size() / 2; ++j) { if(j != i) { - elems *= out_tz[4 - j]; + elems *= out_tz[out_tz.size() - 1 - j]; } } diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc index a09dc5c7817d0..db1cd8400d9bf 100644 --- a/paddle/fluid/operators/pad3d_op.cc +++ b/paddle/fluid/operators/pad3d_op.cc @@ -45,6 +45,25 @@ class Pad3dOp : public framework::OperatorWithKernel { #endif return framework::OpKernelType(input_data_type, ctx.GetPlace()); } + +framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, + const Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const { +#ifdef PADDLE_WITH_MKLDNN + if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && + (tensor.layout() != framework::DataLayout::kMKLDNN)) { + auto attrs = Attrs(); + auto ar = paddle::framework::AttrReader(attrs); + const std::string data_format = ar.Get("data_format"); + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), + framework::StringToDataLayout(data_format)); + } +#endif + return framework::OpKernelType( + expected_kernel_type.data_type_, tensor.place(), tensor.layout()); +} }; class Pad3dOpMaker : public framework::OpProtoAndCheckerMaker { @@ -169,13 +188,7 @@ class Pad3dOpGrad : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const override { auto input_data_type = OperatorWithKernel::IndicateVarDataType( ctx, framework::GradVarName("Out")); -#ifdef PADDLE_WITH_MKLDNN - if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { - return framework::OpKernelType(input_data_type, ctx.GetPlace(), - framework::DataLayout::kMKLDNN, - framework::LibraryType::kMKLDNN); - } -#endif + return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; From 62e3dc6168ca452eb94dab29c5b83bb4643a01f7 Mon Sep 17 00:00:00 2001 From: jakpiase Date: Thu, 30 Jun 2022 18:42:41 +0200 Subject: [PATCH 07/17] added support for pad2d --- .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 158 +++++++++++------- paddle/fluid/operators/pad2d_op.cc | 39 ++++- paddle/fluid/operators/pad3d_op.cc | 44 ++--- .../ir/inference/test_mkldnn_pad2d_op.py | 74 ++++++++ .../ir/inference/test_mkldnn_pad3d_op.py | 75 +++++++++ .../unittests/mkldnn/test_pad3d_mkldnn_op.py | 115 ------------- 6 files changed, 308 insertions(+), 197 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py delete mode 100644 python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc index b9c072ecc45ef..05bb3830e5fc6 100644 --- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc @@ -14,37 +14,37 @@ limitations under the License. */ #include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/mkldnn_reuse.h" - -#define PAD3D_SIZE 6 - namespace paddle { namespace operators { -using paddle::framework::Tensor; - +using framework::Tensor; /* Pad3D is done by using up to 7 reorders. Following example is done -on 2D example for simplicity, but it is straightforward to extend it to 3D case. +on 2D data for simplicity, but it is straightforward to extend it to 3D case. Let us consider following example: N C H W L R T B -X dims = (1, 1, 3, 3), paddings = (1, 2, 3, 4) in order Left, Right, Top, Bottom - -We have to copy the X tensor into Out tensor, but except from that we have to fill the rest of the memory with additional padding. -To avoid looping through the whole Out memory two times, only these parts of Out memory that won't store X's memory are filled with pad value. -That behavior is achieved by using oneDNN's submemory descriptors which allows us to set offsets for each dimension and skip some parts of the memory. -For 2D case up to 5 reorders will be used in Pad3D kernel(if padding=0 reorder is skipped). -In the following example i'th number means, that this part of memory was filled by i'th reorder. 4'th reorder is copying X memory into Out memory. -i&j means that both i'th and j'th reorder will set the padding at that location: - - INDEX +X_dims = (1, 1, 3, 3), paddings = (1, 2, 3, 4) in order Left, Right, Top, Bottom + +We have to copy the X tensor into Out tensor, but except from that we have to +fill the rest of the memory with an additional padding. To avoid looping through +the whole Out memory two times, only these parts of Out memory that won't store +X's memory are filled with pad value. That behavior is achieved by using +oneDNN's submemory descriptors which allows us to set offsets for each dimension +and skip some parts of the memory. For 2D case up to 5 reorders will be used in +Pad3D kernel(if padding=0 reorder is skipped). In the following example i'th +number means, that this part of memory was filled by i'th reorder. 4'th reorder +is copying X memory into Out memory. i&j means that both i'th and j'th reorder +will set the padding at that location: + + INDEX | 0 1 2 3 4 5 |_______________________ 0 |0&2 2 2 2 1&2 1&2 1 |0&2 2 2 2 1&2 1&2 -I 2 |0&2 2 2 2 1&2 1&2 +I 2 |0&2 2 2 2 1&2 1&2 N 3 | 0 4 4 4 1 1 D 4 | 0 4 4 4 1 1 E 5 | 0 4 4 4 1 1 @@ -53,12 +53,14 @@ X 6 |0&3 3 3 3 1&3 1&3 8 |0&3 3 3 3 1&3 1&3 9 |0&3 3 3 3 1&3 1&3 -Since oneDNN's reorder cannot set the pad value to the border memory, we have to prefill Out's memory and use it as a temporary buffer, which later is copied -into the rest of Out's memory. At the end last reorder is done which is copying X memory into Out memory. +Since oneDNN's reorder cannot set the pad value to the memory by itself, we have +to prefill Out's memory and use it as a temporary buffer, which later is copied +into the rest of Out's memory. At the end last reorder is done which copies X +memory into Out memory. */ template -class Pad3dMKLDNNKernel : public framework::OpKernel { +class PadMKLDNNKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { this->RunKernel(ctx); @@ -73,14 +75,23 @@ class Pad3dMKLDNNKernel : public framework::OpKernel { auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); std::vector paddings(ctx.Attr>("paddings")); - T pad_value = static_cast(ctx.Attr("value")); + // pad2d has paddings in order top, bottom, left, right, so we need + // to swap some of them to unify paddings between pad2d and pad3d + if (ctx.Type() == "pad2d") { + std::swap(paddings[0], paddings[2]); + std::swap(paddings[1], paddings[3]); + } + + const std::string pad_attr_name = + ctx.Type() == "pad3d" ? "value" : "pad_value"; + T pad_value = static_cast(ctx.Attr(pad_attr_name)); std::vector x_tz = phi::vectorize(x->dims()); // due to the need of supporting NDHWC, inferring out shape // must be done inside the kernel std::vector out_tz(x_tz); - for(int i = 0; i < paddings.size() / 2; ++i) { + for (int i = 0; i < paddings.size() / 2; ++i) { out_tz[out_tz.size() - 1 - i] += paddings[2 * i] + paddings[2 * i + 1]; } out->Resize(phi::make_ddim(out_tz)); @@ -88,41 +99,56 @@ class Pad3dMKLDNNKernel : public framework::OpKernel { auto paddle_dtype = framework::TransToProtoVarType(x->dtype()); platform::ReorderMKLDNNHandler reorder_handler( - x_tz, - paddle_dtype, - framework::ToMKLDNNDataType(paddle_dtype), - onednn_engine); - - auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(x->mem_desc(), platform::to_void_cast(x->data())); - auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(out, out_tz, platform::GetPlainMKLDNNFormat(out_tz.size()), ctx.GetPlace()); + x_tz, + paddle_dtype, + framework::ToMKLDNNDataType(paddle_dtype), + onednn_engine); + + auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( + x->mem_desc(), platform::to_void_cast(x->data())); + auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory( + out, + out_tz, + platform::GetPlainMKLDNNFormat(out_tz.size()), + ctx.GetPlace()); // to avoid allocating new temporary memory, Out's memory is used as a tmp // buffer for storing a contignuous memory consisting of pad_value, which // later is used as a SRC for reorders that are filling Out with padding T* out_ptr = out->data(); - std::fill(out_ptr, out_ptr+CalculatePrefillElems(out_tz, paddings), pad_value); + std::fill(out_ptr, + out_ptr + CalculateNumOfPrefillElems(out_tz, paddings), + pad_value); // paddings are in order: left, right, top, bottom, front, back - for(int i = 0; i < paddings.size(); ++i) { - if(paddings[i] != 0) { + for (int i = 0; i < paddings.size(); ++i) { + if (paddings[i] != 0) { std::vector offsets(out_tz.size(), 0); std::vector chunk_tz(out_tz.begin(), out_tz.end()); chunk_tz[out_tz.size() - 1 - i / 2] = paddings[i]; if (i % 2 == 1) { - offsets[out_tz.size() - 1 - i / 2] = paddings[i - 1] + x_tz[out_tz.size() - 1 - i / 2]; + offsets[out_tz.size() - 1 - i / 2] = + paddings[i - 1] + x_tz[out_tz.size() - 1 - i / 2]; } - FillPartOfPadding(paddle_dtype, onednn_engine, out_ptr, reorder_dst_memory_p, chunk_tz, offsets); + FillPartOfPadding(paddle_dtype, + onednn_engine, + out_ptr, + reorder_dst_memory_p, + chunk_tz, + offsets); } } - - std::vector offsets(out_tz.size(), 0); - for(int i=0; i offsets(out_tz.size(), 0); + for (int i = 0; i < paddings.size() / 2; ++i) { + offsets[out_tz.size() - 1 - i] = paddings[2 * i]; } - - auto slice_mem_p = reorder_handler.AcquireSubmemory(x_tz, offsets, reorder_dst_memory_p); + + auto slice_mem_p = + reorder_handler.AcquireSubmemory(x_tz, offsets, reorder_dst_memory_p); auto reorder_p = reorder_handler.AcquireReorder(slice_mem_p, reorder_src_memory_p); @@ -132,20 +158,20 @@ class Pad3dMKLDNNKernel : public framework::OpKernel { out->set_mem_desc(reorder_dst_memory_p->get_desc()); } - int64_t CalculatePrefillElems(const std::vector& out_tz, const std::vector& paddings) const { + int64_t CalculateNumOfPrefillElems(const std::vector& out_tz, + const std::vector& paddings) const { int64_t max_elems = 0; - int64_t independent_dims = out_tz[0] * out_tz[1]; - for(int i = 0; i < paddings.size() / 2; ++i) { - int64_t elems = std::max(paddings[2*i], paddings[2*i+1]); - for(int j = 0; j < paddings.size() / 2; ++j) { - if(j != i) { + for (int i = 0; i < paddings.size() / 2; ++i) { + int64_t elems = std::max(paddings[2 * i], paddings[2 * i + 1]); + for (int j = 0; j < paddings.size() / 2; ++j) { + if (j != i) { elems *= out_tz[out_tz.size() - 1 - j]; } } - if(max_elems < elems) { + if (max_elems < elems) { max_elems = elems; } } @@ -155,29 +181,37 @@ class Pad3dMKLDNNKernel : public framework::OpKernel { void FillPartOfPadding(framework::proto::VarType::Type paddle_dtype, const dnnl::engine& onednn_engine, T* prefilled_mem_ptr, - const std::shared_ptr&out_mem_p, - std::vector& chunk_tz, + const std::shared_ptr& out_mem_p, + const std::vector& chunk_tz, const std::vector& offsets) const { auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); - dnnl::memory::desc prefilled_mem_desc(chunk_tz, platform::MKLDNNGetDataType(), platform::GetPlainMKLDNNFormat(5)); - auto prefilled_mem_p = std::make_shared(prefilled_mem_desc, onednn_engine, prefilled_mem_ptr); + dnnl::memory::desc prefilled_mem_desc( + chunk_tz, + platform::MKLDNNGetDataType(), + platform::GetPlainMKLDNNFormat(chunk_tz.size())); + dnnl::memory prefilled_mem( + prefilled_mem_desc, onednn_engine, prefilled_mem_ptr); - platform::ReorderMKLDNNHandler reorder_handler( - chunk_tz, - paddle_dtype, - framework::ToMKLDNNDataType(paddle_dtype), - onednn_engine); + dnnl::memory::desc out_slice_md = + out_mem_p->get_desc().submemory_desc(chunk_tz, {offsets}); + dnnl::memory out_slice_mem( + out_slice_md, onednn_engine, out_mem_p->get_data_handle()); - auto out_slice_mem_p = reorder_handler.AcquireSubmemory(chunk_tz, offsets, out_mem_p); - auto reorder_p = - reorder_handler.AcquireReorder(out_slice_mem_p, prefilled_mem_p); - reorder_p->execute(astream, *prefilled_mem_p, *out_slice_mem_p); + auto reorder_p = dnnl::reorder(prefilled_mem, out_slice_mem); + reorder_p.execute(astream, prefilled_mem, out_slice_mem); } }; } // namespace operators } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_KERNEL(pad3d, MKLDNN, paddle::platform::CPUPlace, - ops::Pad3dMKLDNNKernel); +REGISTER_OP_KERNEL(pad3d, + MKLDNN, + paddle::platform::CPUPlace, + ops::PadMKLDNNKernel); + +REGISTER_OP_KERNEL(pad2d, + MKLDNN, + paddle::platform::CPUPlace, + ops::PadMKLDNNKernel); diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc index 72073ed3067c3..e7f0c6507bf70 100644 --- a/paddle/fluid/operators/pad2d_op.cc +++ b/paddle/fluid/operators/pad2d_op.cc @@ -699,8 +699,41 @@ class Pad2dOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { + auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); +#ifdef PADDLE_WITH_MKLDNN + // only constant mode and non-blocked layouts are supported for oneDNN + if (this->CanMKLDNNBeUsed(ctx, input_data_type) && + ctx.Attr("mode") == "constant" && + ctx.Input("X") + ->mem_desc() + .data.format_desc.blocking.inner_nblks == 0) { + return framework::OpKernelType(input_data_type, + ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif + return framework::OpKernelType(input_data_type, ctx.GetPlace()); + } + + framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, + const Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const { +#ifdef PADDLE_WITH_MKLDNN + if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && + (tensor.layout() != framework::DataLayout::kMKLDNN)) { + auto attrs = Attrs(); + auto ar = paddle::framework::AttrReader(attrs); + const std::string data_format = ar.Get("data_format"); + return framework::OpKernelType( + expected_kernel_type.data_type_, + tensor.place(), + framework::StringToDataLayout(data_format)); + } +#endif return framework::OpKernelType( - OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); + expected_kernel_type.data_type_, tensor.place(), tensor.layout()); } }; @@ -740,6 +773,10 @@ class Pad2dOpMaker : public framework::OpProtoAndCheckerMaker { "An optional string from: \"NHWC\", \"NCHW\". " "Defaults to \"NHWC\". Specify the data format of the input data.") .SetDefault("NCHW"); + AddAttr( + "use_mkldnn", + "(bool, default false) Indicates if MKL-DNN kernel will be used") + .SetDefault(false); AddComment(R"DOC( Pad2d Operator. Pad 2-d images according to 'paddings' and 'mode'. diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc index db1cd8400d9bf..ffbe5ad073aca 100644 --- a/paddle/fluid/operators/pad3d_op.cc +++ b/paddle/fluid/operators/pad3d_op.cc @@ -36,9 +36,14 @@ class Pad3dOp : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const override { auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); #ifdef PADDLE_WITH_MKLDNN - // currently only constant mode and non-blocked layouts are supported for oneDNN - if (this->CanMKLDNNBeUsed(ctx, input_data_type) && ctx.Attr("mode") == "constant" && ctx.Input("X")->mem_desc().data.format_desc.blocking.inner_nblks == 0) { - return framework::OpKernelType(input_data_type, ctx.GetPlace(), + // only constant mode and non-blocked layouts are supported for oneDNN + if (this->CanMKLDNNBeUsed(ctx, input_data_type) && + ctx.Attr("mode") == "constant" && + ctx.Input("X") + ->mem_desc() + .data.format_desc.blocking.inner_nblks == 0) { + return framework::OpKernelType(input_data_type, + ctx.GetPlace(), framework::DataLayout::kMKLDNN, framework::LibraryType::kMKLDNN); } @@ -46,24 +51,25 @@ class Pad3dOp : public framework::OperatorWithKernel { return framework::OpKernelType(input_data_type, ctx.GetPlace()); } -framework::OpKernelType GetKernelTypeForVar( - const std::string& var_name, - const Tensor& tensor, - const framework::OpKernelType& expected_kernel_type) const { + framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, + const Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const { #ifdef PADDLE_WITH_MKLDNN - if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && - (tensor.layout() != framework::DataLayout::kMKLDNN)) { - auto attrs = Attrs(); - auto ar = paddle::framework::AttrReader(attrs); - const std::string data_format = ar.Get("data_format"); - return framework::OpKernelType(expected_kernel_type.data_type_, - tensor.place(), - framework::StringToDataLayout(data_format)); - } + if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && + (tensor.layout() != framework::DataLayout::kMKLDNN)) { + auto attrs = Attrs(); + auto ar = paddle::framework::AttrReader(attrs); + const std::string data_format = ar.Get("data_format"); + return framework::OpKernelType( + expected_kernel_type.data_type_, + tensor.place(), + framework::StringToDataLayout(data_format)); + } #endif - return framework::OpKernelType( - expected_kernel_type.data_type_, tensor.place(), tensor.layout()); -} + return framework::OpKernelType( + expected_kernel_type.data_type_, tensor.place(), tensor.layout()); + } }; class Pad3dOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py new file mode 100644 index 0000000000000..7cd221e239781 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py @@ -0,0 +1,74 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from auto_scan_test import MkldnnAutoScanTest +from program_config import TensorConfig, ProgramConfig, OpConfig +import numpy as np +from functools import partial +import unittest +from hypothesis import given, reproduce_failure +import hypothesis.strategies as st + + +class TestOneDNNPad2DOp(MkldnnAutoScanTest): + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + # if mode is channel, and in_shape is 1 rank + if len(program_config.inputs['input_data'].shape + ) == 1 and program_config.ops[0].attrs['mode'] == 'channel': + return False + return True + + def sample_program_configs(self, *args, **kwargs): + + def generate_input(*args, **kwargs): + return np.random.random(kwargs['in_shape']).astype(np.float32) + + pad3d_op = OpConfig(type="pad2d", + inputs={"X": ["input_data"]}, + outputs={"Out": ["output_data"]}, + attrs={ + "mode": "constant", + "data_format": kwargs['data_format'], + "paddings": kwargs['paddings'], + }) + + program_config = ProgramConfig( + ops=[pad3d_op], + weights={}, + inputs={ + "input_data": + TensorConfig(data_gen=partial(generate_input, *args, **kwargs)), + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs(self, program_config): + config = self.create_inference_config(use_mkldnn=True) + yield config, (1e-5, 1e-5) + + @given(data_format=st.sampled_from(['NCHW', 'NHWC']), + in_shape=st.lists(st.integers(min_value=1, max_value=10), + min_size=4, + max_size=4), + paddings=st.lists(st.integers(min_value=0, max_value=3), + min_size=4, + max_size=4)) + def test(self, *args, **kwargs): + self.run_test(quant=False, *args, **kwargs) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py new file mode 100644 index 0000000000000..11df7c41dfbf5 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py @@ -0,0 +1,75 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from auto_scan_test import MkldnnAutoScanTest +from program_config import TensorConfig, ProgramConfig, OpConfig +import numpy as np +from functools import partial +import unittest +from hypothesis import given, reproduce_failure +import hypothesis.strategies as st + + +@reproduce_failure('6.45.0', b'AAEAAAAAAAAAAAAAAQ==') +class TestOneDNNPad3DOp(MkldnnAutoScanTest): + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + # if mode is channel, and in_shape is 1 rank + if len(program_config.inputs['input_data'].shape + ) == 1 and program_config.ops[0].attrs['mode'] == 'channel': + return False + return True + + def sample_program_configs(self, *args, **kwargs): + + def generate_input(*args, **kwargs): + return np.random.random(kwargs['in_shape']).astype(np.float32) + + pad3d_op = OpConfig(type="pad3d", + inputs={"X": ["input_data"]}, + outputs={"Out": ["output_data"]}, + attrs={ + "mode": "constant", + "data_format": kwargs['data_format'], + "paddings": kwargs['paddings'], + }) + + program_config = ProgramConfig( + ops=[pad3d_op], + weights={}, + inputs={ + "input_data": + TensorConfig(data_gen=partial(generate_input, *args, **kwargs)), + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs(self, program_config): + config = self.create_inference_config(use_mkldnn=True) + yield config, (1e-5, 1e-5) + + @given(data_format=st.sampled_from(['NCDHW', 'NDHWC']), + in_shape=st.lists(st.integers(min_value=1, max_value=10), + min_size=5, + max_size=5), + paddings=st.lists(st.integers(min_value=0, max_value=3), + min_size=6, + max_size=6)) + def test(self, *args, **kwargs): + self.run_test(quant=False, *args, **kwargs) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py deleted file mode 100644 index a93edc9baaf9b..0000000000000 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -from termios import N_PPP # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -import numpy as np -from paddle.fluid.tests.unittests.op_test import OpTest -import paddle -import paddle.nn as nn -import paddle.nn.functional as F -import paddle.fluid.core as core - -from paddle.fluid import Program, program_guard, Executor, default_main_program - - -class TestPad3dOneDNNOp(OpTest): - def setUp(self): - paddle.enable_static() - self.value = 1.0 - self.initTestCase() - self.op_type = "pad3d" - self.python_api = paddle.nn.functional.pad - self.inputs = {'X': np.random.random(self.shape).astype("float32")} - self.attrs = {'use_mkldnn': True} - if self.variable_paddings: - self.attrs['paddings'] = [] - self.inputs['Paddings'] = np.array( - self.paddings).flatten().astype("int32") - else: - self.attrs['paddings'] = np.array( - self.paddings).flatten().astype("int32") - self.attrs['value'] = self.value - self.attrs['mode'] = self.mode - self.attrs['data_format'] = self.data_format - if self.data_format == "NCDHW": - paddings = [ - (0, 0), - (0, 0), - (self.paddings[4], self.paddings[5]), - (self.paddings[2], self.paddings[3]), - (self.paddings[0], self.paddings[1]), - ] - else: - paddings = [ - (0, 0), - (self.paddings[4], self.paddings[5]), - (self.paddings[2], self.paddings[3]), - (self.paddings[0], self.paddings[1]), - (0, 0), - ] - if self.mode == "constant": - out = np.pad(self.inputs['X'], - paddings, - mode=self.mode, - constant_values=self.value) - elif self.mode == "reflect": - out = np.pad(self.inputs['X'], paddings, mode=self.mode) - elif self.mode == "replicate": - out = np.pad(self.inputs['X'], paddings, mode="edge") - elif self.mode == "circular": - out = np.pad(self.inputs['X'], paddings, mode="wrap") - self.outputs = {'Out': out} - - def test_check_output(self): - import sys - np.set_printoptions(threshold=sys.maxsize) - print(self.inputs["X"].shape) - print(self.outputs["Out"].shape) - # print("\n\n\n") - # print("inputs", self.inputs["X"]) - # print("\n\n\n") - # print("outputs", self.outputs["Out"]) - # print("\n\n\n") - self.check_output() - - # def test_check_grad_normal(self): - # self.check_grad(['X'], 'Out') - - def initTestCase(self): - self.shape = (6, 2, 3, 4, 5) - self.paddings = [2, 3, 4, 5, 0, 0] - self.mode = "constant" - self.data_format = "NCDHW" - self.pad_value = 1.0 - self.variable_paddings = False - - -if __name__ == '__main__': - paddle.enable_static() - unittest.main() From f54cd0cbb1ca7818ddd610196b94b1e065e28847 Mon Sep 17 00:00:00 2001 From: jakpiase Date: Thu, 30 Jun 2022 18:43:55 +0200 Subject: [PATCH 08/17] reverted two files --- .../mkldnn/fill_constant_mkldnn_op.cc | 33 +++++++++++++++++-- paddle/fluid/platform/mkldnn_reuse.h | 29 ---------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc index a72ddaa6511e7..615f43bb32c0f 100644 --- a/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc @@ -20,6 +20,35 @@ namespace operators { using framework::Tensor; +template +class FillConstantMKLDNNHandler + : public platform::MKLDNNHandlerNoCachingT { + public: + FillConstantMKLDNNHandler(Tensor* out, + dnnl::engine engine, + platform::Place cpu_place) + : platform::MKLDNNHandlerNoCachingT(engine, cpu_place) { + const auto src0_md = + dnnl::memory::desc({out->numel(), sizeof(T)}, + platform::MKLDNNGetDataType(), + dnnl::memory::format_tag::ab); + + dnnl::primitive_attr attrs; + attrs.set_scales(DNNL_ARG_SRC_0, /* mask = */ 0, {0.0f}); + + this->AcquireForwardPrimitiveDescriptor( + attrs, dnnl::algorithm::binary_add, src0_md, src1_md, src0_md); + } + + static const dnnl::memory::desc src1_md; +}; + +template +const dnnl::memory::desc FillConstantMKLDNNHandler::src1_md( + {1, sizeof(T)}, + platform::MKLDNNGetDataType(), + dnnl::memory::format_tag::ab); + template class FillConstantMKLDNNKernel : public framework::OpKernel { public: @@ -38,10 +67,10 @@ class FillConstantMKLDNNKernel : public framework::OpKernel { auto shape = GetShape(ctx); out->Resize(shape); - platform::FillConstantMKLDNNHandler handler(out, dnnl_engine, ctx.GetPlace()); + FillConstantMKLDNNHandler handler(out, dnnl_engine, ctx.GetPlace()); dnnl::memory constant_value_memory = - dnnl::memory(platform::FillConstantMKLDNNHandler::src1_md, + dnnl::memory(FillConstantMKLDNNHandler::src1_md, dnnl_engine, reinterpret_cast(&fill_value)); diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index c34a4a069f14f..05ebedf611a4b 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -912,35 +912,6 @@ class MatMulV2MKLDNNHandler } }; -template -class FillConstantMKLDNNHandler - : public platform::MKLDNNHandlerNoCachingT { - public: - FillConstantMKLDNNHandler(Tensor* out, - dnnl::engine engine, - platform::Place cpu_place) - : platform::MKLDNNHandlerNoCachingT(engine, cpu_place) { - const auto src0_md = - dnnl::memory::desc({out->numel(), sizeof(T)}, - platform::MKLDNNGetDataType(), - dnnl::memory::format_tag::ab); - - dnnl::primitive_attr attrs; - attrs.set_scales(DNNL_ARG_SRC_0, /* mask = */ 0, {0.0f}); - - this->AcquireForwardPrimitiveDescriptor( - attrs, dnnl::algorithm::binary_add, src0_md, src1_md, src0_md); - } - - static const dnnl::memory::desc src1_md; -}; - -template -const dnnl::memory::desc FillConstantMKLDNNHandler::src1_md( - {1, sizeof(T)}, - platform::MKLDNNGetDataType(), - dnnl::memory::format_tag::ab); - template class ActivationMKLDNNHandler : public MKLDNNHandlerNoCachingT Date: Thu, 30 Jun 2022 18:47:50 +0200 Subject: [PATCH 09/17] reverted one old change --- paddle/fluid/operators/pad3d_op.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc index ffbe5ad073aca..a0da0fecb7e9a 100644 --- a/paddle/fluid/operators/pad3d_op.cc +++ b/paddle/fluid/operators/pad3d_op.cc @@ -192,10 +192,9 @@ class Pad3dOpGrad : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto input_data_type = OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")); - - return framework::OpKernelType(input_data_type, ctx.GetPlace()); + return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")), + ctx.GetPlace()); } }; From 9897681ad829ce66914767ca4a5853c4642d52c7 Mon Sep 17 00:00:00 2001 From: jakpiase Date: Thu, 30 Jun 2022 19:34:19 +0200 Subject: [PATCH 10/17] added support for Paddings tensor --- .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 6 ++++++ .../ir/inference/test_mkldnn_pad3d_op.py | 19 ++++++++++--------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc index 05bb3830e5fc6..173bbe70994eb 100644 --- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc @@ -74,7 +74,13 @@ class PadMKLDNNKernel : public framework::OpKernel { auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); + auto* paddings_tensor = ctx.Input("Paddings"); std::vector paddings(ctx.Attr>("paddings")); + if (paddings_tensor) { + std::copy(paddings_tensor->data(), + paddings_tensor->data() + paddings_tensor->numel(), + paddings.data()); + } // pad2d has paddings in order top, bottom, left, right, so we need // to swap some of them to unify paddings between pad2d and pad3d if (ctx.Type() == "pad2d") { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py index 11df7c41dfbf5..cdd319ca307aa 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py @@ -21,23 +21,21 @@ import hypothesis.strategies as st -@reproduce_failure('6.45.0', b'AAEAAAAAAAAAAAAAAQ==') class TestOneDNNPad3DOp(MkldnnAutoScanTest): - def is_program_valid(self, program_config: ProgramConfig) -> bool: - # if mode is channel, and in_shape is 1 rank - if len(program_config.inputs['input_data'].shape - ) == 1 and program_config.ops[0].attrs['mode'] == 'channel': - return False - return True - def sample_program_configs(self, *args, **kwargs): def generate_input(*args, **kwargs): return np.random.random(kwargs['in_shape']).astype(np.float32) + def generate_paddings(): + return np.random.randint(0, 4, size=(6)).astype(np.int32) + pad3d_op = OpConfig(type="pad3d", - inputs={"X": ["input_data"]}, + inputs={ + "X": ["input_data"], + "Paddings": ["paddings_data"] + }, outputs={"Out": ["output_data"]}, attrs={ "mode": "constant", @@ -51,6 +49,8 @@ def generate_input(*args, **kwargs): inputs={ "input_data": TensorConfig(data_gen=partial(generate_input, *args, **kwargs)), + "paddings_data": + TensorConfig(data_gen=generate_paddings) }, outputs=["output_data"]) @@ -61,6 +61,7 @@ def sample_predictor_configs(self, program_config): yield config, (1e-5, 1e-5) @given(data_format=st.sampled_from(['NCDHW', 'NDHWC']), + use_paddings_tensor=st.sampled_from([True, False]), in_shape=st.lists(st.integers(min_value=1, max_value=10), min_size=5, max_size=5), From fa618f272cb907581a2f5bfa20ade336dd847ee9 Mon Sep 17 00:00:00 2001 From: jakpiase Date: Thu, 30 Jun 2022 19:55:33 +0200 Subject: [PATCH 11/17] CI fix --- paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc index 173bbe70994eb..03e3faf4e753d 100644 --- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc @@ -97,7 +97,7 @@ class PadMKLDNNKernel : public framework::OpKernel { // must be done inside the kernel std::vector out_tz(x_tz); - for (int i = 0; i < paddings.size() / 2; ++i) { + for (size_t i = 0; i < paddings.size() / 2; ++i) { out_tz[out_tz.size() - 1 - i] += paddings[2 * i] + paddings[2 * i + 1]; } out->Resize(phi::make_ddim(out_tz)); @@ -127,7 +127,7 @@ class PadMKLDNNKernel : public framework::OpKernel { pad_value); // paddings are in order: left, right, top, bottom, front, back - for (int i = 0; i < paddings.size(); ++i) { + for (size_t i = 0; i < paddings.size(); ++i) { if (paddings[i] != 0) { std::vector offsets(out_tz.size(), 0); std::vector chunk_tz(out_tz.begin(), out_tz.end()); @@ -149,7 +149,7 @@ class PadMKLDNNKernel : public framework::OpKernel { astream.wait(); std::vector offsets(out_tz.size(), 0); - for (int i = 0; i < paddings.size() / 2; ++i) { + for (size_t i = 0; i < paddings.size() / 2; ++i) { offsets[out_tz.size() - 1 - i] = paddings[2 * i]; } @@ -169,9 +169,9 @@ class PadMKLDNNKernel : public framework::OpKernel { int64_t max_elems = 0; int64_t independent_dims = out_tz[0] * out_tz[1]; - for (int i = 0; i < paddings.size() / 2; ++i) { + for (size_t i = 0; i < paddings.size() / 2; ++i) { int64_t elems = std::max(paddings[2 * i], paddings[2 * i + 1]); - for (int j = 0; j < paddings.size() / 2; ++j) { + for (size_t j = 0; j < paddings.size() / 2; ++j) { if (j != i) { elems *= out_tz[out_tz.size() - 1 - j]; } From d3aca15f5a7a5481b01b0fcee6e706c12aa5a987 Mon Sep 17 00:00:00 2001 From: jakpiase Date: Fri, 1 Jul 2022 00:36:46 +0200 Subject: [PATCH 12/17] CI fix --- paddle/fluid/operators/pad3d_op.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc index a0da0fecb7e9a..e4b32b3d7a76e 100644 --- a/paddle/fluid/operators/pad3d_op.cc +++ b/paddle/fluid/operators/pad3d_op.cc @@ -111,10 +111,10 @@ class Pad3dOpMaker : public framework::OpProtoAndCheckerMaker { "An optional string from: \"NDHWC\", \"NCDHW\". " "Defaults to \"NDHWC\". Specify the data format of the input data.") .SetDefault("NCDHW"); - AddAttr( - "use_mkldnn", - "(bool, default false) Indicates if MKL-DNN kernel will be used") - .SetDefault(false); + AddAttr("use_mkldnn", + "(bool, default false) Only used in mkldnn kernel") + .SetDefault(false) + .AsExtra(); AddComment(R"DOC( Pad3d Operator. Pad 3-d images according to 'paddings' and 'mode'. From 52534a49949d69e720a358a51d6db23718cfd43f Mon Sep 17 00:00:00 2001 From: jakpiase Date: Fri, 1 Jul 2022 19:28:32 +0200 Subject: [PATCH 13/17] fixed timeout of tests --- .../ir/inference/test_mkldnn_pad2d_op.py | 17 ++++------------- .../ir/inference/test_mkldnn_pad3d_op.py | 10 ++++------ 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py index 7cd221e239781..5a81451febf39 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py @@ -23,13 +23,6 @@ class TestOneDNNPad2DOp(MkldnnAutoScanTest): - def is_program_valid(self, program_config: ProgramConfig) -> bool: - # if mode is channel, and in_shape is 1 rank - if len(program_config.inputs['input_data'].shape - ) == 1 and program_config.ops[0].attrs['mode'] == 'channel': - return False - return True - def sample_program_configs(self, *args, **kwargs): def generate_input(*args, **kwargs): @@ -60,12 +53,10 @@ def sample_predictor_configs(self, program_config): yield config, (1e-5, 1e-5) @given(data_format=st.sampled_from(['NCHW', 'NHWC']), - in_shape=st.lists(st.integers(min_value=1, max_value=10), - min_size=4, - max_size=4), - paddings=st.lists(st.integers(min_value=0, max_value=3), - min_size=4, - max_size=4)) + in_shape=st.sampled_from([[2, 3, 4, 5], [1, 4, 1, 3], [4, 3, 2, 1], + [1, 1, 1, 1]]), + paddings=st.sampled_from([[0, 0, 0, 0], [1, 2, 0, 1], [2, 5, 11, 3], + [0, 5, 0, 1]])) def test(self, *args, **kwargs): self.run_test(quant=False, *args, **kwargs) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py index cdd319ca307aa..acc7fa1e30e2d 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py @@ -62,12 +62,10 @@ def sample_predictor_configs(self, program_config): @given(data_format=st.sampled_from(['NCDHW', 'NDHWC']), use_paddings_tensor=st.sampled_from([True, False]), - in_shape=st.lists(st.integers(min_value=1, max_value=10), - min_size=5, - max_size=5), - paddings=st.lists(st.integers(min_value=0, max_value=3), - min_size=6, - max_size=6)) + in_shape=st.sampled_from([[2, 3, 4, 5, 6], [1, 4, 1, 3, 2], + [4, 3, 2, 1, 1], [1, 1, 1, 1, 1]]), + paddings=st.sampled_from([[0, 0, 0, 0, 0, 0], [1, 2, 0, 1, 2, 1], + [2, 5, 11, 3, 4, 3], [0, 5, 0, 1, 0, 2]])) def test(self, *args, **kwargs): self.run_test(quant=False, *args, **kwargs) From 24eff5e96c21cac6635c4d1589b1409a854e9140 Mon Sep 17 00:00:00 2001 From: jakpiase Date: Mon, 4 Jul 2022 15:33:03 +0200 Subject: [PATCH 14/17] fixed typo --- paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc index 03e3faf4e753d..e7a528c452b8d 100644 --- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc @@ -119,7 +119,7 @@ class PadMKLDNNKernel : public framework::OpKernel { ctx.GetPlace()); // to avoid allocating new temporary memory, Out's memory is used as a tmp - // buffer for storing a contignuous memory consisting of pad_value, which + // buffer for storing a contiguous memory consisting of pad_value, which // later is used as a SRC for reorders that are filling Out with padding T* out_ptr = out->data(); std::fill(out_ptr, From 469106115c49682b25038a666fd71bd4a10fb66b Mon Sep 17 00:00:00 2001 From: jakpiase Date: Tue, 5 Jul 2022 17:14:26 +0200 Subject: [PATCH 15/17] changes to GetKernelTypeForVar --- paddle/fluid/operators/pad2d_op.cc | 14 ++++++-------- paddle/fluid/operators/pad3d_op.cc | 14 ++++++-------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc index e7f0c6507bf70..de45a2ff811cd 100644 --- a/paddle/fluid/operators/pad2d_op.cc +++ b/paddle/fluid/operators/pad2d_op.cc @@ -722,14 +722,12 @@ class Pad2dOp : public framework::OperatorWithKernel { const framework::OpKernelType& expected_kernel_type) const { #ifdef PADDLE_WITH_MKLDNN if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && - (tensor.layout() != framework::DataLayout::kMKLDNN)) { - auto attrs = Attrs(); - auto ar = paddle::framework::AttrReader(attrs); - const std::string data_format = ar.Get("data_format"); - return framework::OpKernelType( - expected_kernel_type.data_type_, - tensor.place(), - framework::StringToDataLayout(data_format)); + (tensor.layout() != framework::DataLayout::kMKLDNN) && + paddle::platform::MKLDNNDeviceContext::tls() + .get_cur_paddle_data_layout() == framework::DataLayout::kNHWC) { + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), + framework::DataLayout::kNHWC); } #endif return framework::OpKernelType( diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc index e4b32b3d7a76e..7d4f4826cae88 100644 --- a/paddle/fluid/operators/pad3d_op.cc +++ b/paddle/fluid/operators/pad3d_op.cc @@ -57,14 +57,12 @@ class Pad3dOp : public framework::OperatorWithKernel { const framework::OpKernelType& expected_kernel_type) const { #ifdef PADDLE_WITH_MKLDNN if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && - (tensor.layout() != framework::DataLayout::kMKLDNN)) { - auto attrs = Attrs(); - auto ar = paddle::framework::AttrReader(attrs); - const std::string data_format = ar.Get("data_format"); - return framework::OpKernelType( - expected_kernel_type.data_type_, - tensor.place(), - framework::StringToDataLayout(data_format)); + (tensor.layout() != framework::DataLayout::kMKLDNN) && + paddle::platform::MKLDNNDeviceContext::tls() + .get_cur_paddle_data_layout() == framework::DataLayout::kNHWC) { + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), + framework::DataLayout::kNHWC); } #endif return framework::OpKernelType( From 84893ee69d5f16f52110e0a6b133497a442b2245 Mon Sep 17 00:00:00 2001 From: jakpiase Date: Wed, 6 Jul 2022 15:54:16 +0200 Subject: [PATCH 16/17] Revert "changes to GetKernelTypeForVar" This reverts commit 469106115c49682b25038a666fd71bd4a10fb66b. --- paddle/fluid/operators/pad2d_op.cc | 14 ++++++++------ paddle/fluid/operators/pad3d_op.cc | 14 ++++++++------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc index de45a2ff811cd..e7f0c6507bf70 100644 --- a/paddle/fluid/operators/pad2d_op.cc +++ b/paddle/fluid/operators/pad2d_op.cc @@ -722,12 +722,14 @@ class Pad2dOp : public framework::OperatorWithKernel { const framework::OpKernelType& expected_kernel_type) const { #ifdef PADDLE_WITH_MKLDNN if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && - (tensor.layout() != framework::DataLayout::kMKLDNN) && - paddle::platform::MKLDNNDeviceContext::tls() - .get_cur_paddle_data_layout() == framework::DataLayout::kNHWC) { - return framework::OpKernelType(expected_kernel_type.data_type_, - tensor.place(), - framework::DataLayout::kNHWC); + (tensor.layout() != framework::DataLayout::kMKLDNN)) { + auto attrs = Attrs(); + auto ar = paddle::framework::AttrReader(attrs); + const std::string data_format = ar.Get("data_format"); + return framework::OpKernelType( + expected_kernel_type.data_type_, + tensor.place(), + framework::StringToDataLayout(data_format)); } #endif return framework::OpKernelType( diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc index 7d4f4826cae88..e4b32b3d7a76e 100644 --- a/paddle/fluid/operators/pad3d_op.cc +++ b/paddle/fluid/operators/pad3d_op.cc @@ -57,12 +57,14 @@ class Pad3dOp : public framework::OperatorWithKernel { const framework::OpKernelType& expected_kernel_type) const { #ifdef PADDLE_WITH_MKLDNN if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && - (tensor.layout() != framework::DataLayout::kMKLDNN) && - paddle::platform::MKLDNNDeviceContext::tls() - .get_cur_paddle_data_layout() == framework::DataLayout::kNHWC) { - return framework::OpKernelType(expected_kernel_type.data_type_, - tensor.place(), - framework::DataLayout::kNHWC); + (tensor.layout() != framework::DataLayout::kMKLDNN)) { + auto attrs = Attrs(); + auto ar = paddle::framework::AttrReader(attrs); + const std::string data_format = ar.Get("data_format"); + return framework::OpKernelType( + expected_kernel_type.data_type_, + tensor.place(), + framework::StringToDataLayout(data_format)); } #endif return framework::OpKernelType( From add743fc44570874a99f582f446db8c429dbbcd7 Mon Sep 17 00:00:00 2001 From: jakpiase Date: Thu, 7 Jul 2022 17:03:46 +0200 Subject: [PATCH 17/17] added AsExtra() to pad2d --- paddle/fluid/operators/pad2d_op.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc index e7f0c6507bf70..a0ff40cddbf47 100644 --- a/paddle/fluid/operators/pad2d_op.cc +++ b/paddle/fluid/operators/pad2d_op.cc @@ -773,10 +773,10 @@ class Pad2dOpMaker : public framework::OpProtoAndCheckerMaker { "An optional string from: \"NHWC\", \"NCHW\". " "Defaults to \"NHWC\". Specify the data format of the input data.") .SetDefault("NCHW"); - AddAttr( - "use_mkldnn", - "(bool, default false) Indicates if MKL-DNN kernel will be used") - .SetDefault(false); + AddAttr("use_mkldnn", + "(bool, default false) Only used in mkldnn kernel") + .SetDefault(false) + .AsExtra(); AddComment(R"DOC( Pad2d Operator. Pad 2-d images according to 'paddings' and 'mode'.