From 36128ecbe37bb2667f15d132e6b0c07eec6a8eac Mon Sep 17 00:00:00 2001
From: Piotr Paturej <piotr.paturej@intel.com>
Date: Mon, 6 Jun 2022 10:53:47 +0200
Subject: [PATCH 01/17] Piotrek's changes for pad3d

---
 .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc |  78 +++++++++++
 paddle/fluid/operators/pad3d_op.cc            |  28 +++-
 .../unittests/mkldnn/test_pad3d_mkldnn_op.py  | 126 ++++++++++++++++++
 3 files changed, 227 insertions(+), 5 deletions(-)
 create mode 100644 paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
 create mode 100644 python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py
diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
new file mode 100644
index 0000000000000..07d26bd223dc9
--- /dev/null
+++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
@@ -0,0 +1,78 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/utils.h"
+#include "paddle/fluid/platform/mkldnn_reuse.h"
+
+namespace paddle {
+namespace operators {
+
+using paddle::framework::Tensor;
+
+template <typename T>
+class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    this->RunKernel(ctx);
+  }
+
+  void RunKernel(const framework::ExecutionContext& ctx) const {
+    const auto& dev_ctx =
+        ctx.template device_context<platform::MKLDNNDeviceContext>();
+
+    auto* input = ctx.Input<Tensor>("X");
+    auto* output = ctx.Output<Tensor>("Out");
+
+    std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
+
+    const T& pad_value = static_cast<T>(ctx.Attr<float>("value"));
+    const std::string& mode = ctx.Attr<std::string>("mode");
+    const std::string& data_format = ctx.Attr<std::string>("data_format");
+
+    auto src_tz = phi::vectorize(input->dims());
+    auto dst_tz = phi::vectorize(output->dims());
+
+    auto paddle_dt = framework::TransToProtoVarType(input->dtype());
+    dnnl::memory::data_type onednn_dt = framework::ToMKLDNNDataType(paddle_dt);
+
+    auto dims = phi::vectorize(output->dims());
+
+  }
+};
+template <typename T>
+class Pad3dGradMKLDNNKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    this->RunKernel(ctx);
+  }
+
+  void RunKernel(const framework::ExecutionContext& ctx) const {
+    const auto& dev_ctx =
+        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    const auto& onednn_engine = dev_ctx.GetEngine();
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_KERNEL(pad3d, MKLDNN, paddle::platform::CPUPlace,
+                   ops::Pad3dMKLDNNKernel<float>,
+                   ops::Pad3dMKLDNNKernel<int8_t>,
+                   ops::Pad3dMKLDNNKernel<uint8_t>,
+                   ops::Pad3dMKLDNNKernel<paddle::platform::bfloat16>);
+
+REGISTER_OP_KERNEL(pad3d_grad, MKLDNN, paddle::platform::CPUPlace,
+                   ops::Pad3dGradMKLDNNKernel<float>,
+                   ops::Pad3dGradMKLDNNKernel<paddle::platform::bfloat16>);
diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc
index b7a638d7ce930..c6a241d0dca92 100644
--- a/paddle/fluid/operators/pad3d_op.cc
+++ b/paddle/fluid/operators/pad3d_op.cc
@@ -34,8 +34,15 @@ class Pad3dOp : public framework::OperatorWithKernel {
  protected:
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
-    return framework::OpKernelType(
-        OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
+    auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
+#ifdef PADDLE_WITH_MKLDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
+      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
+                                     framework::DataLayout::kMKLDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
+    return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };
 
@@ -78,6 +85,10 @@ class Pad3dOpMaker : public framework::OpProtoAndCheckerMaker {
         "An optional string from: \"NDHWC\", \"NCDHW\". "
         "Defaults to \"NDHWC\". Specify the data format of the input data.")
         .SetDefault("NCDHW");
+    AddAttr<bool>(
+        "use_mkldnn",
+        "(bool, default false) Indicates if MKL-DNN kernel will be used")
+        .SetDefault(false);
     AddComment(R"DOC(
 Pad3d Operator.
 Pad 3-d images according to 'paddings' and 'mode'. 
@@ -153,9 +164,16 @@ class Pad3dOpGrad : public framework::OperatorWithKernel {
  protected:
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
-    return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(
-                                       ctx, framework::GradVarName("Out")),
-                                   ctx.GetPlace());
+    auto input_data_type = OperatorWithKernel::IndicateVarDataType(
+        ctx, framework::GradVarName("Out"));
+#ifdef PADDLE_WITH_MKLDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
+      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
+                                     framework::DataLayout::kMKLDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
+    return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py
new file mode 100644
index 0000000000000..a8688db7f2213
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py
@@ -0,0 +1,126 @@
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+from termios import N_PPP  #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+from paddle.fluid.tests.unittests.op_test import OpTest
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+import paddle.fluid.core as core
+
+from paddle.fluid import Program, program_guard, Executor, default_main_program
+
+
+class TestPad3dOneDNNOp(OpTest):
+    def setUp(self):
+        paddle.enable_static()
+        self.value = 0.0
+        self.initTestCase()
+        self.op_type = "pad3d"
+        self.python_api = paddle.nn.functional.pad
+        self.inputs = {'X': np.random.random(self.shape).astype("float32")}
+        self.attrs = {'use_mkldnn': True}
+        if self.variable_paddings:
+            self.attrs['paddings'] = []
+            self.inputs['Paddings'] = np.array(
+                self.paddings).flatten().astype("int32")
+        else:
+            self.attrs['paddings'] = np.array(
+                self.paddings).flatten().astype("int32")
+        self.attrs['value'] = self.value
+        self.attrs['mode'] = self.mode
+        self.attrs['data_format'] = self.data_format
+        if self.data_format == "NCDHW":
+            paddings = [
+                (0, 0),
+                (0, 0),
+                (self.paddings[4], self.paddings[5]),
+                (self.paddings[2], self.paddings[3]),
+                (self.paddings[0], self.paddings[1]),
+            ]
+        else:
+            paddings = [
+                (0, 0),
+                (self.paddings[4], self.paddings[5]),
+                (self.paddings[2], self.paddings[3]),
+                (self.paddings[0], self.paddings[1]),
+                (0, 0),
+            ]
+        if self.mode == "constant":
+            out = np.pad(self.inputs['X'],
+                         paddings,
+                         mode=self.mode,
+                         constant_values=self.value)
+        elif self.mode == "reflect":
+            out = np.pad(self.inputs['X'], paddings, mode=self.mode)
+        elif self.mode == "replicate":
+            out = np.pad(self.inputs['X'], paddings, mode="edge")
+        elif self.mode == "circular":
+            out = np.pad(self.inputs['X'], paddings, mode="wrap")
+        self.outputs = {'Out': out}
+
+    def test_check_output(self):
+        import sys
+        np.set_printoptions(threshold=sys.maxsize)
+        print(self.inputs["X"].shape)
+        print(self.outputs["Out"].shape)
+        # print("\n\n\n")
+        # print("inputs", self.inputs["X"])
+        # print("\n\n\n")
+        # print("outputs", self.outputs["Out"])
+        # print("\n\n\n")
+        self.check_output()
+
+    # def test_check_grad_normal(self):
+    #     self.check_grad(['X'], 'Out')
+
+    def initTestCase(self):
+        self.shape = (2, 3, 4, 5, 6)
+        self.paddings = [0, 0, 0, 0, 0, 0]
+        self.mode = "constant"
+        self.data_format = "NCDHW"
+        self.pad_value = 0.0
+        self.variable_paddings = False
+
+
+class TestCase1(TestPad3dOneDNNOp):
+    def initTestCase(self):
+        self.shape = (2, 3, 4, 5, 6)
+        self.paddings = [0, 1, 2, 3, 4, 5]
+        self.mode = "constant"
+        self.data_format = "NCDHW"
+        self.value = 1.0
+        self.variable_paddings = False
+
+
+
+if __name__ == '__main__':
+    paddle.enable_static()
+    unittest.main()

From 4e8ae0340a65eb871801a374ebb832c93d47d7ba Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Mon, 27 Jun 2022 15:41:31 +0200
Subject: [PATCH 02/17] my changes

---
 .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc   | 17 -----------------
 paddle/fluid/operators/pad3d_op.cc              | 14 +++++++-------
 2 files changed, 7 insertions(+), 24 deletions(-)

diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
index 07d26bd223dc9..f0502d55a7a51 100644
--- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
@@ -50,19 +50,6 @@ class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
 
   }
 };
-template <typename T>
-class Pad3dGradMKLDNNKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    this->RunKernel(ctx);
-  }
-
-  void RunKernel(const framework::ExecutionContext& ctx) const {
-    const auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
-    const auto& onednn_engine = dev_ctx.GetEngine();
-  }
-};
 }  // namespace operators
 }  // namespace paddle
 
@@ -72,7 +59,3 @@ REGISTER_OP_KERNEL(pad3d, MKLDNN, paddle::platform::CPUPlace,
                    ops::Pad3dMKLDNNKernel<int8_t>,
                    ops::Pad3dMKLDNNKernel<uint8_t>,
                    ops::Pad3dMKLDNNKernel<paddle::platform::bfloat16>);
-
-REGISTER_OP_KERNEL(pad3d_grad, MKLDNN, paddle::platform::CPUPlace,
-                   ops::Pad3dGradMKLDNNKernel<float>,
-                   ops::Pad3dGradMKLDNNKernel<paddle::platform::bfloat16>);
diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc
index c6a241d0dca92..6632b82e4757d 100644
--- a/paddle/fluid/operators/pad3d_op.cc
+++ b/paddle/fluid/operators/pad3d_op.cc
@@ -35,13 +35,13 @@ class Pad3dOp : public framework::OperatorWithKernel {
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
     auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
-#ifdef PADDLE_WITH_MKLDNN
-    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
-      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
-                                     framework::DataLayout::kMKLDNN,
-                                     framework::LibraryType::kMKLDNN);
-    }
-#endif
+// #ifdef PADDLE_WITH_MKLDNN
+//     if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
+//       return framework::OpKernelType(input_data_type, ctx.GetPlace(),
+//                                      framework::DataLayout::kMKLDNN,
+//                                      framework::LibraryType::kMKLDNN);
+//     }
+// #endif
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };

From 78c45ee77dbdb7695b4bbfa028667187b8851cf9 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Mon, 27 Jun 2022 21:56:52 +0200
Subject: [PATCH 03/17] first version of pad3d, single copy, unnecessary reads

---
 .../mkldnn/fill_constant_mkldnn_op.cc         | 33 +---------
 .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 61 +++++++++++++++----
 paddle/fluid/operators/pad3d_op.cc            | 15 ++---
 paddle/fluid/platform/mkldnn_reuse.h          | 29 +++++++++
 4 files changed, 89 insertions(+), 49 deletions(-)

diff --git a/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc
index 615f43bb32c0f..a72ddaa6511e7 100644
--- a/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc
@@ -20,35 +20,6 @@ namespace operators {
 
 using framework::Tensor;
 
-template <typename T>
-class FillConstantMKLDNNHandler
-    : public platform::MKLDNNHandlerNoCachingT<T, dnnl::binary> {
- public:
-  FillConstantMKLDNNHandler(Tensor* out,
-                            dnnl::engine engine,
-                            platform::Place cpu_place)
-      : platform::MKLDNNHandlerNoCachingT<T, dnnl::binary>(engine, cpu_place) {
-    const auto src0_md =
-        dnnl::memory::desc({out->numel(), sizeof(T)},
-                           platform::MKLDNNGetDataType<uint8_t>(),
-                           dnnl::memory::format_tag::ab);
-
-    dnnl::primitive_attr attrs;
-    attrs.set_scales(DNNL_ARG_SRC_0, /* mask = */ 0, {0.0f});
-
-    this->AcquireForwardPrimitiveDescriptor(
-        attrs, dnnl::algorithm::binary_add, src0_md, src1_md, src0_md);
-  }
-
-  static const dnnl::memory::desc src1_md;
-};
-
-template <typename T>
-const dnnl::memory::desc FillConstantMKLDNNHandler<T>::src1_md(
-    {1, sizeof(T)},
-    platform::MKLDNNGetDataType<uint8_t>(),
-    dnnl::memory::format_tag::ab);
-
 template <typename T>
 class FillConstantMKLDNNKernel : public framework::OpKernel<T> {
  public:
@@ -67,10 +38,10 @@ class FillConstantMKLDNNKernel : public framework::OpKernel<T> {
     auto shape = GetShape(ctx);
     out->Resize(shape);
 
-    FillConstantMKLDNNHandler<T> handler(out, dnnl_engine, ctx.GetPlace());
+    platform::FillConstantMKLDNNHandler<T> handler(out, dnnl_engine, ctx.GetPlace());
 
     dnnl::memory constant_value_memory =
-        dnnl::memory(FillConstantMKLDNNHandler<T>::src1_md,
+        dnnl::memory(platform::FillConstantMKLDNNHandler<T>::src1_md,
                      dnnl_engine,
                      reinterpret_cast<uint8_t*>(&fill_value));
 
diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
index f0502d55a7a51..dccc238eafc81 100644
--- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
@@ -30,24 +30,63 @@ class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
   void RunKernel(const framework::ExecutionContext& ctx) const {
     const auto& dev_ctx =
         ctx.template device_context<platform::MKLDNNDeviceContext>();
+    const auto& onednn_engine = dev_ctx.GetEngine();
 
-    auto* input = ctx.Input<Tensor>("X");
-    auto* output = ctx.Output<Tensor>("Out");
+    auto* x = ctx.Input<Tensor>("X");
+    auto* out = ctx.Output<Tensor>("Out");
 
-    std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
+    std::vector<int> paddings(ctx.Attr<std::vector<int>>("paddings"));
 
-    const T& pad_value = static_cast<T>(ctx.Attr<float>("value"));
-    const std::string& mode = ctx.Attr<std::string>("mode");
-    const std::string& data_format = ctx.Attr<std::string>("data_format");
+    T pad_value = static_cast<T>(ctx.Attr<float>("value"));
 
-    auto src_tz = phi::vectorize(input->dims());
-    auto dst_tz = phi::vectorize(output->dims());
+    auto x_tz = phi::vectorize(x->dims());
+    auto out_tz = phi::vectorize(out->dims());
 
-    auto paddle_dt = framework::TransToProtoVarType(input->dtype());
-    dnnl::memory::data_type onednn_dt = framework::ToMKLDNNDataType(paddle_dt);
+    auto paddle_dtype = framework::TransToProtoVarType(x->dtype());
 
-    auto dims = phi::vectorize(output->dims());
+    platform::FillConstantMKLDNNHandler<T> handler(out, onednn_engine, ctx.GetPlace());
 
+    dnnl::memory constant_value_memory =
+        dnnl::memory(platform::FillConstantMKLDNNHandler<T>::src1_md,
+                     onednn_engine,
+                     reinterpret_cast<uint8_t*>(&pad_value));
+
+    auto src0_memory_p = handler.AcquireDstMemory(out);
+    auto fill_constant_p = handler.AcquireForwardPrimitive();
+
+    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    fill_constant_p->execute(astream,
+                             {{DNNL_ARG_SRC_0, *src0_memory_p},
+                              {DNNL_ARG_SRC_1, constant_value_memory},
+                              {DNNL_ARG_DST, *src0_memory_p}});
+    astream.wait();
+
+    // fill_constant handler flattens memory, so we have to revert it now
+    const dnnl::memory::desc real_out_md(out_tz, platform::MKLDNNGetDataType<T>(), platform::GetPlainMKLDNNFormat(out_tz.size()));
+
+    platform::ReorderMKLDNNHandler reorder_handler(
+      x_tz,
+      paddle_dtype,
+      framework::ToMKLDNNDataType(paddle_dtype),
+      onednn_engine);
+
+    auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(x->mem_desc(), platform::to_void_cast(x->data<T>()));
+
+    auto reorder_dst_memory_p = std::make_shared<dnnl::memory>(real_out_md, onednn_engine, out->data<T>());
+    
+    std::vector<int64_t> offsets(5, 0); // NCDHW     
+    for(int i=0; i<3; ++i) {
+      offsets[4-i] = paddings[2*i];
+    }
+    
+    auto slice_mem_p = reorder_handler.AcquireSubmemory(x_tz, offsets, reorder_dst_memory_p);
+
+    auto reorder_p =
+        reorder_handler.AcquireReorder(slice_mem_p, reorder_src_memory_p);
+    reorder_p->execute(astream, *reorder_src_memory_p, *slice_mem_p);
+    astream.wait();
+
+    out->set_mem_desc(real_out_md);
   }
 };
 }  // namespace operators
diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc
index 27df6ae1aecb8..a09dc5c7817d0 100644
--- a/paddle/fluid/operators/pad3d_op.cc
+++ b/paddle/fluid/operators/pad3d_op.cc
@@ -35,13 +35,14 @@ class Pad3dOp : public framework::OperatorWithKernel {
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
     auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
-// #ifdef PADDLE_WITH_MKLDNN
-//     if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
-//       return framework::OpKernelType(input_data_type, ctx.GetPlace(),
-//                                      framework::DataLayout::kMKLDNN,
-//                                      framework::LibraryType::kMKLDNN);
-//     }
-// #endif
+#ifdef PADDLE_WITH_MKLDNN
+    // currently only constant mode and non-blocked layouts are supported for oneDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type) && ctx.Attr<std::string>("mode") == "constant" && ctx.Input<Tensor>("X")->mem_desc().data.format_desc.blocking.inner_nblks == 0) {
+      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
+                                     framework::DataLayout::kMKLDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };
diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h
index 05ebedf611a4b..c34a4a069f14f 100644
--- a/paddle/fluid/platform/mkldnn_reuse.h
+++ b/paddle/fluid/platform/mkldnn_reuse.h
@@ -912,6 +912,35 @@ class MatMulV2MKLDNNHandler
   }
 };
 
+template <typename T>
+class FillConstantMKLDNNHandler
+    : public platform::MKLDNNHandlerNoCachingT<T, dnnl::binary> {
+ public:
+  FillConstantMKLDNNHandler(Tensor* out,
+                            dnnl::engine engine,
+                            platform::Place cpu_place)
+      : platform::MKLDNNHandlerNoCachingT<T, dnnl::binary>(engine, cpu_place) {
+    const auto src0_md =
+        dnnl::memory::desc({out->numel(), sizeof(T)},
+                           platform::MKLDNNGetDataType<uint8_t>(),
+                           dnnl::memory::format_tag::ab);
+
+    dnnl::primitive_attr attrs;
+    attrs.set_scales(DNNL_ARG_SRC_0, /* mask = */ 0, {0.0f});
+
+    this->AcquireForwardPrimitiveDescriptor(
+        attrs, dnnl::algorithm::binary_add, src0_md, src1_md, src0_md);
+  }
+
+  static const dnnl::memory::desc src1_md;
+};
+
+template <typename T>
+const dnnl::memory::desc FillConstantMKLDNNHandler<T>::src1_md(
+    {1, sizeof(T)},
+    platform::MKLDNNGetDataType<uint8_t>(),
+    dnnl::memory::format_tag::ab);
+
 template <typename T>
 class ActivationMKLDNNHandler
     : public MKLDNNHandlerNoCachingT<T,

From ce02a268a2a4c13b21f3078f3aa13e7fdd001fab Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Wed, 29 Jun 2022 00:10:41 +0200
Subject: [PATCH 04/17] optimized pad3d kernel

---
 .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 127 ++++++++++++++----
 1 file changed, 101 insertions(+), 26 deletions(-)

diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
index dccc238eafc81..eb0437513c619 100644
--- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
@@ -15,11 +15,48 @@ limitations under the License. */
 #include "paddle/fluid/operators/utils.h"
 #include "paddle/fluid/platform/mkldnn_reuse.h"
 
+#define PAD3D_SIZE 6
+
 namespace paddle {
 namespace operators {
 
 using paddle::framework::Tensor;
 
+
+/*
+Pad3D is done by using up to 7 reorders. Following example is done
+on 2D example for simplicity, but it is straightforward to extend it to 3D case.
+
+Let us consider following example:
+
+          N  C  H  W               L  R  T  B
+X dims = (1, 1, 3, 3), paddings = (1, 2, 3, 4) in order Left, Right, Top, Bottom
+
+We have to copy the X tensor into Out tensor, but except from that we have to fill the rest of the memory with additional padding.
+To avoid looping through the whole Out memory two times, only these parts of Out memory that won't store X's memory are filled with pad value.
+That behavior is achieved by using oneDNN's submemory descriptors which allows us to set offsets for each dimension and skip some parts of the memory.
+For 2D case up to 5 reorders will be used in Pad3D kernel(if padding=0 reorder is skipped). 
+In the following example i'th number means, that this part of memory was filled by i'th reorder. 4'th reorder is copying X memory into Out memory.
+i&j means that both i'th and j'th reorder will set the padding at that location:
+
+              INDEX
+     | 0   1   2   3   4   5
+     |_______________________
+   0 |0&2  2   2   2  1&2 1&2
+   1 |0&2  2   2   2  1&2 1&2
+I  2 |0&2  2   2   2  1&2 1&2  
+N  3 | 0   4   4   4   1   1
+D  4 | 0   4   4   4   1   1
+E  5 | 0   4   4   4   1   1
+X  6 |0&3  3   3   3  1&3 1&3
+   7 |0&3  3   3   3  1&3 1&3
+   8 |0&3  3   3   3  1&3 1&3
+   9 |0&3  3   3   3  1&3 1&3
+
+Since oneDNN's reorder cannot set the pad value to the border memory, we have to prefill Out's memory and use it as a temporary buffer, which later is copied
+into the rest of Out's memory. At the end last reorder is done which is copying X memory into Out memory.
+
+*/
 template <typename T>
 class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
  public:
@@ -31,6 +68,7 @@ class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
     const auto& dev_ctx =
         ctx.template device_context<platform::MKLDNNDeviceContext>();
     const auto& onednn_engine = dev_ctx.GetEngine();
+    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
 
     auto* x = ctx.Input<Tensor>("X");
     auto* out = ctx.Output<Tensor>("Out");
@@ -44,26 +82,6 @@ class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
 
     auto paddle_dtype = framework::TransToProtoVarType(x->dtype());
 
-    platform::FillConstantMKLDNNHandler<T> handler(out, onednn_engine, ctx.GetPlace());
-
-    dnnl::memory constant_value_memory =
-        dnnl::memory(platform::FillConstantMKLDNNHandler<T>::src1_md,
-                     onednn_engine,
-                     reinterpret_cast<uint8_t*>(&pad_value));
-
-    auto src0_memory_p = handler.AcquireDstMemory(out);
-    auto fill_constant_p = handler.AcquireForwardPrimitive();
-
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
-    fill_constant_p->execute(astream,
-                             {{DNNL_ARG_SRC_0, *src0_memory_p},
-                              {DNNL_ARG_SRC_1, constant_value_memory},
-                              {DNNL_ARG_DST, *src0_memory_p}});
-    astream.wait();
-
-    // fill_constant handler flattens memory, so we have to revert it now
-    const dnnl::memory::desc real_out_md(out_tz, platform::MKLDNNGetDataType<T>(), platform::GetPlainMKLDNNFormat(out_tz.size()));
-
     platform::ReorderMKLDNNHandler reorder_handler(
       x_tz,
       paddle_dtype,
@@ -71,8 +89,25 @@ class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
       onednn_engine);
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(x->mem_desc(), platform::to_void_cast(x->data<T>()));
+    auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(out, out_tz, platform::GetPlainMKLDNNFormat(5), ctx.GetPlace());
+
+    T* out_ptr = out->data<T>();
+    std::fill(out_ptr, out_ptr+CalculatePrefillElems(out_tz, paddings), pad_value);
+
+    // paddings are in order: left, right, top, bottom, front, back
+    for(int i = 0; i < 6; ++i) {
+      if(paddings[i] != 0) {
+        std::vector<int64_t> offsets(5, 0);
+        std::vector<int64_t> chunk_tz(out_tz.begin(), out_tz.end());
 
-    auto reorder_dst_memory_p = std::make_shared<dnnl::memory>(real_out_md, onednn_engine, out->data<T>());
+        chunk_tz[4 - i / 2] = paddings[i];
+        if (i % 2 == 1) {
+          offsets[4 - i / 2] = paddings[i - 1] + x_tz[4 - i / 2];
+        }
+
+        FillPartOfPadding(paddle_dtype, onednn_engine, out_ptr, reorder_dst_memory_p, chunk_tz, offsets);
+      }
+    }
     
     std::vector<int64_t> offsets(5, 0); // NCDHW     
     for(int i=0; i<3; ++i) {
@@ -86,7 +121,50 @@ class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
     reorder_p->execute(astream, *reorder_src_memory_p, *slice_mem_p);
     astream.wait();
 
-    out->set_mem_desc(real_out_md);
+    out->set_mem_desc(reorder_dst_memory_p->get_desc());
+  }
+
+  int64_t CalculatePrefillElems(const std::vector<int64_t>& out_tz, const std::vector<int>& paddings) const {
+    int64_t max_elems = 0;
+
+    int64_t independent_dims = out_tz[0] * out_tz[1];
+
+    for(int i = 0; i < 3; ++i) {
+      int64_t elems = std::max(paddings[2*i], paddings[2*i+1]);
+      for(int j = 0; j < 3; ++j) {
+        if(j != i) {
+          elems *= out_tz[4 - j];
+        }
+      }
+
+      if(max_elems < elems) {
+        max_elems = elems;
+      }
+    }
+    return independent_dims * max_elems;
+  }
+
+  void FillPartOfPadding(framework::proto::VarType::Type paddle_dtype,
+                         const dnnl::engine& onednn_engine,
+                         T* prefilled_mem_ptr,
+                         const std::shared_ptr<dnnl::memory>&out_mem_p,
+                         std::vector<int64_t>& chunk_tz,
+                         const std::vector<int64_t>& offsets) const {
+    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+
+    dnnl::memory::desc prefilled_mem_desc(chunk_tz, platform::MKLDNNGetDataType<T>(), platform::GetPlainMKLDNNFormat(5));
+    auto prefilled_mem_p = std::make_shared<dnnl::memory>(prefilled_mem_desc, onednn_engine, prefilled_mem_ptr);
+
+    platform::ReorderMKLDNNHandler reorder_handler(
+      chunk_tz,
+      paddle_dtype,
+      framework::ToMKLDNNDataType(paddle_dtype),
+      onednn_engine);
+
+    auto out_slice_mem_p = reorder_handler.AcquireSubmemory(chunk_tz, offsets, out_mem_p);
+    auto reorder_p =
+        reorder_handler.AcquireReorder(out_slice_mem_p, prefilled_mem_p);
+    reorder_p->execute(astream, *prefilled_mem_p, *out_slice_mem_p);
   }
 };
 }  // namespace operators
@@ -94,7 +172,4 @@ class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
 
 namespace ops = paddle::operators;
 REGISTER_OP_KERNEL(pad3d, MKLDNN, paddle::platform::CPUPlace,
-                   ops::Pad3dMKLDNNKernel<float>,
-                   ops::Pad3dMKLDNNKernel<int8_t>,
-                   ops::Pad3dMKLDNNKernel<uint8_t>,
-                   ops::Pad3dMKLDNNKernel<paddle::platform::bfloat16>);
+                   ops::Pad3dMKLDNNKernel<float>);

From e0410f286cc235c122706b4147aa25cccda7e482 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Wed, 29 Jun 2022 00:11:21 +0200
Subject: [PATCH 05/17] test upadte

---
 .../unittests/mkldnn/test_pad3d_mkldnn_op.py  | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py
index a8688db7f2213..a93edc9baaf9b 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py
@@ -41,7 +41,7 @@
 class TestPad3dOneDNNOp(OpTest):
     def setUp(self):
         paddle.enable_static()
-        self.value = 0.0
+        self.value = 1.0
         self.initTestCase()
         self.op_type = "pad3d"
         self.python_api = paddle.nn.functional.pad
@@ -102,25 +102,14 @@ def test_check_output(self):
     #     self.check_grad(['X'], 'Out')
 
     def initTestCase(self):
-        self.shape = (2, 3, 4, 5, 6)
-        self.paddings = [0, 0, 0, 0, 0, 0]
+        self.shape = (6, 2, 3, 4, 5)
+        self.paddings = [2, 3, 4, 5, 0, 0]
         self.mode = "constant"
         self.data_format = "NCDHW"
-        self.pad_value = 0.0
+        self.pad_value = 1.0
         self.variable_paddings = False
 
 
-class TestCase1(TestPad3dOneDNNOp):
-    def initTestCase(self):
-        self.shape = (2, 3, 4, 5, 6)
-        self.paddings = [0, 1, 2, 3, 4, 5]
-        self.mode = "constant"
-        self.data_format = "NCDHW"
-        self.value = 1.0
-        self.variable_paddings = False
-
-
-
 if __name__ == '__main__':
     paddle.enable_static()
     unittest.main()

From 84e30fe1f3c780c6dab1c9233e11ba15f1151bbb Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Wed, 29 Jun 2022 17:58:56 +0200
Subject: [PATCH 06/17] removed magic numbers

---
 .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 38 +++++++++++--------
 paddle/fluid/operators/pad3d_op.cc            | 27 +++++++++----
 2 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
index eb0437513c619..b9c072ecc45ef 100644
--- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
@@ -72,13 +72,18 @@ class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
 
     auto* x = ctx.Input<Tensor>("X");
     auto* out = ctx.Output<Tensor>("Out");
-
     std::vector<int> paddings(ctx.Attr<std::vector<int>>("paddings"));
-
     T pad_value = static_cast<T>(ctx.Attr<float>("value"));
 
-    auto x_tz = phi::vectorize(x->dims());
-    auto out_tz = phi::vectorize(out->dims());
+    std::vector<int64_t> x_tz = phi::vectorize(x->dims());
+    // due to the need of supporting NDHWC, inferring out shape
+    // must be done inside the kernel
+    std::vector<int64_t> out_tz(x_tz);
+
+    for(int i = 0; i < paddings.size() / 2; ++i) {
+      out_tz[out_tz.size() - 1 - i] += paddings[2 * i] + paddings[2 * i + 1];
+    }
+    out->Resize(phi::make_ddim(out_tz));
 
     auto paddle_dtype = framework::TransToProtoVarType(x->dtype());
 
@@ -89,29 +94,32 @@ class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
       onednn_engine);
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(x->mem_desc(), platform::to_void_cast(x->data<T>()));
-    auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(out, out_tz, platform::GetPlainMKLDNNFormat(5), ctx.GetPlace());
+    auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(out, out_tz, platform::GetPlainMKLDNNFormat(out_tz.size()), ctx.GetPlace());
 
+    // to avoid allocating new temporary memory, Out's memory is used as a tmp
+    // buffer for storing a contignuous memory consisting of pad_value, which
+    // later is used as a SRC for reorders that are filling Out with padding
     T* out_ptr = out->data<T>();
     std::fill(out_ptr, out_ptr+CalculatePrefillElems(out_tz, paddings), pad_value);
 
     // paddings are in order: left, right, top, bottom, front, back
-    for(int i = 0; i < 6; ++i) {
+    for(int i = 0; i < paddings.size(); ++i) {
       if(paddings[i] != 0) {
-        std::vector<int64_t> offsets(5, 0);
+        std::vector<int64_t> offsets(out_tz.size(), 0);
         std::vector<int64_t> chunk_tz(out_tz.begin(), out_tz.end());
 
-        chunk_tz[4 - i / 2] = paddings[i];
+        chunk_tz[out_tz.size() - 1 - i / 2] = paddings[i];
         if (i % 2 == 1) {
-          offsets[4 - i / 2] = paddings[i - 1] + x_tz[4 - i / 2];
+          offsets[out_tz.size() - 1 - i / 2] = paddings[i - 1] + x_tz[out_tz.size() - 1 - i / 2];
         }
 
         FillPartOfPadding(paddle_dtype, onednn_engine, out_ptr, reorder_dst_memory_p, chunk_tz, offsets);
       }
     }
     
-    std::vector<int64_t> offsets(5, 0); // NCDHW     
-    for(int i=0; i<3; ++i) {
-      offsets[4-i] = paddings[2*i];
+    std::vector<int64_t> offsets(out_tz.size(), 0); 
+    for(int i=0; i<paddings.size() / 2; ++i) {
+      offsets[out_tz.size() - 1 -i] = paddings[2*i];
     }
     
     auto slice_mem_p = reorder_handler.AcquireSubmemory(x_tz, offsets, reorder_dst_memory_p);
@@ -129,11 +137,11 @@ class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
 
     int64_t independent_dims = out_tz[0] * out_tz[1];
 
-    for(int i = 0; i < 3; ++i) {
+    for(int i = 0; i < paddings.size() / 2; ++i) {
       int64_t elems = std::max(paddings[2*i], paddings[2*i+1]);
-      for(int j = 0; j < 3; ++j) {
+      for(int j = 0; j < paddings.size() / 2; ++j) {
         if(j != i) {
-          elems *= out_tz[4 - j];
+          elems *= out_tz[out_tz.size() - 1 - j];
         }
       }
 
diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc
index a09dc5c7817d0..db1cd8400d9bf 100644
--- a/paddle/fluid/operators/pad3d_op.cc
+++ b/paddle/fluid/operators/pad3d_op.cc
@@ -45,6 +45,25 @@ class Pad3dOp : public framework::OperatorWithKernel {
 #endif
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
+
+framework::OpKernelType GetKernelTypeForVar(
+    const std::string& var_name,
+    const Tensor& tensor,
+    const framework::OpKernelType& expected_kernel_type) const {
+#ifdef PADDLE_WITH_MKLDNN
+  if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
+      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
+    auto attrs = Attrs();
+    auto ar = paddle::framework::AttrReader(attrs);
+    const std::string data_format = ar.Get<std::string>("data_format");
+    return framework::OpKernelType(expected_kernel_type.data_type_,
+                                   tensor.place(),
+                                   framework::StringToDataLayout(data_format));
+  }
+#endif
+  return framework::OpKernelType(
+      expected_kernel_type.data_type_, tensor.place(), tensor.layout());
+}
 };
 
 class Pad3dOpMaker : public framework::OpProtoAndCheckerMaker {
@@ -169,13 +188,7 @@ class Pad3dOpGrad : public framework::OperatorWithKernel {
       const framework::ExecutionContext& ctx) const override {
     auto input_data_type = OperatorWithKernel::IndicateVarDataType(
         ctx, framework::GradVarName("Out"));
-#ifdef PADDLE_WITH_MKLDNN
-    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
-      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
-                                     framework::DataLayout::kMKLDNN,
-                                     framework::LibraryType::kMKLDNN);
-    }
-#endif
+
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };

From 62e3dc6168ca452eb94dab29c5b83bb4643a01f7 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Thu, 30 Jun 2022 18:42:41 +0200
Subject: [PATCH 07/17] added support for pad2d

---
 .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 158 +++++++++++-------
 paddle/fluid/operators/pad2d_op.cc            |  39 ++++-
 paddle/fluid/operators/pad3d_op.cc            |  44 ++---
 .../ir/inference/test_mkldnn_pad2d_op.py      |  74 ++++++++
 .../ir/inference/test_mkldnn_pad3d_op.py      |  75 +++++++++
 .../unittests/mkldnn/test_pad3d_mkldnn_op.py  | 115 -------------
 6 files changed, 308 insertions(+), 197 deletions(-)
 create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py
 create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py
 delete mode 100644 python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py

diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
index b9c072ecc45ef..05bb3830e5fc6 100644
--- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
@@ -14,37 +14,37 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/utils.h"
 #include "paddle/fluid/platform/mkldnn_reuse.h"
-
-#define PAD3D_SIZE 6
-
 namespace paddle {
 namespace operators {
 
-using paddle::framework::Tensor;
-
+using framework::Tensor;
 
 /*
 Pad3D is done by using up to 7 reorders. Following example is done
-on 2D example for simplicity, but it is straightforward to extend it to 3D case.
+on 2D data for simplicity, but it is straightforward to extend it to 3D case.
 
 Let us consider following example:
 
           N  C  H  W               L  R  T  B
-X dims = (1, 1, 3, 3), paddings = (1, 2, 3, 4) in order Left, Right, Top, Bottom
-
-We have to copy the X tensor into Out tensor, but except from that we have to fill the rest of the memory with additional padding.
-To avoid looping through the whole Out memory two times, only these parts of Out memory that won't store X's memory are filled with pad value.
-That behavior is achieved by using oneDNN's submemory descriptors which allows us to set offsets for each dimension and skip some parts of the memory.
-For 2D case up to 5 reorders will be used in Pad3D kernel(if padding=0 reorder is skipped). 
-In the following example i'th number means, that this part of memory was filled by i'th reorder. 4'th reorder is copying X memory into Out memory.
-i&j means that both i'th and j'th reorder will set the padding at that location:
-
-              INDEX
+X_dims = (1, 1, 3, 3), paddings = (1, 2, 3, 4) in order Left, Right, Top, Bottom
+
+We have to copy the X tensor into Out tensor, but except from that we have to
+fill the rest of the memory with an additional padding. To avoid looping through
+the whole Out memory two times, only these parts of Out memory that won't store
+X's memory are filled with pad value. That behavior is achieved by using
+oneDNN's submemory descriptors which allows us to set offsets for each dimension
+and skip some parts of the memory. For 2D case up to 5 reorders will be used in
+Pad3D kernel(if padding=0 reorder is skipped). In the following example i'th
+number means, that this part of memory was filled by i'th reorder. 4'th reorder
+is copying X memory into Out memory. i&j means that both i'th and j'th reorder
+will set the padding at that location:
+
+               INDEX
      | 0   1   2   3   4   5
      |_______________________
    0 |0&2  2   2   2  1&2 1&2
    1 |0&2  2   2   2  1&2 1&2
-I  2 |0&2  2   2   2  1&2 1&2  
+I  2 |0&2  2   2   2  1&2 1&2
 N  3 | 0   4   4   4   1   1
 D  4 | 0   4   4   4   1   1
 E  5 | 0   4   4   4   1   1
@@ -53,12 +53,14 @@ X  6 |0&3  3   3   3  1&3 1&3
    8 |0&3  3   3   3  1&3 1&3
    9 |0&3  3   3   3  1&3 1&3
 
-Since oneDNN's reorder cannot set the pad value to the border memory, we have to prefill Out's memory and use it as a temporary buffer, which later is copied
-into the rest of Out's memory. At the end last reorder is done which is copying X memory into Out memory.
+Since oneDNN's reorder cannot set the pad value to the memory by itself, we have
+to prefill Out's memory and use it as a temporary buffer, which later is copied
+into the rest of Out's memory. At the end last reorder is done which copies X
+memory into Out memory.
 
 */
 template <typename T>
-class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
+class PadMKLDNNKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     this->RunKernel(ctx);
@@ -73,14 +75,23 @@ class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
     auto* x = ctx.Input<Tensor>("X");
     auto* out = ctx.Output<Tensor>("Out");
     std::vector<int> paddings(ctx.Attr<std::vector<int>>("paddings"));
-    T pad_value = static_cast<T>(ctx.Attr<float>("value"));
+    // pad2d has paddings in order top, bottom, left, right, so we need
+    // to swap some of them to unify paddings between pad2d and pad3d
+    if (ctx.Type() == "pad2d") {
+      std::swap(paddings[0], paddings[2]);
+      std::swap(paddings[1], paddings[3]);
+    }
+
+    const std::string pad_attr_name =
+        ctx.Type() == "pad3d" ? "value" : "pad_value";
+    T pad_value = static_cast<T>(ctx.Attr<float>(pad_attr_name));
 
     std::vector<int64_t> x_tz = phi::vectorize(x->dims());
     // due to the need of supporting NDHWC, inferring out shape
     // must be done inside the kernel
     std::vector<int64_t> out_tz(x_tz);
 
-    for(int i = 0; i < paddings.size() / 2; ++i) {
+    for (int i = 0; i < paddings.size() / 2; ++i) {
       out_tz[out_tz.size() - 1 - i] += paddings[2 * i] + paddings[2 * i + 1];
     }
     out->Resize(phi::make_ddim(out_tz));
@@ -88,41 +99,56 @@ class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
     auto paddle_dtype = framework::TransToProtoVarType(x->dtype());
 
     platform::ReorderMKLDNNHandler reorder_handler(
-      x_tz,
-      paddle_dtype,
-      framework::ToMKLDNNDataType(paddle_dtype),
-      onednn_engine);
-
-    auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(x->mem_desc(), platform::to_void_cast(x->data<T>()));
-    auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(out, out_tz, platform::GetPlainMKLDNNFormat(out_tz.size()), ctx.GetPlace());
+        x_tz,
+        paddle_dtype,
+        framework::ToMKLDNNDataType(paddle_dtype),
+        onednn_engine);
+
+    auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
+        x->mem_desc(), platform::to_void_cast(x->data<T>()));
+    auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
+        out,
+        out_tz,
+        platform::GetPlainMKLDNNFormat(out_tz.size()),
+        ctx.GetPlace());
 
     // to avoid allocating new temporary memory, Out's memory is used as a tmp
     // buffer for storing a contignuous memory consisting of pad_value, which
     // later is used as a SRC for reorders that are filling Out with padding
     T* out_ptr = out->data<T>();
-    std::fill(out_ptr, out_ptr+CalculatePrefillElems(out_tz, paddings), pad_value);
+    std::fill(out_ptr,
+              out_ptr + CalculateNumOfPrefillElems(out_tz, paddings),
+              pad_value);
 
     // paddings are in order: left, right, top, bottom, front, back
-    for(int i = 0; i < paddings.size(); ++i) {
-      if(paddings[i] != 0) {
+    for (int i = 0; i < paddings.size(); ++i) {
+      if (paddings[i] != 0) {
         std::vector<int64_t> offsets(out_tz.size(), 0);
         std::vector<int64_t> chunk_tz(out_tz.begin(), out_tz.end());
 
         chunk_tz[out_tz.size() - 1 - i / 2] = paddings[i];
         if (i % 2 == 1) {
-          offsets[out_tz.size() - 1 - i / 2] = paddings[i - 1] + x_tz[out_tz.size() - 1 - i / 2];
+          offsets[out_tz.size() - 1 - i / 2] =
+              paddings[i - 1] + x_tz[out_tz.size() - 1 - i / 2];
         }
 
-        FillPartOfPadding(paddle_dtype, onednn_engine, out_ptr, reorder_dst_memory_p, chunk_tz, offsets);
+        FillPartOfPadding(paddle_dtype,
+                          onednn_engine,
+                          out_ptr,
+                          reorder_dst_memory_p,
+                          chunk_tz,
+                          offsets);
       }
     }
-    
-    std::vector<int64_t> offsets(out_tz.size(), 0); 
-    for(int i=0; i<paddings.size() / 2; ++i) {
-      offsets[out_tz.size() - 1 -i] = paddings[2*i];
+    astream.wait();
+
+    std::vector<int64_t> offsets(out_tz.size(), 0);
+    for (int i = 0; i < paddings.size() / 2; ++i) {
+      offsets[out_tz.size() - 1 - i] = paddings[2 * i];
     }
-    
-    auto slice_mem_p = reorder_handler.AcquireSubmemory(x_tz, offsets, reorder_dst_memory_p);
+
+    auto slice_mem_p =
+        reorder_handler.AcquireSubmemory(x_tz, offsets, reorder_dst_memory_p);
 
     auto reorder_p =
         reorder_handler.AcquireReorder(slice_mem_p, reorder_src_memory_p);
@@ -132,20 +158,20 @@ class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
     out->set_mem_desc(reorder_dst_memory_p->get_desc());
   }
 
-  int64_t CalculatePrefillElems(const std::vector<int64_t>& out_tz, const std::vector<int>& paddings) const {
+  int64_t CalculateNumOfPrefillElems(const std::vector<int64_t>& out_tz,
+                                     const std::vector<int>& paddings) const {
     int64_t max_elems = 0;
-
     int64_t independent_dims = out_tz[0] * out_tz[1];
 
-    for(int i = 0; i < paddings.size() / 2; ++i) {
-      int64_t elems = std::max(paddings[2*i], paddings[2*i+1]);
-      for(int j = 0; j < paddings.size() / 2; ++j) {
-        if(j != i) {
+    for (int i = 0; i < paddings.size() / 2; ++i) {
+      int64_t elems = std::max(paddings[2 * i], paddings[2 * i + 1]);
+      for (int j = 0; j < paddings.size() / 2; ++j) {
+        if (j != i) {
           elems *= out_tz[out_tz.size() - 1 - j];
         }
       }
 
-      if(max_elems < elems) {
+      if (max_elems < elems) {
         max_elems = elems;
       }
     }
@@ -155,29 +181,37 @@ class Pad3dMKLDNNKernel : public framework::OpKernel<T> {
   void FillPartOfPadding(framework::proto::VarType::Type paddle_dtype,
                          const dnnl::engine& onednn_engine,
                          T* prefilled_mem_ptr,
-                         const std::shared_ptr<dnnl::memory>&out_mem_p,
-                         std::vector<int64_t>& chunk_tz,
+                         const std::shared_ptr<dnnl::memory>& out_mem_p,
+                         const std::vector<int64_t>& chunk_tz,
                          const std::vector<int64_t>& offsets) const {
     auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
 
-    dnnl::memory::desc prefilled_mem_desc(chunk_tz, platform::MKLDNNGetDataType<T>(), platform::GetPlainMKLDNNFormat(5));
-    auto prefilled_mem_p = std::make_shared<dnnl::memory>(prefilled_mem_desc, onednn_engine, prefilled_mem_ptr);
+    dnnl::memory::desc prefilled_mem_desc(
+        chunk_tz,
+        platform::MKLDNNGetDataType<T>(),
+        platform::GetPlainMKLDNNFormat(chunk_tz.size()));
+    dnnl::memory prefilled_mem(
+        prefilled_mem_desc, onednn_engine, prefilled_mem_ptr);
 
-    platform::ReorderMKLDNNHandler reorder_handler(
-      chunk_tz,
-      paddle_dtype,
-      framework::ToMKLDNNDataType(paddle_dtype),
-      onednn_engine);
+    dnnl::memory::desc out_slice_md =
+        out_mem_p->get_desc().submemory_desc(chunk_tz, {offsets});
+    dnnl::memory out_slice_mem(
+        out_slice_md, onednn_engine, out_mem_p->get_data_handle());
 
-    auto out_slice_mem_p = reorder_handler.AcquireSubmemory(chunk_tz, offsets, out_mem_p);
-    auto reorder_p =
-        reorder_handler.AcquireReorder(out_slice_mem_p, prefilled_mem_p);
-    reorder_p->execute(astream, *prefilled_mem_p, *out_slice_mem_p);
+    auto reorder_p = dnnl::reorder(prefilled_mem, out_slice_mem);
+    reorder_p.execute(astream, prefilled_mem, out_slice_mem);
   }
 };
 }  // namespace operators
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-REGISTER_OP_KERNEL(pad3d, MKLDNN, paddle::platform::CPUPlace,
-                   ops::Pad3dMKLDNNKernel<float>);
+REGISTER_OP_KERNEL(pad3d,
+                   MKLDNN,
+                   paddle::platform::CPUPlace,
+                   ops::PadMKLDNNKernel<float>);
+
+REGISTER_OP_KERNEL(pad2d,
+                   MKLDNN,
+                   paddle::platform::CPUPlace,
+                   ops::PadMKLDNNKernel<float>);
diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc
index 72073ed3067c3..e7f0c6507bf70 100644
--- a/paddle/fluid/operators/pad2d_op.cc
+++ b/paddle/fluid/operators/pad2d_op.cc
@@ -699,8 +699,41 @@ class Pad2dOp : public framework::OperatorWithKernel {
  protected:
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
+    auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
+#ifdef PADDLE_WITH_MKLDNN
+    // only constant mode and non-blocked layouts are supported for oneDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type) &&
+        ctx.Attr<std::string>("mode") == "constant" &&
+        ctx.Input<Tensor>("X")
+                ->mem_desc()
+                .data.format_desc.blocking.inner_nblks == 0) {
+      return framework::OpKernelType(input_data_type,
+                                     ctx.GetPlace(),
+                                     framework::DataLayout::kMKLDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
+    return framework::OpKernelType(input_data_type, ctx.GetPlace());
+  }
+
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string& var_name,
+      const Tensor& tensor,
+      const framework::OpKernelType& expected_kernel_type) const {
+#ifdef PADDLE_WITH_MKLDNN
+    if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
+        (tensor.layout() != framework::DataLayout::kMKLDNN)) {
+      auto attrs = Attrs();
+      auto ar = paddle::framework::AttrReader(attrs);
+      const std::string data_format = ar.Get<std::string>("data_format");
+      return framework::OpKernelType(
+          expected_kernel_type.data_type_,
+          tensor.place(),
+          framework::StringToDataLayout(data_format));
+    }
+#endif
     return framework::OpKernelType(
-        OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
+        expected_kernel_type.data_type_, tensor.place(), tensor.layout());
   }
 };
 
@@ -740,6 +773,10 @@ class Pad2dOpMaker : public framework::OpProtoAndCheckerMaker {
         "An optional string from: \"NHWC\", \"NCHW\". "
         "Defaults to \"NHWC\". Specify the data format of the input data.")
         .SetDefault("NCHW");
+    AddAttr<bool>(
+        "use_mkldnn",
+        "(bool, default false) Indicates if MKL-DNN kernel will be used")
+        .SetDefault(false);
     AddComment(R"DOC(
 Pad2d Operator.
 Pad 2-d images according to 'paddings' and 'mode'. 
diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc
index db1cd8400d9bf..ffbe5ad073aca 100644
--- a/paddle/fluid/operators/pad3d_op.cc
+++ b/paddle/fluid/operators/pad3d_op.cc
@@ -36,9 +36,14 @@ class Pad3dOp : public framework::OperatorWithKernel {
       const framework::ExecutionContext& ctx) const override {
     auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
 #ifdef PADDLE_WITH_MKLDNN
-    // currently only constant mode and non-blocked layouts are supported for oneDNN
-    if (this->CanMKLDNNBeUsed(ctx, input_data_type) && ctx.Attr<std::string>("mode") == "constant" && ctx.Input<Tensor>("X")->mem_desc().data.format_desc.blocking.inner_nblks == 0) {
-      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
+    // only constant mode and non-blocked layouts are supported for oneDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type) &&
+        ctx.Attr<std::string>("mode") == "constant" &&
+        ctx.Input<Tensor>("X")
+                ->mem_desc()
+                .data.format_desc.blocking.inner_nblks == 0) {
+      return framework::OpKernelType(input_data_type,
+                                     ctx.GetPlace(),
                                      framework::DataLayout::kMKLDNN,
                                      framework::LibraryType::kMKLDNN);
     }
@@ -46,24 +51,25 @@ class Pad3dOp : public framework::OperatorWithKernel {
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 
-framework::OpKernelType GetKernelTypeForVar(
-    const std::string& var_name,
-    const Tensor& tensor,
-    const framework::OpKernelType& expected_kernel_type) const {
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string& var_name,
+      const Tensor& tensor,
+      const framework::OpKernelType& expected_kernel_type) const {
 #ifdef PADDLE_WITH_MKLDNN
-  if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
-      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
-    auto attrs = Attrs();
-    auto ar = paddle::framework::AttrReader(attrs);
-    const std::string data_format = ar.Get<std::string>("data_format");
-    return framework::OpKernelType(expected_kernel_type.data_type_,
-                                   tensor.place(),
-                                   framework::StringToDataLayout(data_format));
-  }
+    if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
+        (tensor.layout() != framework::DataLayout::kMKLDNN)) {
+      auto attrs = Attrs();
+      auto ar = paddle::framework::AttrReader(attrs);
+      const std::string data_format = ar.Get<std::string>("data_format");
+      return framework::OpKernelType(
+          expected_kernel_type.data_type_,
+          tensor.place(),
+          framework::StringToDataLayout(data_format));
+    }
 #endif
-  return framework::OpKernelType(
-      expected_kernel_type.data_type_, tensor.place(), tensor.layout());
-}
+    return framework::OpKernelType(
+        expected_kernel_type.data_type_, tensor.place(), tensor.layout());
+  }
 };
 
 class Pad3dOpMaker : public framework::OpProtoAndCheckerMaker {
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py
new file mode 100644
index 0000000000000..7cd221e239781
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from auto_scan_test import MkldnnAutoScanTest
+from program_config import TensorConfig, ProgramConfig, OpConfig
+import numpy as np
+from functools import partial
+import unittest
+from hypothesis import given, reproduce_failure
+import hypothesis.strategies as st
+
+
+class TestOneDNNPad2DOp(MkldnnAutoScanTest):
+
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        # if mode is channel, and in_shape is 1 rank
+        if len(program_config.inputs['input_data'].shape
+               ) == 1 and program_config.ops[0].attrs['mode'] == 'channel':
+            return False
+        return True
+
+    def sample_program_configs(self, *args, **kwargs):
+
+        def generate_input(*args, **kwargs):
+            return np.random.random(kwargs['in_shape']).astype(np.float32)
+
+        pad3d_op = OpConfig(type="pad2d",
+                            inputs={"X": ["input_data"]},
+                            outputs={"Out": ["output_data"]},
+                            attrs={
+                                "mode": "constant",
+                                "data_format": kwargs['data_format'],
+                                "paddings": kwargs['paddings'],
+                            })
+
+        program_config = ProgramConfig(
+            ops=[pad3d_op],
+            weights={},
+            inputs={
+                "input_data":
+                TensorConfig(data_gen=partial(generate_input, *args, **kwargs)),
+            },
+            outputs=["output_data"])
+
+        yield program_config
+
+    def sample_predictor_configs(self, program_config):
+        config = self.create_inference_config(use_mkldnn=True)
+        yield config, (1e-5, 1e-5)
+
+    @given(data_format=st.sampled_from(['NCHW', 'NHWC']),
+           in_shape=st.lists(st.integers(min_value=1, max_value=10),
+                             min_size=4,
+                             max_size=4),
+           paddings=st.lists(st.integers(min_value=0, max_value=3),
+                             min_size=4,
+                             max_size=4))
+    def test(self, *args, **kwargs):
+        self.run_test(quant=False, *args, **kwargs)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py
new file mode 100644
index 0000000000000..11df7c41dfbf5
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from auto_scan_test import MkldnnAutoScanTest
+from program_config import TensorConfig, ProgramConfig, OpConfig
+import numpy as np
+from functools import partial
+import unittest
+from hypothesis import given, reproduce_failure
+import hypothesis.strategies as st
+
+
+@reproduce_failure('6.45.0', b'AAEAAAAAAAAAAAAAAQ==')
+class TestOneDNNPad3DOp(MkldnnAutoScanTest):
+
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        # if mode is channel, and in_shape is 1 rank
+        if len(program_config.inputs['input_data'].shape
+               ) == 1 and program_config.ops[0].attrs['mode'] == 'channel':
+            return False
+        return True
+
+    def sample_program_configs(self, *args, **kwargs):
+
+        def generate_input(*args, **kwargs):
+            return np.random.random(kwargs['in_shape']).astype(np.float32)
+
+        pad3d_op = OpConfig(type="pad3d",
+                            inputs={"X": ["input_data"]},
+                            outputs={"Out": ["output_data"]},
+                            attrs={
+                                "mode": "constant",
+                                "data_format": kwargs['data_format'],
+                                "paddings": kwargs['paddings'],
+                            })
+
+        program_config = ProgramConfig(
+            ops=[pad3d_op],
+            weights={},
+            inputs={
+                "input_data":
+                TensorConfig(data_gen=partial(generate_input, *args, **kwargs)),
+            },
+            outputs=["output_data"])
+
+        yield program_config
+
+    def sample_predictor_configs(self, program_config):
+        config = self.create_inference_config(use_mkldnn=True)
+        yield config, (1e-5, 1e-5)
+
+    @given(data_format=st.sampled_from(['NCDHW', 'NDHWC']),
+           in_shape=st.lists(st.integers(min_value=1, max_value=10),
+                             min_size=5,
+                             max_size=5),
+           paddings=st.lists(st.integers(min_value=0, max_value=3),
+                             min_size=6,
+                             max_size=6))
+    def test(self, *args, **kwargs):
+        self.run_test(quant=False, *args, **kwargs)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py
deleted file mode 100644
index a93edc9baaf9b..0000000000000
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_pad3d_mkldnn_op.py
+++ /dev/null
@@ -1,115 +0,0 @@
-#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-from termios import N_PPP  #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-import numpy as np
-from paddle.fluid.tests.unittests.op_test import OpTest
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-import paddle.fluid.core as core
-
-from paddle.fluid import Program, program_guard, Executor, default_main_program
-
-
-class TestPad3dOneDNNOp(OpTest):
-    def setUp(self):
-        paddle.enable_static()
-        self.value = 1.0
-        self.initTestCase()
-        self.op_type = "pad3d"
-        self.python_api = paddle.nn.functional.pad
-        self.inputs = {'X': np.random.random(self.shape).astype("float32")}
-        self.attrs = {'use_mkldnn': True}
-        if self.variable_paddings:
-            self.attrs['paddings'] = []
-            self.inputs['Paddings'] = np.array(
-                self.paddings).flatten().astype("int32")
-        else:
-            self.attrs['paddings'] = np.array(
-                self.paddings).flatten().astype("int32")
-        self.attrs['value'] = self.value
-        self.attrs['mode'] = self.mode
-        self.attrs['data_format'] = self.data_format
-        if self.data_format == "NCDHW":
-            paddings = [
-                (0, 0),
-                (0, 0),
-                (self.paddings[4], self.paddings[5]),
-                (self.paddings[2], self.paddings[3]),
-                (self.paddings[0], self.paddings[1]),
-            ]
-        else:
-            paddings = [
-                (0, 0),
-                (self.paddings[4], self.paddings[5]),
-                (self.paddings[2], self.paddings[3]),
-                (self.paddings[0], self.paddings[1]),
-                (0, 0),
-            ]
-        if self.mode == "constant":
-            out = np.pad(self.inputs['X'],
-                         paddings,
-                         mode=self.mode,
-                         constant_values=self.value)
-        elif self.mode == "reflect":
-            out = np.pad(self.inputs['X'], paddings, mode=self.mode)
-        elif self.mode == "replicate":
-            out = np.pad(self.inputs['X'], paddings, mode="edge")
-        elif self.mode == "circular":
-            out = np.pad(self.inputs['X'], paddings, mode="wrap")
-        self.outputs = {'Out': out}
-
-    def test_check_output(self):
-        import sys
-        np.set_printoptions(threshold=sys.maxsize)
-        print(self.inputs["X"].shape)
-        print(self.outputs["Out"].shape)
-        # print("\n\n\n")
-        # print("inputs", self.inputs["X"])
-        # print("\n\n\n")
-        # print("outputs", self.outputs["Out"])
-        # print("\n\n\n")
-        self.check_output()
-
-    # def test_check_grad_normal(self):
-    #     self.check_grad(['X'], 'Out')
-
-    def initTestCase(self):
-        self.shape = (6, 2, 3, 4, 5)
-        self.paddings = [2, 3, 4, 5, 0, 0]
-        self.mode = "constant"
-        self.data_format = "NCDHW"
-        self.pad_value = 1.0
-        self.variable_paddings = False
-
-
-if __name__ == '__main__':
-    paddle.enable_static()
-    unittest.main()

From f54cd0cbb1ca7818ddd610196b94b1e065e28847 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Thu, 30 Jun 2022 18:43:55 +0200
Subject: [PATCH 08/17] reverted two files

---
 .../mkldnn/fill_constant_mkldnn_op.cc         | 33 +++++++++++++++++--
 paddle/fluid/platform/mkldnn_reuse.h          | 29 ----------------
 2 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc
index a72ddaa6511e7..615f43bb32c0f 100644
--- a/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc
@@ -20,6 +20,35 @@ namespace operators {
 
 using framework::Tensor;
 
+template <typename T>
+class FillConstantMKLDNNHandler
+    : public platform::MKLDNNHandlerNoCachingT<T, dnnl::binary> {
+ public:
+  FillConstantMKLDNNHandler(Tensor* out,
+                            dnnl::engine engine,
+                            platform::Place cpu_place)
+      : platform::MKLDNNHandlerNoCachingT<T, dnnl::binary>(engine, cpu_place) {
+    const auto src0_md =
+        dnnl::memory::desc({out->numel(), sizeof(T)},
+                           platform::MKLDNNGetDataType<uint8_t>(),
+                           dnnl::memory::format_tag::ab);
+
+    dnnl::primitive_attr attrs;
+    attrs.set_scales(DNNL_ARG_SRC_0, /* mask = */ 0, {0.0f});
+
+    this->AcquireForwardPrimitiveDescriptor(
+        attrs, dnnl::algorithm::binary_add, src0_md, src1_md, src0_md);
+  }
+
+  static const dnnl::memory::desc src1_md;
+};
+
+template <typename T>
+const dnnl::memory::desc FillConstantMKLDNNHandler<T>::src1_md(
+    {1, sizeof(T)},
+    platform::MKLDNNGetDataType<uint8_t>(),
+    dnnl::memory::format_tag::ab);
+
 template <typename T>
 class FillConstantMKLDNNKernel : public framework::OpKernel<T> {
  public:
@@ -38,10 +67,10 @@ class FillConstantMKLDNNKernel : public framework::OpKernel<T> {
     auto shape = GetShape(ctx);
     out->Resize(shape);
 
-    platform::FillConstantMKLDNNHandler<T> handler(out, dnnl_engine, ctx.GetPlace());
+    FillConstantMKLDNNHandler<T> handler(out, dnnl_engine, ctx.GetPlace());
 
     dnnl::memory constant_value_memory =
-        dnnl::memory(platform::FillConstantMKLDNNHandler<T>::src1_md,
+        dnnl::memory(FillConstantMKLDNNHandler<T>::src1_md,
                      dnnl_engine,
                      reinterpret_cast<uint8_t*>(&fill_value));
 
diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h
index c34a4a069f14f..05ebedf611a4b 100644
--- a/paddle/fluid/platform/mkldnn_reuse.h
+++ b/paddle/fluid/platform/mkldnn_reuse.h
@@ -912,35 +912,6 @@ class MatMulV2MKLDNNHandler
   }
 };
 
-template <typename T>
-class FillConstantMKLDNNHandler
-    : public platform::MKLDNNHandlerNoCachingT<T, dnnl::binary> {
- public:
-  FillConstantMKLDNNHandler(Tensor* out,
-                            dnnl::engine engine,
-                            platform::Place cpu_place)
-      : platform::MKLDNNHandlerNoCachingT<T, dnnl::binary>(engine, cpu_place) {
-    const auto src0_md =
-        dnnl::memory::desc({out->numel(), sizeof(T)},
-                           platform::MKLDNNGetDataType<uint8_t>(),
-                           dnnl::memory::format_tag::ab);
-
-    dnnl::primitive_attr attrs;
-    attrs.set_scales(DNNL_ARG_SRC_0, /* mask = */ 0, {0.0f});
-
-    this->AcquireForwardPrimitiveDescriptor(
-        attrs, dnnl::algorithm::binary_add, src0_md, src1_md, src0_md);
-  }
-
-  static const dnnl::memory::desc src1_md;
-};
-
-template <typename T>
-const dnnl::memory::desc FillConstantMKLDNNHandler<T>::src1_md(
-    {1, sizeof(T)},
-    platform::MKLDNNGetDataType<uint8_t>(),
-    dnnl::memory::format_tag::ab);
-
 template <typename T>
 class ActivationMKLDNNHandler
     : public MKLDNNHandlerNoCachingT<T,

From 776bba63fd09178cc2ad85ef7c6d7ac8461c6fe1 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Thu, 30 Jun 2022 18:47:50 +0200
Subject: [PATCH 09/17] reverted one old change

---
 paddle/fluid/operators/pad3d_op.cc | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc
index ffbe5ad073aca..a0da0fecb7e9a 100644
--- a/paddle/fluid/operators/pad3d_op.cc
+++ b/paddle/fluid/operators/pad3d_op.cc
@@ -192,10 +192,9 @@ class Pad3dOpGrad : public framework::OperatorWithKernel {
  protected:
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
-    auto input_data_type = OperatorWithKernel::IndicateVarDataType(
-        ctx, framework::GradVarName("Out"));
-
-    return framework::OpKernelType(input_data_type, ctx.GetPlace());
+    return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(
+                                       ctx, framework::GradVarName("Out")),
+                                   ctx.GetPlace());
   }
 };
 

From 9897681ad829ce66914767ca4a5853c4642d52c7 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Thu, 30 Jun 2022 19:34:19 +0200
Subject: [PATCH 10/17] added support for Paddings tensor

---
 .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc |  6 ++++++
 .../ir/inference/test_mkldnn_pad3d_op.py      | 19 ++++++++++---------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
index 05bb3830e5fc6..173bbe70994eb 100644
--- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
@@ -74,7 +74,13 @@ class PadMKLDNNKernel : public framework::OpKernel<T> {
 
     auto* x = ctx.Input<Tensor>("X");
     auto* out = ctx.Output<Tensor>("Out");
+    auto* paddings_tensor = ctx.Input<Tensor>("Paddings");
     std::vector<int> paddings(ctx.Attr<std::vector<int>>("paddings"));
+    if (paddings_tensor) {
+      std::copy(paddings_tensor->data<int>(),
+                paddings_tensor->data<int>() + paddings_tensor->numel(),
+                paddings.data());
+    }
     // pad2d has paddings in order top, bottom, left, right, so we need
     // to swap some of them to unify paddings between pad2d and pad3d
     if (ctx.Type() == "pad2d") {
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py
index 11df7c41dfbf5..cdd319ca307aa 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py
@@ -21,23 +21,21 @@
 import hypothesis.strategies as st
 
 
-@reproduce_failure('6.45.0', b'AAEAAAAAAAAAAAAAAQ==')
 class TestOneDNNPad3DOp(MkldnnAutoScanTest):
 
-    def is_program_valid(self, program_config: ProgramConfig) -> bool:
-        # if mode is channel, and in_shape is 1 rank
-        if len(program_config.inputs['input_data'].shape
-               ) == 1 and program_config.ops[0].attrs['mode'] == 'channel':
-            return False
-        return True
-
     def sample_program_configs(self, *args, **kwargs):
 
         def generate_input(*args, **kwargs):
             return np.random.random(kwargs['in_shape']).astype(np.float32)
 
+        def generate_paddings():
+            return np.random.randint(0, 4, size=(6)).astype(np.int32)
+
         pad3d_op = OpConfig(type="pad3d",
-                            inputs={"X": ["input_data"]},
+                            inputs={
+                                "X": ["input_data"],
+                                "Paddings": ["paddings_data"]
+                            },
                             outputs={"Out": ["output_data"]},
                             attrs={
                                 "mode": "constant",
@@ -51,6 +49,8 @@ def generate_input(*args, **kwargs):
             inputs={
                 "input_data":
                 TensorConfig(data_gen=partial(generate_input, *args, **kwargs)),
+                "paddings_data":
+                TensorConfig(data_gen=generate_paddings)
             },
             outputs=["output_data"])
 
@@ -61,6 +61,7 @@ def sample_predictor_configs(self, program_config):
         yield config, (1e-5, 1e-5)
 
     @given(data_format=st.sampled_from(['NCDHW', 'NDHWC']),
+           use_paddings_tensor=st.sampled_from([True, False]),
            in_shape=st.lists(st.integers(min_value=1, max_value=10),
                              min_size=5,
                              max_size=5),

From fa618f272cb907581a2f5bfa20ade336dd847ee9 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Thu, 30 Jun 2022 19:55:33 +0200
Subject: [PATCH 11/17] CI fix

---
 paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
index 173bbe70994eb..03e3faf4e753d 100644
--- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
@@ -97,7 +97,7 @@ class PadMKLDNNKernel : public framework::OpKernel<T> {
     // must be done inside the kernel
     std::vector<int64_t> out_tz(x_tz);
 
-    for (int i = 0; i < paddings.size() / 2; ++i) {
+    for (size_t i = 0; i < paddings.size() / 2; ++i) {
       out_tz[out_tz.size() - 1 - i] += paddings[2 * i] + paddings[2 * i + 1];
     }
     out->Resize(phi::make_ddim(out_tz));
@@ -127,7 +127,7 @@ class PadMKLDNNKernel : public framework::OpKernel<T> {
               pad_value);
 
     // paddings are in order: left, right, top, bottom, front, back
-    for (int i = 0; i < paddings.size(); ++i) {
+    for (size_t i = 0; i < paddings.size(); ++i) {
       if (paddings[i] != 0) {
         std::vector<int64_t> offsets(out_tz.size(), 0);
         std::vector<int64_t> chunk_tz(out_tz.begin(), out_tz.end());
@@ -149,7 +149,7 @@ class PadMKLDNNKernel : public framework::OpKernel<T> {
     astream.wait();
 
     std::vector<int64_t> offsets(out_tz.size(), 0);
-    for (int i = 0; i < paddings.size() / 2; ++i) {
+    for (size_t i = 0; i < paddings.size() / 2; ++i) {
       offsets[out_tz.size() - 1 - i] = paddings[2 * i];
     }
 
@@ -169,9 +169,9 @@ class PadMKLDNNKernel : public framework::OpKernel<T> {
     int64_t max_elems = 0;
     int64_t independent_dims = out_tz[0] * out_tz[1];
 
-    for (int i = 0; i < paddings.size() / 2; ++i) {
+    for (size_t i = 0; i < paddings.size() / 2; ++i) {
       int64_t elems = std::max(paddings[2 * i], paddings[2 * i + 1]);
-      for (int j = 0; j < paddings.size() / 2; ++j) {
+      for (size_t j = 0; j < paddings.size() / 2; ++j) {
         if (j != i) {
           elems *= out_tz[out_tz.size() - 1 - j];
         }

From d3aca15f5a7a5481b01b0fcee6e706c12aa5a987 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Fri, 1 Jul 2022 00:36:46 +0200
Subject: [PATCH 12/17] CI fix

---
 paddle/fluid/operators/pad3d_op.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc
index a0da0fecb7e9a..e4b32b3d7a76e 100644
--- a/paddle/fluid/operators/pad3d_op.cc
+++ b/paddle/fluid/operators/pad3d_op.cc
@@ -111,10 +111,10 @@ class Pad3dOpMaker : public framework::OpProtoAndCheckerMaker {
         "An optional string from: \"NDHWC\", \"NCDHW\". "
         "Defaults to \"NDHWC\". Specify the data format of the input data.")
         .SetDefault("NCDHW");
-    AddAttr<bool>(
-        "use_mkldnn",
-        "(bool, default false) Indicates if MKL-DNN kernel will be used")
-        .SetDefault(false);
+    AddAttr<bool>("use_mkldnn",
+                  "(bool, default false) Only used in mkldnn kernel")
+        .SetDefault(false)
+        .AsExtra();
     AddComment(R"DOC(
 Pad3d Operator.
 Pad 3-d images according to 'paddings' and 'mode'. 

From 52534a49949d69e720a358a51d6db23718cfd43f Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Fri, 1 Jul 2022 19:28:32 +0200
Subject: [PATCH 13/17] fixed timeout of tests

---
 .../ir/inference/test_mkldnn_pad2d_op.py        | 17 ++++-------------
 .../ir/inference/test_mkldnn_pad3d_op.py        | 10 ++++------
 2 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py
index 7cd221e239781..5a81451febf39 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad2d_op.py
@@ -23,13 +23,6 @@
 
 class TestOneDNNPad2DOp(MkldnnAutoScanTest):
 
-    def is_program_valid(self, program_config: ProgramConfig) -> bool:
-        # if mode is channel, and in_shape is 1 rank
-        if len(program_config.inputs['input_data'].shape
-               ) == 1 and program_config.ops[0].attrs['mode'] == 'channel':
-            return False
-        return True
-
     def sample_program_configs(self, *args, **kwargs):
 
         def generate_input(*args, **kwargs):
@@ -60,12 +53,10 @@ def sample_predictor_configs(self, program_config):
         yield config, (1e-5, 1e-5)
 
     @given(data_format=st.sampled_from(['NCHW', 'NHWC']),
-           in_shape=st.lists(st.integers(min_value=1, max_value=10),
-                             min_size=4,
-                             max_size=4),
-           paddings=st.lists(st.integers(min_value=0, max_value=3),
-                             min_size=4,
-                             max_size=4))
+           in_shape=st.sampled_from([[2, 3, 4, 5], [1, 4, 1, 3], [4, 3, 2, 1],
+                                     [1, 1, 1, 1]]),
+           paddings=st.sampled_from([[0, 0, 0, 0], [1, 2, 0, 1], [2, 5, 11, 3],
+                                     [0, 5, 0, 1]]))
     def test(self, *args, **kwargs):
         self.run_test(quant=False, *args, **kwargs)
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py
index cdd319ca307aa..acc7fa1e30e2d 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_pad3d_op.py
@@ -62,12 +62,10 @@ def sample_predictor_configs(self, program_config):
 
     @given(data_format=st.sampled_from(['NCDHW', 'NDHWC']),
            use_paddings_tensor=st.sampled_from([True, False]),
-           in_shape=st.lists(st.integers(min_value=1, max_value=10),
-                             min_size=5,
-                             max_size=5),
-           paddings=st.lists(st.integers(min_value=0, max_value=3),
-                             min_size=6,
-                             max_size=6))
+           in_shape=st.sampled_from([[2, 3, 4, 5, 6], [1, 4, 1, 3, 2],
+                                     [4, 3, 2, 1, 1], [1, 1, 1, 1, 1]]),
+           paddings=st.sampled_from([[0, 0, 0, 0, 0, 0], [1, 2, 0, 1, 2, 1],
+                                     [2, 5, 11, 3, 4, 3], [0, 5, 0, 1, 0, 2]]))
     def test(self, *args, **kwargs):
         self.run_test(quant=False, *args, **kwargs)
 

From 24eff5e96c21cac6635c4d1589b1409a854e9140 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Mon, 4 Jul 2022 15:33:03 +0200
Subject: [PATCH 14/17] fixed typo

---
 paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
index 03e3faf4e753d..e7a528c452b8d 100644
--- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc
@@ -119,7 +119,7 @@ class PadMKLDNNKernel : public framework::OpKernel<T> {
         ctx.GetPlace());
 
     // to avoid allocating new temporary memory, Out's memory is used as a tmp
-    // buffer for storing a contignuous memory consisting of pad_value, which
+    // buffer for storing a contiguous memory consisting of pad_value, which
     // later is used as a SRC for reorders that are filling Out with padding
     T* out_ptr = out->data<T>();
     std::fill(out_ptr,

From 469106115c49682b25038a666fd71bd4a10fb66b Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Tue, 5 Jul 2022 17:14:26 +0200
Subject: [PATCH 15/17] changes to GetKernelTypeForVar

---
 paddle/fluid/operators/pad2d_op.cc | 14 ++++++--------
 paddle/fluid/operators/pad3d_op.cc | 14 ++++++--------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc
index e7f0c6507bf70..de45a2ff811cd 100644
--- a/paddle/fluid/operators/pad2d_op.cc
+++ b/paddle/fluid/operators/pad2d_op.cc
@@ -722,14 +722,12 @@ class Pad2dOp : public framework::OperatorWithKernel {
       const framework::OpKernelType& expected_kernel_type) const {
 #ifdef PADDLE_WITH_MKLDNN
     if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
-        (tensor.layout() != framework::DataLayout::kMKLDNN)) {
-      auto attrs = Attrs();
-      auto ar = paddle::framework::AttrReader(attrs);
-      const std::string data_format = ar.Get<std::string>("data_format");
-      return framework::OpKernelType(
-          expected_kernel_type.data_type_,
-          tensor.place(),
-          framework::StringToDataLayout(data_format));
+        (tensor.layout() != framework::DataLayout::kMKLDNN) &&
+        paddle::platform::MKLDNNDeviceContext::tls()
+                .get_cur_paddle_data_layout() == framework::DataLayout::kNHWC) {
+      return framework::OpKernelType(expected_kernel_type.data_type_,
+                                     tensor.place(),
+                                     framework::DataLayout::kNHWC);
     }
 #endif
     return framework::OpKernelType(
diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc
index e4b32b3d7a76e..7d4f4826cae88 100644
--- a/paddle/fluid/operators/pad3d_op.cc
+++ b/paddle/fluid/operators/pad3d_op.cc
@@ -57,14 +57,12 @@ class Pad3dOp : public framework::OperatorWithKernel {
       const framework::OpKernelType& expected_kernel_type) const {
 #ifdef PADDLE_WITH_MKLDNN
     if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
-        (tensor.layout() != framework::DataLayout::kMKLDNN)) {
-      auto attrs = Attrs();
-      auto ar = paddle::framework::AttrReader(attrs);
-      const std::string data_format = ar.Get<std::string>("data_format");
-      return framework::OpKernelType(
-          expected_kernel_type.data_type_,
-          tensor.place(),
-          framework::StringToDataLayout(data_format));
+        (tensor.layout() != framework::DataLayout::kMKLDNN) &&
+        paddle::platform::MKLDNNDeviceContext::tls()
+                .get_cur_paddle_data_layout() == framework::DataLayout::kNHWC) {
+      return framework::OpKernelType(expected_kernel_type.data_type_,
+                                     tensor.place(),
+                                     framework::DataLayout::kNHWC);
     }
 #endif
     return framework::OpKernelType(

From 84893ee69d5f16f52110e0a6b133497a442b2245 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Wed, 6 Jul 2022 15:54:16 +0200
Subject: [PATCH 16/17] Revert "changes to GetKernelTypeForVar"

This reverts commit 469106115c49682b25038a666fd71bd4a10fb66b.
---
 paddle/fluid/operators/pad2d_op.cc | 14 ++++++++------
 paddle/fluid/operators/pad3d_op.cc | 14 ++++++++------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc
index de45a2ff811cd..e7f0c6507bf70 100644
--- a/paddle/fluid/operators/pad2d_op.cc
+++ b/paddle/fluid/operators/pad2d_op.cc
@@ -722,12 +722,14 @@ class Pad2dOp : public framework::OperatorWithKernel {
       const framework::OpKernelType& expected_kernel_type) const {
 #ifdef PADDLE_WITH_MKLDNN
     if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
-        (tensor.layout() != framework::DataLayout::kMKLDNN) &&
-        paddle::platform::MKLDNNDeviceContext::tls()
-                .get_cur_paddle_data_layout() == framework::DataLayout::kNHWC) {
-      return framework::OpKernelType(expected_kernel_type.data_type_,
-                                     tensor.place(),
-                                     framework::DataLayout::kNHWC);
+        (tensor.layout() != framework::DataLayout::kMKLDNN)) {
+      auto attrs = Attrs();
+      auto ar = paddle::framework::AttrReader(attrs);
+      const std::string data_format = ar.Get<std::string>("data_format");
+      return framework::OpKernelType(
+          expected_kernel_type.data_type_,
+          tensor.place(),
+          framework::StringToDataLayout(data_format));
     }
 #endif
     return framework::OpKernelType(
diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc
index 7d4f4826cae88..e4b32b3d7a76e 100644
--- a/paddle/fluid/operators/pad3d_op.cc
+++ b/paddle/fluid/operators/pad3d_op.cc
@@ -57,12 +57,14 @@ class Pad3dOp : public framework::OperatorWithKernel {
       const framework::OpKernelType& expected_kernel_type) const {
 #ifdef PADDLE_WITH_MKLDNN
     if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
-        (tensor.layout() != framework::DataLayout::kMKLDNN) &&
-        paddle::platform::MKLDNNDeviceContext::tls()
-                .get_cur_paddle_data_layout() == framework::DataLayout::kNHWC) {
-      return framework::OpKernelType(expected_kernel_type.data_type_,
-                                     tensor.place(),
-                                     framework::DataLayout::kNHWC);
+        (tensor.layout() != framework::DataLayout::kMKLDNN)) {
+      auto attrs = Attrs();
+      auto ar = paddle::framework::AttrReader(attrs);
+      const std::string data_format = ar.Get<std::string>("data_format");
+      return framework::OpKernelType(
+          expected_kernel_type.data_type_,
+          tensor.place(),
+          framework::StringToDataLayout(data_format));
     }
 #endif
     return framework::OpKernelType(

From add743fc44570874a99f582f446db8c429dbbcd7 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Thu, 7 Jul 2022 17:03:46 +0200
Subject: [PATCH 17/17] added AsExtra() to pad2d

---
 paddle/fluid/operators/pad2d_op.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc
index e7f0c6507bf70..a0ff40cddbf47 100644
--- a/paddle/fluid/operators/pad2d_op.cc
+++ b/paddle/fluid/operators/pad2d_op.cc
@@ -773,10 +773,10 @@ class Pad2dOpMaker : public framework::OpProtoAndCheckerMaker {
         "An optional string from: \"NHWC\", \"NCHW\". "
         "Defaults to \"NHWC\". Specify the data format of the input data.")
         .SetDefault("NCHW");
-    AddAttr<bool>(
-        "use_mkldnn",
-        "(bool, default false) Indicates if MKL-DNN kernel will be used")
-        .SetDefault(false);
+    AddAttr<bool>("use_mkldnn",
+                  "(bool, default false) Only used in mkldnn kernel")
+        .SetDefault(false)
+        .AsExtra();
     AddComment(R"DOC(
 Pad2d Operator.
 Pad 2-d images according to 'paddings' and 'mode'.