From 868b7833783851ac68deb31bccde57c3c25850d1 Mon Sep 17 00:00:00 2001
From: Tomasz Socha <tomasz.socha@intel.com>
Date: Thu, 2 Sep 2021 14:46:52 +0200
Subject: [PATCH] [ONNX] QLinearConvolution (#7210)

---
 .../core/include/ngraph/op/fake_quantize.hpp  |   6 +
 ngraph/core/src/op/fake_quantize.cpp          |  79 +++++++++
 ngraph/frontend/onnx/frontend/src/op/conv.cpp |  21 +--
 ngraph/frontend/onnx/frontend/src/op/conv.hpp |   3 +
 .../frontend/src/op/dequantize_linear.cpp     |  43 ++---
 .../frontend/src/op/dequantize_linear.hpp     |  13 +-
 .../onnx/frontend/src/op/qlinear_conv.cpp     |  68 ++++++++
 .../onnx/frontend/src/op/qlinear_conv.hpp     |  31 ++++
 .../onnx/frontend/src/op/quantize_linear.cpp  |  31 +++-
 .../onnx/frontend/src/op/quantize_linear.hpp  |   6 +
 .../frontend/onnx/frontend/src/ops_bridge.cpp |  10 +-
 ngraph/test/files/onnx/qlinearconv3d/x.bin    | Bin 64 -> 0 bytes
 ngraph/test/files/onnx/qlinearconv3d/y.bin    | Bin 64 -> 0 bytes
 .../quant_conv_linear_onnx_example.prototxt   | 152 ++++++++++++++++++
 ngraph/test/onnx/onnx_import_quant.in.cpp     | 116 +++++++++++--
 ngraph/test/runtime/ie/unit_test.manifest     |   9 +-
 .../runtime/interpreter/evaluates_map.cpp     |  23 ---
 .../runtime/interpreter/unit_test.manifest    |   3 -
 runtime/bindings/python/tests/__init__.py     |   6 -
 .../tests/test_ngraph/test_ops_fused.py       |   3 +-
 .../python/tests/test_onnx/test_backend.py    |  25 +--
 21 files changed, 533 insertions(+), 115 deletions(-)
 create mode 100644 ngraph/frontend/onnx/frontend/src/op/qlinear_conv.cpp
 create mode 100644 ngraph/frontend/onnx/frontend/src/op/qlinear_conv.hpp
 delete mode 100644 ngraph/test/files/onnx/qlinearconv3d/x.bin
 delete mode 100644 ngraph/test/files/onnx/qlinearconv3d/y.bin
 create mode 100644 ngraph/test/models/onnx/quantization/quant_conv_linear_onnx_example.prototxt
diff --git a/ngraph/core/include/ngraph/op/fake_quantize.hpp b/ngraph/core/include/ngraph/op/fake_quantize.hpp
index 08a53dca2fdcb9..00c970bf8fd415 100644
--- a/ngraph/core/include/ngraph/op/fake_quantize.hpp
+++ b/ngraph/core/include/ngraph/op/fake_quantize.hpp
@@ -67,6 +67,12 @@ class NGRAPH_API FakeQuantize : public ngraph::op::Op {
         m_auto_broadcast = auto_broadcast;
     }
 
+    bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
+    bool has_evaluate() const override;
+    bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override {
+        return false;
+    }
+
 private:
     std::size_t m_levels;
     AutoBroadcastSpec m_auto_broadcast = op::AutoBroadcastType::NUMPY;
diff --git a/ngraph/core/src/op/fake_quantize.cpp b/ngraph/core/src/op/fake_quantize.cpp
index 645fcc92b97a18..324b87b651fd3f 100644
--- a/ngraph/core/src/op/fake_quantize.cpp
+++ b/ngraph/core/src/op/fake_quantize.cpp
@@ -11,7 +11,9 @@
 #include "ngraph/op/constant.hpp"
 #include "ngraph/op/convert.hpp"
 #include "ngraph/op/select.hpp"
+#include "ngraph/runtime/reference/fake_quantize.hpp"
 #include "ngraph/shape.hpp"
+#include "ngraph/type/element_type.hpp"
 
 using namespace std;
 using namespace ngraph;
@@ -73,3 +75,80 @@ shared_ptr<Node> op::FakeQuantize::clone_with_new_inputs(const OutputVector& new
                                      m_levels,
                                      m_auto_broadcast);
 }
+
+namespace fakequantizeop {
+template <element::Type_t ET>
+bool evaluate(const HostTensorPtr& arg0,
+              const HostTensorPtr& arg1,
+              const HostTensorPtr& arg2,
+              const HostTensorPtr& arg3,
+              const HostTensorPtr& arg4,
+              const HostTensorPtr& out,
+              const ngraph::op::FakeQuantize* parent) {
+    NGRAPH_OP_SCOPE(v0_FakeQuantize_evaluate);
+    using T = typename element_type_traits<ET>::value_type;
+    runtime::reference::fake_quantize<T>(arg0->get_data_ptr<const T>(),
+                                         arg1->get_data_ptr<const T>(),
+                                         arg2->get_data_ptr<const T>(),
+                                         arg3->get_data_ptr<const T>(),
+                                         arg4->get_data_ptr<const T>(),
+                                         out->get_data_ptr<T>(),
+                                         parent->get_input_shape(0),
+                                         parent->get_input_shape(1),
+                                         parent->get_input_shape(2),
+                                         parent->get_input_shape(3),
+                                         parent->get_input_shape(4),
+                                         parent->get_levels(),
+                                         parent->get_auto_broadcast());
+    return true;
+}
+
+bool evaluate_fakequantize(const HostTensorPtr& arg0,
+                           const HostTensorPtr& arg1,
+                           const HostTensorPtr& arg2,
+                           const HostTensorPtr& arg3,
+                           const HostTensorPtr& arg4,
+                           const HostTensorPtr& out,
+                           const ngraph::op::FakeQuantize* parent) {
+    bool rc = true;
+    switch (arg0->get_element_type()) {
+        NGRAPH_TYPE_CASE(evaluate_fakequantize, i32, arg0, arg1, arg2, arg3, arg4, out, parent);
+        NGRAPH_TYPE_CASE(evaluate_fakequantize, i64, arg0, arg1, arg2, arg3, arg4, out, parent);
+        NGRAPH_TYPE_CASE(evaluate_fakequantize, u32, arg0, arg1, arg2, arg3, arg4, out, parent);
+        NGRAPH_TYPE_CASE(evaluate_fakequantize, u64, arg0, arg1, arg2, arg3, arg4, out, parent);
+        NGRAPH_TYPE_CASE(evaluate_fakequantize, f16, arg0, arg1, arg2, arg3, arg4, out, parent);
+        NGRAPH_TYPE_CASE(evaluate_fakequantize, f32, arg0, arg1, arg2, arg3, arg4, out, parent);
+    default:
+        rc = false;
+        break;
+    }
+    return rc;
+}
+}  // namespace fakequantizeop
+
+bool ngraph::op::FakeQuantize::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
+    NGRAPH_OP_SCOPE(v0_FakeQuantize_evaluate);
+    return fakequantizeop::evaluate_fakequantize(inputs[0],
+                                                 inputs[1],
+                                                 inputs[2],
+                                                 inputs[3],
+                                                 inputs[4],
+                                                 outputs[0],
+                                                 this);
+}
+
+bool ngraph::op::FakeQuantize::has_evaluate() const {
+    NGRAPH_OP_SCOPE(v0_FakeQuantize_has_evaluate);
+    switch (get_input_element_type(0)) {
+    case ngraph::element::i32:
+    case ngraph::element::i64:
+    case ngraph::element::u32:
+    case ngraph::element::u64:
+    case ngraph::element::f16:
+    case ngraph::element::f32:
+        return true;
+    default:
+        break;
+    }
+    return false;
+}
diff --git a/ngraph/frontend/onnx/frontend/src/op/conv.cpp b/ngraph/frontend/onnx/frontend/src/op/conv.cpp
index 5c280c3769acf1..230ae916bc78b5 100644
--- a/ngraph/frontend/onnx/frontend/src/op/conv.cpp
+++ b/ngraph/frontend/onnx/frontend/src/op/conv.cpp
@@ -13,6 +13,7 @@
 #include "ngraph/builder/reshape.hpp"
 #include "ngraph/op/group_conv.hpp"
 #include "ngraph/op/util/attr_types.hpp"
+#include "onnx_import/core/null_node.hpp"
 #include "utils/convpool.hpp"
 #include "utils/reshape.hpp"
 
@@ -20,7 +21,7 @@ namespace ngraph {
 namespace onnx_import {
 namespace op {
 namespace set_1 {
-namespace {
+namespace detail {
 std::shared_ptr<ngraph::op::Op> make_ng_convolution(const Output<ngraph::Node>& data,
                                                     const Output<ngraph::Node>& filters,
                                                     const ngraph::Strides& strides,
@@ -57,14 +58,13 @@ std::shared_ptr<ngraph::Node> add_bias(const Output<ngraph::Node>& ng_conv, cons
     return {
         std::make_shared<default_opset::Add>(ng_conv, reshape::reshape_channel_shaped_node_to_nchw(bias, conv_rank))};
 }
-}  // namespace
 
-OutputVector conv(const Node& node) {
+OutputVector conv(const Node& node,
+                  Output<ngraph::Node> data,
+                  Output<ngraph::Node> filters,
+                  Output<ngraph::Node> bias) {
     // in the current implementation we assume that the data input rank is static
     // and only the 'batch' dimension can be dynamic
-    const OutputVector& inputs = node.get_ng_inputs();
-    const auto data = inputs.at(0);
-    const auto filters = inputs.at(1);
     const auto groups = node.get_attribute_value<int64_t>("group", 1);
 
     NGRAPH_CHECK(data.get_partial_shape().rank().is_static(), "The input data tensor's rank has to be known (static)");
@@ -80,10 +80,9 @@ OutputVector conv(const Node& node) {
         make_ng_convolution(data, filters, strides, dilations, padding_below, padding_above, groups, auto_pad_type);
 
     // no bias param
-    if (inputs.size() < 3) {
+    if (ngraph::op::is_null(bias)) {
         return {conv_node};
     } else {
-        const auto& bias = inputs.at(2);
         const auto& bias_ps = bias.get_partial_shape();
 
         NGRAPH_CHECK(bias_ps.rank().is_static() && bias_ps.rank().get_length() == 1,
@@ -92,7 +91,11 @@ OutputVector conv(const Node& node) {
         return {add_bias(conv_node, bias)};
     }
 }
-
+}  // namespace detail
+OutputVector conv(const Node& node) {
+    const OutputVector& inputs = node.get_ng_inputs();
+    return detail::conv(node, inputs[0], inputs[1], inputs.size() < 3 ? std::make_shared<NullNode>() : inputs[2]);
+}
 }  // namespace set_1
 
 }  // namespace op
diff --git a/ngraph/frontend/onnx/frontend/src/op/conv.hpp b/ngraph/frontend/onnx/frontend/src/op/conv.hpp
index 065902af22a338..759a75110cb12f 100644
--- a/ngraph/frontend/onnx/frontend/src/op/conv.hpp
+++ b/ngraph/frontend/onnx/frontend/src/op/conv.hpp
@@ -11,6 +11,9 @@ namespace ngraph {
 namespace onnx_import {
 namespace op {
 namespace set_1 {
+namespace detail {
+OutputVector conv(const Node& node, Output<ngraph::Node> data, Output<ngraph::Node> filters, Output<ngraph::Node> bias);
+}
 /// \brief Performs ONNX Conv operation.
 ///
 /// \param node   The ONNX node object representing this operation.
diff --git a/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.cpp b/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.cpp
index bfa26cc64ada7d..ecdd5261503556 100644
--- a/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.cpp
+++ b/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.cpp
@@ -19,7 +19,7 @@
 namespace ngraph {
 namespace onnx_import {
 namespace op {
-namespace {
+namespace detail {
 Output<ngraph::Node> get_zero_point(const OutputVector& inputs) {
     if (inputs.size() == 3 && !ngraph::op::is_null(inputs[2])) {
         auto zero_point = inputs[2];
@@ -33,7 +33,7 @@ Output<ngraph::Node> get_zero_point(const OutputVector& inputs) {
         return default_opset::Constant::create(element::f32, Shape{}, {0});
     }
 }
-}  // namespace
+}  // namespace detail
 namespace set_1 {
 OutputVector dequantize_linear(const Node& node) {
     const OutputVector inputs{node.get_ng_inputs()};
@@ -44,7 +44,7 @@ OutputVector dequantize_linear(const Node& node) {
 
     const auto x = inputs[0];
     const auto scale = inputs[1];
-    const auto zero_point = get_zero_point(inputs);
+    const auto zero_point = detail::get_zero_point(inputs);
 
     common::validate_scalar_input("Dequantization scale", scale.get_node_shared_ptr(), {element::f32});
     common::validate_scalar_input("Zero point", zero_point.get_node_shared_ptr());
@@ -58,7 +58,7 @@ OutputVector dequantize_linear(const Node& node) {
 }  // namespace set_1
 
 namespace set_13 {
-namespace {
+namespace detail {
 void validate_scale(const Output<ngraph::Node> scale, const Output<ngraph::Node> x, const int64_t axis) {
     const auto& scale_shape = scale.get_partial_shape();
     NGRAPH_CHECK(scale_shape.rank().get_length() == 0 || scale_shape.rank().get_length() == 1,
@@ -129,25 +129,16 @@ std::shared_ptr<ngraph::Node> reshape_input(const Output<ngraph::Node> input,
 
     return std::make_shared<default_opset::Reshape>(input, target_shape, true);
 }
-}  // namespace
-
-OutputVector dequantize_linear(const Node& node) {
-    const OutputVector inputs{node.get_ng_inputs()};
-
-    NGRAPH_CHECK(2 <= inputs.size() && inputs.size() <= 3,
-                 "The DequantizeLinear op expects 2 required and one optional "
-                 "input. Got: ",
-                 inputs.size());
-
-    const auto x = inputs[0];
-    auto scale = inputs[1];
-    auto zero_point = get_zero_point(inputs);
 
+OutputVector dequantize_linear(Output<ngraph::Node> x,
+                               Output<ngraph::Node> scale,
+                               Output<ngraph::Node> zero_point,
+                               int64_t axis,
+                               Node node) {
     const auto x_shape = x.get_partial_shape();
 
     NGRAPH_CHECK(x_shape.rank().is_static(), "Rank of the input data tensor has to be known (static).");
 
-    int64_t axis{node.get_attribute_value<int64_t>("axis", 1)};
     axis = ngraph::normalize_axis(node.get_description(), axis, x_shape.rank());
 
     validate_scale(scale, x, axis);
@@ -163,6 +154,22 @@ OutputVector dequantize_linear(const Node& node) {
         std::make_shared<default_opset::Multiply>(std::make_shared<default_opset::Subtract>(converted_x, zero_point),
                                                   scale)};
 }
+}  // namespace detail
+
+OutputVector dequantize_linear(const Node& node) {
+    const OutputVector inputs{node.get_ng_inputs()};
+
+    NGRAPH_CHECK(2 <= inputs.size() && inputs.size() <= 3,
+                 "The DequantizeLinear op expects 2 required and one optional "
+                 "input. Got: ",
+                 inputs.size());
+    const auto x = inputs[0];
+    auto scale = inputs[1];
+    auto zero_point = op::detail::get_zero_point(inputs);
+
+    // these reshapes make sure that dequantization happens over the specified axis
+    return detail::dequantize_linear(x, scale, zero_point, node.get_attribute_value<int64_t>("axis", 1), node);
+}
 }  // namespace set_13
 }  // namespace op
 }  // namespace onnx_import
diff --git a/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.hpp b/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.hpp
index d1329a247f8ceb..be36d796a506c1 100644
--- a/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.hpp
+++ b/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.hpp
@@ -10,14 +10,25 @@
 namespace ngraph {
 namespace onnx_import {
 namespace op {
+namespace detail {
+Output<ngraph::Node> get_zero_point(const OutputVector& inputs);
+}
+
 namespace set_1 {
 OutputVector dequantize_linear(const Node& node);
 
 }  // namespace set_1
 
 namespace set_13 {
-OutputVector dequantize_linear(const Node& node);
+namespace detail {
+OutputVector dequantize_linear(Output<ngraph::Node> x,
+                               Output<ngraph::Node> scale,
+                               Output<ngraph::Node> zero_point,
+                               int64_t axis,
+                               Node node);
 }
+OutputVector dequantize_linear(const Node& node);
+}  // namespace set_13
 
 }  // namespace op
 
diff --git a/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.cpp b/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.cpp
new file mode 100644
index 00000000000000..3ee14d345e9e94
--- /dev/null
+++ b/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.cpp
@@ -0,0 +1,68 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// Disabled in CMakeList
+// Update to higher opset required
+
+#include "op/qlinear_conv.hpp"
+
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include "conv.hpp"
+#include "dequantize_linear.hpp"
+#include "exceptions.hpp"
+#include "ngraph/opsets/opset6.hpp"
+#include "onnx_import/core/null_node.hpp"
+#include "quantize_linear.hpp"
+
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace set_1 {
+OutputVector qlinear_conv(const Node& node) {
+    const OutputVector& inputs = node.get_ng_inputs();
+
+    auto x = inputs.at(0);
+    auto x_scale = inputs.at(1);
+    auto x_zero_point = inputs.at(2);
+    auto w = inputs.at(3);
+    auto w_scale = inputs.at(4);
+    auto w_zero_point = inputs.at(5);
+    auto y_scale = inputs.at(6);
+    auto y_zero_point = inputs.at(7);
+    Output<ngraph::Node> B = inputs.size() > 8 ? inputs.at(8) : std::make_shared<NullNode>()->output(0);
+
+    x = set_13::detail::dequantize_linear(x,
+                                          x_scale,
+                                          std::make_shared<opset6::Convert>(x_zero_point, element::f32),
+                                          1,
+                                          node)[0];
+    w = set_13::detail::dequantize_linear(w,
+                                          w_scale,
+                                          std::make_shared<opset6::Convert>(w_zero_point, element::f32),
+                                          1,
+                                          node)[0];
+
+    if (!ngraph::op::is_null(B)) {
+        B = std::make_shared<opset6::Multiply>(std::make_shared<opset6::Convert>(B, x_scale.get_element_type()),
+                                               std::make_shared<opset6::Multiply>(x_scale, w_scale))
+                ->output(0);
+    }
+
+    auto result = detail::conv(node, x, w, B)[0];
+
+    result = op::detail::make_fake_quantize(y_scale, y_zero_point, result);
+
+    return {result};
+}
+
+}  // namespace set_1
+
+}  // namespace op
+
+}  // namespace onnx_import
+
+}  // namespace ngraph
diff --git a/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.hpp b/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.hpp
new file mode 100644
index 00000000000000..a25cf374174f66
--- /dev/null
+++ b/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.hpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// Disabled in CMakeList
+// Update to higher opset required
+
+#pragma once
+
+#include "ngraph/node.hpp"
+#include "onnx_import/core/node.hpp"
+
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace set_1 {
+/// \brief Performs ONNX QLinearConv operation.
+///
+/// \param node   The ONNX node object representing this operation.
+///
+/// \return The vector containing Ngraph nodes producing output of ONNX quantizied
+///         convolution operation.
+OutputVector qlinear_conv(const Node& node);
+
+}  // namespace set_1
+
+}  // namespace op
+
+}  // namespace onnx_import
+
+}  // namespace ngraph
diff --git a/ngraph/frontend/onnx/frontend/src/op/quantize_linear.cpp b/ngraph/frontend/onnx/frontend/src/op/quantize_linear.cpp
index b8466c59a2584e..b83e43c77941d4 100644
--- a/ngraph/frontend/onnx/frontend/src/op/quantize_linear.cpp
+++ b/ngraph/frontend/onnx/frontend/src/op/quantize_linear.cpp
@@ -94,7 +94,7 @@ std::tuple<std::shared_ptr<ngraph::Node>, std::shared_ptr<ngraph::Node>> get_inp
 
     return std::make_tuple(input_low, input_high);
 }
-
+}  // namespace
 std::shared_ptr<ngraph::Node> make_fake_quantize(const Output<ngraph::Node>& y_scale,
                                                  const Output<ngraph::Node>& y_zero_point,
                                                  const Output<ngraph::Node>& data) {
@@ -116,7 +116,6 @@ std::shared_ptr<ngraph::Node> make_fake_quantize(const Output<ngraph::Node>& y_s
         std::make_shared<default_opset::FakeQuantize>(data, input_low, input_high, output_low, output_high, levels),
         destination_type);
 }
-}  // namespace
 }  // namespace detail
 
 namespace set_1 {
@@ -135,11 +134,13 @@ OutputVector quantize_linear(const Node& node) {
 }  // namespace set_1
 
 namespace set_13 {
-OutputVector quantize_linear(const Node& node) {
-    OutputVector inputs{node.get_ng_inputs()};
-    auto x = inputs.at(0);
-    auto y_scale = inputs.at(1);
-    auto y_zero_point = detail::get_zero_point(inputs);
+namespace detail {
+OutputVector quantize_linear(Output<ngraph::Node> x,
+                             Output<ngraph::Node> y_scale,
+                             Output<ngraph::Node> y_zero_point,
+                             int64_t axis,
+                             Node node) {
+    namespace detail = ngraph::onnx_import::op::detail;
 
     x = detail::validate_data(node, x);
     detail::validate_zero_point_type(node, y_zero_point);
@@ -147,7 +148,6 @@ OutputVector quantize_linear(const Node& node) {
 
     const auto& x_shape = x.get_partial_shape();
 
-    int64_t axis{node.get_attribute_value<int64_t>("axis", 1)};
     axis = normalize_axis(node.get_description(), axis, x_shape.rank());
 
     const auto& y_scale_shape = y_scale.get_partial_shape();
@@ -185,7 +185,22 @@ OutputVector quantize_linear(const Node& node) {
 
     return {detail::make_fake_quantize(y_scale, y_zero_point, x)};
 }
+}  // namespace detail
+
+OutputVector quantize_linear(const Node& node) {
+    const OutputVector inputs{node.get_ng_inputs()};
 
+    NGRAPH_CHECK(2 <= inputs.size() && inputs.size() <= 3,
+                 "The QuantizeLinear op expects 2 required and one optional "
+                 "input. Got: ",
+                 inputs.size());
+
+    const auto x = inputs[0];
+    auto scale = inputs[1];
+    auto zero_point = op::detail::get_zero_point(inputs);
+
+    return detail::quantize_linear(x, scale, zero_point, node.get_attribute_value<int64_t>("axis", 1), node);
+}
 }  // namespace set_13
 
 }  // namespace op
diff --git a/ngraph/frontend/onnx/frontend/src/op/quantize_linear.hpp b/ngraph/frontend/onnx/frontend/src/op/quantize_linear.hpp
index bc049d55d4a029..531d9ac494d656 100644
--- a/ngraph/frontend/onnx/frontend/src/op/quantize_linear.hpp
+++ b/ngraph/frontend/onnx/frontend/src/op/quantize_linear.hpp
@@ -10,12 +10,18 @@
 namespace ngraph {
 namespace onnx_import {
 namespace op {
+namespace detail {
+std::shared_ptr<ngraph::Node> make_fake_quantize(const Output<ngraph::Node>& y_scale,
+                                                 const Output<ngraph::Node>& y_zero_point,
+                                                 const Output<ngraph::Node>& data);
+}
 namespace set_1 {
 OutputVector quantize_linear(const Node& node);
 
 }  // namespace set_1
 
 namespace set_13 {
+
 OutputVector quantize_linear(const Node& node);
 
 }  // namespace set_13
diff --git a/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp b/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp
index ed759c1b5eaf63..f4110b1c5d932b 100644
--- a/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp
+++ b/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp
@@ -91,10 +91,6 @@
 #include "op/not.hpp"
 #include "op/onehot.hpp"
 #include "op/or.hpp"
-#include "op/pad.hpp"
-#include "op/pow.hpp"
-#include "op/prelu.hpp"
-// #include "op/quant_conv.hpp"
 #include "op/org.openvinotoolkit/deformable_conv_2d.hpp"
 #include "op/org.openvinotoolkit/detection_output.hpp"
 #include "op/org.openvinotoolkit/experimental_detectron/detection_output.hpp"
@@ -107,6 +103,10 @@
 #include "op/org.openvinotoolkit/normalize.hpp"
 #include "op/org.openvinotoolkit/prior_box.hpp"
 #include "op/org.openvinotoolkit/swish.hpp"
+#include "op/pad.hpp"
+#include "op/pow.hpp"
+#include "op/prelu.hpp"
+#include "op/qlinear_conv.hpp"
 #include "op/quantize_linear.hpp"
 #include "op/random_uniform.hpp"
 #include "op/random_uniform_like.hpp"
@@ -368,7 +368,7 @@ OperatorsBridge::OperatorsBridge() {
     REGISTER_OPERATOR("Pad", 11, pad);
     REGISTER_OPERATOR("Pow", 1, pow);
     REGISTER_OPERATOR("PRelu", 1, prelu);
-    // REGISTER_OPERATOR("QLinearConv", 1, quant_conv);
+    REGISTER_OPERATOR("QLinearConv", 1, qlinear_conv);
     REGISTER_OPERATOR("QuantizeLinear", 1, quantize_linear);
     REGISTER_OPERATOR("QuantizeLinear", 13, quantize_linear);
     REGISTER_OPERATOR("Range", 1, range);
diff --git a/ngraph/test/files/onnx/qlinearconv3d/x.bin b/ngraph/test/files/onnx/qlinearconv3d/x.bin
deleted file mode 100644
index 2cfb4e9b24ffb3c4dc88dbfe8792102247ae792d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 64
zcmV-G0Kfl&4)itjckE9Iid{ZJo>8#jcA$0kMXWLxDvJP;o%s76aAS(T7LU5el)jW%
W5Qf~pMIr^9|HOtd>D+;9+^l>8$ROwd

diff --git a/ngraph/test/files/onnx/qlinearconv3d/y.bin b/ngraph/test/files/onnx/qlinearconv3d/y.bin
deleted file mode 100644
index 4ac0510ba7f864c0322054e29453aec3ae84b29a..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 64
ccmZo@zyg~8H#9IrBP7I=8XC?WLGY1S01J>G=>Px#

diff --git a/ngraph/test/models/onnx/quantization/quant_conv_linear_onnx_example.prototxt b/ngraph/test/models/onnx/quantization/quant_conv_linear_onnx_example.prototxt
new file mode 100644
index 00000000000000..94cf76c2b95775
--- /dev/null
+++ b/ngraph/test/models/onnx/quantization/quant_conv_linear_onnx_example.prototxt
@@ -0,0 +1,152 @@
+ir_version: 5
+producer_name: "onnx-examples"
+graph {
+  node {
+    input: "x"
+    input: "x_scale"
+    input: "x_zero_point"
+    input: "w"
+    input: "w_scale"
+    input: "w_zero_point"
+    input: "y_scale"
+    input: "y_zero_point"
+    output: "y"
+    op_type: "QLinearConv"
+  }
+  name: "test_qlinearconv"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 7
+          }
+          dim {
+            dim_value: 7
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "x_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "x_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "w"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "w_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "w_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "y_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "y_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 7
+          }
+          dim {
+            dim_value: 7
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
diff --git a/ngraph/test/onnx/onnx_import_quant.in.cpp b/ngraph/test/onnx/onnx_import_quant.in.cpp
index 96faa74393439e..cfb3a918785797 100644
--- a/ngraph/test/onnx/onnx_import_quant.in.cpp
+++ b/ngraph/test/onnx/onnx_import_quant.in.cpp
@@ -264,20 +264,32 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_1d_zero_scale_uint8_ne
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear) {
     auto function = onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/quant_conv_lin.onnx"));
 
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    // don't change style for better readibility
+    // clang-format off
     std::vector<std::vector<std::uint8_t>> inputs;
-    inputs.emplace_back(std::vector<std::uint8_t>{
-        1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
-        28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
-        55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81});
-
-    std::vector<std::vector<std::int8_t>> expected_output{std::vector<std::int8_t>{
-        2,  3,  3,  3,  4,  4,  4,  5,  2,  4,  6,  7,  8,  8,  9,  9,  10, 3,  8,  11, 12, 13, 13, 14, 14, 15, 5,
-        11, 16, 17, 18, 18, 19, 19, 20, 7,  14, 22, 22, 23, 23, 24, 24, 25, 8,  18, 27, 27, 28, 28, 29, 29, 30, 10,
-        21, 32, 32, 33, 33, 34, 34, 35, 12, 24, 37, 37, 38, 38, 39, 40, 40, 13, 17, 26, 27, 27, 27, 28, 28, 28, 9}};
-
-    std::vector<std::vector<std::int8_t>> outputs{
-        execute<std::uint8_t, std::int8_t>(function, inputs, "${BACKEND_NAME}")};
-    EXPECT_TRUE(test::all_close(expected_output.front(), outputs.front()));
+    test_case.add_input(std::vector<uint8_t>{ 1,  2,  3,  4,  5,  6,  7,  8,  9,
+                                             10, 11, 12, 13, 14, 15, 16, 17, 18,
+                                             19, 20, 21, 22, 23, 24, 25, 26, 27,
+                                             28, 29, 30, 31, 32, 33, 34, 35, 36,
+                                             37, 38, 39, 40, 41, 42, 43, 44, 45,
+                                             46, 47, 48, 49, 50, 51, 52, 53, 54,
+                                             55, 56, 57, 58, 59, 60, 61, 62, 63,
+                                             64, 65, 66, 67, 68, 69, 70, 71, 72,
+                                             73, 74, 75, 76, 77, 78, 79, 80, 81});
+
+    test_case.add_expected_output<uint8_t>({1, 1, 9, 9}, std::vector<uint8_t>{ 2,  3,  3,  3,  4,  4,  4,  5,  2,
+                                                                               4,  6,  7,  8,  8,  9,  9, 10,  3,
+                                                                               8, 11, 12, 13, 13, 14, 14, 15,  5,
+                                                                              11, 16, 17, 18, 18, 19, 19, 20,  7,
+                                                                              14, 22, 22, 23, 23, 24, 24, 25,  8,
+                                                                              18, 27, 27, 28, 28, 29, 29, 30, 10,
+                                                                              21, 32, 32, 33, 33, 34, 34, 35, 12,
+                                                                              24, 37, 37, 38, 38, 39, 40, 40, 13,
+                                                                              17, 26, 27, 27, 27, 28, 28, 28, 9});
+    //clang-format on
+    test_case.run();
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear_2d) {
@@ -303,7 +315,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear_3d) {
 
     auto test_case = test::TestCase<TestEngine>(function);
 
-    test_case.add_input_from_file<uint8_t>(TEST_FILES, "onnx/qlinearconv3d/x.bin");
+    // don't change style for better readibility
+    // clang-format off
+    test_case.add_input(std::vector<uint8_t>{130,  14, 244,  53,
+                                             244, 119, 236,  79,
+                                               9, 138,  93,  62,
+                                              66, 158,  81, 176,
+
+                                             225, 118, 160, 117,
+                                             246,  69, 172,  50,
+                                              23,  42, 139,  0,
+                                             146, 157, 248, 251,
+
+                                              30, 112,  99, 138,
+                                             190,  22, 143, 186,
+                                             199, 148, 190, 148,
+                                              89,  16, 134, 220,
+
+                                             191,  69,  34,   5,
+                                             156, 255, 196, 134,
+                                              49, 233, 220, 129,
+                                             107, 220, 172, 124});  // x
     test_case.add_input(std::vector<float>{0.00389225385151803f});  // x_scale
     test_case.add_input(std::vector<uint8_t>{127});                 // x_zero_point
     test_case.add_input(std::vector<uint8_t>{255});                 // w
@@ -312,7 +344,61 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear_3d) {
     test_case.add_input(std::vector<float>{0.0011764180380851f});   // y_scale
     test_case.add_input(std::vector<uint8_t>{128});                 // y_zero_point
 
-    test_case.add_expected_output_from_file<uint8_t>({1, 1, 4, 4, 4}, TEST_FILES, "onnx/qlinearconv3d/y.bin");
+    test_case.add_expected_output<uint8_t>({1, 1, 4, 4, 4},
+                                           {128, 128, 128, 128,
+                                            128, 128, 128, 128,
+                                            128, 128, 128, 128,
+                                            128, 128, 128, 128,
+
+                                            128, 128, 128, 128,
+                                            128, 131, 255, 128,
+                                            128,   0,  91, 128,
+                                            128, 128, 128, 128,
+
+                                            128, 128, 128, 128,
+                                            128,  23,  98, 128,
+                                            128, 206, 196, 128,
+                                            128, 128, 128, 128,
+
+                                            128, 128, 128, 128,
+                                            128, 128, 128, 128,
+                                            128, 128, 128, 128,
+                                            128, 128, 128, 128});
+    // clang-format on
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear_onnx_example) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/quant_conv_linear_onnx_example.onnx"));
+
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    // don't change style for better readibility
+    // clang-format off
+    test_case.add_input(std::vector<uint8_t>{255, 174, 162,  25, 203, 168,  58,
+                                              15,  59, 237,  95, 129,   0,  64,
+                                              56, 242, 153, 221, 168,  12, 166,
+                                             232, 178, 186, 195, 237, 162, 237,
+                                             188,  39, 124,  77,  80, 102,  43,
+                                             127, 230,  21,  83,  41,  40, 134,
+                                             255, 154,  92, 141,  42, 148, 247});  // x
+    test_case.add_input(std::vector<float>{0.00369204697f});                       // x_scale
+    test_case.add_input(std::vector<uint8_t>{132});                                // x_zero_point
+    test_case.add_input(std::vector<uint8_t>{0});                                  // w
+    test_case.add_input(std::vector<float>{0.00172794575f});                       // w_scale
+    test_case.add_input(std::vector<uint8_t>{255});                                // w_zero_point
+    test_case.add_input(std::vector<float>{0.00162681262f});                       // y_scale
+    test_case.add_input(std::vector<uint8_t>{123});                                // y_zero_point
+
+    test_case.add_expected_output<uint8_t>({1, 1, 7, 7}, std::vector<uint8_t>{  0,  81,  93, 230,  52,  87, 197,
+                                                                              240, 196,  18, 160, 126, 255, 191,
+                                                                              199,  13, 102,  34,  87, 243,  89,
+                                                                               23,  77,  69,  60,  18,  93,  18,
+                                                                               67, 216, 131, 178, 175, 153, 212,
+                                                                              128,  25, 234, 172, 214, 215, 121,
+                                                                                0, 101, 163, 114, 213, 107,   8});
+    // clang-format on
     test_case.run();
 }
 
diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest
index c32732c3c2639c..8c7fad49569dda 100644
--- a/ngraph/test/runtime/ie/unit_test.manifest
+++ b/ngraph/test/runtime/ie/unit_test.manifest
@@ -130,7 +130,6 @@ onnx_model_scatterND_param_i64_indices
 IE_CPU.onnx_constant_sparse_tensor_int64_3x4
 IE_CPU.onnx_constant_sparse_tensor_uint64_3x4
 
-
 # TopK Incorrect input data/index values precision
 onnx_model_argmax_int32
 onnx_model_argmin_int32
@@ -241,6 +240,9 @@ onnx_size_op_single
 onnx_size_op_graph_end
 onnx_size_op_graph_middle
 
+# /openvino/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp:747
+# Output blob byte size is not equal network output byte size (64!=216)." thrown in the test body.
+onnx_model_quant_conv_linear_3d
 
 #-------------------------------------------------------------------------------
 #
@@ -643,11 +645,6 @@ gemm_broadcast_axes_1_input_C
 scale_shift_no_broadcast
 scale_shift
 
-# Detected op not belonging to opset1!
-onnx_model_quant_conv_linear
-onnx_model_quant_conv_linear_2d
-onnx_model_quant_conv_linear_3d
-
 # Cannot cast ngraph node Dot to CNNLayer!
 dot_4d_5d_multi_axis
 dot_4d_5d_multi_axis_more
diff --git a/ngraph/test/runtime/interpreter/evaluates_map.cpp b/ngraph/test/runtime/interpreter/evaluates_map.cpp
index 90adbe1245783f..781c252fa2a5af 100644
--- a/ngraph/test/runtime/interpreter/evaluates_map.cpp
+++ b/ngraph/test/runtime/interpreter/evaluates_map.cpp
@@ -35,7 +35,6 @@
 #include <ngraph/runtime/reference/experimental_detectron_topk_rois.hpp>
 #include <ngraph/runtime/reference/experimental_detectron_proposal_single_image.hpp>
 #include <ngraph/runtime/reference/extract_image_patches.hpp>
-#include <ngraph/runtime/reference/fake_quantize.hpp>
 #include <ngraph/runtime/reference/fft.hpp>
 #include <ngraph/runtime/reference/gather.hpp>
 #include <ngraph/runtime/reference/gather_elements.hpp>
@@ -2435,28 +2434,6 @@ namespace
         return true;
     }
 
-    template <element::Type_t ET>
-    bool evaluate(const shared_ptr<op::v0::FakeQuantize>& op,
-                  const HostTensorVector& outputs,
-                  const HostTensorVector& inputs)
-    {
-        using T = typename element_type_traits<ET>::value_type;
-        runtime::reference::fake_quantize<T>(inputs[0]->get_data_ptr<const T>(),
-                                             inputs[1]->get_data_ptr<const T>(),
-                                             inputs[2]->get_data_ptr<const T>(),
-                                             inputs[3]->get_data_ptr<const T>(),
-                                             inputs[4]->get_data_ptr<const T>(),
-                                             outputs[0]->get_data_ptr<T>(),
-                                             op->get_input_shape(0),
-                                             op->get_input_shape(1),
-                                             op->get_input_shape(2),
-                                             op->get_input_shape(3),
-                                             op->get_input_shape(4),
-                                             op->get_levels(),
-                                             op->get_auto_broadcast());
-        return true;
-    }
-
     template <element::Type_t ET>
     bool evaluate(const shared_ptr<op::v0::NormalizeL2>& op,
                   const HostTensorVector& outputs,
diff --git a/ngraph/test/runtime/interpreter/unit_test.manifest b/ngraph/test/runtime/interpreter/unit_test.manifest
index ec77901f320283..880872f4491cf2 100644
--- a/ngraph/test/runtime/interpreter/unit_test.manifest
+++ b/ngraph/test/runtime/interpreter/unit_test.manifest
@@ -1,4 +1,3 @@
-INTERPRETER.onnx_model_quant_conv_linear
 INTERPRETER.onnx_top_k_opset_10
 
 # Temporarily disabled:
@@ -80,8 +79,6 @@ INTERPRETER.onnx_model_dequantize_linear_1d_zero_scale_uint8
 INTERPRETER.onnx_model_dequantize_linear_1d_zero_scale_int8
 INTERPRETER.onnx_model_dequantize_linear_1d_zero_scale_int8_4d
 INTERPRETER.onnx_model_dequantize_linear_1d_zero_scale_uint8_negative_axis
-INTERPRETER.onnx_model_quant_conv_linear_2d
-INTERPRETER.onnx_model_quant_conv_linear_3d
 INTERPRETER.onnx_model_conv_integer
 INTERPRETER.onnx_model_conv_integer_zero_point_zero
 INTERPRETER.onnx_model_conv_integer_no_zero_point
diff --git a/runtime/bindings/python/tests/__init__.py b/runtime/bindings/python/tests/__init__.py
index 929eb41658b8da..4f07f874900229 100644
--- a/runtime/bindings/python/tests/__init__.py
+++ b/runtime/bindings/python/tests/__init__.py
@@ -25,8 +25,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True):
 skip_segfault = pytest.mark.skip(reason="Segmentation fault error")
 xfail_issue_33488 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:"
                                       "MaxUnpool")
-xfail_issue_33535 = xfail_test(reason="nGraph does not support the following ONNX operations:"
-                                      "DynamicQuantizeLinear")
 xfail_issue_33538 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:"
                                       "Scan")
 skip_issue_38084 = pytest.mark.skip(reason="Aborted (core dumped) Assertion "
@@ -72,8 +70,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True):
 xfail_issue_38722 = xfail_test(reason="RuntimeError: While validating ONNX nodes MatMulInteger"
                                       "and QLinearMatMul"
                                       "Input0 scale and input0 zero point shape must be same and 1")
-xfail_issue_38723 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:"
-                                      "QLinearConv")
 xfail_issue_38724 = xfail_test(reason="RuntimeError: While validating ONNX node '<Node(Resize): Y>':"
                                       "tf_crop_and_resize - this type of coordinate transformation mode"
                                       "is not supported. Choose one of the following modes:"
@@ -100,8 +96,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True):
 xfail_issue_44958 = xfail_test(reason="Expected: Unsupported dynamic op: Interpolate")
 xfail_issue_44965 = xfail_test(reason="Expected: RuntimeError: value info has no element")
 xfail_issue_44968 = xfail_test(reason="Expected: Unsupported dynamic op: Squeeze")
-xfail_issue_44976 = xfail_test(reason="Expected: RuntimeError: Quantize layer with name:"
-                                      "FakeQuantize_xxx has non const input on 1 port")
 xfail_issue_46762 = xfail_test(reason="Incorrect result of Minimum op if uint data type is used")
 xfail_issue_47323 = xfail_test(reason="RuntimeError: The plugin does not support FP64")
 xfail_issue_47337 = xfail_test(reason="RuntimeError: Unsupported dynamic ops: v1::OneHot")
diff --git a/runtime/bindings/python/tests/test_ngraph/test_ops_fused.py b/runtime/bindings/python/tests/test_ngraph/test_ops_fused.py
index d96f870f604bc0..6db4a5f29c4e77 100644
--- a/runtime/bindings/python/tests/test_ngraph/test_ops_fused.py
+++ b/runtime/bindings/python/tests/test_ngraph/test_ops_fused.py
@@ -6,7 +6,7 @@
 
 import ngraph as ng
 from tests.runtime import get_runtime
-from tests import (xfail_issue_36486, xfail_issue_44976)
+from tests import xfail_issue_36486
 
 
 def test_elu_operator_with_scalar_and_array():
@@ -40,7 +40,6 @@ def test_elu_operator_with_scalar():
     assert np.allclose(result, expected)
 
 
-@xfail_issue_44976
 def test_fake_quantize():
     runtime = get_runtime()
 
diff --git a/runtime/bindings/python/tests/test_onnx/test_backend.py b/runtime/bindings/python/tests/test_onnx/test_backend.py
index 367e9a04dd55f3..fb5ca82b46e83e 100644
--- a/runtime/bindings/python/tests/test_onnx/test_backend.py
+++ b/runtime/bindings/python/tests/test_onnx/test_backend.py
@@ -8,7 +8,6 @@
     BACKEND_NAME,
     skip_rng_tests,
     xfail_issue_33488,
-    xfail_issue_33535,
     xfail_issue_33538,
     xfail_issue_33581,
     xfail_issue_33589,
@@ -26,7 +25,6 @@
     xfail_issue_38710,
     xfail_issue_38713,
     xfail_issue_38722,
-    xfail_issue_38723,
     xfail_issue_38724,
     xfail_issue_38732,
     xfail_issue_38734,
@@ -45,7 +43,6 @@
     xfail_issue_44958,
     xfail_issue_44965,
     xfail_issue_44968,
-    xfail_issue_44976,
     xfail_issue_45180,
     xfail_issue_45344,
     xfail_issue_46762,
@@ -200,7 +197,12 @@ def expect_fail(test_case_path, xfail):  # type: (str) -> None
         "OnnxBackendNodeModelTest.test_argmax_negative_axis_keepdims_random_select_last_index_cpu",
         "OnnxBackendNodeModelTest.test_argmin_negative_axis_keepdims_random_select_last_index_cpu",
     ),
-    (xfail_issue_38091, "OnnxBackendNodeModelTest.test_gather_negative_indices_cpu"),
+    (
+        xfail_issue_38091,
+        "OnnxBackendNodeModelTest.test_gather_negative_indices_cpu",
+        "OnnxBackendNodeModelTest.test_dynamicquantizelinear_cpu",
+        "OnnxBackendNodeModelTest.test_dynamicquantizelinear_expanded_cpu",
+    ),
     (
         xfail_issue_52463,
         "OnnxBackendPyTorchOperatorModelTest.test_operator_add_size1_singleton_broadcast_cpu",
@@ -337,19 +339,12 @@ def expect_fail(test_case_path, xfail):  # type: (str) -> None
         "OnnxBackendNodeModelTest.test_isinf_negative_cpu",
         "OnnxBackendNodeModelTest.test_isinf_cpu",
     ),
-    (
-        xfail_issue_33535,
-        "OnnxBackendNodeModelTest.test_dynamicquantizelinear_min_adjusted_cpu",
-        "OnnxBackendNodeModelTest.test_dynamicquantizelinear_cpu",
-        "OnnxBackendNodeModelTest.test_dynamicquantizelinear_max_adjusted_cpu",
-    ),
     (
         xfail_issue_38722,
         "OnnxBackendNodeModelTest.test_matmulinteger_cpu",
         "OnnxBackendNodeModelTest.test_qlinearmatmul_2D_cpu",
         "OnnxBackendNodeModelTest.test_qlinearmatmul_3D_cpu",
     ),
-    (xfail_issue_38723, "OnnxBackendNodeModelTest.test_qlinearconv_cpu"),
     (xfail_issue_38724, "OnnxBackendNodeModelTest.test_resize_tf_crop_and_resize_cpu"),
     (
         xfail_issue_33606,
@@ -450,14 +445,6 @@ def expect_fail(test_case_path, xfail):  # type: (str) -> None
         "OnnxBackendNodeModelTest.test_squeeze_cpu",
         "OnnxBackendNodeModelTest.test_squeeze_negative_axes_cpu",
     ),
-    (
-        xfail_issue_44976,
-        "OnnxBackendNodeModelTest.test_quantizelinear_axis_cpu",
-        "OnnxBackendNodeModelTest.test_dynamicquantizelinear_min_adjusted_expanded_cpu",
-        "OnnxBackendNodeModelTest.test_dynamicquantizelinear_expanded_cpu",
-        "OnnxBackendNodeModelTest.test_dynamicquantizelinear_max_adjusted_expanded_cpu",
-        "OnnxBackendNodeModelTest.test_quantizelinear_cpu",
-    ),
     (
         xfail_issue_33593,
         "OnnxBackendNodeModelTest.test_maxpool_with_argmax_2d_precomputed_strides_cpu",