From d275a15f4b42d8febf2b125c064f6d15ae4929e7 Mon Sep 17 00:00:00 2001
From: mandrono <maxim.andronov@intel.com>
Date: Tue, 22 Dec 2020 15:12:44 +0300
Subject: [PATCH] FakeQuantize decomposition

---
 .../src/mkldnn_plugin/mkldnn_plugin.cpp       |  55 ++-
 .../nodes/mkldnn_quantize_node.cpp            |  32 ++
 .../nodes/mkldnn_quantize_node.h              |   3 +
 .../op_conversions/fq_decomposition.hpp       |  26 ++
 .../op_conversions/fq_decomposition.cpp       |  86 +++++
 .../transformations/fq_decomposition_test.cpp | 182 ++++++++++
 .../cpu/single_layer_tests/fake_quantize.cpp  | 330 ++++++++++++++++++
 .../plugin/cpu/test_utils/cpu_test_utils.cpp  |   2 +
 ngraph/python/tests/__init__.py               |   2 -
 .../tests/test_ngraph/test_ops_fused.py       |   4 +-
 ngraph/python/tests/test_onnx/test_backend.py |  10 +-
 11 files changed, 717 insertions(+), 15 deletions(-)
 create mode 100644 inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp
 create mode 100644 inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp
 create mode 100644 inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp
 create mode 100644 inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
index 3045037185e492..d6740e33ad0f34 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -57,6 +57,8 @@
 #include <transformations/convert_precision.hpp>
 #include <transformations/init_node_info.hpp>
 #include <transformations/rt_info/fused_names_attribute.hpp>
+#include <transformations/op_conversions/fq_decomposition.hpp>
+#include <transformations/utils/utils.hpp>
 
 #include <ngraph/opsets/opset2.hpp>
 #include <ngraph/opsets/opset3.hpp>
@@ -226,13 +228,17 @@ static void Transformation(ICNNNetwork::Ptr& clonedNetwork, const Config& conf)
         transformer.transform(nGraphFunc);
     }
 
+    bool keep_constant_inputs = ::ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc);
+
     ngraph::pass::Manager legacyManager;
+
+    legacyManager.register_pass<ngraph::pass::FakeQuantizeDecomposition>();
     legacyManager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
     legacyManager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::i64, ngraph::element::i32);
     // not legacy actually, but it should be the last transformation in the transformation pipeline
     legacyManager.register_pass<ngraph::pass::UnrollTensorIterator>();
-
     auto legacyPassConfig = legacyManager.get_pass_config();
+
     legacyPassConfig->set_callback<ngraph::pass::AddMultiplyFusion>([](const_node_ptr &node) -> bool {
         if (auto mul_op = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
             auto add_op = std::dynamic_pointer_cast<const ngraph::opset1::Add>(mul_op->get_input_node_shared_ptr(0));
@@ -247,15 +253,58 @@ static void Transformation(ICNNNetwork::Ptr& clonedNetwork, const Config& conf)
         return false;
     });
 
-    legacyManager.get_pass_config()->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
+    legacyPassConfig->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
         // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
         return node->get_rt_info().count("UNROLL_TI") == 0;
     });
+
+    auto initAxisIdx = [](const std::shared_ptr<ngraph::Node> node) -> int {
+        int axisIdx = 0, numberOfNonUnit = 0;
+
+        for (size_t i = 0; i < node->get_shape().size(); i++) {
+            if (node->get_shape()[i] > 1) {
+                axisIdx = i;
+                numberOfNonUnit++;
+            }
+        }
+        return numberOfNonUnit > 1 ? -1 : axisIdx;
+    };
+    auto isSupportedFQ = [initAxisIdx](const_node_ptr &node) {
+        std::set<int> quantizationParamsAxisesIdxs;
+        std::set<size_t> quantizationParamsAxisesSizes;
+        for (size_t i = 1; i < node->get_input_size(); i++) {
+            auto inNode = node->get_input_node_shared_ptr(i);
+            auto axis = initAxisIdx(inNode);
+            if (axis == -1)
+                return false;
+            if (inNode->get_shape().size() != 0 && inNode->get_shape()[axis] != 1) {
+                quantizationParamsAxisesIdxs.insert(axis);
+                quantizationParamsAxisesSizes.insert(inNode->get_shape()[axis]);
+            }
+        }
+        return (quantizationParamsAxisesIdxs.size() <= 1 && quantizationParamsAxisesSizes.size() <= 1);
+    };
+
+    legacyPassConfig->set_callback<ngraph::pass::FakeQuantizeDecomposition>([isSupportedFQ](const_node_ptr &node) -> bool {
+        if (auto fq_op = std::dynamic_pointer_cast<const ngraph::opset1::FakeQuantize>(node)) {
+            if (node->get_input_node_shared_ptr(0)->get_shape().size() > 5)
+                return false;
+            for (size_t i = 1; i < fq_op->get_input_size(); i++) {
+                if (!std::dynamic_pointer_cast<const ngraph::opset1::Constant>(fq_op->get_input_node_shared_ptr(i)) ||
+                    node->get_input_node_shared_ptr(i)->get_shape().size() > 5)
+                    return false;
+            }
+            return isSupportedFQ(fq_op);
+        }
+
+        return true;
+    });
+
     legacyManager.run_passes(nGraphFunc);
 
     OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork");
 
-    clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork);
+    clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork, keep_constant_inputs);
 
     OV_ITT_TASK_NEXT(taskChain, "ConvertIOPrecision");
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp
index 5331dc23c9dc84..c42ef3f56793a8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp
@@ -402,6 +402,38 @@ void MKLDNNQuantizeNode::initSupportedPrimitiveDescriptors() {
     }
 }
 
+void MKLDNNQuantizeNode::filterSupportedPrimitiveDescriptors() {
+    MKLDNNNode::filterSupportedPrimitiveDescriptors();
+    filterSupportedDescriptors();
+}
+
+void MKLDNNQuantizeNode::filterSupportedDescriptors() {
+    if (!inputMemoryFormatsFilter.empty() || !outputMemoryFormatsFilter.empty()) {
+        if (inputMemoryFormatsFilter.size() > 1 || outputMemoryFormatsFilter.size() > 1) {
+            THROW_IE_EXCEPTION << "Incorrect number of input or output memory formats for Quantize node";
+        }
+        auto itd = descs.begin();
+        while (itd != descs.end()) {
+            bool isSuitableDesc = true;
+            if (!inputMemoryFormatsFilter.empty()) {
+                auto src_fmt = std::shared_ptr<mkldnn::quantization_forward::desc>(*itd)->data.src_desc.format;
+                if (src_fmt != inputMemoryFormatsFilter[0])
+                    isSuitableDesc = false;
+            }
+            if (!outputMemoryFormatsFilter.empty()) {
+                auto dst_fmt = std::shared_ptr<mkldnn::quantization_forward::desc>(*itd)->data.dst_desc.format;
+                if (dst_fmt != outputMemoryFormatsFilter[0])
+                    isSuitableDesc = false;
+            }
+            if (!isSuitableDesc) {
+                itd = descs.erase(itd);
+            } else {
+                itd++;
+            }
+        }
+    }
+}
+
 void MKLDNNQuantizeNode::createPrimitive() {
     if (prim)
         return;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h
index af68cfdd08a8b5..7ab6ab62ea1f6b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h
@@ -25,6 +25,9 @@ class MKLDNNQuantizeNode : public MKLDNNNode {
     bool created() const override;
     void execute(mkldnn::stream strm) override;
 
+    void filterSupportedPrimitiveDescriptors() override;
+    void filterSupportedDescriptors();
+
     size_t getAxis() const { return axis; }
 
     bool isBinarization() const { return quantizeAlgorithm == mkldnn::algorithm::binarization_depthwise; }
diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp
new file mode 100644
index 00000000000000..a3765cdfac9912
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class TRANSFORMATIONS_API FakeQuantizeDecomposition;
+
+}  // namespace pass
+}  // namespace ngraph
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief FakeQuantizeDecomposition transformation into sub-graph
+ */
+class ngraph::pass::FakeQuantizeDecomposition: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    FakeQuantizeDecomposition();
+};
diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp
new file mode 100644
index 00000000000000..b48b39273e1b65
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp
@@ -0,0 +1,86 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/fq_decomposition.hpp"
+
+#include <ngraph/opsets/opset5.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/builder/autobroadcast.hpp>
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::FakeQuantizeDecomposition, "FakeQuantizeDecomposition", 0);
+
+ngraph::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() {
+    auto fake_quantize = ngraph::pattern::wrap_type<ngraph::opset5::FakeQuantize>();
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+        auto &pattern_to_output = m.get_pattern_value_map();
+        auto fake_quantize_node = std::dynamic_pointer_cast<ngraph::opset5::FakeQuantize>(pattern_to_output.at(fake_quantize).get_node_shared_ptr());
+
+        if (fake_quantize_node == nullptr || m_transformation_callback(fake_quantize_node)) {
+            return false;
+        }
+
+        Output<Node> data{fake_quantize_node->input_value(0)};
+        Output<Node> input_low{fake_quantize_node->input_value(1)};
+        Output<Node> input_high{fake_quantize_node->input_value(2)};
+        Output<Node> output_low{fake_quantize_node->input_value(3)};
+        Output<Node> output_high{fake_quantize_node->input_value(4)};
+        auto input_type = data.get_element_type();
+
+        ngraph::NodeVector decomp_ops;
+        if (input_type != input_low.get_element_type()) {
+            input_type = input_low.get_element_type();
+            data = std::make_shared<ngraph::opset5::Convert>(data, input_type);
+            decomp_ops.push_back(data.get_node_shared_ptr());
+        }
+
+        auto max = std::make_shared<ngraph::opset5::Maximum>(data, input_low);
+        auto min = std::make_shared<ngraph::opset5::Minimum>(max, input_high);
+        decomp_ops.push_back(max);
+        decomp_ops.push_back(min);
+
+        auto levels_minus_one = std::make_shared<ngraph::opset5::Constant>(input_type, Shape{}, fake_quantize_node->get_levels() - 1);
+        decomp_ops.push_back(levels_minus_one);
+        // input scale and shift
+        auto subInHighLow = std::make_shared<ngraph::opset5::Subtract>(input_high, input_low);
+        auto isc = std::make_shared<ngraph::opset5::Divide>(levels_minus_one, subInHighLow);
+        auto ish = std::make_shared<ngraph::opset5::Multiply>(input_low, isc);
+        decomp_ops.push_back(subInHighLow);
+        decomp_ops.push_back(isc);
+        decomp_ops.push_back(ish);
+
+        auto after_isc_apply = std::make_shared<ngraph::opset5::Multiply>(min, isc);
+        auto after_ish_apply = std::make_shared<ngraph::opset5::Subtract>(after_isc_apply, ish);
+        decomp_ops.push_back(after_isc_apply);
+        decomp_ops.push_back(after_ish_apply);
+
+        auto round = std::make_shared<ngraph::opset5::Round>(after_ish_apply, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN);
+        decomp_ops.push_back(round);
+
+        // output scale and shift
+        auto subOutHighLow = std::make_shared<ngraph::opset5::Subtract>(output_high, output_low);
+        auto osc = std::make_shared<ngraph::opset5::Divide>(subOutHighLow, levels_minus_one);
+        decomp_ops.push_back(subOutHighLow);
+        decomp_ops.push_back(osc);
+
+        auto after_osc_apply = std::make_shared<ngraph::opset5::Multiply>(round, osc);
+        std::shared_ptr<Node> result = std::make_shared<ngraph::opset5::Add>(after_osc_apply, output_low);
+        decomp_ops.push_back(after_osc_apply);
+        decomp_ops.push_back(result);
+
+        if (result->get_output_element_type(0) != fake_quantize_node->get_output_element_type(0)) {
+            result = std::make_shared<ngraph::opset5::Convert>(result, fake_quantize_node->get_output_element_type(0));
+            decomp_ops.push_back(result);
+        }
+
+        result->set_friendly_name(m.get_match_root()->get_friendly_name());
+        ngraph::copy_runtime_info(fake_quantize_node, decomp_ops);
+        ngraph::replace_node(m.get_match_root(), result);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(fake_quantize, "FakeQuantizeDecomposition");
+    register_matcher(m, callback);
+}
diff --git a/inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp
new file mode 100644
index 00000000000000..e105fb5b9c403c
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp
@@ -0,0 +1,182 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/pass/visualize_tree.hpp>
+
+#include <gtest/gtest.h>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset5.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <transformations/op_conversions/fq_decomposition.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+using namespace testing;
+
+using FakeQuantizeDecompositionParamsSet = std::tuple<ngraph::Shape, // data shape
+                                                      ngraph::Shape, // il shape
+                                                      ngraph::Shape, // ih shape
+                                                      ngraph::Shape, // ol shape
+                                                      ngraph::Shape, // oh shape
+                                                      bool           // is constant
+>;
+
+class FakeQuantizeDecompositionTest : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<FakeQuantizeDecompositionParamsSet> {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<FakeQuantizeDecompositionParamsSet> obj) {
+        ngraph::Shape data_shape, il_shape, ih_shape, ol_shape, oh_shape;
+        bool is_const;
+        std::tie(data_shape, il_shape, ih_shape, ol_shape, oh_shape, is_const) = obj.param;
+
+        std::ostringstream result;
+        result << "DATA=" << CommonTestUtils::vec2str(data_shape) << "_";
+        result << "IL=" << CommonTestUtils::vec2str(il_shape) << "_";
+        result << "IH=" << CommonTestUtils::vec2str(ih_shape) << "_";
+        result << "OL=" << CommonTestUtils::vec2str(ol_shape) << "_";
+        result << "OH=" << CommonTestUtils::vec2str(oh_shape) << "_";
+        std::string rangeType = is_const ? "CONST" : "PARAMETR";
+        result << "RANGES_TYPE=" << rangeType;
+        return result.str();
+    }
+
+protected:
+    void SetUp() {
+        ngraph::Shape data_shape, il_shape, ih_shape, ol_shape, oh_shape;
+        bool is_const;
+        std::tie(data_shape, il_shape, ih_shape, ol_shape, oh_shape, is_const) = this->GetParam();
+
+        std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+        const size_t levels = 256;
+        {
+            auto data = std::make_shared<ngraph::opset5::Parameter>(ngraph::element::f32, ngraph::PartialShape(data_shape));
+            ngraph::ParameterVector params;
+            params.push_back(data);
+            ngraph::Output<ngraph::Node> il, ih, ol, oh;
+            if (is_const) {
+                il = std::make_shared<ngraph::opset5::Constant>(ngraph::element::f32, il_shape);
+                ih = std::make_shared<ngraph::opset5::Constant>(ngraph::element::f32, ih_shape);
+                ol = std::make_shared<ngraph::opset5::Constant>(ngraph::element::f32, ol_shape);
+                oh = std::make_shared<ngraph::opset5::Constant>(ngraph::element::f32, oh_shape);
+
+            } else {
+                auto il_params = std::make_shared<ngraph::opset5::Parameter>(ngraph::element::f32, ngraph::PartialShape(il_shape));
+                params.push_back(il_params);
+                il = il_params;
+                auto ih_params = std::make_shared<ngraph::opset5::Parameter>(ngraph::element::f32, ngraph::PartialShape(ih_shape));
+                params.push_back(ih_params);
+                ih = ih_params;
+                auto ol_params = std::make_shared<ngraph::opset5::Parameter>(ngraph::element::f32, ngraph::PartialShape(ol_shape));
+                params.push_back(ol_params);
+                ol = ol_params;
+                auto oh_params = std::make_shared<ngraph::opset5::Parameter>(ngraph::element::f32, ngraph::PartialShape(oh_shape));
+                oh = oh_params;
+                params.push_back(oh_params);
+            }
+            auto fq = std::make_shared<ngraph::opset5::FakeQuantize>(data, il, ih, ol, oh, levels);
+            f = std::make_shared<ngraph::Function>(ngraph::NodeVector{fq}, params);
+
+            ngraph::pass::Manager manager;
+            manager.register_pass<ngraph::pass::InitNodeInfo>();
+            manager.register_pass<ngraph::pass::FakeQuantizeDecomposition>();
+            manager.run_passes(f);
+
+            ASSERT_NO_THROW(check_rt_info(f));
+        }
+
+        {
+            auto data = std::make_shared<ngraph::opset5::Parameter>(ngraph::element::f32, ngraph::PartialShape(data_shape));
+            ngraph::ParameterVector params;
+            params.push_back(data);
+            ngraph::Output<ngraph::Node> il, ih, ol, oh;
+            if (is_const) {
+                il = std::make_shared<ngraph::opset5::Constant>(ngraph::element::f32, il_shape);
+                ih = std::make_shared<ngraph::opset5::Constant>(ngraph::element::f32, ih_shape);
+                ol = std::make_shared<ngraph::opset5::Constant>(ngraph::element::f32, ol_shape);
+                oh = std::make_shared<ngraph::opset5::Constant>(ngraph::element::f32, oh_shape);
+            } else {
+                auto il_params = std::make_shared<ngraph::opset5::Parameter>(ngraph::element::f32, ngraph::PartialShape(il_shape));
+                params.push_back(il_params);
+                il = il_params;
+                auto ih_params = std::make_shared<ngraph::opset5::Parameter>(ngraph::element::f32, ngraph::PartialShape(ih_shape));
+                params.push_back(ih_params);
+                ih = ih_params;
+                auto ol_params = std::make_shared<ngraph::opset5::Parameter>(ngraph::element::f32, ngraph::PartialShape(ol_shape));
+                params.push_back(ol_params);
+                ol = ol_params;
+                auto oh_params = std::make_shared<ngraph::opset5::Parameter>(ngraph::element::f32, ngraph::PartialShape(oh_shape));
+                oh = oh_params;
+                params.push_back(oh_params);
+            }
+
+            auto max = std::make_shared<ngraph::opset5::Maximum>(data, il);
+            auto min = std::make_shared<ngraph::opset5::Minimum>(max, ih);
+
+            auto levels_minus_one = std::make_shared<ngraph::opset5::Constant>(ngraph::element::f32, ngraph::Shape{}, levels - 1);
+
+            auto subInHighLow = std::make_shared<ngraph::opset5::Subtract>(ih, il);
+            auto isc = std::make_shared<ngraph::opset5::Divide>(levels_minus_one, subInHighLow);
+            auto ish = std::make_shared<ngraph::opset5::Multiply>(il, isc);
+
+            auto after_isc_apply = std::make_shared<ngraph::opset5::Multiply>(min, isc);
+            auto after_ish_apply = std::make_shared<ngraph::opset5::Subtract>(after_isc_apply, ish);
+
+            auto round = std::make_shared<ngraph::opset5::Round>(after_ish_apply, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN);
+
+            auto subOutHighLow = std::make_shared<ngraph::opset5::Subtract>(oh, ol);
+            auto osc = std::make_shared<ngraph::opset5::Divide>(subOutHighLow, levels_minus_one);
+
+            auto after_osc_apply = std::make_shared<ngraph::opset5::Multiply>(round, osc);
+            auto after_out_low_add = std::make_shared<ngraph::opset5::Add>(after_osc_apply, ol);
+
+            f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{after_out_low_add}, params);
+        }
+
+        auto res = compare_functions(f, f_ref);
+        ASSERT_TRUE(res.first) << res.second;
+    }
+};
+
+TEST_P(FakeQuantizeDecompositionTest, CompareFunctions) {}
+
+const std::vector<bool> isConst = {true, false};
+
+INSTANTIATE_TEST_CASE_P(SimpleFakeQuantizeDecomposition, FakeQuantizeDecompositionTest,
+                        Combine(Values(ngraph::Shape{2, 3, 4, 5}),
+                                Values(ngraph::Shape{1, 3, 1, 1}),
+                                Values(ngraph::Shape{1, 3, 1, 1}),
+                                Values(ngraph::Shape{1, 1, 1, 1}),
+                                Values(ngraph::Shape{1, 1, 1, 1}),
+                                ValuesIn(isConst)),
+                        FakeQuantizeDecompositionTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BroadcastFakeQuantizeDecomposition, FakeQuantizeDecompositionTest,
+                        Combine(Values(ngraph::Shape{2, 3, 4, 5}),
+                                Values(ngraph::Shape{1, 1, 4, 5}),
+                                Values(ngraph::Shape{1, 1, 4, 5}),
+                                Values(ngraph::Shape{1, 1, 1, 1}),
+                                Values(ngraph::Shape{1, 1, 1, 1}),
+                                ValuesIn(isConst)),
+                        FakeQuantizeDecompositionTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(ElementwiseFakeQuantizeDecomposition, FakeQuantizeDecompositionTest,
+                        Combine(Values(ngraph::Shape{2, 3, 4, 5}),
+                                Values(ngraph::Shape{2, 3, 4, 5}),
+                                Values(ngraph::Shape{2, 3, 4, 5}),
+                                Values(ngraph::Shape{2, 3, 4, 5}),
+                                Values(ngraph::Shape{2, 3, 4, 5}),
+                                ValuesIn(isConst)),
+                        FakeQuantizeDecompositionTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(FakeQuantizeDecomposition_6D, FakeQuantizeDecompositionTest,
+                        Combine(Values(ngraph::Shape{2, 3, 4, 5, 6, 7}),
+                                Values(ngraph::Shape{1, 1, 1, 1, 1, 1}),
+                                Values(ngraph::Shape{1, 1, 1, 1, 1, 1}),
+                                Values(ngraph::Shape{1, 1, 1, 5, 6, 7}),
+                                Values(ngraph::Shape{1, 1, 1, 5, 6, 7}),
+                                ValuesIn(isConst)),
+                        FakeQuantizeDecompositionTest::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp
new file mode 100644
index 00000000000000..7c33545b022b4b
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp
@@ -0,0 +1,330 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+
+using namespace InferenceEngine;
+using namespace ngraph;
+using namespace CPUTestUtils;
+
+namespace CPULayerTestsDefinitions {
+
+using fqSpecificParams = std::tuple<int64_t,                  // input data low bounds
+                                    int64_t,                  // input data high bounds
+                                    std::vector<float>,       // input low
+                                    std::vector<float>,       // input high
+                                    std::vector<float>,       // output low
+                                    std::vector<float>,       // output high
+                                    std::vector<SizeVector>,  // 'ranges' inputs shapes
+                                    size_t>;                  // levels
+
+using fqLayerTestParamsSet = std::tuple<fqSpecificParams,
+                                        SizeVector,               // 'data' input shape
+                                        Precision,                // input precision
+                                        helpers::InputLayerType,  // 'range' inputs type
+                                        bool,                     // should be decomposed
+                                        CPUSpecificParams>;
+
+class FakeQuantizeLayerCPUTest : public testing::WithParamInterface<fqLayerTestParamsSet>,
+                                 virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj) {
+        fqSpecificParams fqParams;
+        SizeVector inDataShape;
+        Precision inPrec;
+        helpers::InputLayerType rangesType;
+        bool shouldBeDecomposed;
+        CPUSpecificParams cpuParams;
+        std::tie(fqParams, inDataShape, inPrec, rangesType, shouldBeDecomposed, cpuParams) = obj.param;
+
+        int64_t inDataLowBounds, inDataHighBounds;
+        std::vector<float> inputLow, inputHigh, outputLow, outputHigh;
+        std::vector<SizeVector> inRangesShapes;
+        size_t levels;
+        std::tie(inDataLowBounds, inDataHighBounds, inputLow, inputHigh, outputLow, outputHigh, inRangesShapes, levels) = fqParams;
+
+        std::ostringstream result;
+        result << "IS=" << CommonTestUtils::vec2str(inDataShape) << "_";
+        result << "inPrec=" << inPrec.name() << "_";
+        result << "RANGES_TYPE=" << rangesType << "_";
+
+        std::string rs = "";
+        for (size_t i = 0; i < inRangesShapes.size(); i++) {
+            rs += CommonTestUtils::vec2str(inRangesShapes[i]) + "_";
+        }
+        result << "RS=" << rs;
+        result << "LOW_BOUNDS=" << inDataLowBounds << "_";
+        result << "HIGH_BOUNDS=" << inDataHighBounds << "_";
+        result << "IL=" << CommonTestUtils::vec2str(inputLow) << "_";
+        result << "IH=" << CommonTestUtils::vec2str(inputHigh) << "_";
+        result << "OL=" << CommonTestUtils::vec2str(outputLow) << "_";
+        result << "OH=" << CommonTestUtils::vec2str(outputHigh) << "_";
+        result << "LEVELS=" << levels;
+
+        result << CPUTestsBase::getTestCaseName(cpuParams);
+
+        return result.str();
+    }
+
+    void Infer() override {
+        inferRequest = executableNetwork.CreateInferRequest();
+        inputs.clear();
+
+        const InputsDataMap &inDataMap = cnnNetwork.getInputsInfo();
+        auto input = inDataMap.begin();
+
+        Blob::Ptr blob = FuncTestUtils::createAndFillBlob(input->second->getTensorDesc(), inDataHighBounds - inDataLowBounds, inDataLowBounds);
+        inferRequest.SetBlob(input->second->name(), blob);
+        inputs.push_back(blob);
+        input++;
+
+        for (size_t it = 1; it < inDataMap.size(); it++) {
+            blob = fillRanges(rangesBounds[it - 1], input->second->getTensorDesc());
+            inferRequest.SetBlob(input->second->name(), blob);
+            inputs.push_back(blob);
+            input++;
+        }
+        inferRequest.Infer();
+    }
+
+protected:
+    std::string layerName;
+
+    void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+        fqSpecificParams fqParams;
+        SizeVector inDataShape;
+        Precision inPrec;
+        helpers::InputLayerType rangesType;
+        bool shouldBeDecomposed;
+        CPUSpecificParams cpuParams;
+        std::tie(fqParams, inDataShape, inPrec, rangesType, shouldBeDecomposed, cpuParams) = this->GetParam();
+
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+
+        std::vector<SizeVector> inRangesShapes;
+        size_t levels;
+        rangesBounds.resize(RANGES_INPUT_NUMBER);
+        std::tie(inDataLowBounds, inDataHighBounds, rangesBounds[0], rangesBounds[1], rangesBounds[2], rangesBounds[3], inRangesShapes, levels) = fqParams;
+
+        ParameterVector params;
+        auto ngInPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec);
+        if (rangesType == helpers::InputLayerType::PARAMETER) {
+            inRangesShapes.insert(inRangesShapes.begin(), inDataShape);
+            params = builder::makeParams(ngInPrec, inRangesShapes);
+        } else {
+            params = builder::makeParams(ngInPrec, {inDataShape});
+        }
+        auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<opset5::Parameter>(params));
+
+        std::shared_ptr<Node> fq;
+        if (rangesType == helpers::InputLayerType::PARAMETER) {
+            fq = std::make_shared<opset5::FakeQuantize>(paramOuts[0], paramOuts[1], paramOuts[2], paramOuts[3], paramOuts[4], levels);
+        } else {
+            auto il = builder::makeConstant(ngInPrec, inRangesShapes[0], rangesBounds[0], rangesBounds[0].empty());
+            auto ih = builder::makeConstant(ngInPrec, inRangesShapes[1], rangesBounds[1], rangesBounds[1].empty());
+            auto ol = builder::makeConstant(ngInPrec, inRangesShapes[2], rangesBounds[2], rangesBounds[2].empty());
+            auto oh = builder::makeConstant(ngInPrec, inRangesShapes[3], rangesBounds[3], rangesBounds[3].empty());
+            fq = std::make_shared<opset5::FakeQuantize>(paramOuts[0], il, ih, ol, oh, levels);
+        }
+
+        layerName = shouldBeDecomposed ? "" : "Quantize";
+        fq->get_rt_info() = getCPUInfo();
+
+        function = std::make_shared<Function>(fq, params, "FakeQuantizeCPU");
+    }
+
+private:
+    Blob::Ptr fillRanges(std::vector<float> data, const TensorDesc &td) {
+        if (data.empty()) {
+            return FuncTestUtils::createAndFillBlob(td);
+        } else {
+            if (data.size() == 1) {
+                data.resize(std::accumulate(td.getDims().begin(), td.getDims().end(), (size_t)1, std::multiplies<size_t>()));
+                std::fill(data.begin() + 1, data.end(), data.front());
+            }
+            return FuncTestUtils::createAndFillBlobWithFloatArray(td, data.data(), data.size());
+        }
+    }
+
+    const size_t RANGES_INPUT_NUMBER = 4;
+
+    int64_t inDataLowBounds, inDataHighBounds;
+    std::vector<std::vector<float>> rangesBounds;
+};
+
+TEST_P(FakeQuantizeLayerCPUTest, CompareWithRefs) {
+    Run();
+
+    CheckCPUImpl(executableNetwork, layerName);
+}
+
+const std::vector<SizeVector> dataShapes = {
+    {4, 5, 6, 7},
+    {3, 4, 5, 6, 7},
+    {2, 3, 4, 5, 6, 7},
+};
+
+const std::vector<std::vector<SizeVector>> rangesShapes = {
+    {{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}},
+    {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}},
+    {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}},
+    {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
+};
+
+const std::vector<size_t> levels = {16, 255, 256};
+
+std::vector<helpers::InputLayerType> rangesTypes = {
+        helpers::InputLayerType::CONSTANT,
+        helpers::InputLayerType::PARAMETER,
+};
+
+const std::vector<float> outputLow{5.0f}, outputHigh{25.0f};
+
+int64_t dataLowBounds{-10}, dataHighBounds{10};
+
+
+
+namespace fqImpl {
+
+const std::vector<float> inputLow{0.0f}, inputHigh{5.0f};
+
+std::vector<CPUSpecificParams> filterCPUInfoForDevice(std::vector<CPUSpecificParams> CPUParams) {
+    std::vector<CPUSpecificParams> resCPUParams;
+    const int selectedTypeIndex = 3;
+
+    for (auto param : CPUParams) {
+        auto selectedTypeStr = std::get<selectedTypeIndex>(param);
+
+        if (selectedTypeStr.find("jit") != std::string::npos && !with_cpu_x86_sse42())
+            continue;
+        if (selectedTypeStr.find("sse42") != std::string::npos && !with_cpu_x86_sse42())
+            continue;
+        if (selectedTypeStr.find("avx2") != std::string::npos && !with_cpu_x86_avx2())
+            continue;
+        if (selectedTypeStr.find("avx512") != std::string::npos && !with_cpu_x86_avx512f())
+            continue;
+
+        resCPUParams.push_back(param);
+    }
+
+    return resCPUParams;
+}
+
+std::vector<CPUSpecificParams> memForm4D = {
+        CPUSpecificParams({nchw}, {nchw}, {"jit_sse42"}, {"jit_sse42_FP32"}),
+        CPUSpecificParams({nhwc}, {nhwc}, {"jit_sse42"}, {"jit_sse42_FP32"}),
+        CPUSpecificParams({nChw8c}, {nChw8c}, {"jit_sse42"}, {"jit_sse42_FP32"}),
+        CPUSpecificParams({nchw}, {nchw}, {"jit_avx2"}, {"jit_avx2_FP32"}),
+        CPUSpecificParams({nhwc}, {nhwc}, {"jit_avx2"}, {"jit_avx2_FP32"}),
+        CPUSpecificParams({nChw8c}, {nChw8c}, {"jit_avx2"}, {"jit_avx2_FP32"}),
+        CPUSpecificParams({nchw}, {nchw}, {"jit_avx512"}, {"jit_avx512_FP32"}),
+        CPUSpecificParams({nhwc}, {nhwc}, {"jit_avx512"}, {"jit_avx512_FP32"}),
+        CPUSpecificParams({nChw16c}, {nChw16c}, {"jit_avx512"}, {"jit_avx512_FP32"})
+};
+
+const std::vector<std::vector<SizeVector>> rangesShapes4D = {
+    {{1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}},
+    {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}
+};
+
+const auto specificParams4D = ::testing::Combine(::testing::Values(dataLowBounds),
+                                                 ::testing::Values(dataHighBounds),
+                                                 ::testing::Values(inputLow),
+                                                 ::testing::Values(inputHigh),
+                                                 ::testing::Values(outputLow),
+                                                 ::testing::Values(outputHigh),
+                                                 ::testing::ValuesIn(rangesShapes4D),
+                                                 ::testing::ValuesIn(levels));
+const auto testParams4D = ::testing::Combine(specificParams4D,
+                                             ::testing::Values(SizeVector{4, 5, 6, 7}),
+                                             ::testing::Values(Precision::FP32),
+                                             ::testing::Values(helpers::InputLayerType::CONSTANT),
+                                             ::testing::Values(false),
+                                             ::testing::ValuesIn(filterCPUInfoForDevice(memForm4D)));
+INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_4D, FakeQuantizeLayerCPUTest, testParams4D, FakeQuantizeLayerCPUTest::getTestCaseName);
+
+
+std::vector<CPUSpecificParams> memForm5D = {
+        CPUSpecificParams({ncdhw}, {ncdhw}, {"jit_sse42"}, {"jit_sse42_FP32"}),
+        CPUSpecificParams({ndhwc}, {ndhwc}, {"jit_sse42"}, {"jit_sse42_FP32"}),
+        CPUSpecificParams({ncdhw}, {ncdhw}, {"jit_avx2"}, {"jit_avx2_FP32"}),
+        CPUSpecificParams({ndhwc}, {ndhwc}, {"jit_avx2"}, {"jit_avx2_FP32"}),
+        CPUSpecificParams({ncdhw}, {ncdhw}, {"jit_avx512"}, {"jit_avx512_FP32"}),
+        CPUSpecificParams({ndhwc}, {ndhwc}, {"jit_avx512"}, {"jit_avx512_FP32"})
+};
+
+const std::vector<std::vector<SizeVector>> rangesShapes5D = {
+    {{1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}},
+    {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}
+};
+
+const auto specificParams5D = ::testing::Combine(::testing::Values(dataLowBounds),
+                                                 ::testing::Values(dataHighBounds),
+                                                 ::testing::Values(inputLow),
+                                                 ::testing::Values(inputHigh),
+                                                 ::testing::Values(outputLow),
+                                                 ::testing::Values(outputHigh),
+                                                 ::testing::ValuesIn(rangesShapes5D),
+                                                 ::testing::ValuesIn(levels));
+const auto testParams5D = ::testing::Combine(specificParams5D,
+                                           ::testing::Values(SizeVector{3, 4, 5, 6, 7}),
+                                           ::testing::Values(Precision::FP32),
+                                           ::testing::Values(helpers::InputLayerType::CONSTANT),
+                                           ::testing::Values(false),
+                                           ::testing::ValuesIn(filterCPUInfoForDevice(memForm5D)));
+
+INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_5D, FakeQuantizeLayerCPUTest, testParams5D, FakeQuantizeLayerCPUTest::getTestCaseName);
+
+} // namespace fqImpl
+
+
+namespace fqDecompPositveRanges {
+
+const std::vector<float> inputLow{0.0f}, inputHigh{5.0f};
+const auto specificParams = ::testing::Combine(::testing::Values(dataLowBounds),
+                                               ::testing::Values(dataHighBounds),
+                                               ::testing::Values(inputLow),
+                                               ::testing::Values(inputHigh),
+                                               ::testing::Values(outputLow),
+                                               ::testing::Values(outputHigh),
+                                               ::testing::ValuesIn(rangesShapes),
+                                               ::testing::ValuesIn(levels));
+const auto testParams = ::testing::Combine(specificParams,
+                                           ::testing::ValuesIn(dataShapes),
+                                           ::testing::Values(Precision::FP32),
+                                           ::testing::ValuesIn(rangesTypes),
+                                           ::testing::Values(true),
+                                           ::testing::Values(CPUSpecificParams{}));
+
+INSTANTIATE_TEST_CASE_P(smoke_FQDecompos_PositveRanges, FakeQuantizeLayerCPUTest, testParams, FakeQuantizeLayerCPUTest::getTestCaseName);
+
+} // namespace fqDecompPositveRanges
+
+
+namespace fqDecompNegativeRanges {
+
+const std::vector<float> inputLow{-5.0f}, inputHigh{0.0f};
+const auto specificParams = ::testing::Combine(::testing::Values(dataLowBounds),
+                                               ::testing::Values(dataHighBounds),
+                                               ::testing::Values(inputLow),
+                                               ::testing::Values(inputHigh),
+                                               ::testing::Values(outputLow),
+                                               ::testing::Values(outputHigh),
+                                               ::testing::ValuesIn(rangesShapes),
+                                               ::testing::ValuesIn(levels));
+const auto testParams = ::testing::Combine(specificParams,
+                                           ::testing::ValuesIn(dataShapes),
+                                           ::testing::Values(Precision::FP32),
+                                           ::testing::ValuesIn(rangesTypes),
+                                           ::testing::Values(true),
+                                           ::testing::Values(CPUSpecificParams{}));
+
+INSTANTIATE_TEST_CASE_P(smoke_FQDecompos_NegativeRanges, FakeQuantizeLayerCPUTest, testParams, FakeQuantizeLayerCPUTest::getTestCaseName);
+
+} // namespace fqDecompNegativeRanges
+
+} // namespace CPULayerTestsDefinitions
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
index fb99ff842e1e87..9ee6cf2818785c 100644
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
@@ -66,6 +66,8 @@ std::string CPUTestsBase::impls2str(const std::vector<std::string> &priority) {
 }
 
 void CPUTestsBase::CheckCPUImpl(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType) const {
+    if (nodeType.empty()) return;
+
     IE_SUPPRESS_DEPRECATED_START
     ASSERT_TRUE(!selectedType.empty()) << "Node type is not defined.";
     bool isNodeFound = false;
diff --git a/ngraph/python/tests/__init__.py b/ngraph/python/tests/__init__.py
index 4e3b2b7ffadaf6..0b8bcefede4635 100644
--- a/ngraph/python/tests/__init__.py
+++ b/ngraph/python/tests/__init__.py
@@ -181,8 +181,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True):
 xfail_issue_44967 = xfail_test(reason="E   RuntimeError: unsupported element type: BFLOAT16")
 xfail_issue_44968 = xfail_test(reason="E   Unsupported dynamic op: Squeeze")
 xfail_issue_44970 = xfail_test(reason="Assertion error")
-xfail_issue_44976 = xfail_test(reason="E   RuntimeError: Quantize layer with name:"
-                                      "FakeQuantize_xxx has non const input on 1 port")
 
 # Model MSFT issues:
 xfail_issue_37957 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:"
diff --git a/ngraph/python/tests/test_ngraph/test_ops_fused.py b/ngraph/python/tests/test_ngraph/test_ops_fused.py
index f7e37805a1fa9d..bfb8ab4838368b 100644
--- a/ngraph/python/tests/test_ngraph/test_ops_fused.py
+++ b/ngraph/python/tests/test_ngraph/test_ops_fused.py
@@ -22,8 +22,7 @@
                    xfail_issue_34327,
                    xfail_issue_36485,
                    xfail_issue_36486,
-                   xfail_issue_36487,
-                   xfail_issue_44976)
+                   xfail_issue_36487)
 
 
 @xfail_issue_40957
@@ -58,7 +57,6 @@ def test_elu_operator_with_scalar():
     assert np.allclose(result, expected)
 
 
-@xfail_issue_44976
 def test_fake_quantize():
     runtime = get_runtime()
 
diff --git a/ngraph/python/tests/test_onnx/test_backend.py b/ngraph/python/tests/test_onnx/test_backend.py
index 5c708c78ba9c12..4503ef7417fba2 100644
--- a/ngraph/python/tests/test_onnx/test_backend.py
+++ b/ngraph/python/tests/test_onnx/test_backend.py
@@ -89,8 +89,7 @@
                    xfail_issue_44958,
                    xfail_issue_44965,
                    xfail_issue_44967,
-                   xfail_issue_44968,
-                   xfail_issue_44976)
+                   xfail_issue_44968)
 
 
 def expect_fail(test_case_path, xfail):  # type: (str) -> None
@@ -196,8 +195,7 @@ def expect_fail(test_case_path, xfail):  # type: (str) -> None
     (xfail_issue_38086,
         "OnnxBackendNodeModelTest.test_dynamicquantizelinear_min_adjusted_expanded_cpu",
         "OnnxBackendNodeModelTest.test_dynamicquantizelinear_expanded_cpu",
-        "OnnxBackendNodeModelTest.test_dynamicquantizelinear_max_adjusted_expanded_cpu",
-        "OnnxBackendNodeModelTest.test_quantizelinear_cpu"),
+        "OnnxBackendNodeModelTest.test_dynamicquantizelinear_max_adjusted_expanded_cpu"),
     (xfail_issue_38087,
         "OnnxBackendNodeModelTest.test_convtranspose_1d_cpu"),
     (xfail_issue_40957,
@@ -708,9 +706,7 @@ def expect_fail(test_case_path, xfail):  # type: (str) -> None
         "OnnxBackendNodeModelTest.test_cast_FLOAT_to_BFLOAT16_cpu",),
     (xfail_issue_44968,
         "OnnxBackendNodeModelTest.test_squeeze_cpu",
-        "OnnxBackendNodeModelTest.test_squeeze_negative_axes_cpu",),
-    (xfail_issue_44976,
-        "OnnxBackendNodeModelTest.test_quantizelinear_axis_cpu",)
+        "OnnxBackendNodeModelTest.test_squeeze_negative_axes_cpu",)
 ]
 
 for test_group in tests_expected_to_fail: