From d275a15f4b42d8febf2b125c064f6d15ae4929e7 Mon Sep 17 00:00:00 2001 From: mandrono Date: Tue, 22 Dec 2020 15:12:44 +0300 Subject: [PATCH] FakeQuantize decomposition --- .../src/mkldnn_plugin/mkldnn_plugin.cpp | 55 ++- .../nodes/mkldnn_quantize_node.cpp | 32 ++ .../nodes/mkldnn_quantize_node.h | 3 + .../op_conversions/fq_decomposition.hpp | 26 ++ .../op_conversions/fq_decomposition.cpp | 86 +++++ .../transformations/fq_decomposition_test.cpp | 182 ++++++++++ .../cpu/single_layer_tests/fake_quantize.cpp | 330 ++++++++++++++++++ .../plugin/cpu/test_utils/cpu_test_utils.cpp | 2 + ngraph/python/tests/__init__.py | 2 - .../tests/test_ngraph/test_ops_fused.py | 4 +- ngraph/python/tests/test_onnx/test_backend.py | 10 +- 11 files changed, 717 insertions(+), 15 deletions(-) create mode 100644 inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp create mode 100644 inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp create mode 100644 inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp create mode 100644 inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 3045037185e492..d6740e33ad0f34 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -57,6 +57,8 @@ #include #include #include +#include +#include #include #include @@ -226,13 +228,17 @@ static void Transformation(ICNNNetwork::Ptr& clonedNetwork, const Config& conf) transformer.transform(nGraphFunc); } + bool keep_constant_inputs = ::ngraph::op::util::has_op_with_type(nGraphFunc); + ngraph::pass::Manager legacyManager; + + legacyManager.register_pass(); legacyManager.register_pass(); legacyManager.register_pass(ngraph::element::i64, ngraph::element::i32); // not legacy actually, but it should be the last transformation in the transformation pipeline legacyManager.register_pass(); - auto legacyPassConfig = legacyManager.get_pass_config(); + legacyPassConfig->set_callback([](const_node_ptr &node) -> bool { if (auto mul_op = std::dynamic_pointer_cast(node)) { auto add_op = std::dynamic_pointer_cast(mul_op->get_input_node_shared_ptr(0)); @@ -247,15 +253,58 @@ static void Transformation(ICNNNetwork::Ptr& clonedNetwork, const Config& conf) return false; }); - legacyManager.get_pass_config()->set_callback([](const_node_ptr &node) -> bool { + legacyPassConfig->set_callback([](const_node_ptr &node) -> bool { // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation return node->get_rt_info().count("UNROLL_TI") == 0; }); + + auto initAxisIdx = [](const std::shared_ptr node) -> int { + int axisIdx = 0, numberOfNonUnit = 0; + + for (size_t i = 0; i < node->get_shape().size(); i++) { + if (node->get_shape()[i] > 1) { + axisIdx = i; + numberOfNonUnit++; + } + } + return numberOfNonUnit > 1 ? -1 : axisIdx; + }; + auto isSupportedFQ = [initAxisIdx](const_node_ptr &node) { + std::set quantizationParamsAxisesIdxs; + std::set quantizationParamsAxisesSizes; + for (size_t i = 1; i < node->get_input_size(); i++) { + auto inNode = node->get_input_node_shared_ptr(i); + auto axis = initAxisIdx(inNode); + if (axis == -1) + return false; + if (inNode->get_shape().size() != 0 && inNode->get_shape()[axis] != 1) { + quantizationParamsAxisesIdxs.insert(axis); + quantizationParamsAxisesSizes.insert(inNode->get_shape()[axis]); + } + } + return (quantizationParamsAxisesIdxs.size() <= 1 && quantizationParamsAxisesSizes.size() <= 1); + }; + + legacyPassConfig->set_callback([isSupportedFQ](const_node_ptr &node) -> bool { + if (auto fq_op = std::dynamic_pointer_cast(node)) { + if (node->get_input_node_shared_ptr(0)->get_shape().size() > 5) + return false; + for (size_t i = 1; i < fq_op->get_input_size(); i++) { + if (!std::dynamic_pointer_cast(fq_op->get_input_node_shared_ptr(i)) || + node->get_input_node_shared_ptr(i)->get_shape().size() > 5) + return false; + } + return isSupportedFQ(fq_op); + } + + return true; + }); + legacyManager.run_passes(nGraphFunc); OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork"); - clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork); + clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork, keep_constant_inputs); OV_ITT_TASK_NEXT(taskChain, "ConvertIOPrecision"); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp index 5331dc23c9dc84..c42ef3f56793a8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp @@ -402,6 +402,38 @@ void MKLDNNQuantizeNode::initSupportedPrimitiveDescriptors() { } } +void MKLDNNQuantizeNode::filterSupportedPrimitiveDescriptors() { + MKLDNNNode::filterSupportedPrimitiveDescriptors(); + filterSupportedDescriptors(); +} + +void MKLDNNQuantizeNode::filterSupportedDescriptors() { + if (!inputMemoryFormatsFilter.empty() || !outputMemoryFormatsFilter.empty()) { + if (inputMemoryFormatsFilter.size() > 1 || outputMemoryFormatsFilter.size() > 1) { + THROW_IE_EXCEPTION << "Incorrect number of input or output memory formats for Quantize node"; + } + auto itd = descs.begin(); + while (itd != descs.end()) { + bool isSuitableDesc = true; + if (!inputMemoryFormatsFilter.empty()) { + auto src_fmt = std::shared_ptr(*itd)->data.src_desc.format; + if (src_fmt != inputMemoryFormatsFilter[0]) + isSuitableDesc = false; + } + if (!outputMemoryFormatsFilter.empty()) { + auto dst_fmt = std::shared_ptr(*itd)->data.dst_desc.format; + if (dst_fmt != outputMemoryFormatsFilter[0]) + isSuitableDesc = false; + } + if (!isSuitableDesc) { + itd = descs.erase(itd); + } else { + itd++; + } + } + } +} + void MKLDNNQuantizeNode::createPrimitive() { if (prim) return; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h index af68cfdd08a8b5..7ab6ab62ea1f6b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h @@ -25,6 +25,9 @@ class MKLDNNQuantizeNode : public MKLDNNNode { bool created() const override; void execute(mkldnn::stream strm) override; + void filterSupportedPrimitiveDescriptors() override; + void filterSupportedDescriptors(); + size_t getAxis() const { return axis; } bool isBinarization() const { return quantizeAlgorithm == mkldnn::algorithm::binarization_depthwise; } diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp new file mode 100644 index 00000000000000..a3765cdfac9912 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API FakeQuantizeDecomposition; + +} // namespace pass +} // namespace ngraph + +/** + * @ingroup ie_transformation_common_api + * @brief FakeQuantizeDecomposition transformation into sub-graph + */ +class ngraph::pass::FakeQuantizeDecomposition: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + FakeQuantizeDecomposition(); +}; diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp new file mode 100644 index 00000000000000..b48b39273e1b65 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp @@ -0,0 +1,86 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/fq_decomposition.hpp" + +#include +#include +#include +#include + +NGRAPH_RTTI_DEFINITION(ngraph::pass::FakeQuantizeDecomposition, "FakeQuantizeDecomposition", 0); + +ngraph::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { + auto fake_quantize = ngraph::pattern::wrap_type(); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + auto &pattern_to_output = m.get_pattern_value_map(); + auto fake_quantize_node = std::dynamic_pointer_cast(pattern_to_output.at(fake_quantize).get_node_shared_ptr()); + + if (fake_quantize_node == nullptr || m_transformation_callback(fake_quantize_node)) { + return false; + } + + Output data{fake_quantize_node->input_value(0)}; + Output input_low{fake_quantize_node->input_value(1)}; + Output input_high{fake_quantize_node->input_value(2)}; + Output output_low{fake_quantize_node->input_value(3)}; + Output output_high{fake_quantize_node->input_value(4)}; + auto input_type = data.get_element_type(); + + ngraph::NodeVector decomp_ops; + if (input_type != input_low.get_element_type()) { + input_type = input_low.get_element_type(); + data = std::make_shared(data, input_type); + decomp_ops.push_back(data.get_node_shared_ptr()); + } + + auto max = std::make_shared(data, input_low); + auto min = std::make_shared(max, input_high); + decomp_ops.push_back(max); + decomp_ops.push_back(min); + + auto levels_minus_one = std::make_shared(input_type, Shape{}, fake_quantize_node->get_levels() - 1); + decomp_ops.push_back(levels_minus_one); + // input scale and shift + auto subInHighLow = std::make_shared(input_high, input_low); + auto isc = std::make_shared(levels_minus_one, subInHighLow); + auto ish = std::make_shared(input_low, isc); + decomp_ops.push_back(subInHighLow); + decomp_ops.push_back(isc); + decomp_ops.push_back(ish); + + auto after_isc_apply = std::make_shared(min, isc); + auto after_ish_apply = std::make_shared(after_isc_apply, ish); + decomp_ops.push_back(after_isc_apply); + decomp_ops.push_back(after_ish_apply); + + auto round = std::make_shared(after_ish_apply, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN); + decomp_ops.push_back(round); + + // output scale and shift + auto subOutHighLow = std::make_shared(output_high, output_low); + auto osc = std::make_shared(subOutHighLow, levels_minus_one); + decomp_ops.push_back(subOutHighLow); + decomp_ops.push_back(osc); + + auto after_osc_apply = std::make_shared(round, osc); + std::shared_ptr result = std::make_shared(after_osc_apply, output_low); + decomp_ops.push_back(after_osc_apply); + decomp_ops.push_back(result); + + if (result->get_output_element_type(0) != fake_quantize_node->get_output_element_type(0)) { + result = std::make_shared(result, fake_quantize_node->get_output_element_type(0)); + decomp_ops.push_back(result); + } + + result->set_friendly_name(m.get_match_root()->get_friendly_name()); + ngraph::copy_runtime_info(fake_quantize_node, decomp_ops); + ngraph::replace_node(m.get_match_root(), result); + return true; + }; + + auto m = std::make_shared(fake_quantize, "FakeQuantizeDecomposition"); + register_matcher(m, callback); +} diff --git a/inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp new file mode 100644 index 00000000000000..e105fb5b9c403c --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp @@ -0,0 +1,182 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "common_test_utils/common_utils.hpp" + +using namespace testing; + +using FakeQuantizeDecompositionParamsSet = std::tuple; + +class FakeQuantizeDecompositionTest : public CommonTestUtils::TestsCommon, public testing::WithParamInterface { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + ngraph::Shape data_shape, il_shape, ih_shape, ol_shape, oh_shape; + bool is_const; + std::tie(data_shape, il_shape, ih_shape, ol_shape, oh_shape, is_const) = obj.param; + + std::ostringstream result; + result << "DATA=" << CommonTestUtils::vec2str(data_shape) << "_"; + result << "IL=" << CommonTestUtils::vec2str(il_shape) << "_"; + result << "IH=" << CommonTestUtils::vec2str(ih_shape) << "_"; + result << "OL=" << CommonTestUtils::vec2str(ol_shape) << "_"; + result << "OH=" << CommonTestUtils::vec2str(oh_shape) << "_"; + std::string rangeType = is_const ? "CONST" : "PARAMETR"; + result << "RANGES_TYPE=" << rangeType; + return result.str(); + } + +protected: + void SetUp() { + ngraph::Shape data_shape, il_shape, ih_shape, ol_shape, oh_shape; + bool is_const; + std::tie(data_shape, il_shape, ih_shape, ol_shape, oh_shape, is_const) = this->GetParam(); + + std::shared_ptr f(nullptr), f_ref(nullptr); + const size_t levels = 256; + { + auto data = std::make_shared(ngraph::element::f32, ngraph::PartialShape(data_shape)); + ngraph::ParameterVector params; + params.push_back(data); + ngraph::Output il, ih, ol, oh; + if (is_const) { + il = std::make_shared(ngraph::element::f32, il_shape); + ih = std::make_shared(ngraph::element::f32, ih_shape); + ol = std::make_shared(ngraph::element::f32, ol_shape); + oh = std::make_shared(ngraph::element::f32, oh_shape); + + } else { + auto il_params = std::make_shared(ngraph::element::f32, ngraph::PartialShape(il_shape)); + params.push_back(il_params); + il = il_params; + auto ih_params = std::make_shared(ngraph::element::f32, ngraph::PartialShape(ih_shape)); + params.push_back(ih_params); + ih = ih_params; + auto ol_params = std::make_shared(ngraph::element::f32, ngraph::PartialShape(ol_shape)); + params.push_back(ol_params); + ol = ol_params; + auto oh_params = std::make_shared(ngraph::element::f32, ngraph::PartialShape(oh_shape)); + oh = oh_params; + params.push_back(oh_params); + } + auto fq = std::make_shared(data, il, ih, ol, oh, levels); + f = std::make_shared(ngraph::NodeVector{fq}, params); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto data = std::make_shared(ngraph::element::f32, ngraph::PartialShape(data_shape)); + ngraph::ParameterVector params; + params.push_back(data); + ngraph::Output il, ih, ol, oh; + if (is_const) { + il = std::make_shared(ngraph::element::f32, il_shape); + ih = std::make_shared(ngraph::element::f32, ih_shape); + ol = std::make_shared(ngraph::element::f32, ol_shape); + oh = std::make_shared(ngraph::element::f32, oh_shape); + } else { + auto il_params = std::make_shared(ngraph::element::f32, ngraph::PartialShape(il_shape)); + params.push_back(il_params); + il = il_params; + auto ih_params = std::make_shared(ngraph::element::f32, ngraph::PartialShape(ih_shape)); + params.push_back(ih_params); + ih = ih_params; + auto ol_params = std::make_shared(ngraph::element::f32, ngraph::PartialShape(ol_shape)); + params.push_back(ol_params); + ol = ol_params; + auto oh_params = std::make_shared(ngraph::element::f32, ngraph::PartialShape(oh_shape)); + oh = oh_params; + params.push_back(oh_params); + } + + auto max = std::make_shared(data, il); + auto min = std::make_shared(max, ih); + + auto levels_minus_one = std::make_shared(ngraph::element::f32, ngraph::Shape{}, levels - 1); + + auto subInHighLow = std::make_shared(ih, il); + auto isc = std::make_shared(levels_minus_one, subInHighLow); + auto ish = std::make_shared(il, isc); + + auto after_isc_apply = std::make_shared(min, isc); + auto after_ish_apply = std::make_shared(after_isc_apply, ish); + + auto round = std::make_shared(after_ish_apply, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN); + + auto subOutHighLow = std::make_shared(oh, ol); + auto osc = std::make_shared(subOutHighLow, levels_minus_one); + + auto after_osc_apply = std::make_shared(round, osc); + auto after_out_low_add = std::make_shared(after_osc_apply, ol); + + f_ref = std::make_shared(ngraph::NodeVector{after_out_low_add}, params); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; + } +}; + +TEST_P(FakeQuantizeDecompositionTest, CompareFunctions) {} + +const std::vector isConst = {true, false}; + +INSTANTIATE_TEST_CASE_P(SimpleFakeQuantizeDecomposition, FakeQuantizeDecompositionTest, + Combine(Values(ngraph::Shape{2, 3, 4, 5}), + Values(ngraph::Shape{1, 3, 1, 1}), + Values(ngraph::Shape{1, 3, 1, 1}), + Values(ngraph::Shape{1, 1, 1, 1}), + Values(ngraph::Shape{1, 1, 1, 1}), + ValuesIn(isConst)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(BroadcastFakeQuantizeDecomposition, FakeQuantizeDecompositionTest, + Combine(Values(ngraph::Shape{2, 3, 4, 5}), + Values(ngraph::Shape{1, 1, 4, 5}), + Values(ngraph::Shape{1, 1, 4, 5}), + Values(ngraph::Shape{1, 1, 1, 1}), + Values(ngraph::Shape{1, 1, 1, 1}), + ValuesIn(isConst)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(ElementwiseFakeQuantizeDecomposition, FakeQuantizeDecompositionTest, + Combine(Values(ngraph::Shape{2, 3, 4, 5}), + Values(ngraph::Shape{2, 3, 4, 5}), + Values(ngraph::Shape{2, 3, 4, 5}), + Values(ngraph::Shape{2, 3, 4, 5}), + Values(ngraph::Shape{2, 3, 4, 5}), + ValuesIn(isConst)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(FakeQuantizeDecomposition_6D, FakeQuantizeDecompositionTest, + Combine(Values(ngraph::Shape{2, 3, 4, 5, 6, 7}), + Values(ngraph::Shape{1, 1, 1, 1, 1, 1}), + Values(ngraph::Shape{1, 1, 1, 1, 1, 1}), + Values(ngraph::Shape{1, 1, 1, 5, 6, 7}), + Values(ngraph::Shape{1, 1, 1, 5, 6, 7}), + ValuesIn(isConst)), + FakeQuantizeDecompositionTest::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp new file mode 100644 index 00000000000000..7c33545b022b4b --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp @@ -0,0 +1,330 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "ngraph_functions/builders.hpp" + +using namespace InferenceEngine; +using namespace ngraph; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { + +using fqSpecificParams = std::tuple, // input low + std::vector, // input high + std::vector, // output low + std::vector, // output high + std::vector, // 'ranges' inputs shapes + size_t>; // levels + +using fqLayerTestParamsSet = std::tuple; + +class FakeQuantizeLayerCPUTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + fqSpecificParams fqParams; + SizeVector inDataShape; + Precision inPrec; + helpers::InputLayerType rangesType; + bool shouldBeDecomposed; + CPUSpecificParams cpuParams; + std::tie(fqParams, inDataShape, inPrec, rangesType, shouldBeDecomposed, cpuParams) = obj.param; + + int64_t inDataLowBounds, inDataHighBounds; + std::vector inputLow, inputHigh, outputLow, outputHigh; + std::vector inRangesShapes; + size_t levels; + std::tie(inDataLowBounds, inDataHighBounds, inputLow, inputHigh, outputLow, outputHigh, inRangesShapes, levels) = fqParams; + + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inDataShape) << "_"; + result << "inPrec=" << inPrec.name() << "_"; + result << "RANGES_TYPE=" << rangesType << "_"; + + std::string rs = ""; + for (size_t i = 0; i < inRangesShapes.size(); i++) { + rs += CommonTestUtils::vec2str(inRangesShapes[i]) + "_"; + } + result << "RS=" << rs; + result << "LOW_BOUNDS=" << inDataLowBounds << "_"; + result << "HIGH_BOUNDS=" << inDataHighBounds << "_"; + result << "IL=" << CommonTestUtils::vec2str(inputLow) << "_"; + result << "IH=" << CommonTestUtils::vec2str(inputHigh) << "_"; + result << "OL=" << CommonTestUtils::vec2str(outputLow) << "_"; + result << "OH=" << CommonTestUtils::vec2str(outputHigh) << "_"; + result << "LEVELS=" << levels; + + result << CPUTestsBase::getTestCaseName(cpuParams); + + return result.str(); + } + + void Infer() override { + inferRequest = executableNetwork.CreateInferRequest(); + inputs.clear(); + + const InputsDataMap &inDataMap = cnnNetwork.getInputsInfo(); + auto input = inDataMap.begin(); + + Blob::Ptr blob = FuncTestUtils::createAndFillBlob(input->second->getTensorDesc(), inDataHighBounds - inDataLowBounds, inDataLowBounds); + inferRequest.SetBlob(input->second->name(), blob); + inputs.push_back(blob); + input++; + + for (size_t it = 1; it < inDataMap.size(); it++) { + blob = fillRanges(rangesBounds[it - 1], input->second->getTensorDesc()); + inferRequest.SetBlob(input->second->name(), blob); + inputs.push_back(blob); + input++; + } + inferRequest.Infer(); + } + +protected: + std::string layerName; + + void SetUp() override { + targetDevice = CommonTestUtils::DEVICE_CPU; + fqSpecificParams fqParams; + SizeVector inDataShape; + Precision inPrec; + helpers::InputLayerType rangesType; + bool shouldBeDecomposed; + CPUSpecificParams cpuParams; + std::tie(fqParams, inDataShape, inPrec, rangesType, shouldBeDecomposed, cpuParams) = this->GetParam(); + + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + + std::vector inRangesShapes; + size_t levels; + rangesBounds.resize(RANGES_INPUT_NUMBER); + std::tie(inDataLowBounds, inDataHighBounds, rangesBounds[0], rangesBounds[1], rangesBounds[2], rangesBounds[3], inRangesShapes, levels) = fqParams; + + ParameterVector params; + auto ngInPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec); + if (rangesType == helpers::InputLayerType::PARAMETER) { + inRangesShapes.insert(inRangesShapes.begin(), inDataShape); + params = builder::makeParams(ngInPrec, inRangesShapes); + } else { + params = builder::makeParams(ngInPrec, {inDataShape}); + } + auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); + + std::shared_ptr fq; + if (rangesType == helpers::InputLayerType::PARAMETER) { + fq = std::make_shared(paramOuts[0], paramOuts[1], paramOuts[2], paramOuts[3], paramOuts[4], levels); + } else { + auto il = builder::makeConstant(ngInPrec, inRangesShapes[0], rangesBounds[0], rangesBounds[0].empty()); + auto ih = builder::makeConstant(ngInPrec, inRangesShapes[1], rangesBounds[1], rangesBounds[1].empty()); + auto ol = builder::makeConstant(ngInPrec, inRangesShapes[2], rangesBounds[2], rangesBounds[2].empty()); + auto oh = builder::makeConstant(ngInPrec, inRangesShapes[3], rangesBounds[3], rangesBounds[3].empty()); + fq = std::make_shared(paramOuts[0], il, ih, ol, oh, levels); + } + + layerName = shouldBeDecomposed ? "" : "Quantize"; + fq->get_rt_info() = getCPUInfo(); + + function = std::make_shared(fq, params, "FakeQuantizeCPU"); + } + +private: + Blob::Ptr fillRanges(std::vector data, const TensorDesc &td) { + if (data.empty()) { + return FuncTestUtils::createAndFillBlob(td); + } else { + if (data.size() == 1) { + data.resize(std::accumulate(td.getDims().begin(), td.getDims().end(), (size_t)1, std::multiplies())); + std::fill(data.begin() + 1, data.end(), data.front()); + } + return FuncTestUtils::createAndFillBlobWithFloatArray(td, data.data(), data.size()); + } + } + + const size_t RANGES_INPUT_NUMBER = 4; + + int64_t inDataLowBounds, inDataHighBounds; + std::vector> rangesBounds; +}; + +TEST_P(FakeQuantizeLayerCPUTest, CompareWithRefs) { + Run(); + + CheckCPUImpl(executableNetwork, layerName); +} + +const std::vector dataShapes = { + {4, 5, 6, 7}, + {3, 4, 5, 6, 7}, + {2, 3, 4, 5, 6, 7}, +}; + +const std::vector> rangesShapes = { + {{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}}, + {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}}, + {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}, + {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}} +}; + +const std::vector levels = {16, 255, 256}; + +std::vector rangesTypes = { + helpers::InputLayerType::CONSTANT, + helpers::InputLayerType::PARAMETER, +}; + +const std::vector outputLow{5.0f}, outputHigh{25.0f}; + +int64_t dataLowBounds{-10}, dataHighBounds{10}; + + + +namespace fqImpl { + +const std::vector inputLow{0.0f}, inputHigh{5.0f}; + +std::vector filterCPUInfoForDevice(std::vector CPUParams) { + std::vector resCPUParams; + const int selectedTypeIndex = 3; + + for (auto param : CPUParams) { + auto selectedTypeStr = std::get(param); + + if (selectedTypeStr.find("jit") != std::string::npos && !with_cpu_x86_sse42()) + continue; + if (selectedTypeStr.find("sse42") != std::string::npos && !with_cpu_x86_sse42()) + continue; + if (selectedTypeStr.find("avx2") != std::string::npos && !with_cpu_x86_avx2()) + continue; + if (selectedTypeStr.find("avx512") != std::string::npos && !with_cpu_x86_avx512f()) + continue; + + resCPUParams.push_back(param); + } + + return resCPUParams; +} + +std::vector memForm4D = { + CPUSpecificParams({nchw}, {nchw}, {"jit_sse42"}, {"jit_sse42_FP32"}), + CPUSpecificParams({nhwc}, {nhwc}, {"jit_sse42"}, {"jit_sse42_FP32"}), + CPUSpecificParams({nChw8c}, {nChw8c}, {"jit_sse42"}, {"jit_sse42_FP32"}), + CPUSpecificParams({nchw}, {nchw}, {"jit_avx2"}, {"jit_avx2_FP32"}), + CPUSpecificParams({nhwc}, {nhwc}, {"jit_avx2"}, {"jit_avx2_FP32"}), + CPUSpecificParams({nChw8c}, {nChw8c}, {"jit_avx2"}, {"jit_avx2_FP32"}), + CPUSpecificParams({nchw}, {nchw}, {"jit_avx512"}, {"jit_avx512_FP32"}), + CPUSpecificParams({nhwc}, {nhwc}, {"jit_avx512"}, {"jit_avx512_FP32"}), + CPUSpecificParams({nChw16c}, {nChw16c}, {"jit_avx512"}, {"jit_avx512_FP32"}) +}; + +const std::vector> rangesShapes4D = { + {{1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}}, + {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}} +}; + +const auto specificParams4D = ::testing::Combine(::testing::Values(dataLowBounds), + ::testing::Values(dataHighBounds), + ::testing::Values(inputLow), + ::testing::Values(inputHigh), + ::testing::Values(outputLow), + ::testing::Values(outputHigh), + ::testing::ValuesIn(rangesShapes4D), + ::testing::ValuesIn(levels)); +const auto testParams4D = ::testing::Combine(specificParams4D, + ::testing::Values(SizeVector{4, 5, 6, 7}), + ::testing::Values(Precision::FP32), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(false), + ::testing::ValuesIn(filterCPUInfoForDevice(memForm4D))); +INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_4D, FakeQuantizeLayerCPUTest, testParams4D, FakeQuantizeLayerCPUTest::getTestCaseName); + + +std::vector memForm5D = { + CPUSpecificParams({ncdhw}, {ncdhw}, {"jit_sse42"}, {"jit_sse42_FP32"}), + CPUSpecificParams({ndhwc}, {ndhwc}, {"jit_sse42"}, {"jit_sse42_FP32"}), + CPUSpecificParams({ncdhw}, {ncdhw}, {"jit_avx2"}, {"jit_avx2_FP32"}), + CPUSpecificParams({ndhwc}, {ndhwc}, {"jit_avx2"}, {"jit_avx2_FP32"}), + CPUSpecificParams({ncdhw}, {ncdhw}, {"jit_avx512"}, {"jit_avx512_FP32"}), + CPUSpecificParams({ndhwc}, {ndhwc}, {"jit_avx512"}, {"jit_avx512_FP32"}) +}; + +const std::vector> rangesShapes5D = { + {{1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}} +}; + +const auto specificParams5D = ::testing::Combine(::testing::Values(dataLowBounds), + ::testing::Values(dataHighBounds), + ::testing::Values(inputLow), + ::testing::Values(inputHigh), + ::testing::Values(outputLow), + ::testing::Values(outputHigh), + ::testing::ValuesIn(rangesShapes5D), + ::testing::ValuesIn(levels)); +const auto testParams5D = ::testing::Combine(specificParams5D, + ::testing::Values(SizeVector{3, 4, 5, 6, 7}), + ::testing::Values(Precision::FP32), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(false), + ::testing::ValuesIn(filterCPUInfoForDevice(memForm5D))); + +INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_5D, FakeQuantizeLayerCPUTest, testParams5D, FakeQuantizeLayerCPUTest::getTestCaseName); + +} // namespace fqImpl + + +namespace fqDecompPositveRanges { + +const std::vector inputLow{0.0f}, inputHigh{5.0f}; +const auto specificParams = ::testing::Combine(::testing::Values(dataLowBounds), + ::testing::Values(dataHighBounds), + ::testing::Values(inputLow), + ::testing::Values(inputHigh), + ::testing::Values(outputLow), + ::testing::Values(outputHigh), + ::testing::ValuesIn(rangesShapes), + ::testing::ValuesIn(levels)); +const auto testParams = ::testing::Combine(specificParams, + ::testing::ValuesIn(dataShapes), + ::testing::Values(Precision::FP32), + ::testing::ValuesIn(rangesTypes), + ::testing::Values(true), + ::testing::Values(CPUSpecificParams{})); + +INSTANTIATE_TEST_CASE_P(smoke_FQDecompos_PositveRanges, FakeQuantizeLayerCPUTest, testParams, FakeQuantizeLayerCPUTest::getTestCaseName); + +} // namespace fqDecompPositveRanges + + +namespace fqDecompNegativeRanges { + +const std::vector inputLow{-5.0f}, inputHigh{0.0f}; +const auto specificParams = ::testing::Combine(::testing::Values(dataLowBounds), + ::testing::Values(dataHighBounds), + ::testing::Values(inputLow), + ::testing::Values(inputHigh), + ::testing::Values(outputLow), + ::testing::Values(outputHigh), + ::testing::ValuesIn(rangesShapes), + ::testing::ValuesIn(levels)); +const auto testParams = ::testing::Combine(specificParams, + ::testing::ValuesIn(dataShapes), + ::testing::Values(Precision::FP32), + ::testing::ValuesIn(rangesTypes), + ::testing::Values(true), + ::testing::Values(CPUSpecificParams{})); + +INSTANTIATE_TEST_CASE_P(smoke_FQDecompos_NegativeRanges, FakeQuantizeLayerCPUTest, testParams, FakeQuantizeLayerCPUTest::getTestCaseName); + +} // namespace fqDecompNegativeRanges + +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp index fb99ff842e1e87..9ee6cf2818785c 100644 --- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp +++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp @@ -66,6 +66,8 @@ std::string CPUTestsBase::impls2str(const std::vector &priority) { } void CPUTestsBase::CheckCPUImpl(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType) const { + if (nodeType.empty()) return; + IE_SUPPRESS_DEPRECATED_START ASSERT_TRUE(!selectedType.empty()) << "Node type is not defined."; bool isNodeFound = false; diff --git a/ngraph/python/tests/__init__.py b/ngraph/python/tests/__init__.py index 4e3b2b7ffadaf6..0b8bcefede4635 100644 --- a/ngraph/python/tests/__init__.py +++ b/ngraph/python/tests/__init__.py @@ -181,8 +181,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): xfail_issue_44967 = xfail_test(reason="E RuntimeError: unsupported element type: BFLOAT16") xfail_issue_44968 = xfail_test(reason="E Unsupported dynamic op: Squeeze") xfail_issue_44970 = xfail_test(reason="Assertion error") -xfail_issue_44976 = xfail_test(reason="E RuntimeError: Quantize layer with name:" - "FakeQuantize_xxx has non const input on 1 port") # Model MSFT issues: xfail_issue_37957 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:" diff --git a/ngraph/python/tests/test_ngraph/test_ops_fused.py b/ngraph/python/tests/test_ngraph/test_ops_fused.py index f7e37805a1fa9d..bfb8ab4838368b 100644 --- a/ngraph/python/tests/test_ngraph/test_ops_fused.py +++ b/ngraph/python/tests/test_ngraph/test_ops_fused.py @@ -22,8 +22,7 @@ xfail_issue_34327, xfail_issue_36485, xfail_issue_36486, - xfail_issue_36487, - xfail_issue_44976) + xfail_issue_36487) @xfail_issue_40957 @@ -58,7 +57,6 @@ def test_elu_operator_with_scalar(): assert np.allclose(result, expected) -@xfail_issue_44976 def test_fake_quantize(): runtime = get_runtime() diff --git a/ngraph/python/tests/test_onnx/test_backend.py b/ngraph/python/tests/test_onnx/test_backend.py index 5c708c78ba9c12..4503ef7417fba2 100644 --- a/ngraph/python/tests/test_onnx/test_backend.py +++ b/ngraph/python/tests/test_onnx/test_backend.py @@ -89,8 +89,7 @@ xfail_issue_44958, xfail_issue_44965, xfail_issue_44967, - xfail_issue_44968, - xfail_issue_44976) + xfail_issue_44968) def expect_fail(test_case_path, xfail): # type: (str) -> None @@ -196,8 +195,7 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None (xfail_issue_38086, "OnnxBackendNodeModelTest.test_dynamicquantizelinear_min_adjusted_expanded_cpu", "OnnxBackendNodeModelTest.test_dynamicquantizelinear_expanded_cpu", - "OnnxBackendNodeModelTest.test_dynamicquantizelinear_max_adjusted_expanded_cpu", - "OnnxBackendNodeModelTest.test_quantizelinear_cpu"), + "OnnxBackendNodeModelTest.test_dynamicquantizelinear_max_adjusted_expanded_cpu"), (xfail_issue_38087, "OnnxBackendNodeModelTest.test_convtranspose_1d_cpu"), (xfail_issue_40957, @@ -708,9 +706,7 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_cast_FLOAT_to_BFLOAT16_cpu",), (xfail_issue_44968, "OnnxBackendNodeModelTest.test_squeeze_cpu", - "OnnxBackendNodeModelTest.test_squeeze_negative_axes_cpu",), - (xfail_issue_44976, - "OnnxBackendNodeModelTest.test_quantizelinear_axis_cpu",) + "OnnxBackendNodeModelTest.test_squeeze_negative_axes_cpu",) ] for test_group in tests_expected_to_fail: