From d4df3d7a8eb8a7799d65e28c0761ed8b0460c3af Mon Sep 17 00:00:00 2001 From: mandrono Date: Tue, 22 Dec 2020 15:12:44 +0300 Subject: [PATCH] FakeQuantize decomposition --- .../src/mkldnn_plugin/mkldnn_plugin.cpp | 20 +- .../nodes/mkldnn_quantize_node.cpp | 31 +- .../nodes/mkldnn_quantize_node.h | 4 +- .../op_conversions/fq_decomposition.hpp | 47 +++ .../op_conversions/fq_decomposition.cpp | 124 ++++++++ .../transformations/fq_decomposition_test.cpp | 249 +++++++++++++++ .../cpu/single_layer_tests/fake_quantize.cpp | 288 ++++++++++++++++++ .../plugin/cpu/test_utils/cpu_test_utils.cpp | 2 + .../runtime/reference/fake_quantize.hpp | 4 +- 9 files changed, 762 insertions(+), 7 deletions(-) create mode 100644 inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp create mode 100644 inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp create mode 100644 inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp create mode 100644 inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 3200bfb81e6371..606b181d50fc2f 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -57,6 +57,8 @@ #include #include #include +#include +#include #include #include @@ -71,6 +73,8 @@ # include # include +#include "nodes/mkldnn_quantize_node.h" + #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) #if defined(_WIN32) || defined(WIN32) #include @@ -227,13 +231,22 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { transformer.transform(nGraphFunc); } + bool has_fake_quantize = ::ngraph::op::util::has_op_with_type(nGraphFunc); + ngraph::pass::Manager legacyManager; + + legacyManager.register_pass(); legacyManager.register_pass(); legacyManager.register_pass(ngraph::element::i64, ngraph::element::i32); // not legacy actually, but it should be the last transformation in the transformation pipeline legacyManager.register_pass(); auto legacyPassConfig = legacyManager.get_pass_config(); + + legacyPassConfig->set_callback([](const_node_ptr &node) -> bool { + return !MKLDNNQuantizeNode::isNeedToDecompose(node); + }); + legacyPassConfig->set_callback([](const_node_ptr &node) -> bool { if (auto mul_op = std::dynamic_pointer_cast(node)) { auto add_op = std::dynamic_pointer_cast(mul_op->get_input_node_shared_ptr(0)); @@ -248,15 +261,16 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { return false; }); - legacyManager.get_pass_config()->set_callback([](const_node_ptr &node) -> bool { + legacyPassConfig->set_callback([](const_node_ptr &node) -> bool { // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation return node->get_rt_info().count("UNROLL_TI") == 0; }); + legacyManager.run_passes(nGraphFunc); OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork"); - clonedNetwork = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, clonedNetwork)); + clonedNetwork = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, clonedNetwork, has_fake_quantize)); OV_ITT_TASK_NEXT(taskChain, "ConvertIOPrecision"); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp index 8725b38adb0be5..3215bfc8749e13 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp @@ -18,6 +18,8 @@ #include #include "ie_parallel.hpp" +#include + // Quantization ranges validation is switched off by default in order to avoid regressions on user side // #define VALIDATE_QUANTIZATION_RANGES @@ -1029,7 +1031,7 @@ void MKLDNNQuantizeNode::init() { float ih = inputHighData[isInputHighBroadcasted ? 0 : i]; #if defined(VALIDATE_QUANTIZATION_RANGES) - if ((il == ih && levels != 2) || std::isnan(il) || std::isnan(ih) || std::isinf(il) || std::isinf(ih)) { + if ((il == ih && levels != 2) || il > ih || std::isnan(il) || std::isnan(ih) || std::isinf(il) || std::isinf(ih)) { THROW_IE_EXCEPTION << "Quantize layer with name '" << getName() << "' has invalid input quantize ranges: " << "inputLow = " << il << ", inputHigh = " << ih; } @@ -1578,6 +1580,33 @@ void MKLDNNQuantizeNode::appendPostOps(mkldnn::post_ops& ops) { isPostOpDataInitialized = true; } +bool MKLDNNQuantizeNode::isNeedToDecompose(const std::shared_ptr& node) { + if (const auto fq = std::dynamic_pointer_cast(node)) { + for (size_t i = 0; i < fq->get_input_size(); i++) { + if (fq->get_input_shape(i).size() > 5) + return true; + } + + for (size_t i = 1; i < fq->get_input_size(); i++) { + size_t count_not_unit_axis = 0; + auto shape = fq->get_input_shape(i); + + if (ngraph::shape_size(shape) != 1) { + size_t not_unit_axis = 0; + for (size_t i = 0; i < shape.size(); i++) { + if (shape[i] > 1) { + not_unit_axis = i; + count_not_unit_axis++; + } + } + if (count_not_unit_axis > 1 || not_unit_axis > 1) + return true; + } + } + } + return false; +} + bool MKLDNNQuantizeNode::created() const { return getType() == Quantize; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h index e3fff7f72ff20c..234fd103d8ae56 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -113,6 +113,8 @@ class MKLDNNQuantizeNode : public MKLDNNNode { void appendPostOps(mkldnn::post_ops& ops) override; + static bool isNeedToDecompose(const std::shared_ptr& node); + private: void init() override; std::vector getDataFormats() const; diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp new file mode 100644 index 00000000000000..cb545cba8ef27e --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/op_conversions/fq_decomposition.hpp @@ -0,0 +1,47 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API FakeQuantizeDecomposition; + +} // namespace pass +} // namespace ngraph + +/** + * @ingroup ie_transformation_common_api + * @brief FakeQuantizeDecomposition transformation decomposes FakeQuantize layer. + * + * Expression from specification: + * if x <= min(input_low, input_high): + * output = output_low + * elif x > max(input_low, input_high): + * output = output_high + * else: + * output = round((x - input_low) / (input_high - input_low) * (levels-1)) / (levels-1) * (output_high - output_low) + output_low + * + * expand brackets into round: + * round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) + * div on (levels-1) and mult on (output_high - output_low) => mult on (output_high - output_low) / (levels-1) + * + * => + * round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1) + output_low + * + * This transformation doesn't support following cases: + * 1. At least one 'range' input is not Constant + * 2. At least one 'input_low' input value greater or equal than 'input_high' input value + * + */ + +class ngraph::pass::FakeQuantizeDecomposition: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + FakeQuantizeDecomposition(); +}; diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp new file mode 100644 index 00000000000000..ab4e91e1b324f7 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/op_conversions/fq_decomposition.cpp @@ -0,0 +1,124 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" +#include "transformations/op_conversions/fq_decomposition.hpp" + +#include +#include +#include +#include +#include + +#include + +NGRAPH_RTTI_DEFINITION(ngraph::pass::FakeQuantizeDecomposition, "FakeQuantizeDecomposition", 0); + +bool isValidRangesInputs(const std::shared_ptr &fq) { + auto il = fq->input_value(1); + auto ih = fq->input_value(2); + auto greater_equal = std::make_shared(il, ih); + + ngraph::OutputVector result(1); + if (!greater_equal->constant_fold(result, greater_equal->input_values())) + return false; + + auto res_node = std::dynamic_pointer_cast(result[0].get_node_shared_ptr()); + + const std::vector comp_result = res_node->cast_vector(); + + return !std::any_of(comp_result.begin(), comp_result.end(), [](const bool value) { return value; }); +} + +ngraph::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { + MATCHER_SCOPE(FakeQuantizeDecomposition); + auto data = ngraph::pattern::any_input(); + auto il = ngraph::pattern::wrap_type(); + auto ih = ngraph::pattern::wrap_type(); + auto ol = ngraph::pattern::wrap_type(); + auto oh = ngraph::pattern::wrap_type(); + auto fake_quantize = ngraph::pattern::wrap_type({data, il, ih, ol, oh}); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + auto &pattern_to_output = m.get_pattern_value_map(); + const auto fake_quantize_node = std::dynamic_pointer_cast(pattern_to_output.at(fake_quantize).get_node_shared_ptr()); + + if (fake_quantize_node == nullptr || transformation_callback(fake_quantize_node) || !isValidRangesInputs(fake_quantize_node)) { + return false; + } + + Output data{fake_quantize_node->input_value(0)}; + const Output input_low{fake_quantize_node->input_value(1)}; + const Output input_high{fake_quantize_node->input_value(2)}; + const Output output_low{fake_quantize_node->input_value(3)}; + const Output output_high{fake_quantize_node->input_value(4)}; + auto input_type = data.get_element_type(); + + ngraph::NodeVector decomp_ops; + if (input_type != input_low.get_element_type()) { + input_type = input_low.get_element_type(); + data = std::make_shared(data, input_type); + decomp_ops.push_back(data.get_node_shared_ptr()); + } + + // if we set input_low or input_high in formula we got output = output_low and output = output_high respectively + // so we just clamp x + const auto max = std::make_shared(data, input_low); + const auto min = std::make_shared(max, input_high); + decomp_ops.push_back(max); + decomp_ops.push_back(min); + + // (levels-1) + const auto levels_minus_one = std::make_shared(input_type, Shape{}, fake_quantize_node->get_levels() - 1); + decomp_ops.push_back(levels_minus_one); + // (input_high - input_low) + const auto subInHighLow = std::make_shared(input_high, input_low); + // (levels-1) / (input_high - input_low) + const auto isc = std::make_shared(levels_minus_one, subInHighLow); + // input_low * (levels-1) / (input_high - input_low) + const auto ish = std::make_shared(input_low, isc); + decomp_ops.push_back(subInHighLow); + decomp_ops.push_back(isc); + decomp_ops.push_back(ish); + + // x * (levels-1) / (input_high - input_low) + const auto after_isc_apply = std::make_shared(min, isc); + // x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low) + const auto after_ish_apply = std::make_shared(after_isc_apply, ish); + decomp_ops.push_back(after_isc_apply); + decomp_ops.push_back(after_ish_apply); + + // round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) + const auto round = std::make_shared(after_ish_apply, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN); + decomp_ops.push_back(round); + + // (output_high - output_low) + const auto sub_out_high_low = std::make_shared(output_high, output_low); + // (output_high - output_low) / (levels-1) + const auto osc = std::make_shared(sub_out_high_low, levels_minus_one); + decomp_ops.push_back(sub_out_high_low); + decomp_ops.push_back(osc); + + // round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1) + const auto after_osc_apply = std::make_shared(round, osc); + // round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1) + + // output_low + std::shared_ptr result = std::make_shared(after_osc_apply, output_low); + decomp_ops.push_back(after_osc_apply); + decomp_ops.push_back(result); + + if (result->get_output_element_type(0) != fake_quantize_node->get_output_element_type(0)) { + result = std::make_shared(result, fake_quantize_node->get_output_element_type(0)); + decomp_ops.push_back(result); + } + + result->set_friendly_name(m.get_match_root()->get_friendly_name()); + ngraph::copy_runtime_info(fake_quantize_node, decomp_ops); + ngraph::replace_node(m.get_match_root(), result); + return true; + }; + + auto m = std::make_shared(fake_quantize, matcher_name); + register_matcher(m, callback); +} diff --git a/inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp new file mode 100644 index 00000000000000..25e2bf481e4666 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/fq_decomposition_test.cpp @@ -0,0 +1,249 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "common_test_utils/common_utils.hpp" + +using FakeQuantizeDecompositionBasicParams = std::tuple; + +using FakeQuantizeDecompositionParamsSet = std::tuple, // il and ih values + bool // should be decompos +>; + +class FakeQuantizeDecompositionTest : public CommonTestUtils::TestsCommon, public ::testing::WithParamInterface { +public: + static std::string getTestCaseName(::testing::TestParamInfo obj) { + FakeQuantizeDecompositionBasicParams basic_params; + std::pair input_ranges_values; + bool should_be_decompos; + std::tie(basic_params, input_ranges_values, should_be_decompos) = obj.param; + + ngraph::Shape data_shape, il_shape, ih_shape, ol_shape, oh_shape; + ngraph::element::Type_t data_prec, ranges_prec; + size_t levels; + std::tie(data_prec, data_shape, ranges_prec, il_shape, ih_shape, ol_shape, oh_shape, levels) = basic_params; + + std::ostringstream result; + result << "DATA=" << CommonTestUtils::vec2str(data_shape) << "_"; + result << "DATA_PRC=" << ngraph::element::Type(data_prec) << "_"; + result << "IL=" << CommonTestUtils::vec2str(il_shape) << "_" << input_ranges_values.first << "_"; + result << "IH=" << CommonTestUtils::vec2str(ih_shape) << "_" << input_ranges_values.second << "_"; + result << "OL=" << CommonTestUtils::vec2str(ol_shape) << "_"; + result << "OH=" << CommonTestUtils::vec2str(oh_shape) << "_"; + result << "RANGES_PRC=" << ngraph::element::Type(ranges_prec) << "_"; + result << "LEVELS=" << levels; + return result.str(); + } + +protected: + void SetUp() { + FakeQuantizeDecompositionBasicParams basic_params; + std::pair input_ranges_values; + bool should_be_decompos; + std::tie(basic_params, input_ranges_values, should_be_decompos) = this->GetParam(); + + ngraph::Shape data_shape, il_shape, ih_shape, ol_shape, oh_shape; + ngraph::element::Type_t data_prec, ranges_prec; + size_t levels; + std::tie(data_prec, data_shape, ranges_prec, il_shape, ih_shape, ol_shape, oh_shape, levels) = basic_params; + + bool need_convert = data_prec != ranges_prec; + + std::shared_ptr f(nullptr), f_ref(nullptr); + { + const auto data = std::make_shared(data_prec, ngraph::PartialShape(data_shape)); + const auto il = std::make_shared(ranges_prec, il_shape, input_ranges_values.first); + const auto ih = std::make_shared(ranges_prec, ih_shape, input_ranges_values.second); + const auto ol = std::make_shared(ranges_prec, ol_shape); + const auto oh = std::make_shared(ranges_prec, oh_shape); + + const auto fq = std::make_shared(data, il, ih, ol, oh, levels); + f = std::make_shared(ngraph::NodeVector{fq}, ngraph::ParameterVector{data}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto input_data = std::make_shared(data_prec, ngraph::PartialShape(data_shape)); + ngraph::ParameterVector params; + params.push_back(input_data); + std::shared_ptr data = input_data; + const auto il = std::make_shared(ranges_prec, il_shape, input_ranges_values.first); + const auto ih = std::make_shared(ranges_prec, ih_shape, input_ranges_values.second); + const auto ol = std::make_shared(ranges_prec, ol_shape); + const auto oh = std::make_shared(ranges_prec, oh_shape); + + if (should_be_decompos) { + if (need_convert) { + data = std::make_shared(data, ranges_prec); + } + + const auto max = std::make_shared(data, il); + const auto min = std::make_shared(max, ih); + + const auto levels_minus_one = std::make_shared(ranges_prec, ngraph::Shape{}, levels - 1); + + const auto sub_in_high_low = std::make_shared(ih, il); + const auto isc = std::make_shared(levels_minus_one, sub_in_high_low); + const auto ish = std::make_shared(il, isc); + + const auto after_isc_apply = std::make_shared(min, isc); + const auto after_ish_apply = std::make_shared(after_isc_apply, ish); + + const auto round = std::make_shared(after_ish_apply, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN); + + const auto sub_out_high_low = std::make_shared(oh, ol); + const auto osc = std::make_shared(sub_out_high_low, levels_minus_one); + + const auto after_osc_apply = std::make_shared(round, osc); + const auto after_out_low_add = std::make_shared(after_osc_apply, ol); + std::shared_ptr result = after_out_low_add; + + if (need_convert) { + result = std::make_shared(result, data_prec); + } + + f_ref = std::make_shared(ngraph::NodeVector{result}, params); + } else { + const auto fq = std::make_shared(data, il, ih, ol, oh, levels); + f_ref = std::make_shared(ngraph::NodeVector{fq}, params); + } + } + + const auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; + } +}; + +TEST_P(FakeQuantizeDecompositionTest, CompareFunctions) {} + +const std::vector precisions = {ngraph::element::Type_t::f16, ngraph::element::Type_t::f32}; + +const std::vector levels = {16, 255, 256}; + +const std::vector> input_ranges_supported = { + {-10.0f, 10.f} +}; + +const auto simple_fq_basic = ::testing::Combine(::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{2, 3, 4, 5}), + ::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{1, 3, 1, 1}), + ::testing::Values(ngraph::Shape{1, 3, 1, 1}), + ::testing::Values(ngraph::Shape{1, 3, 1, 1}), + ::testing::Values(ngraph::Shape{1, 3, 1, 1}), + ::testing::ValuesIn(levels)); + +const auto broadcast_fq_basic = ::testing::Combine(::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{2, 3, 4, 5}), + ::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{1, 3, 4, 1}), + ::testing::Values(ngraph::Shape{1, 1, 4, 5}), + ::testing::Values(ngraph::Shape{1, 1, 1, 1}), + ::testing::Values(ngraph::Shape{1, 1, 1, 1}), + ::testing::ValuesIn(levels)); + +const auto elementwise_fq_basic = ::testing::Combine(::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{2, 3, 4, 5}), + ::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{2, 3, 4, 5}), + ::testing::Values(ngraph::Shape{2, 3, 4, 1}), + ::testing::Values(ngraph::Shape{2, 3, 4, 5}), + ::testing::Values(ngraph::Shape{2, 3, 4, 5}), + ::testing::ValuesIn(levels)); + +const auto broadcast_6D_fq_basic = ::testing::Combine(::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{2, 3, 4, 5, 6, 7}), + ::testing::ValuesIn(precisions), + ::testing::Values(ngraph::Shape{2, 3, 4, 1, 1, 1}), + ::testing::Values(ngraph::Shape{1, 3, 4, 5, 1, 1}), + ::testing::Values(ngraph::Shape{1, 1, 1, 5, 6, 7}), + ::testing::Values(ngraph::Shape{1, 1, 1, 5, 6, 7}), + ::testing::ValuesIn(levels)); + +INSTANTIATE_TEST_CASE_P(SimpleFakeQuantize_Decomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + simple_fq_basic, + ::testing::ValuesIn(input_ranges_supported), + ::testing::Values(true)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(BroadcastFakeQuantize_Decomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + broadcast_fq_basic, + ::testing::ValuesIn(input_ranges_supported), + ::testing::Values(true)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(ElementwiseFakeQuantize_Decomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + elementwise_fq_basic, + ::testing::ValuesIn(input_ranges_supported), + ::testing::Values(true)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(FakeQuantize6D_Decomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + broadcast_6D_fq_basic, + ::testing::ValuesIn(input_ranges_supported), + ::testing::Values(true)), + FakeQuantizeDecompositionTest::getTestCaseName); + +const std::vector> input_ranges_unsupported = { + {10.0f, -10.f}, + {5.0f, 5.0f}, + {-5.0f, -5.0f} +}; + +INSTANTIATE_TEST_CASE_P(SimpleFakeQuantize_NoDecomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + simple_fq_basic, + ::testing::ValuesIn(input_ranges_unsupported), + ::testing::Values(false)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(BroadcastFakeQuantize_NoDecomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + broadcast_fq_basic, + ::testing::ValuesIn(input_ranges_unsupported), + ::testing::Values(false)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(ElementwiseFakeQuantize_NoDecomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + elementwise_fq_basic, + ::testing::ValuesIn(input_ranges_unsupported), + ::testing::Values(false)), + FakeQuantizeDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(FakeQuantize6D_NoDecomposition, FakeQuantizeDecompositionTest, + ::testing::Combine( + broadcast_6D_fq_basic, + ::testing::ValuesIn(input_ranges_unsupported), + ::testing::Values(false)), + FakeQuantizeDecompositionTest::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp new file mode 100644 index 00000000000000..5ca327ff39d08f --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp @@ -0,0 +1,288 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "ngraph_functions/builders.hpp" + +using namespace InferenceEngine; +using namespace ngraph; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { + +using fqSpecificParams = std::tuple, // output low + std::vector, // output high + std::vector, // 'range' inputs shapes + size_t>; // levels + +using fqLayerTestParamsSet = std::tuple, std::vector>, // il and ih values + bool, // should be decomposed + CPUSpecificParams>; + +class FakeQuantizeLayerCPUTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + fqSpecificParams fqParams; + SizeVector inDataShape; + Precision inPrec; + std::pair, std::vector> inputRangesValues; + bool shouldBeDecomposed; + CPUSpecificParams cpuParams; + std::tie(fqParams, inDataShape, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = obj.param; + + int64_t inDataLowBounds, inDataHighBounds; + std::vector inputLow, inputHigh, outputLow, outputHigh; + std::vector inRangesShapes; + size_t levels; + inputLow = inputRangesValues.first; + inputHigh = inputRangesValues.second; + std::tie(inDataLowBounds, inDataHighBounds, outputLow, outputHigh, inRangesShapes, levels) = fqParams; + + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inDataShape) << "_"; + result << "inPrec=" << inPrec.name() << "_"; + + std::string rs = ""; + for (size_t i = 0; i < inRangesShapes.size(); i++) { + rs += CommonTestUtils::vec2str(inRangesShapes[i]) + "_"; + } + result << "RS=" << rs; + result << "LOW_BOUNDS=" << inDataLowBounds << "_"; + result << "HIGH_BOUNDS=" << inDataHighBounds << "_"; + result << "IL=" << CommonTestUtils::vec2str(inputLow) << "_"; + result << "IH=" << CommonTestUtils::vec2str(inputHigh) << "_"; + result << "OL=" << CommonTestUtils::vec2str(outputLow) << "_"; + result << "OH=" << CommonTestUtils::vec2str(outputHigh) << "_"; + result << "LEVELS=" << levels; + + result << CPUTestsBase::getTestCaseName(cpuParams); + + return result.str(); + } + + void Infer() override { + inferRequest = executableNetwork.CreateInferRequest(); + inputs.clear(); + + const InputsDataMap &inDataMap = cnnNetwork.getInputsInfo(); + auto input = inDataMap.begin(); + + Blob::Ptr blob = FuncTestUtils::createAndFillBlob(input->second->getTensorDesc(), inDataHighBounds - inDataLowBounds, inDataLowBounds); + inferRequest.SetBlob(input->second->name(), blob); + inputs.push_back(blob); + + inferRequest.Infer(); + } + +protected: + std::string layerName; + + void SetUp() override { + targetDevice = CommonTestUtils::DEVICE_CPU; + fqSpecificParams fqParams; + SizeVector inDataShape; + Precision inPrec; + std::pair, std::vector> inputRangesValues; + bool shouldBeDecomposed; + CPUSpecificParams cpuParams; + std::tie(fqParams, inDataShape, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = this->GetParam(); + + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + + std::vector inRangesShapes; + size_t levels; + std::vector> rangesBounds(RANGES_INPUT_NUMBER); + rangesBounds[0] = inputRangesValues.first; + rangesBounds[1] = inputRangesValues.second; + std::tie(inDataLowBounds, inDataHighBounds, rangesBounds[2], rangesBounds[3], inRangesShapes, levels) = fqParams; + + auto ngInPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec); + ParameterVector params = builder::makeParams(ngInPrec, {inDataShape}); + auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); + + auto il = builder::makeConstant(ngInPrec, inRangesShapes[0], rangesBounds[0], rangesBounds[0].empty()); + auto ih = builder::makeConstant(ngInPrec, inRangesShapes[1], rangesBounds[1], rangesBounds[1].empty()); + auto ol = builder::makeConstant(ngInPrec, inRangesShapes[2], rangesBounds[2], rangesBounds[2].empty()); + auto oh = builder::makeConstant(ngInPrec, inRangesShapes[3], rangesBounds[3], rangesBounds[3].empty()); + auto fq = std::make_shared(paramOuts[0], il, ih, ol, oh, levels); + + layerName = shouldBeDecomposed ? "" : "Quantize"; + + if (selectedType.empty()) { + selectedType = getPrimitiveType() + "_" + inPrec.name(); + } + + fq->get_rt_info() = getCPUInfo(); + + function = std::make_shared(fq, params, "FakeQuantizeCPU"); + } + +private: + const size_t RANGES_INPUT_NUMBER = 4; + + int64_t inDataLowBounds, inDataHighBounds; +}; + +TEST_P(FakeQuantizeLayerCPUTest, CompareWithRefs) { + Run(); + + CheckPluginRelatedResults(executableNetwork, layerName); +} + + +const std::vector levels = {16, 255, 256}; + +int64_t dataLowBounds{-10}, dataHighBounds{10}; + +const std::vector, std::vector>> input_ranges = { + {{0.0f}, {5.f}}, + {{-10.0f}, {-5.f}} +}; + +const std::vector outputLow{5.0f}, outputHigh{25.0f}; + +namespace fqImpl { + +std::vector memForm4D_jit = { + CPUSpecificParams({nchw}, {nchw}, {}, {}), + CPUSpecificParams({nhwc}, {nhwc}, {}, {}), + CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}) +}; + +const std::vector> rangesShapes4D_jit = { + {{1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}}, + {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}} +}; + +const auto specificParams4D_jit = ::testing::Combine(::testing::Values(dataLowBounds), + ::testing::Values(dataHighBounds), + ::testing::Values(outputLow), + ::testing::Values(outputHigh), + ::testing::ValuesIn(rangesShapes4D_jit), + ::testing::ValuesIn(levels)); +const auto testParams4D_jit = ::testing::Combine(specificParams4D_jit, + ::testing::Values(SizeVector{4, 5, 6, 7}), + ::testing::Values(Precision::FP32), + ::testing::ValuesIn(input_ranges), + ::testing::Values(false), + ::testing::ValuesIn(filterCPUSpecificParams(memForm4D_jit))); +INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_4D_jit, FakeQuantizeLayerCPUTest, testParams4D_jit, FakeQuantizeLayerCPUTest::getTestCaseName); + + +std::vector memForm4D_ref = { + CPUSpecificParams({nchw}, {nchw}, {"ref_FP32"}, {"ref_FP32"}) +}; + +const std::vector> rangesShapes4D_ref = { + {{4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}} +}; + +const auto specificParams4D_ref = ::testing::Combine(::testing::Values(dataLowBounds), + ::testing::Values(dataHighBounds), + ::testing::Values(outputLow), + ::testing::Values(outputHigh), + ::testing::ValuesIn(rangesShapes4D_ref), + ::testing::ValuesIn(levels)); +const auto testParams4D_ref = ::testing::Combine(specificParams4D_ref, + ::testing::Values(SizeVector{4, 5, 6, 7}), + ::testing::Values(Precision::FP32), + ::testing::ValuesIn(input_ranges), + ::testing::Values(false), + ::testing::ValuesIn(memForm4D_ref)); +INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_4D_ref, FakeQuantizeLayerCPUTest, testParams4D_ref, FakeQuantizeLayerCPUTest::getTestCaseName); + + +std::vector memForm5D_jit = { + CPUSpecificParams({ncdhw}, {ncdhw}, {}, {}), + CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}), + CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {}) +}; + +const std::vector> rangesShapes5D_jit = { + {{1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}} +}; + +const auto specificParams5D_jit = ::testing::Combine(::testing::Values(dataLowBounds), + ::testing::Values(dataHighBounds), + ::testing::Values(outputLow), + ::testing::Values(outputHigh), + ::testing::ValuesIn(rangesShapes5D_jit), + ::testing::ValuesIn(levels)); +const auto testParams5D_jit = ::testing::Combine(specificParams5D_jit, + ::testing::Values(SizeVector{3, 4, 5, 6, 7}), + ::testing::Values(Precision::FP32), + ::testing::ValuesIn(input_ranges), + ::testing::Values(false), + ::testing::ValuesIn(filterCPUSpecificParams(memForm5D_jit))); + +INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_5D_jit, FakeQuantizeLayerCPUTest, testParams5D_jit, FakeQuantizeLayerCPUTest::getTestCaseName); + + +std::vector memForm5D_ref = { + CPUSpecificParams({ncdhw}, {ncdhw}, {"ref_FP32"}, {"ref_FP32"}) +}; + +const std::vector> rangesShapes5D_ref = { + {{3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}} +}; + +const auto specificParams5D_ref = ::testing::Combine(::testing::Values(dataLowBounds), + ::testing::Values(dataHighBounds), + ::testing::Values(outputLow), + ::testing::Values(outputHigh), + ::testing::ValuesIn(rangesShapes5D_ref), + ::testing::ValuesIn(levels)); +const auto testParams5D_ref = ::testing::Combine(specificParams5D_ref, + ::testing::Values(SizeVector{3, 4, 5, 6, 7}), + ::testing::Values(Precision::FP32), + ::testing::ValuesIn(input_ranges), + ::testing::Values(false), + ::testing::ValuesIn(memForm5D_ref)); + +INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_5D_ref, FakeQuantizeLayerCPUTest, testParams5D_ref, FakeQuantizeLayerCPUTest::getTestCaseName); + +} // namespace fqImpl + +const std::vector dataShapes = { + {4, 5, 6, 7}, + {3, 4, 5, 6, 7}, + {2, 3, 4, 5, 6, 7}, +}; + +const std::vector> rangesShapes = { + {{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}}, + {{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}, + {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}, + {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}, + {{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}} +}; + +namespace fqDecompos { + +const auto specificParams = ::testing::Combine(::testing::Values(dataLowBounds), + ::testing::Values(dataHighBounds), + ::testing::Values(outputLow), + ::testing::Values(outputHigh), + ::testing::ValuesIn(rangesShapes), + ::testing::ValuesIn(levels)); +const auto testParams = ::testing::Combine(specificParams, + ::testing::ValuesIn(dataShapes), + ::testing::Values(Precision::FP32), + ::testing::ValuesIn(input_ranges), + ::testing::Values(true), + ::testing::Values(CPUSpecificParams{})); + +INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_Decompos, FakeQuantizeLayerCPUTest, testParams, FakeQuantizeLayerCPUTest::getTestCaseName); + +} // namespace fqDecompos + +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp index eae5f1fade7bd1..755c95a7b488e0 100644 --- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp +++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp @@ -77,6 +77,8 @@ std::string CPUTestsBase::impls2str(const std::vector &priority) { } void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType) const { + if (nodeType.empty()) return; + ASSERT_TRUE(!selectedType.empty()) << "Node type is not defined."; bool isNodeFound = false; InferenceEngine::CNNNetwork execGraphInfo = execNet.GetExecGraphInfo(); diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/fake_quantize.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/fake_quantize.hpp index 3353b6299272e5..f9174f8f9c11e7 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/fake_quantize.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/fake_quantize.hpp @@ -223,11 +223,11 @@ namespace ngraph out_high, i, out_high_offsets); - if (arg[i] <= in_low_val) + if (arg[i] <= std::min(in_low_val, in_high_val)) { out[i] = out_low_val; } - else if (arg[i] > in_high_val) + else if (arg[i] > std::max(in_low_val, in_high_val)) { out[i] = out_high_val; }