From 80b074bf2175727317fa0b082c36ee40eddce5e5 Mon Sep 17 00:00:00 2001 From: Nikita Demashov <65417008+NikDemoShow@users.noreply.github.com> Date: Wed, 15 Sep 2021 17:15:57 +0300 Subject: [PATCH] [LPT] MoveFakeQuantize (#6723) * add move_fake_quantize_for_concat_transformation, mfk and mfk_function * fix relu_transformation.cpp * backup * add change * add cpu test * [LPT] MoveFakeQuantizeTransformation: fixes * get InferenceEngine::NotImplemented * fix ieFuncTests * try without new cpu_test * fix cpuFuncTests and ieFuncTests * fix tests * fix lin * add cpu test * fix link and matcher in move_fake_quantize.cpp * update matcher * add gpu test * naming fix * move_fake_quantize.cpp add set_fr_name for new_concat * naming new fq fix * fix NetworkHelper::copyInfo naming * concat.cpp naming fix * gpu tests fix * rm network_helper changes * rm extra output * resolve conversations * resolve other conversations * add multi inputs for concat * fix lin * fix move_fake_qunatize naming * rm maxpool from mfk_function * mkldnn update * fix style * rm extra change * fix concat matcher * rm mkldnn_plugin changes * fix conversations * fix interval * fix and add isQuantizedStatic, add attribute and negative tests * add negative plugin tests * fix style: Co-authored-by: Edward Shogulin --- .../low_precision/move_fake_quantize.hpp | 25 ++ .../src/concat.cpp | 8 +- .../src/low_precision.cpp | 2 + .../src/move_fake_quantize.cpp | 107 +++++ .../move_fake_quantize_transformation.cpp | 364 ++++++++++++++++++ .../move_fake_quantize_transformation.cpp | 86 +++++ .../move_fake_quantize_transformation.cpp | 86 +++++ .../move_fake_quantize_transformation.hpp | 57 +++ .../move_fake_quantize_transformation.cpp | 77 ++++ .../move_fake_quantize_function.hpp | 41 ++ .../src/move_fake_quantize_function.cpp | 107 +++++ 11 files changed, 957 insertions(+), 3 deletions(-) create mode 100644 inference-engine/src/low_precision_transformations/include/low_precision/move_fake_quantize.hpp create mode 100644 inference-engine/src/low_precision_transformations/src/move_fake_quantize.cpp create mode 100644 inference-engine/tests/functional/inference_engine/lp_transformations/move_fake_quantize_transformation.cpp create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/move_fake_quantize_transformation.cpp create mode 100644 inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/move_fake_quantize_transformation.cpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/move_fake_quantize_transformation.hpp create mode 100644 inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/move_fake_quantize_transformation.cpp create mode 100644 inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/move_fake_quantize_function.hpp create mode 100644 inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/move_fake_quantize_function.cpp diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/move_fake_quantize.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/move_fake_quantize.hpp new file mode 100644 index 00000000000000..4e0e8054e554e5 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/move_fake_quantize.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "low_precision/layer_transformation.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API MoveFakeQuantize : public LayerTransformation { +public: + NGRAPH_RTTI_DECLARATION; + MoveFakeQuantize(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; + bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; +}; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp index da040d1f897be0..8df69b6fb215bd 100644 --- a/inference-engine/src/low_precision_transformations/src/concat.cpp +++ b/inference-engine/src/low_precision_transformations/src/concat.cpp @@ -138,6 +138,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat const auto convert = convertNodes[0]->clone_with_new_inputs({ newConcat }); NetworkHelper::copyInfo({ concat, convert }, convert); + convert->set_friendly_name(concat->get_friendly_name() + "/DequantizationConvert"); lastDequantization = convert; } @@ -150,6 +151,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat ngraph::pass::low_precision::fold(subtractNodes, 1))); NetworkHelper::copyInfo({ concat, subtract }, subtract); + subtract->set_friendly_name(concat->get_friendly_name() + "/DequantizationSubtract"); lastDequantization = subtract; } @@ -163,6 +165,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat layerDequantizations[0].multiply->get_output_element_type(0)); NetworkHelper::copyInfo({ concat, multiply }, multiply); + multiply->set_friendly_name(concat->get_friendly_name() + "/DequantizationMultyply"); lastDequantization = multiply; } @@ -325,13 +328,12 @@ bool ConcatTransformation::isQuantizedStatic(const std::shared_ptr& return false; } - const auto axis = concat->get_axis(); const auto outputRank = concat->get_output_partial_shape(0).rank(); - if (axis < 0 && outputRank.is_dynamic()) { + if (outputRank.is_dynamic()) { return false; } - const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, outputRank); + const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), concat->get_axis(), outputRank); return normalizedAxis == 1ul; } diff --git a/inference-engine/src/low_precision_transformations/src/low_precision.cpp b/inference-engine/src/low_precision_transformations/src/low_precision.cpp index ca34f0e8c776e2..936ba903ce0718 100644 --- a/inference-engine/src/low_precision_transformations/src/low_precision.cpp +++ b/inference-engine/src/low_precision_transformations/src/low_precision.cpp @@ -66,6 +66,7 @@ #include "low_precision/transpose.hpp" #include "low_precision/unsqueeze.hpp" #include "low_precision/variadic_split.hpp" +#include "low_precision/move_fake_quantize.hpp" // cleanup transformations #include "low_precision/convert.hpp" @@ -197,6 +198,7 @@ bool ngraph::pass::low_precision::LowPrecision::run_on_function(std::shared_ptr< prerequisites->add_matcher(supportedTypes); prerequisites->add_matcher(supportedTypes); prerequisites->add_matcher(); + prerequisites->add_matcher(); manager.register_pass(); diff --git a/inference-engine/src/low_precision_transformations/src/move_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/move_fake_quantize.cpp new file mode 100644 index 00000000000000..7192282c2d2b36 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/move_fake_quantize.cpp @@ -0,0 +1,107 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/move_fake_quantize.hpp" + +#include +#include + +#include +#include +#include +#include + +#include "low_precision/concat.hpp" +#include "low_precision/network_helper.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MoveFakeQuantize, "MoveFakeQuantize", 0); + +MoveFakeQuantize::MoveFakeQuantize(const Params& params) : LayerTransformation(params) { + const auto concat = ngraph::pattern::wrap_type(pattern::consumers_count(1)); + const auto operation = ngraph::pattern::wrap_type({ concat }); + const auto input_low = ngraph::pattern::wrap_type(); + const auto input_high = ngraph::pattern::wrap_type(); + const auto output_low = ngraph::pattern::wrap_type(); + const auto output_high = ngraph::pattern::wrap_type(); + const auto fq_with_operation = ngraph::pattern::wrap_type({ operation, + input_low, + input_high, + output_low, + output_high}); + const auto fq = ngraph::pattern::wrap_type({ concat, + input_low, + input_high, + output_low, + output_high }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + + return transform(*context, m); + }; + + auto m = std::make_shared( + std::make_shared(OutputVector{fq, fq_with_operation}), + "MoveFakeQuantize"); + this->register_matcher(m, callback); +} + +bool MoveFakeQuantize::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { + auto fq = m.get_match_root(); + auto operation = fq->get_input_node_shared_ptr(0); + std::shared_ptr concat; + bool only_concat = true; + std::string fq_original_name = fq->get_friendly_name(), operation_original_name; + if (is_type(operation)) { + concat = operation; + } else { + operation_original_name = operation->get_friendly_name(); + concat = operation->get_input_node_shared_ptr(0); + only_concat = false; + } + if (!ConcatTransformation::isQuantizedStatic(concat)) { + return false; + } + std::vector> fqs; + size_t input_size = concat->get_input_size(); + for (size_t i{ 0 }; i < input_size; ++i) { + std::shared_ptr fq_input; + if (only_concat) { + fq_input = concat->get_input_node_shared_ptr(i); + } else { + auto input = concat->get_input_node_shared_ptr(i); + fq_input = operation->clone_with_new_inputs({ input }); + fq_input->set_friendly_name(operation_original_name + "_" + std::to_string(i + 1)); + } + auto newFq = fq->clone_with_new_inputs({ fq_input, + fq->get_input_node_shared_ptr(1), + fq->get_input_node_shared_ptr(2), + fq->get_input_node_shared_ptr(3), + fq->get_input_node_shared_ptr(4) }); + newFq->set_friendly_name(fq_original_name + "_" + std::to_string(i + 1)); + fqs.push_back(newFq); + } + ngraph::copy_runtime_info(fq, fqs); + auto newConcat = concat->clone_with_new_inputs(ngraph::OutputVector(fqs.begin(), fqs.end())); + newConcat->set_friendly_name(concat->get_friendly_name()); + replace_node(fq, newConcat); + NetworkHelper::copyInfo(concat, newConcat); + updateOutput(context, newConcat, fq); + return true; +} + +bool MoveFakeQuantize::isPrecisionPreserved(std::shared_ptr layer) const noexcept { + return true; +} + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/move_fake_quantize_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/move_fake_quantize_transformation.cpp new file mode 100644 index 00000000000000..76474731b69814 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/move_fake_quantize_transformation.cpp @@ -0,0 +1,364 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include + +#include + +#include "low_precision/move_fake_quantize.hpp" +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "lpt_ngraph_functions/move_fake_quantize_function.hpp" +#include "lpt_ngraph_functions/common/builders.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp" +#include "lpt_ngraph_functions/relu_function.hpp" +#include "simple_low_precision_transformer.hpp" + +using namespace testing; +using namespace ngraph; +using namespace ngraph::pass; + +namespace { + +class MoveFakeQuantizeTransformationActualValues { +public: + ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1; + ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1; + ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1; + ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2; + ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2; + ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2; + std::string operation; + ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter; + ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter; + ngraph::builder::subgraph::DequantizationOperations dequantizationAfter; +}; + +inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationActualValues& values) { + return out << "_" << + values.fakeQuantizeBefore1 << "_" << + values.convertBefore1.outPrecision << "_" << + values.dequantizationBefore1 << "_" << + values.fakeQuantizeBefore2 << "_" << + values.convertBefore2.outPrecision << "_" << + values.dequantizationBefore2 << "_" << + values.operation << "_" << + values.fakeQuantizeAfter << "_" << + values.convertAfter.outPrecision << "_" << + values.dequantizationAfter; +} + +class MoveFakeQuantizeTransformationResultValues { +public: + ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1; + ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1; + ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1; + ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2; + ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2; + ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2; + std::string operation; + ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter; + ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter; + ngraph::builder::subgraph::DequantizationOperations dequantizationAfter; + ngraph::element::Type precisionAfterOperation; + ngraph::builder::subgraph::DequantizationOperations dequantizationAfterNotFQ; +}; + +inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationResultValues& values) { + return out << "_" << + values.fakeQuantizeBefore1 << "_" << + values.convertBefore1.outPrecision << "_" << + values.dequantizationBefore1 << "_" << + values.fakeQuantizeBefore2 << "_" << + values.convertBefore2.outPrecision << "_" << + values.dequantizationBefore2 << "_" << + values.operation << "_" << + values.fakeQuantizeAfter << "_" << + values.convertAfter << "_" << + values.dequantizationAfter << "_" << + values.dequantizationAfterNotFQ; +} + +class MoveFakeQuantizeTransformationTestValues { +public: + MoveFakeQuantizeTransformationTestValues() = default; + MoveFakeQuantizeTransformationTestValues( + const TestTransformationParams& params, + const bool multiChannels, + const std::int64_t axis, + const MoveFakeQuantizeTransformationActualValues& actual, + const MoveFakeQuantizeTransformationResultValues& result, + const bool addNotPrecisionPreservedOperation = false, + const bool checkIntervalsAlignmentAttributes = true) : + params(params), + multiChannels(multiChannels), + axis(axis), + actual(actual), + result(result) {} + + TestTransformationParams params; + bool multiChannels; + std::int64_t axis; + MoveFakeQuantizeTransformationActualValues actual; + MoveFakeQuantizeTransformationResultValues result; + // add not precision preserved operation to set output precision for FakeQuantize + // don't set to 'true' by default to keep test cases with tested operation as output +}; + +inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationTestValues& values) { + return out << "_" << values.multiChannels << "_" << values.actual << "_" << values.result; +} + +typedef std::tuple < + ngraph::element::Type, + ngraph::PartialShape, + MoveFakeQuantizeTransformationTestValues +> MoveFakeQuantizeTransformationParams; + +class MoveFakeQuantizeTransformation : public LayerTransformation, public testing::WithParamInterface { +public: + void SetUp() override { + const ngraph::element::Type precision = std::get<0>(GetParam()); + const ngraph::PartialShape shape = std::get<1>(GetParam()); + MoveFakeQuantizeTransformationTestValues testValues = std::get<2>(GetParam()); + + // dequantization output precision depends on input precision + // to avoid huge amount of tests cases let's define dequantization output precision as input precision + if (!testValues.actual.dequantizationBefore1.multiply.empty()) { + testValues.actual.dequantizationBefore1.multiply.outPrecision = precision; + } + if (!testValues.actual.dequantizationBefore2.multiply.empty()) { + testValues.actual.dequantizationBefore2.multiply.outPrecision = precision; + } + + IntervalsAlignmentSharedValue::Interval interval{ -1.28f, 2.55f }; + + actualFunction = ngraph::builder::subgraph::MoveFakeQuantize::get( + precision, + shape, + testValues.actual.fakeQuantizeBefore1, + testValues.actual.convertBefore1, + testValues.actual.dequantizationBefore1, + testValues.actual.fakeQuantizeBefore2, + testValues.actual.convertBefore2, + testValues.actual.dequantizationBefore2, + testValues.actual.operation, + testValues.actual.fakeQuantizeAfter, + testValues.actual.convertAfter, + testValues.actual.dequantizationAfter, + { + ngraph::builder::subgraph::make_shared_attribute_ptr(true), + ngraph::builder::subgraph::make_shared_attribute_ptr(interval, 256), + ngraph::builder::subgraph::make_shared_attribute_ptr(false) + }, + ngraph::element::undefined, + {}, + testValues.axis); + auto supportedPrecisionsOnActivation = std::vector({ + ngraph::pass::low_precision::OperationPrecisionRestriction::create({{0, testValues.params.precisionsOnActivations}}) + }); + + auto quantizationRestrictions = testValues.multiChannels ? + std::vector() : + std::vector({ + ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create() + }); + + const auto params = TestTransformationParams::toParams(testValues.params); + ov::pass::Manager manager; + manager.register_pass(params); + manager.run_passes(actualFunction); + // dequantization output precision depends on input precision + // to avoid huge amount of tests cases let's define dequantization output precision as input precision + if (!testValues.result.dequantizationAfter.multiply.empty()) { + testValues.result.dequantizationAfter.multiply.outPrecision = precision; + } + + if (!testValues.params.updatePrecisions && + (precision == ngraph::element::f32) && + !testValues.result.dequantizationAfter.convert.empty()) { + testValues.result.dequantizationAfter.convert = {}; + } + + referenceFunction = ngraph::builder::subgraph::MoveFakeQuantize::get( + precision, + shape, + testValues.result.fakeQuantizeBefore1, + testValues.result.convertBefore1, + testValues.result.dequantizationBefore1, + testValues.result.fakeQuantizeBefore2, + testValues.result.convertBefore2, + testValues.result.dequantizationBefore2, + testValues.result.operation, + testValues.result.fakeQuantizeAfter, + testValues.result.convertAfter, + testValues.result.dequantizationAfter, + { + ngraph::builder::subgraph::make_shared_attribute_ptr(true), + ngraph::builder::subgraph::make_shared_attribute_ptr(interval, 256), + ngraph::builder::subgraph::make_shared_attribute_ptr(false) + }, + testValues.result.precisionAfterOperation, + {}, + testValues.axis); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + const ngraph::element::Type precision = std::get<0>(obj.param); + const ngraph::PartialShape shape = std::get<1>(obj.param); + const MoveFakeQuantizeTransformationTestValues testValues = std::get<2>(obj.param); + + std::ostringstream result; + result << + LayerTransformation::getTestCaseNameByParams(precision, shape, testValues.params) << "_" << + (testValues.multiChannels ? "multiChannels_" : "notMultiChannels_") << + "axis_" << testValues.axis << "_" << + testValues.actual << "_" << + testValues.result << "_"; + return result.str(); + } +}; + +TEST_P(MoveFakeQuantizeTransformation, CompareFunctions) { + actualFunction->validate_nodes_and_infer_types(); + auto res = compare_functions(referenceFunction, actualFunction, true, true, true, true, true); + ASSERT_TRUE(res.first) << res.second; + + const auto actualFakeQuantizes = LayerTransformation::get(actualFunction); + ASSERT_TRUE(checkIfOutputAttributesSharedValuesAreTheSame>(actualFakeQuantizes)) << + "PrecisionsAttribute are not the same"; +} + +const std::vector precisions = { + ngraph::element::f32, + ngraph::element::f16 +}; + +namespace testValues1 { +const std::vector shapes = { + { 1, 3, 9, 9 }, + { 4, 3, 9, 9 }, + { Dimension::dynamic(), 3, Dimension::dynamic(), Dimension::dynamic() } +}; +const std::vector testValues = { + // U8: concat + { + LayerTransformation::createParamsU8I8(), + false, + 1, + { + {}, + {}, + {}, + {}, + {}, + {}, + "", + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {}, + {} + }, + { + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {}, + {}, + "", + {}, + {}, + {}, + }, + false, + false + }, + { + LayerTransformation::createParamsU8I8(), + false, + 1, + { + {}, + {}, + {}, + {}, + {}, + {}, + "relu", + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {}, + {} + }, + { + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {}, + {}, + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {}, + {}, + "relu", + {}, + {}, + {}, + }, + false, + false + }, + { + LayerTransformation::createParamsU8I8(), + false, + 0, + { + {}, + {}, + {}, + {}, + {}, + {}, + "", + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {}, + {} + }, + { + {}, + {}, + {}, + {}, + {}, + {}, + "", + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {}, + {} + }, + false, + false + }, +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_LPT, + MoveFakeQuantizeTransformation, + ::testing::Combine( + ::testing::ValuesIn(precisions), + ::testing::ValuesIn(shapes), + ::testing::ValuesIn(testValues)), + MoveFakeQuantizeTransformation::getTestCaseName); +} // namespace testValues1 +} // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/move_fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/move_fake_quantize_transformation.cpp new file mode 100644 index 00000000000000..09ac0e229a1330 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/move_fake_quantize_transformation.cpp @@ -0,0 +1,86 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/move_fake_quantize_transformation.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { +const std::vector netPrecisions = { + ngraph::element::f32, + //ngraph::element::f16 +}; + +const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true) +}; + +const std::vector params = { + // without operation + { + {}, + {}, + {}, + {}, + {}, + {}, + "", + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {}, + {}, + "Concatenation", + "U8", + 1, + }, + // with ReLU operation + { + {}, + {}, + {}, + {}, + {}, + {}, + "relu", + { 256ul, {}, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f }}, + {}, + {}, + "Concatenation", + "U8", + 1 + }, + // negative axis + { + {}, + {}, + {}, + {}, + {}, + {}, + "", + {256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f}}, + {}, + {}, + "Concatenation", + "FP32", + 0 + } +}; + +const std::vector shapes = { + { 1, 3, 16, 16 }, + { 4, 3, 16, 16 } +}; + +INSTANTIATE_TEST_SUITE_P(smoke_LPT, MoveFakeQuantizeTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(shapes), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn(params)), + MoveFakeQuantizeTransformation::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/move_fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/move_fake_quantize_transformation.cpp new file mode 100644 index 00000000000000..86b44f3b248127 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/move_fake_quantize_transformation.cpp @@ -0,0 +1,86 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/move_fake_quantize_transformation.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { +const std::vector netPrecisions = { + ngraph::element::f32, + ngraph::element::f16 +}; + + const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams(), + }; + + const std::vector params = { + // without operation + { + {}, + {}, + {}, + {}, + {}, + {}, + "", + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {}, + {}, + "Concat", + "U8", + 1, + }, + // with ReLU operation + { + {}, + {}, + {}, + {}, + {}, + {}, + "relu", + { 256ul, {}, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f }}, + {}, + {}, + "Concat", + "U8", + 1 + }, + // negative axis + { + {}, + {}, + {}, + {}, + {}, + {}, + "", + {256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f}}, + {}, + {}, + "Concat", + "FP32", + 0 + } + }; + + const std::vector shapes = { + { 1, 3, 16, 16 }, + { 4, 3, 16, 16 } + }; + + INSTANTIATE_TEST_SUITE_P(smoke_LPT, MoveFakeQuantizeTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(shapes), + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn(params)), + MoveFakeQuantizeTransformation::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/move_fake_quantize_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/move_fake_quantize_transformation.hpp new file mode 100644 index 00000000000000..e53eef8b048ee1 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/move_fake_quantize_transformation.hpp @@ -0,0 +1,57 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp" +#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp" + +#include "low_precision/move_fake_quantize.hpp" + +#include "lpt_ngraph_functions/move_fake_quantize_function.hpp" + +namespace LayerTestsDefinitions { + +class MoveFakeQuantizeTransformationParam { +public: + ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1; + ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1; + ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1; + ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2; + ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2; + ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2; + std::string operation; + ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter; + ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter; + ngraph::builder::subgraph::DequantizationOperations dequantizationAfter; + std::string layerName; + std::string expectedKernelType; + std::int64_t axis; +}; + +typedef std::tuple < + ngraph::element::Type, + ngraph::Shape, + std::string, + ngraph::pass::low_precision::LayerTransformation::Params, + MoveFakeQuantizeTransformationParam +> MoveFakeQuantizeTransformationParams; + +class MoveFakeQuantizeTransformation : + public testing::WithParamInterface, + public LayerTestsUtils::LayerTransformation { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; + + void Run() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/move_fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/move_fake_quantize_transformation.cpp new file mode 100644 index 00000000000000..d92181e34c2542 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/move_fake_quantize_transformation.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision_transformations/move_fake_quantize_transformation.hpp" + +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "lpt_ngraph_functions/move_fake_quantize_function.hpp" + +namespace LayerTestsDefinitions { + +std::string MoveFakeQuantizeTransformation::getTestCaseName(testing::TestParamInfo obj) { + ngraph::element::Type netPrecision; + ngraph::PartialShape inputShape; + std::string targetDevice; + ngraph::pass::low_precision::LayerTransformation::Params params; + MoveFakeQuantizeTransformationParam param; + std::tie(netPrecision, inputShape, targetDevice, params, param) = obj.param; + + std::ostringstream result; + result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << + param.operation << param.fakeQuantizeAfter; + return result.str(); +} + +void MoveFakeQuantizeTransformation::SetUp() { + ngraph::element::Type netPrecision; + ngraph::PartialShape inputShape; + ngraph::pass::low_precision::LayerTransformation::Params params; + MoveFakeQuantizeTransformationParam param; + std::tie(netPrecision, inputShape, targetDevice, params, param) = this->GetParam(); + + function = ngraph::builder::subgraph::MoveFakeQuantize::get( + netPrecision, + inputShape, + param.fakeQuantizeBefore1, + param.convertBefore1, + param.dequantizationBefore1, + param.fakeQuantizeBefore2, + param.convertBefore2, + param.dequantizationBefore2, + param.operation, + param.fakeQuantizeAfter, + param.convertAfter, + param.dequantizationAfter, + {}, + {}, + {}, + param.axis); +} + +void MoveFakeQuantizeTransformation::Run() { + LayerTestsCommon::Run(); + + const auto params = std::get<4>(GetParam()); + const auto actualPrecision = getRuntimePrecisionByType(params.layerName); + auto expectedPrecision = params.expectedKernelType; + if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ngraph::element::f16) { + expectedPrecision = "FP16"; + } + EXPECT_EQ(actualPrecision, expectedPrecision); +} + +TEST_P(MoveFakeQuantizeTransformation, CompareWithRefImpl) { + Run(); +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/move_fake_quantize_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/move_fake_quantize_function.hpp new file mode 100644 index 00000000000000..9640845c8be6d1 --- /dev/null +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/move_fake_quantize_function.hpp @@ -0,0 +1,41 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include "low_precision/layer_transformation.hpp" +#include "common/fake_quantize_on_data.hpp" +#include "common/dequantization_operations.hpp" + +namespace ngraph { +namespace builder { +namespace subgraph { + +class MoveFakeQuantize { +public: + static std::shared_ptr get( + const ngraph::element::Type inputPrecision, + const ngraph::PartialShape& inputShape, + const FakeQuantizeOnDataWithConstant& fqOnData1, + const DequantizationOperations::Convert& convert1, + const DequantizationOperations& dequantization1, + const FakeQuantizeOnDataWithConstant& fqOnData2, + const DequantizationOperations::Convert& convert2, + const DequantizationOperations& dequantization2, + const std::string& operation, + const FakeQuantizeOnDataWithConstant& fqOnData3, + const DequantizationOperations::Convert& convert3, + const DequantizationOperations& dequantization3, + const std::vector>& concatAttributes, + const ngraph::element::Type precisionAfterOperation, + const DequantizationOperations& dequantizationAfter, + const std::int64_t& axis); +}; + +} // namespace subgraph +} // namespace builder +} // namespace ngraph diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/move_fake_quantize_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/move_fake_quantize_function.cpp new file mode 100644 index 00000000000000..c53ddd3f6df813 --- /dev/null +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/move_fake_quantize_function.cpp @@ -0,0 +1,107 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "lpt_ngraph_functions/move_fake_quantize_function.hpp" +#include + +#include +#include "ngraph_ops/type_relaxed.hpp" +#include "low_precision/network_helper.hpp" + +#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp" +#include "lpt_ngraph_functions/common/dequantization_operations.hpp" +#include "lpt_ngraph_functions/common/builders.hpp" + +namespace ngraph { +namespace builder { +namespace subgraph { + +using namespace ngraph::pass; + +std::shared_ptr MoveFakeQuantize::get( + const ngraph::element::Type inputPrecision, + const ngraph::PartialShape& inputShape, + const FakeQuantizeOnDataWithConstant& fqOnData1, + const DequantizationOperations::Convert& convert1, + const DequantizationOperations& dequantization1, + const FakeQuantizeOnDataWithConstant& fqOnData2, + const DequantizationOperations::Convert& convert2, + const DequantizationOperations& dequantization2, + const std::string& operation, + const FakeQuantizeOnDataWithConstant& fqOnData3, + const DequantizationOperations::Convert& convert3, + const DequantizationOperations& dequantization3, + const std::vector>& concatAttributes, + const ngraph::element::Type precisionAfterOperation, + const DequantizationOperations& dequantizationAfter, + const std::int64_t& axis) { + + const auto input1 = std::make_shared(inputPrecision, inputShape); + input1->set_friendly_name("input1"); + + const auto input2 = std::make_shared(inputPrecision, inputShape); + input2->set_friendly_name("input2"); + std::shared_ptr parent1 = input1, parent2 = input2; + if (!fqOnData1.empty()) { + if (operation == "relu") { + auto relu1 = std::make_shared(input1->output(0)); + parent1 = makeFakeQuantize(relu1, inputPrecision, fqOnData1); + } else { + parent1 = makeFakeQuantize(input1, inputPrecision, fqOnData1); + } + parent1->set_friendly_name("concat_fq1"); + if (!convert1.empty()) { + parent1 = std::make_shared(parent1, convert1.outPrecision); + } + if (!dequantization1.empty()) { + parent1 = makeDequantization(parent1, dequantization1); + } + } + if (!fqOnData2.empty()) { + if (operation == "relu") { + auto relu2 = std::make_shared(input2->output(0)); + parent2 = makeFakeQuantize(relu2, inputPrecision, fqOnData2); + } else { + parent2 = makeFakeQuantize(input1, inputPrecision, fqOnData2); + } + parent2->set_friendly_name("concat_fq2"); + if (!convert2.empty()) { + parent1 = std::make_shared(parent2, convert2.outPrecision); + } + if (!dequantization1.empty()) { + parent2 = makeDequantization(parent2, dequantization2); + } + } + const std::shared_ptr concat = std::make_shared(ngraph::OutputVector{ parent1, parent2 }, axis); + concat->set_friendly_name("concat"); + std::shared_ptr parent = concat; + if (!dequantizationAfter.empty()) { + const auto lastDequantization = makeDequantization(concat, dequantizationAfter); + lastDequantization->set_friendly_name("multiply"); + parent = lastDequantization; + } + addAttributes({ parent }, concatAttributes); + if (!fqOnData3.empty()) { + std::shared_ptr fq; + if (operation == "relu") { + auto relu = std::make_shared(concat->output(0)); + fq = makeFakeQuantize(relu, inputPrecision, fqOnData3); + } else { + fq = makeFakeQuantize(concat, inputPrecision, fqOnData3); + } + fq->set_friendly_name("fakeQuantizeAfter"); + parent = fq; + } + parent->set_friendly_name("output"); + ngraph::ResultVector results{ std::make_shared(parent) }; + std::shared_ptr function = std::make_shared( + results, + ngraph::ParameterVector{ input1, input2 }, + "MoveFakeQuantize"); + return function; +} + +} // namespace subgraph +} // namespace builder +} // namespace ngraph