diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp new file mode 100644 index 00000000000000..0e98ea3eeec8f9 --- /dev/null +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include "low_precision/lpt_visibility.hpp" +#include "low_precision/rt_info/attribute_parameters.hpp" +#include "low_precision/rt_info/shared_value_attribute.hpp" + +namespace ngraph { +/** + * @ingroup ie_transformation_common_api + * @brief PrecisionsAttribute defines precision which is required for input/output port or an operation. + */ +class LP_TRANSFORMATIONS_API SkipCleanupAttribute : public SharedAttribute { +public: + OPENVINO_RTTI("LowPrecision::SkipCleanup", "", ov::RuntimeAttribute, 0); + SkipCleanupAttribute(const bool skip); + + static ov::Any create(const std::shared_ptr& node, const bool skip); + // vizualize shared attributes details in VizualizeTree pass + std::string to_string() const override; +}; +} // namespace ngraph diff --git a/src/common/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp b/src/common/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp index 6bb211acf924fd..6283ef836547ee 100644 --- a/src/common/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp +++ b/src/common/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp @@ -9,6 +9,8 @@ #include "low_precision/rt_info/intervals_alignment_attribute.hpp" #include "low_precision/fake_quantize.hpp" #include "low_precision/network_helper.hpp" +#include "low_precision/rt_info/skip_cleanup_attribute.hpp" + namespace ngraph { namespace pass { @@ -112,6 +114,13 @@ bool FuseMultiplyToFakeQuantizeTransformation::canBeTransformed(const Transforma return false; } + auto skip = getAttribute(fq); + if (!skip.empty()) { + if (skip.as().value()) { + return false; + } + } + return true; } diff --git a/src/common/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp b/src/common/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp index 16b94ae9f31f2f..c1bb1801a0a05f 100644 --- a/src/common/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp +++ b/src/common/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp @@ -8,6 +8,7 @@ #include #include "low_precision/fake_quantize.hpp" #include "low_precision/network_helper.hpp" +#include "low_precision/rt_info/skip_cleanup_attribute.hpp" namespace ngraph { namespace pass { @@ -116,6 +117,12 @@ bool FuseSubtractToFakeQuantizeTransformation::canBeTransformed(const Transforma if (fq->get_output_target_inputs(0).size() != 1) { return false; } + auto skip = getAttribute(fq); + if (!skip.empty()) { + if (skip.as().value()) { + return false; + } + } return true; } diff --git a/src/common/low_precision_transformations/src/low_precision.cpp b/src/common/low_precision_transformations/src/low_precision.cpp index a70fa5d6572264..f584f2a72c49a1 100644 --- a/src/common/low_precision_transformations/src/low_precision.cpp +++ b/src/common/low_precision_transformations/src/low_precision.cpp @@ -240,9 +240,9 @@ bool ngraph::pass::low_precision::LowPrecision::run_on_model(const std::shared_p std::shared_ptr cleanup = manager.register_pass(); cleanup->add_matcher(params); - /* cleanup->add_matcher(params); + cleanup->add_matcher(params); cleanup->add_matcher(params); - cleanup->add_matcher(params);*/ + cleanup->add_matcher(params); // WA: precision restrictions for groupConv must be propagated to MultiplyToGroupConvolution transformation cleanup->add_matcher( params, diff --git a/src/common/low_precision_transformations/src/lstm.cpp b/src/common/low_precision_transformations/src/lstm.cpp index 0e4dcd0a64d3ca..d12c7005e31463 100644 --- a/src/common/low_precision_transformations/src/lstm.cpp +++ b/src/common/low_precision_transformations/src/lstm.cpp @@ -15,6 +15,7 @@ #include "low_precision/concat.hpp" #include "low_precision/network_helper.hpp" +#include "../include/low_precision/rt_info/skip_cleanup_attribute.hpp" namespace ngraph { namespace pass { @@ -80,6 +81,8 @@ LSTM::LSTM(const Params& params) : LayerTransformation(params) { const auto dequantization_without_subtract_squeeze = ngraph::pattern::wrap_type( {dequantization_multiply_without_subtract_X, squeeze_constant}); const auto lstm_cell = ngraph::pattern::wrap_type( + {fq_X, fq_H, C, fq_W, fq_R, B}); + const auto lstm_cell_squeeze = ngraph::pattern::wrap_type( {squeeze, fq_H, C, fq_W, fq_R, B}); const auto lstm_cell_with_dequantizations = ngraph::pattern::wrap_type( {dequantization_squeeze, dequantization_multiply_H, C, fq_W, fq_R, B}); @@ -96,8 +99,9 @@ LSTM::LSTM(const Params& params) : LayerTransformation(params) { }; auto m = std::make_shared( - std::make_shared( - OutputVector{lstm_cell, lstm_cell_with_dequantizations, lstm_cell_with_dequantizations_without_subtract}), + std::make_shared(OutputVector{lstm_cell, + lstm_cell_squeeze, lstm_cell_with_dequantizations, + lstm_cell_with_dequantizations_without_subtract}), "LSTM"); this->register_matcher(m, callback); } @@ -114,6 +118,7 @@ bool LSTM::transform(TransformationContext& context, ngraph::pattern::Matcher& m for (size_t parentIndex = 0ul; parentIndex < lstm->get_input_size(); parentIndex++) { auto fq = lstm->get_input_node_shared_ptr(parentIndex); if (is_type(fq)) { + SkipCleanupAttribute::create(fq, true); auto fq_parent = fq->get_input_node_shared_ptr(0); if (is_type(fq_parent)) { auto fq_node = as_type_ptr(fq); @@ -130,12 +135,12 @@ bool LSTM::transform(TransformationContext& context, ngraph::pattern::Matcher& m dataPrecision.hasZeroPoint, updatePrecisions); std::shared_ptr new_fq = std::get<0>(QDQ); - std::shared_ptr dequantize = std::get<1>(QDQ); - this->register_new_node(new_fq); - if (dequantize == nullptr || new_fq == nullptr) { + std::shared_ptr deq_multiply = std::get<1>(QDQ); + if (deq_multiply == nullptr || new_fq == nullptr) { return false; } - updateOutput(context, dequantize, new_fq); + this->register_new_node(new_fq); + updateOutput(context, deq_multiply, new_fq); } else { continue; } diff --git a/src/common/low_precision_transformations/src/rt_info/skip_cleanup_attribute.cpp b/src/common/low_precision_transformations/src/rt_info/skip_cleanup_attribute.cpp new file mode 100644 index 00000000000000..bc05e369012eb7 --- /dev/null +++ b/src/common/low_precision_transformations/src/rt_info/skip_cleanup_attribute.cpp @@ -0,0 +1,38 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/skip_cleanup_attribute.hpp" + +#include +#include +#include +#include +#include + +#include +#include "low_precision/network_helper.hpp" +#include "low_precision/layer_transformation.hpp" + +using namespace ngraph; +using namespace ov; + +SkipCleanupAttribute::SkipCleanupAttribute(const bool skip) + : + SharedAttribute(skip) { +} + +ov::Any SkipCleanupAttribute::create( + const std::shared_ptr& node, + const bool skip) { + auto& rt = node->get_rt_info(); + return (rt[SkipCleanupAttribute::get_type_info_static()] = SkipCleanupAttribute(skip)); +} + +std::string SkipCleanupAttribute::to_string() const { + std::stringstream ss; + ss << "SkipCleanup: {"; + attribute ? ss << "True" : ss << "False"; + ss << "}"; + return ss.str(); +} diff --git a/src/tests/functional/inference_engine/lp_transformations/lstm_transformation.cpp b/src/tests/functional/inference_engine/lp_transformations/lstm_transformation.cpp index d4237cb17312ae..5dc016a03b0a61 100644 --- a/src/tests/functional/inference_engine/lp_transformations/lstm_transformation.cpp +++ b/src/tests/functional/inference_engine/lp_transformations/lstm_transformation.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -49,16 +51,16 @@ class LSTMTransformationTestValues { public: LSTMTransformationTestValues() = default; LSTMTransformationTestValues(const TestTransformationParams& params, - bool multiChannels, const bool bias, + const bool squeeze, const LSTMFunction::LSTMType type, const LSTMTransformationValues& actual, const LSTMTransformationValues& result, const bool addNotPrecisionPreservedOperation = false, const bool checkIntervalsAlignmentAttributes = true) : params(params), - multiChannels(multiChannels), bias(bias), + squeeze(squeeze), type(type), actual(actual), result(result), @@ -66,8 +68,8 @@ class LSTMTransformationTestValues { checkIntervalsAlignmentAttributes(checkIntervalsAlignmentAttributes) {} TestTransformationParams params; - bool multiChannels; bool bias; + bool squeeze; LSTMFunction::LSTMType type; LSTMTransformationValues actual; LSTMTransformationValues result; @@ -78,7 +80,7 @@ class LSTMTransformationTestValues { }; inline std::ostream& operator<<(std::ostream& out, const LSTMTransformationTestValues& values) { - return out << "_" << values.multiChannels << "_" << values.actual << "_" << values.result; + return out << "_" << values.bias << "_" << values.actual << "_" << values.result; } typedef std::tuple, LSTMTransformationTestValues> @@ -95,6 +97,7 @@ class LSTMTransformation : public LayerTransformation, public testing::WithParam shapes, testValues.type, testValues.bias, + testValues.squeeze, testValues.actual.fakeQuantizes, testValues.actual.converts, testValues.actual.dequantizations, @@ -107,8 +110,14 @@ class LSTMTransformation : public LayerTransformation, public testing::WithParam SimpleLowPrecisionTransformer transformer; transformer.commonGraphRewrite->add_matcher(params); transformer.commonGraphRewrite->add_matcher(params); - transformer.transform(actualFunction); + transformer.commonGraphRewrite->add_matcher(params); + transformer.commonGraphRewrite->add_matcher(params); + transformer.commonGraphRewrite->add_matcher(params); + transformer.commonGraphRewrite->add_matcher(params); + transformer.transform(actualFunction); + ngraph::pass::VisualizeTree("C:\\Users\\ndemasho\\rep\\Visual\\test.transform.dot") + .run_on_function(actualFunction); // dequantization output precision depends on input precision // to avoid huge amount of tests cases let's define dequantization output precision as input precision if (!testValues.result.dequantizationAfter.multiply.empty()) { @@ -126,6 +135,7 @@ class LSTMTransformation : public LayerTransformation, public testing::WithParam shapes, testValues.type, testValues.bias, + testValues.squeeze, testValues.result.fakeQuantizes, {{}, {}, {}}, testValues.result.dequantizations, @@ -142,6 +152,7 @@ class LSTMTransformation : public LayerTransformation, public testing::WithParam std::ostringstream result; result << LayerTransformation::getTestCaseNameByParams(precision, shapes[0], testValues.params) << "_" << (testValues.bias ? "with_bias_" : "without_bias_") + << (testValues.squeeze ? "with_squeeze_" : "without_squeeze_") << "_" << testValues.actual << "_" << testValues.result << "_"; return result.str(); } @@ -176,38 +187,129 @@ namespace testValues1 { const std::vector> shapes = {{{1, 1, 16}, {1, 128}, {1, 128}}}; const std::vector testValues = { - // LSTM Cell without bias + // LSTM Cell without squeeze on X {LayerTransformation::createParamsU8I8(), false, - false, + true, LSTMFunction::LSTMType::Cell, - {{{256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, - {255ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, - {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}}, - {{}, {}, {}}, - {{}, {}, {}}}, - {{{256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8}, {}, {}}, - {}, - {{{element::f32}, {}, {0.01f}}, {{element::f32}, {}, {0.01f}}, {{element::f32}, {}, {0.01f}}}}, - true}, + { + { + {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {255ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}} + }, + {{}, {}, {}}, + {{}, {}, {}} + }, + { + { + {256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8}, + {256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8}, + {} + }, + {}, + { + {{element::f32}, {}, {0.01f}}, + {{element::f32}, {}, {0.01f}}, + {{element::f32}, {}, {0.01f}}} + }, + true}, // LSTM Cell with bias {LayerTransformation::createParamsU8I8(), - false, + true, + true, + LSTMFunction::LSTMType::Cell, + { + { + {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {255ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}} + }, + {{}, {}, {}}, + {{}, {}, {}} + }, + { + { + {256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8}, + {256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8}, + {} + }, + {}, + { + {{element::f32}, {}, {0.01f}}, + {{element::f32}, {}, {0.01f}}, + {{element::f32}, {}, {0.01f}} + } + }, + true}, + // LSTM Cell multi channels + /*{LayerTransformation::createParamsU8I8(), true, LSTMFunction::LSTMType::Cell, { { + { + 256ul, + {{1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}}, + {0.f, 0.f, 0.f}, {2.55f, 2.55f, 2.55f}, {0.f, 0.f, 0.f}, {2.55f / 1.f, 2.55f / 2.f, 2.55f / 3.f} + }, {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, {255ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, - {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}} }, {{}, {}, {}}, {{}, {}, {}} }, { { + { + 256ul, + {{1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}}, + {0.f, 0.f, 0.f}, {2.55f, 2.55f, 2.55f}, {0.f, 0.f, 0.f}, {255.f, 255.f, 255.f}, + ngraph::element::u8 + }, + {256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8}, + {} + }, + {}, + { + {{element::f32}, {}, {{ 0.01f / 1.f, 0.01f / 2.f, 0.01f / 3.f, 0.005f / 1.f, 0.005f / 2.f, 0.005f / 3.f }}}, + {{element::f32}, {}, {{ 0.01f / 1.f, 0.01f / 2.f, 0.01f / 3.f, 0.005f / 1.f, 0.005f / 2.f, 0.005f / 3.f }}}, + {{element::f32}, {}, {{ 0.01f / 1.f, 0.01f / 2.f, 0.01f / 3.f, 0.005f / 1.f, 0.005f / 2.f, 0.005f / 3.f }}}, + } + }, + true}*/ +}; +INSTANTIATE_TEST_SUITE_P( + smoke_LPT, + LSTMTransformation, + ::testing::Combine( + ::testing::ValuesIn(precisions), + ::testing::ValuesIn(shapes), + ::testing::ValuesIn(testValues)), + LSTMTransformation::getTestCaseName); +} // namespace testValues1 +namespace testValues2 { +const std::vector> shapes = {{{1, 16}, {1, 128}, {1, 128}}}; + +const std::vector testValues = { + // LSTM Cell + /*{LayerTransformation::createParamsU8I8(), + false, + false, + LSTMFunction::LSTMType::Cell, + { + { + {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + {255ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}} + }, + {{}, {}, {}}, + {{}, {}, {}} + }, + { + { + {256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8}, {256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8}, - {}, {} }, {}, @@ -217,7 +319,7 @@ const std::vector testValues = { {{element::f32}, {}, {0.01f}} } }, - true} + true}*/ }; INSTANTIATE_TEST_SUITE_P( smoke_LPT, @@ -227,5 +329,5 @@ INSTANTIATE_TEST_SUITE_P( ::testing::ValuesIn(shapes), ::testing::ValuesIn(testValues)), LSTMTransformation::getTestCaseName); -} // namespace testValues1 -} // namespace \ No newline at end of file +} // namespace testValues2 +} // namespace \ No newline at end of file diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/lstm_function.hpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/lstm_function.hpp index d669b421dd02e3..2c2ea3e84e6034 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/lstm_function.hpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/lstm_function.hpp @@ -24,6 +24,7 @@ class LSTMFunction { const std::vector& inputShapes, const LSTMType type, const bool bias, + const bool squeeze, const std::vector& fqOnDatas, const std::vector& converts, const std::vector& dequantizations, diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/lstm_function.cpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/lstm_function.cpp index f05e09858b4944..e54cc86385ae02 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/lstm_function.cpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/lstm_function.cpp @@ -28,6 +28,7 @@ std::shared_ptr LSTMFunction::get( const std::vector& inputShapes, const LSTMType type, const bool bias, + const bool squeeze, const std::vector& fqOnDatas, const std::vector& converts, const std::vector& dequantizations, @@ -43,7 +44,7 @@ std::shared_ptr LSTMFunction::get( converts[0], dequantizations[0]); std::shared_ptr squeeze_X; - if (type == LSTMType::Cell) { + if (squeeze) { auto squeeze_pattern = ngraph::opset5::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1}); squeeze_X = std::make_shared(parent_X, squeeze_pattern); squeeze_X->set_friendly_name("squeeze_X"); @@ -53,47 +54,57 @@ std::shared_ptr LSTMFunction::get( std::shared_ptr parent_H = makeQuantizationAndDequantization(H, inputPrecision, H->get_friendly_name(), - fqOnDatas[0], - converts[0], - dequantizations[0]); + fqOnDatas[1], + converts[1], + dequantizations[1]); auto C = std::make_shared(inputPrecision, inputShapes[2]); C->set_friendly_name("C"); auto w_val = std::vector(512 * 16, 0); auto r_val = std::vector(512 * 128, 0); - auto W = ngraph::opset5::Constant::create(fqOnDatas[1].empty() ? ngraph::element::u8 : inputPrecision, + auto W = ngraph::opset5::Constant::create(fqOnDatas[2].empty() ? ngraph::element::u8 : inputPrecision, type == LSTMType::Cell ? ngraph::Shape{512, 16} : ngraph::Shape{1, 512, 16}, w_val); std::shared_ptr parent_W = makeQuantizationAndDequantization(W, inputPrecision, W->get_friendly_name(), - fqOnDatas[1], - converts[1], - dequantizations[1]); - auto R = ngraph::opset5::Constant::create(fqOnDatas[1].empty() ? ngraph::element::u8 : inputPrecision, + fqOnDatas[2], + converts[2], + dequantizations[2]); + auto R = ngraph::opset5::Constant::create(fqOnDatas[2].empty() ? ngraph::element::u8 : inputPrecision, type == LSTMType::Cell ? ngraph::Shape{512, 128} : ngraph::Shape{1, 512, 128}, r_val); std::shared_ptr parent_R = makeQuantizationAndDequantization(R, inputPrecision, R->get_friendly_name(), - fqOnDatas[1], - converts[1], - dequantizations[1]); + fqOnDatas[2], + converts[2], + dequantizations[2]); auto b_val = std::vector(512, 0); auto B = ngraph::opset5::Constant::create(inputPrecision, type == LSTMType::Cell ? ngraph::Shape{512} : ngraph::Shape{1, 512}, b_val); std::shared_ptr lstm; - if (bias) { - if (type == LSTMType::Cell) { - lstm = std::make_shared(squeeze_X, parent_H, C, parent_W, parent_R, B, 128); - lstm->set_friendly_name("lstm_cell"); + if (type == LSTMType::Cell) { + if (bias) { + lstm = std::make_shared(squeeze ? squeeze_X : parent_X, + parent_H, + C, + parent_W, + parent_R, + B, + 128); } else { - + lstm = std::make_shared(squeeze ? squeeze_X : parent_X, + parent_H, + C, + parent_W, + parent_R, + 128); } - } else { - lstm = std::make_shared(squeeze_X, parent_H, C, parent_W, parent_R, 128); lstm->set_friendly_name("lstm_cell"); + } else { + // } auto& rtInfo = lstm->get_rt_info();