diff --git a/src/common/low_precision_transformations/src/convolution.cpp b/src/common/low_precision_transformations/src/convolution.cpp index 46831f0586c5cf..4bd2dd31f24534 100644 --- a/src/common/low_precision_transformations/src/convolution.cpp +++ b/src/common/low_precision_transformations/src/convolution.cpp @@ -237,8 +237,15 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph Shape newScaleShape = newScalePShape.to_shape(); if (!newScaleShape.empty()) { - // that's all we need: [C, 1, 1, 1] => [C, 1, 1] - newScaleShape.pop_back(); + const auto input_shape = convolution->get_input_partial_shape(0); + const auto diff = newScaleShape.size() - input_shape.size(); + OPENVINO_ASSERT( + newScaleShape.empty() || ((0 <= diff) && (diff <= 2ull)), + "unexpected shape size on weights"); + + for (size_t i = 0; i <= diff; ++i) { + newScaleShape.pop_back(); + } } if (reshapeFromWeights != nullptr) { @@ -282,7 +289,12 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph const size_t weightsRankValue = weightsPShape.rank().get_length(); Shape zeroPointShape(weightsRankValue, 1ul); + // output channel or group zeroPointShape[0] = static_cast(weightsPShape[0].get_length()); + if ((reshapeFromWeights == nullptr) && (weightsRankValue == 5ull)) { + // output channel + zeroPointShape[1] = static_cast(weightsPShape[1].get_length()); + } auto zeroPointConstant = fold( subtractFromWeights->input_value(1), diff --git a/src/common/low_precision_transformations/src/pull_reshape_through_dequantization.cpp b/src/common/low_precision_transformations/src/pull_reshape_through_dequantization.cpp index 6cdb2ee9c43f03..911f0eccac1f28 100644 --- a/src/common/low_precision_transformations/src/pull_reshape_through_dequantization.cpp +++ b/src/common/low_precision_transformations/src/pull_reshape_through_dequantization.cpp @@ -114,11 +114,6 @@ ngraph::pass::low_precision::PullReshapeThroughDequantization::PullReshapeThroug const auto& opsMap = m.get_pattern_value_map(); auto reshape = opsMap.at(reshapeWrapper).get_node_shared_ptr(); - auto child = reshape->get_output_target_inputs(0).begin()->get_node(); - if (ov::is_type(child)) { - return false; - } - while (reshape != nullptr) { const auto parent = reshape->get_input_node_shared_ptr(0); if (ov::is_type(parent) || ov::is_type(parent)) { diff --git a/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp b/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp index 1cfe4bb51d3ac7..1837f21635235c 100644 --- a/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp +++ b/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp @@ -230,16 +230,16 @@ bool WeightableLayerTransformation::isQuantizedStatic(const std::shared_ptrget_input_node_shared_ptr(1); - if (!ov::is_type(reshape)) { - return false; - } + std::shared_ptr parent = ov::is_type(reshape) ? + reshape->get_input_node_shared_ptr(0) : + reshape; - if (ov::is_type(reshape->get_input_node_shared_ptr(0))) { - const std::shared_ptr fq = ov::as_type_ptr(reshape->get_input_node_shared_ptr(0)); + const auto fq = ov::as_type_ptr(parent); + if (fq != nullptr) { return NetworkHelper::isQuantizeSupported(fq); } - dequantizationOnWeights = NetworkHelper::getDequantization(reshape, defaultPrecisions, 0); + dequantizationOnWeights = NetworkHelper::getDequantization(parent, defaultPrecisions, 0, true); } else if (ov::is_type(layer->get_input_node_shared_ptr(1))) { const std::shared_ptr fq = ov::as_type_ptr(layer->get_input_node_shared_ptr(1)); return NetworkHelper::isQuantizeSupported(fq); diff --git a/src/common/low_precision_transformations/tests/group_convolution_transformation.cpp b/src/common/low_precision_transformations/tests/group_convolution_transformation.cpp index b20d211d651adc..d148c370dfd2f8 100644 --- a/src/common/low_precision_transformations/tests/group_convolution_transformation.cpp +++ b/src/common/low_precision_transformations/tests/group_convolution_transformation.cpp @@ -48,6 +48,7 @@ class GroupConvolutionTestValues { TestTransformationParams params; size_t group; int groupCalculationDimention; + bool addReshape; Actual actual; Expected expected; }; @@ -76,7 +77,8 @@ class GroupConvolutionTransformation : public LayerTransformation, testValues.actual.dequantizationOnWeights, ngraph::element::f32, {}, - ngraph::element::f32); + ngraph::element::f32, + testValues.addReshape); SimpleLowPrecisionTransformer transform; transform.add( @@ -101,7 +103,8 @@ class GroupConvolutionTransformation : public LayerTransformation, testValues.expected.dequantizationOnWeights, testValues.expected.precisionAfterOperation, testValues.expected.dequantizationAfter, - testValues.expected.precisionAfterDequantization); + testValues.expected.precisionAfterDequantization, + testValues.addReshape); } static std::string getTestCaseName(testing::TestParamInfo obj) { @@ -113,7 +116,9 @@ class GroupConvolutionTransformation : public LayerTransformation, result << toString(testValues.params) << "_" << inputShape << "_" << outputShape << "_" << testValues.group << "_" << testValues.groupCalculationDimention << "_" << testValues.actual.precisionBeforeDequantization << "_" << testValues.actual.dequantization << "_" - << "_weights_" << testValues.actual.weights->get_element_type() << "_" + << "_add_reshape:" << testValues.addReshape << "_" + << "_weights_type:" << testValues.actual.weights->get_element_type() << "_" + << "_weights_shape:" << testValues.actual.weights->get_shape() << "_" << "{ " << testValues.actual.weights->cast_vector()[0] << " }_" << testValues.actual.fakeQuantizeOnWeights << "_"; return result.str(); @@ -128,333 +133,520 @@ TEST_P(GroupConvolutionTransformation, CompareFunctions) { ASSERT_TRUE(LayerTransformation::allNamesAreUnique(actualFunction)) << "Not all names are unique"; } +// clang-format off namespace testValues1 { + const std::vector> shapesForGroupConv = { {{1, 6, 224, 224}, {1, 24, 218, 218}}, - {{-1, -1, -1, -1}, {-1, -1, -1, -1}}}; + {{-1, -1, -1, -1}, {-1, -1, -1, -1}} +}; const std::vector testValuesGroupConv = { // group convolution, tensor quantization, with zero point - {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - { - ngraph::element::u8, - {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) - }}, + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) + } + }, // group convolution, tensor quantization, with zero point - {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), - 3ul, - 0, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - { - ngraph::element::u8, - {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) - }}, + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + 3ul, + 0, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) + } + }, // group convolution, tensor quantization, with zero point - {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), - 3ul, - 1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - { - ngraph::element::u8, - {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) - }}, + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + 3ul, + 1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) + } + }, // group convolution, tensor quantization, with zero point - {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(false), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}, - ngraph::element::f32, - {}}}, + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(false), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {}, + ngraph::element::f32, + {} + } + }, + // group convolution, tensor quantization, with zero point - {LayerTransformation::createParamsU8I8().setUpdatePrecisions(false), - 3ul, - -1, - // ActualValues - {ngraph::element::f32, - {{}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - { - ngraph::element::f32, - {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) - }}, + { + LayerTransformation::createParamsU8I8().setUpdatePrecisions(false), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::f32, + {{}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::f32, + {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) + } + }, + // group convolution, per-channel quantization with different values, without zero point - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - { - ngraph::element::u8, - {}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, - {}, - {{// 0.0002 = 0.02 (on data) * 0.01 (on weights) - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - // 0.0004 = 0.04 (on data) * 0.01 (on weights) - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - // 0.0008 = 0.08 (on data) * 0.01 (on weights) - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f}, - ngraph::element::f32, - {1, 24, 1, 1}}}, - }}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + { + {}, + {}, + { + { + // 0.0002 = 0.02 (on data) * 0.01 (on weights) + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + // 0.0004 = 0.04 (on data) * 0.01 (on weights) + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + // 0.0008 = 0.08 (on data) * 0.01 (on weights) + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f + }, + ngraph::element::f32, + {1, 24, 1, 1} + } + }, + } + }, + // group convolution, per-channel quantization with the same values, without zero point - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - { - ngraph::element::u8, - {{ngraph::element::f32}, {}, {{0.02f}, ngraph::element::f32, {1, 6, 1, 1}}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}, - }, - // ExpectedValues - { - ngraph::element::u8, - {}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}, - }}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, {{0.02f}, ngraph::element::f32, {1, 6, 1, 1}}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {}, + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}, + } + }, + // group convolution, without zero point, without convert - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - {ngraph::element::f32, - {{}, {}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::f32, - {{}, {}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-1.25f}), - {}, - {}, - ngraph::element::f32, - {}}}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::f32, + {{}, {}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::f32, + {{}, {}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-1.25f}), + {}, + {}, + ngraph::element::f32, + {} + } + }, + // group convolution, without zero point - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{element::f32}, {}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::u8, - {}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}}}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{element::f32}, {}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} + } + }, + // per-channel quantization with different values, without zero point - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), - {}, - {ngraph::element::f32, {}, {0.01f}}}, - // ExpectedValues - { - ngraph::element::u8, - {}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), - {}, - {}, - ngraph::element::f32, - {{}, - {}, - {{// 0.0002 = 0.02 (on data) * 0.01 (on weights) - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - // 0.0004 = 0.04 (on data) * 0.01 (on weights) - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - // 0.0008 = 0.08 (on data) * 0.01 (on weights) - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f}, - ngraph::element::f32, - {1, 24, 1, 1}}}, - }}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), + {}, + {ngraph::element::f32, {}, {0.01f}} + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), + {}, + {}, + ngraph::element::f32, + { + {}, + {}, + { + { + // 0.0002 = 0.02 (on data) * 0.01 (on weights) + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + // 0.0004 = 0.04 (on data) * 0.01 (on weights) + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + // 0.0008 = 0.08 (on data) * 0.01 (on weights) + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f + }, + ngraph::element::f32, + {1, 24, 1, 1} + } + }, + } + }, // per-channel quantization with different values, without zero point - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, - {{255}, ngraph::element::f32, {}, true, 1, ngraph::element::u8, true}, - {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), - {}, - {ngraph::element::f32, {{127}, ngraph::element::f32, {}, true, 1, ngraph::element::i8, true}, {0.01f}}}, - // ExpectedValues - { - ngraph::element::u8, - {{}, {std::vector(6ul, 255.f), ngraph::element::f32, {1, 6, 1, 1}, false, 1, ngraph::element::u8}, {}}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), - {}, - {{}, - {std::vector(24ul, 127.f), - ngraph::element::f32, - {24, 1, 1, 1}, - false, - 1, - ngraph::element::i8, - false, - {{ov::pass::DisableConstantFolding::get_type_info_static(), ov::pass::DisableConstantFolding()}}}, - {}}, - ngraph::element::f32, - {{}, - {}, - {{// 0.0002 = 0.02 (on data) * 0.01 (on weights) - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - // 0.0004 = 0.04 (on data) * 0.01 (on weights) - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - // 0.0008 = 0.08 (on data) * 0.01 (on weights) - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f}, - ngraph::element::f32, - {1, 24, 1, 1}}}, - }}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + { + {ngraph::element::f32}, + {{255}, ngraph::element::f32, {}, true, 1, ngraph::element::u8, true}, + {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}} + }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), + {}, + { + ngraph::element::f32, + {{127}, ngraph::element::f32, {}, true, 1, ngraph::element::i8, true}, + {0.01f} + } + }, + // ExpectedValues + { + ngraph::element::u8, + { + {}, + {std::vector(6ul, 255.f), ngraph::element::f32, {1, 6, 1, 1}, false, 1, ngraph::element::u8}, + {} + }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), + {}, + { + {}, + { + std::vector(24ul, 127.f), + ngraph::element::f32, + {24, 1, 1, 1}, + false, + 1, + ngraph::element::i8, + false, + {{ov::pass::DisableConstantFolding::get_type_info_static(), ov::pass::DisableConstantFolding()}} + }, + {} + }, + ngraph::element::f32, + { + {}, + {}, + { + { + // 0.0002 = 0.02 (on data) * 0.01 (on weights) + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + // 0.0004 = 0.04 (on data) * 0.01 (on weights) + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + // 0.0008 = 0.08 (on data) * 0.01 (on weights) + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f + }, + ngraph::element::f32, + {1, 24, 1, 1} + } + }, + } + }, + + // per-channel quantization with different values, without zero point, no reshape - 5D weights + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + false, + // ActualValues + { + ngraph::element::u8, + { + {ngraph::element::f32}, + {{255}, ngraph::element::f32, {}, true, 1, ngraph::element::u8, true}, + {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}} + }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{3,8,2,7,7}, std::vector{2.f}), + {}, + { + ngraph::element::f32, + {{127}, ngraph::element::f32, {}, true, 1, ngraph::element::i8, true}, + {0.01f} + } + }, + // ExpectedValues + { + ngraph::element::u8, + { + {}, + {std::vector(6ul, 255.f), ngraph::element::f32, {1, 6, 1, 1}, false, 1, ngraph::element::u8}, + {} + }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{3,8,2,7,7}, std::vector{2.f}), + {}, + { + {}, + { + std::vector(24ul, 127.f), + ngraph::element::f32, + {3, 8, 1, 1, 1}, + false, + 1, + ngraph::element::i8, + false, + {{ov::pass::DisableConstantFolding::get_type_info_static(), ov::pass::DisableConstantFolding()}} + }, + {} + }, + ngraph::element::f32, + { + {}, + {}, + { + { + // 0.0002 = 0.02 (on data) * 0.01 (on weights) + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + // 0.0004 = 0.04 (on data) * 0.01 (on weights) + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + // 0.0008 = 0.08 (on data) * 0.01 (on weights) + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f + }, + ngraph::element::f32, + {1, 24, 1, 1} + } + }, + } + }, }; INSTANTIATE_TEST_SUITE_P(smoke_LPT, @@ -472,146 +664,201 @@ const std::vector> shapesF const std::vector testValuesForDepthWiseConv = { // depth-wise convolution, per-tensor quantization, with zero point - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::u8, - {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}}}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} + } + }, + // depth-wise convolution, tensor quantization, with zero point - {LayerTransformation::createParamsU8I8().setUpdatePrecisions(false), - 3ul, - -1, - // ActualValues - {ngraph::element::f32, - {{}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::f32, - {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}}}, + { + LayerTransformation::createParamsU8I8().setUpdatePrecisions(false), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::f32, + {{}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::f32, + {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} + } + }, + // depth-wise convolution, per-channel quantization with different values, without zero point - {LayerTransformation::createParamsU8I8(), - 6ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - { - ngraph::element::u8, - {}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, - {}, - {{ - 0.0002f, - 0.0002f, // 0.0002 = 0.02 (on data) * 0.01 (on weights) - 0.0004f, - 0.0004f, // 0.0004 = 0.04 (on data) * 0.01 (on weights) - 0.0008f, - 0.0008f // 0.0008 = 0.08 (on data) * 0.01 (on weights) - }, - ngraph::element::f32, - {1, 6, 1, 1}}}, - }}, + { + LayerTransformation::createParamsU8I8(), + 6ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + { + {}, + {}, + { + { + 0.0002f, + 0.0002f, // 0.0002 = 0.02 (on data) * 0.01 (on weights) + 0.0004f, + 0.0004f, // 0.0004 = 0.04 (on data) * 0.01 (on weights) + 0.0008f, + 0.0008f // 0.0008 = 0.08 (on data) * 0.01 (on weights) + }, + ngraph::element::f32, + {1, 6, 1, 1} + } + }, + } + }, + // depth-wise convolution, per-tensor quantization with the same values, without zero point - {LayerTransformation::createParamsU8I8(), - 6ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {}, {{0.02f}, ngraph::element::f32, {1, 6, 1, 1}}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - { - ngraph::element::u8, - {}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}, - }}, + { + LayerTransformation::createParamsU8I8(), + 6ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, {{0.02f}, ngraph::element::f32, {1, 6, 1, 1}}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}, + } + }, + // depth-wise convolution, without zero point, without convert - {LayerTransformation::createParamsU8I8(), - 6ul, - -1, - // ActualValues - {ngraph::element::f32, - {{}, {}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::f32, - {{}, {}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-1.25f}), - {}, - {}, - ngraph::element::f32, - {}}}, + { + LayerTransformation::createParamsU8I8(), + 6ul, + -1, + true, + // ActualValues + { + ngraph::element::f32, + {{}, {}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::f32, + {{}, {}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-1.25f}), + {}, + {}, + ngraph::element::f32, + {} + } + }, + // depth-wise convolution, without zero point - {LayerTransformation::createParamsU8I8(), - 6ul, - -1, - // ActualValues - {ngraph::element::u8, - {{element::f32}, {}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::u8, - {}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}}}, + { + LayerTransformation::createParamsU8I8(), + 6ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{element::f32}, {}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} + } + }, + // without dequantization operations - {LayerTransformation::createParamsU8I8(), - 6ul, - -1, - // ActualValues - {ngraph::element::f32, - {}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::f32, - {}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}, - ngraph::element::f32, - {}}}, + { + LayerTransformation::createParamsU8I8(), + 6ul, + -1, + true, + // ActualValues + { + ngraph::element::f32, + {}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::f32, + {}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {}, + ngraph::element::f32, + {} + } + }, }; INSTANTIATE_TEST_SUITE_P(smoke_LPT, @@ -623,27 +870,35 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, namespace testValues3 { const std::vector> shapesWithDynamicChannel = { - {PartialShape::dynamic(), PartialShape::dynamic()}}; + {PartialShape::dynamic(), PartialShape::dynamic()} +}; const std::vector testValuesWithDynamicChannel = { // depth-wise convolution, per-tensor quantization, with zero point - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}, - ngraph::element::f32, - {}}}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {}, + ngraph::element::f32, + {} + } + }, }; INSTANTIATE_TEST_SUITE_P(smoke_LPT, @@ -652,3 +907,4 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, ::testing::ValuesIn(testValuesWithDynamicChannel)), GroupConvolutionTransformation::getTestCaseName); } // namespace testValues3 +// clang-format on diff --git a/src/common/low_precision_transformations/tests/pull_reshape_through_dequantization_transformation.cpp b/src/common/low_precision_transformations/tests/pull_reshape_through_dequantization_transformation.cpp index 14e8f4361ceb22..7f331e00ac0dcb 100644 --- a/src/common/low_precision_transformations/tests/pull_reshape_through_dequantization_transformation.cpp +++ b/src/common/low_precision_transformations/tests/pull_reshape_through_dequantization_transformation.cpp @@ -133,11 +133,17 @@ TEST_P(PullReshapeThroughDequantizationTransformation, CompareFunctions) { ASSERT_TRUE(res.first) << res.second; } -const std::vector inputShapes = {ngraph::Shape({1, 960, 7, 7}), ngraph::Shape({4, 960, 7, 7})}; +// clang-format on + +const std::vector inputShapes = { + ngraph::Shape({1, 960, 7, 7}), + ngraph::Shape({4, 960, 7, 7}) +}; const std::vector> dequantizationOnWeightElementwiseConstantShapes = { - {ngraph::Shape({1, 960}), ngraph::Shape({960, 1, 1, 1})}, - {ngraph::Shape({9, 960}), ngraph::Shape({960, 1, 3, 3})}}; + {ngraph::Shape({1, 960}), ngraph::Shape({960, 1, 1, 1, 1})}, + {ngraph::Shape({9, 960}), ngraph::Shape({960, 1, 1, 3, 3})} +}; const std::vector multiplyShapes = {ngraph::Shape({1, 1, 960, 1})}; @@ -193,37 +199,51 @@ const std::vector testValues = { // \ / // Multiply // - {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, - {{0.02f}, element::f32, {}, false}}, - {std::vector{2.f}, ngraph::element::i8, {9, 960}}, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {/* from parameter */}, false}, - {{0.03f}, element::f32, {/* from parameter */}, false}}, - {{3, 3, 960, 1}}, - {{2}, element::f32, {/* from parameter: multiplyShapes */}, false}, - {{2, 3, 0, 1}}, - {{960, 1, 1, 3, 3}}, - ngraph::element::f32, - {}}, - // ExpectedValues - {ngraph::element::u8, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, - {{0.02f}, element::f32, {}, false}}, - {std::vector{2.f}, ngraph::element::i8, {960, 1, 3, 3}}, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {/* from parameter */}, false}, - {{0.06f}, element::f32, {/* from parameter */}, false}}, - {}, - {}, - {}, - {{960, 1, 1, 3, 3}}, - ngraph::element::f32, - {}}}, + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + // ActualValues + { + ngraph::element::u8, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, + { {0.02f}, element::f32, {}, false } + }, + { std::vector{ 2.f }, ngraph::element::i8, {9, 960}}, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {/* from parameter */}, false }, + { {0.03f}, element::f32, {/* from parameter */}, false } + }, + { {3, 3, 960, 1} }, + { {2}, element::f32, {/* from parameter: multiplyShapes */}, false }, + { {2, 3, 0, 1} }, + { {960, 1, 1, 3, 3} }, + ngraph::element::f32, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, + { {0.02f}, element::f32, {}, false } + }, + { std::vector{ 2.f }, ngraph::element::i8, {960, 1, 1, 3, 3}}, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {/* from parameter */}, false }, + { {0.06f}, element::f32, {/* from parameter */}, false } + }, + {}, + {}, + {}, + {}, + ngraph::element::f32, + {} + } + }, // Subtract with Convert + Constant // Actual: @@ -276,37 +296,54 @@ const std::vector testValues = { // \ / // Multiply // - {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, - {{0.02f}, element::f32, {}, false}}, - {std::vector{2.f}, ngraph::element::i8, {9, 960}}, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {/* from parameter */}, false, 1ul, element::i8, true}, - {{0.03f}, element::f32, {/* from parameter */}, false}}, - {{3, 3, 960, 1}}, - {{2}, element::f32, {/* from parameter: multiplyShapes */}, false}, - {{2, 3, 0, 1}}, - {{960, 1, 1, 3, 3}}, - ngraph::element::f32, - {}}, - // ExpectedValues - {ngraph::element::u8, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, - {{0.02f}, element::f32, {}, false}}, - {std::vector{2.f}, ngraph::element::i8, {960, 1, 3, 3}}, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {/* from parameter */}, false, 1ul, element::i8, true}, - {{0.06f}, element::f32, {/* from parameter */}, false}}, - {}, - {}, - {}, - {{960, 1, 1, 3, 3}}, - ngraph::element::f32, - {}}}}; + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + // ActualValues + { + ngraph::element::u8, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, + { {0.02f}, element::f32, {}, false } + }, + { std::vector{ 2.f }, ngraph::element::i8, {9, 960}}, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {/* from parameter */}, false, 1ul, element::i8, true }, + { {0.03f}, element::f32, {/* from parameter */}, false } + }, + { {3, 3, 960, 1} }, + { {2}, element::f32, {/* from parameter: multiplyShapes */}, false }, + { {2, 3, 0, 1} }, + { {960, 1, 1, 3, 3} }, + ngraph::element::f32, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, + { {0.02f}, element::f32, {}, false } + }, + { std::vector{ 2.f }, ngraph::element::i8, {960, 1, 1, 3, 3}}, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {/* from parameter */}, false, 1ul, element::i8, true }, + { {0.06f}, element::f32, {/* from parameter */}, false } + }, + {}, + {}, + {}, + {}, + ngraph::element::f32, + {} + } + } +}; + +// clang-format off INSTANTIATE_TEST_SUITE_P(smoke_LPT, PullReshapeThroughDequantizationTransformation, diff --git a/src/common/low_precision_transformations/tests/pull_transpose_through_dequantization_transformation.cpp b/src/common/low_precision_transformations/tests/pull_transpose_through_dequantization_transformation.cpp index 8b5d9a0b6447b8..a6b8efa5172ee4 100644 --- a/src/common/low_precision_transformations/tests/pull_transpose_through_dequantization_transformation.cpp +++ b/src/common/low_precision_transformations/tests/pull_transpose_through_dequantization_transformation.cpp @@ -126,11 +126,16 @@ TEST_P(PullTransposeThroughDequantizationTransformation, CompareFunctions) { ASSERT_TRUE(res.first) << res.second; } -const std::vector inputShapes = {ngraph::Shape({1, 960, 7, 7}), ngraph::Shape({4, 960, 7, 7})}; +// clang-format on + +const std::vector inputShapes = { + ngraph::Shape({1, 960, 7, 7}), + ngraph::Shape({4, 960, 7, 7}) +}; const std::vector> dequantizationOnWeightElementwiseConstantShapes = { - {ngraph::Shape({}), ngraph::Shape({1, 1, 1, 1})}, - {ngraph::Shape({1}), ngraph::Shape({1, 1, 1, 1})}}; + {ngraph::Shape({}), ngraph::Shape({1, 1, 1, 1, 1})}, + {ngraph::Shape({1}), ngraph::Shape({1, 1, 1, 1, 1})}}; const std::vector testValues = { // Actual: @@ -178,37 +183,54 @@ const std::vector testValues = { // \ / // Multiply // - {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, - {{0.02f}, element::f32, {}, false}}, - {std::vector{2.f}, ngraph::element::i8, {3, 3, 960, 1}}, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {/* from parameter */}, false}, - {{0.03f}, element::f32, {/* from parameter */}, false}}, - {}, // reshape1 - {}, // multiply - {{2, 3, 0, 1}}, - {{960, 1, 1, 3, 3}}, - ngraph::element::f32, - {}}, - // ExpectedValues - {ngraph::element::u8, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, - {{0.02f}, element::f32, {}, false}}, - {std::vector{2.f}, ngraph::element::i8, {960, 1, 3, 3}}, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {/* from parameter */}, false}, - {{0.03f}, element::f32, {/* from parameter */}, false}}, - {}, - {}, - {}, - {{960, 1, 1, 3, 3}}, - ngraph::element::f32, - {}}}}; + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + // ActualValues + { + ngraph::element::u8, + { + {ngraph::element::f32, false}, + {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, + {{0.02f}, element::f32, {}, false} + }, + {std::vector{2.f}, ngraph::element::i8, {3, 3, 960, 1}}, + { + {ngraph::element::f32, false}, + {{127.f}, element::f32, {/* from parameter */}, false}, + {{0.03f}, element::f32, {/* from parameter */}, false} + }, + {}, // reshape1 + {}, // multiply + {{2, 3, 0, 1}}, + {{960, 1, 1, 3, 3}}, + ngraph::element::f32, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + { + {ngraph::element::f32, false}, + {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, + {{0.02f}, element::f32, {}, false} + }, + {std::vector{2.f}, ngraph::element::i8, {960, 1, 1, 3, 3}}, + { + {ngraph::element::f32, false}, + {{127.f}, element::f32, {/* from parameter */}, false}, + {{0.03f}, element::f32, {/* from parameter */}, false} + }, + {}, + {}, + {}, + {}, + ngraph::element::f32, + {} + } + } +}; + +// clang-format on INSTANTIATE_TEST_SUITE_P(smoke_LPT, PullTransposeThroughDequantizationTransformation, diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp index c8d8473ad1468e..bc058ef87c7add 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp @@ -11,6 +11,8 @@ using namespace LayerTestsDefinitions; namespace { +// clang-format off + const std::vector netPrecisions = { ngraph::element::f32, // ngraph::element::f16 @@ -370,6 +372,66 @@ const std::vector true, }, + // Actual: + // + // FQ + // |FP32 + // | + // Convert Convert Constant Constant + // |U8 |U8 |I8 |I8 + // | | | | + // Convert Convert Convert Convert + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Subtract Constant Subtract Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // \ / + // \ / + // GroupConvolution Constant + // \FP32 /FP32 + // \ / + // Multiply + // + // Transformed: + // + // FQ Constant Constant + // \U8 /U8 / I8 + // \ / / + // Subtract Subtract + // \FP32 /FP32 + // \ / + // \ / + // \ / + // GroupConvolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + { ngraph::element::f32, false }, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + { std::vector(4, 15.f), ngraph::element::i8, {2, 1, 2, 1, 1} }, + {}, + {}, + { + { ngraph::element::f32, false }, + { {126.f, 127.f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false, 1ul, ngraph::element::i8, true }, + { {0.1f, 0.2f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false } + }, + {}, + "output_original", + "FP32", + true, + }, + // Actual: // // FQ @@ -427,6 +489,63 @@ const std::vector false, }, + // Actual: + // + // FQ + // |FP32 + // | + // Convert Convert + // |U8 |U8 + // | | + // Convert Convert Constant + // \FP32 /FP32 \U8 + // \ / \ + // Subtract Constant Convert Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // \ / + // \ / + // GroupConvolution + // + // Transformed: + // + // FQ Constant + // \U8 /U8 + // \ / + // Subtract + // \FP32 + // \ Constant + // \ /I8 + // \ / + // GroupConvolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + { ngraph::element::f32, false }, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + { std::vector(4, 15.f), ngraph::element::i8, {2, 1, 2, 1, 1} }, + {}, + {}, + { + { ngraph::element::f32, false }, + {}, + { {0.1f, 0.2f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false } + }, + {}, + "output_original", + "U8", + false, + }, + // Actual: // // FQ @@ -500,4 +619,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, GroupConvolutionQDqTransformation, ::testing::ValuesIn(trasformationParamValues), ::testing::ValuesIn(params)), GroupConvolutionQDqTransformation::getTestCaseName); + +// clang-format on } // namespace diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp index 0794065e13e5f7..946554d0f7f2ea 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp @@ -11,6 +11,8 @@ using namespace LayerTestsDefinitions; namespace { +// clang-format off + const std::vector netPrecisions = { ngraph::element::f32, // ngraph::element::f16 @@ -370,6 +372,66 @@ const std::vector true, }, + // Actual: + // + // FQ + // |FP32 + // | + // Convert Convert Constant Constant + // |U8 |U8 |I8 |I8 + // | | | | + // Convert Convert Convert Convert + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Subtract Constant Subtract Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // \ / + // \ / + // GroupConvolution Constant + // \FP32 /FP32 + // \ / + // Multiply + // + // Transformed: + // + // FQ Constant Constant + // \U8 /U8 / I8 + // \ / / + // Subtract Subtract + // \FP32 /FP32 + // \ / + // \ / + // \ / + // GroupConvolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + { ngraph::element::f32, false }, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + { std::vector(4, 15.f), ngraph::element::i8, {2, 1, 2, 1, 1} }, + {}, + {}, + { + { ngraph::element::f32, false }, + { {126.f, 127.f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false, 1ul, ngraph::element::i8, true }, + { {0.1f, 0.2f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false } + }, + {}, + "output_original", + "FP32", + true, + }, + // Actual: // // FQ @@ -427,6 +489,63 @@ const std::vector false, }, + // Actual: + // + // FQ + // |FP32 + // | + // Convert Convert + // |U8 |U8 + // | | + // Convert Convert Constant + // \FP32 /FP32 \U8 + // \ / \ + // Subtract Constant Convert Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // \ / + // \ / + // GroupConvolution + // + // Transformed: + // + // FQ Constant + // \U8 /U8 + // \ / + // Subtract + // \FP32 + // \ Constant + // \ /I8 + // \ / + // GroupConvolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + { ngraph::element::f32, false }, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + { std::vector(4, 15.f), ngraph::element::i8, {2, 1, 2, 1, 1} }, + {}, + {}, + { + { ngraph::element::f32, false }, + {}, + { {0.1f, 0.2f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false } + }, + {}, + "output_original", + "U8", + false, + }, + // Actual: // // FQ @@ -500,4 +619,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, GroupConvolutionQDqTransformation, ::testing::ValuesIn(trasformationParamValues), ::testing::ValuesIn(params)), GroupConvolutionQDqTransformation::getTestCaseName); + +// clang-format on } // namespace diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp index e90c32eb00bf46..20101a88d57745 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp @@ -49,7 +49,8 @@ class GroupConvolutionFunction { const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights, const ngraph::element::Type precisionAfterOperation, const ngraph::builder::subgraph::DequantizationOperations& dequantizationAfter, - const ngraph::element::Type precisionAfterDequantization); + const ngraph::element::Type precisionAfterDequantization, + const bool addReshape); }; } // namespace subgraph diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp index 6af36be45295cd..953e52326de7c7 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp @@ -31,7 +31,8 @@ std::shared_ptr createWeightsOriginal( const size_t kernelSize, const std::vector& weightsValues, const FakeQuantizeOnWeights& fakeQuantizeOnWeights, - const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights) { + const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights, + const bool addReshape = true) { std::shared_ptr weights; if (fakeQuantizeOnWeights.empty() && dequantizationOnWeights.empty()) { weights = ngraph::opset1::Constant::create( @@ -46,9 +47,13 @@ std::shared_ptr createWeightsOriginal( const size_t inputChannelsPerGroup = inputChannelsCount / groupCount; weights = ngraph::opset1::Constant::create( precision, - rankLength == 3 ? - ngraph::Shape{ outputChannelsCount, inputChannelsPerGroup, kernelSize } : - ngraph::Shape{ outputChannelsCount, inputChannelsPerGroup, kernelSize, kernelSize }, + addReshape ? + (rankLength == 3 ? + ngraph::Shape{ outputChannelsCount, inputChannelsPerGroup, kernelSize } : + ngraph::Shape{ outputChannelsCount, inputChannelsPerGroup, kernelSize, kernelSize }) : + (rankLength == 3 ? + ngraph::Shape{ groupCount, outputChannelsCount / groupCount, inputChannelsPerGroup, kernelSize } : + ngraph::Shape{ groupCount, outputChannelsCount / groupCount, inputChannelsPerGroup, kernelSize, kernelSize }), weightsValues.size() == 1ul ? std::vector( rankLength == 3 ? @@ -75,24 +80,26 @@ std::shared_ptr createWeightsOriginal( weights = ngraph::builder::subgraph::makeDequantization(weights, dequantizationOnWeights); } - weights = std::make_shared( - weights, - ngraph::opset1::Constant::create( - element::i64, - Shape{ static_cast(rankLength) + 1ul }, - rankLength == 3 ? - std::vector { - calculatedDimention == 0 ? -1 : static_cast(groupCount), - calculatedDimention == 1 ? -1 : static_cast(outputChannelsCount / groupCount), - static_cast(inputChannelsPerGroup), - static_cast(kernelSize) } : - std::vector { - calculatedDimention == 0 ? -1 : static_cast(groupCount), - calculatedDimention == 1 ? -1 : static_cast(outputChannelsCount / groupCount), - static_cast(inputChannelsPerGroup), - static_cast(kernelSize), - static_cast(kernelSize) }), - true); + if (addReshape) { + weights = std::make_shared( + weights, + ngraph::opset1::Constant::create( + element::i64, + Shape{ static_cast(rankLength) + 1ul }, + rankLength == 3 ? + std::vector { + calculatedDimention == 0 ? -1 : static_cast(groupCount), + calculatedDimention == 1 ? -1 : static_cast(outputChannelsCount / groupCount), + static_cast(inputChannelsPerGroup), + static_cast(kernelSize) } : + std::vector { + calculatedDimention == 0 ? -1 : static_cast(groupCount), + calculatedDimention == 1 ? -1 : static_cast(outputChannelsCount / groupCount), + static_cast(inputChannelsPerGroup), + static_cast(kernelSize), + static_cast(kernelSize) }), + true); + } } return weights; @@ -253,7 +260,8 @@ std::shared_ptr GroupConvolutionFunction::get( const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights, const ngraph::element::Type precisionAfterOperation, const ngraph::builder::subgraph::DequantizationOperations& dequantizationAfter, - const ngraph::element::Type precisionAfterDequantization) { + const ngraph::element::Type precisionAfterDequantization, + const bool addReshape) { const auto rankLength = inputShape.rank().is_dynamic() ? 4 : inputShape.rank().get_length(); OPENVINO_ASSERT(rankLength == 3 || rankLength == 4, "not supported input shape rank: ", rankLength); @@ -269,9 +277,6 @@ std::shared_ptr GroupConvolutionFunction::get( const size_t outputChannelsInGroup = outputChannelsCount / groupCount; const size_t weightsSize = weightsConst->cast_vector().size(); - if ((weightsSize != 1ul) && (weightsSize != (inputChannelsCount * outputChannelsCount))) { - throw std::runtime_error("unexpected actual weights values size"); - } std::shared_ptr weights; if (fakeQuantizeOnWeights.empty() && dequantizationOnWeights.empty()) { @@ -293,7 +298,8 @@ std::shared_ptr GroupConvolutionFunction::get( kernelSize, weightsConst->cast_vector(), fakeQuantizeOnWeights, - dequantizationOnWeights); + dequantizationOnWeights, + addReshape); } auto convolutionOriginal = ngraph::opset1::GroupConvolution(