diff --git a/src/common/snippets/src/pass/fc_tokenization.cpp b/src/common/snippets/src/pass/fc_tokenization.cpp index 3291f451b58497..3acafd1de5a1f8 100644 --- a/src/common/snippets/src/pass/fc_tokenization.cpp +++ b/src/common/snippets/src/pass/fc_tokenization.cpp @@ -4,10 +4,8 @@ #include "snippets/pass/fc_tokenization.hpp" -#include "openvino/pass/graph_rewrite.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "snippets/itt.hpp" -#include "snippets/op/subgraph.hpp" #include "snippets/utils/tokenization_utils.hpp" ov::snippets::pass::TokenizeFCSnippets::TokenizeFCSnippets(const SnippetsTokenization::Config& config) { diff --git a/src/common/snippets/src/pass/tokenization.cpp b/src/common/snippets/src/pass/tokenization.cpp index 600db8cbde0fce..e472caa1ed215f 100644 --- a/src/common/snippets/src/pass/tokenization.cpp +++ b/src/common/snippets/src/pass/tokenization.cpp @@ -82,8 +82,9 @@ bool SnippetsTokenization::run_on_model(const std::shared_ptr& m) { manager.register_pass(); manager.register_pass(); - // This pass mustn't be registered in GraphRewrite with other tokenization passes - // since it changes the nodes after the matched root node + // This pass mustn't be registered in GraphRewrite with other tokenization passes because of 2 reasons: + // 1. It has higher priority than other tokenization passes + // 2. It changes the nodes after the matched root node manager.register_pass(m_config); auto tokenization_passes = manager.register_pass(); diff --git a/src/common/snippets/src/utils/tokenization_utils.cpp b/src/common/snippets/src/utils/tokenization_utils.cpp index f8bfa8ab29a3bd..700b282f86f4d4 100644 --- a/src/common/snippets/src/utils/tokenization_utils.cpp +++ b/src/common/snippets/src/utils/tokenization_utils.cpp @@ -65,7 +65,8 @@ bool tokenize_node(const std::shared_ptr& node, const SnippetsTokeniza op::update_out_tensor_name(subgraph); }; - auto abort_with_strategy = [&](const std::string& message_reset, const std::string& message_abort = "") { + auto abort = [&](const std::string& message) { + remark(3) << message << std::endl; create_single_node_subgraph(node); return true; }; @@ -203,7 +204,7 @@ bool tokenize_node(const std::shared_ptr& node, const SnippetsTokeniza // todo: In principle, we can still attach the node to the subgraph if cyclic dependency is introduced during ternary merge. // Need to support. if (cyclicDependencyIsIntoduced(to_replace_with, currentTopoBounds)) - return abort_with_strategy("Attempt to perform recurrent merge for cyclic-dependent subgraphs. Aborting."); + return abort("Attempt to perform recurrent merge for cyclic-dependent subgraphs. Aborting."); for (const auto& output : internal_consumers) { for (auto consumer : output.get_target_inputs()) { auto other_body = clones[subgraph->get_input_node_shared_ptr(i)]; @@ -260,7 +261,7 @@ bool tokenize_node(const std::shared_ptr& node, const SnippetsTokeniza } if (!ov::is_type(grandparent)) { - return abort_with_strategy("Convert supports only as Input and as Result of subgraph. Aborting"); + return abort("Convert supports only as Input and as Result of subgraph. Aborting"); } } // Result op has a single input @@ -288,7 +289,7 @@ bool tokenize_node(const std::shared_ptr& node, const SnippetsTokeniza fusedNames += node->get_friendly_name(); num_result_children += get_num_result_children(node); if (num_result_children > 1) - return abort_with_strategy("New subgraph is created since too many Result children are detected"); + return abort("New subgraph is created since too many Result children are detected"); auto body_node = node->copy_with_new_inputs(internal_inputs); body_node->set_friendly_name(node->get_friendly_name()); @@ -380,10 +381,7 @@ bool tokenize_node(const std::shared_ptr& node, const SnippetsTokeniza const std::string message_reset = "new subgraph is created. Impossible to schedule subgraph with " + std::to_string(body_parameters.size()) + " inputs, " + std::to_string(body_results.size()) + " outputs and " + std::to_string(hidden_data_count) + " non-scalar constants and " + std::to_string(unique_buffer_count) + "buffers."; - const std::string message_abort = "failed to continue subgraph. Impossible to schedule subgraph with " + - std::to_string(body_parameters.size()) + " inputs, " + std::to_string(body_results.size()) + " outputs and " + - std::to_string(hidden_data_count) + " non-scalar constants and " + std::to_string(unique_buffer_count) + "buffers."; - return abort_with_strategy(message_reset, message_abort); + return abort(message_reset); } auto body = op::create_body(node->get_friendly_name(), body_results, body_parameters); @@ -402,7 +400,7 @@ bool tokenize_node(const std::shared_ptr& node, const SnippetsTokeniza } if (outputs_are_not_broadcastable(subgraph)) - return abort_with_strategy("New subgraph is created due to outputs of a subgraph not broadcastable."); + return abort("New subgraph is created due to outputs of a subgraph not broadcastable."); for (size_t i = 0; i < subgraph->get_output_size(); ++i) { for (auto target_input : subgraph_result_inputs[i]) { diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fully_connected.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fully_connected.cpp new file mode 100644 index 00000000000000..572621e6644fde --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fully_connected.cpp @@ -0,0 +1,173 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/matmul.hpp" + +#include "common_test_utils/test_constants.hpp" +#include "openvino/runtime/system_conf.hpp" + +namespace ov { +namespace test { +namespace snippets { +namespace { +static inline std::vector> quantized_precisions() { + std::vector> prc = {}; + // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms + if (ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8()) { + prc.emplace_back(std::vector{element::i8, element::i8}); + prc.emplace_back(std::vector{element::u8, element::i8}); + } + return prc; +} + +static inline std::vector> precisions(bool only_fp32 = true) { + std::vector> prc = { + {element::f32, element::f32}, + }; +// Note: TPP doesn't support low precisions yet +#ifndef SNIPPETS_LIBXSMM_TPP + if (!only_fp32) { + auto quant = quantized_precisions(); + std::copy(quant.begin(), quant.end(), std::back_inserter(prc)); + // In Snippets MatMul BF16 is supported only on bf16/AMX platforms + if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) { + prc.emplace_back(std::vector{element::bf16, element::bf16}); + } + } +#endif + return prc; +} + +std::vector> fc_input_shapes{ + { + {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}}}, + {{}, {{2500, 256}}} + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnected, MatMul, + ::testing::Combine( + ::testing::ValuesIn(fc_input_shapes), + ::testing::ValuesIn(precisions(false)), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // MatMul + ::testing::Values(1), // Tokenized MatMul + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedFQ, MatMulFQ, + ::testing::Combine( + ::testing::ValuesIn(fc_input_shapes), + ::testing::ValuesIn(precisions()), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // MatMul; + ::testing::Values(1), // Tokenized MatMul + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedEltwiseChain, MatMulEltwiseChain, + ::testing::Combine( + ::testing::ValuesIn(fc_input_shapes), + ::testing::ValuesIn(precisions()), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // MatMul + ::testing::Values(1), // Tokenized MatMul + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +std::vector> fc_cascade_shapes{ + { + {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}, {2, 1, 32, 2500}}}, + {PartialShape{}, {{2500, 128}}}, + {PartialShape{}, {{128, 64}}}, + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedEltwiseChainCascade, MatMulEltwiseChainCascade, + ::testing::Combine( + ::testing::ValuesIn(fc_cascade_shapes), + ::testing::ValuesIn(precisions()), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +std::vector> fc_transpose_b_shapes{ + { + {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}}}, + {{}, {{256, 2500}}} + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedTransposeB, MatMulTransposeB, + ::testing::Combine( + ::testing::ValuesIn(fc_transpose_b_shapes), + ::testing::ValuesIn(precisions(false)), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // MatMul + ::testing::Values(1), // Tokenized MatMul + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + + +std::vector> fc_bias_shapes{ + { + {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}}}, + {{}, {{2500, 256}}}, + {PartialShape{-1, -1, -1, 256}, {{1, 1, 32, 256}, {1, 1, 80, 256}}} + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedBias, MatMulBias, + ::testing::Combine( + ::testing::ValuesIn(fc_bias_shapes), + ::testing::ValuesIn(precisions(false)), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // Subgraph; + ::testing::Values(1), // Tokenized MatMul+Bias + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedBiasQuantized, MatMulBiasQuantized, + ::testing::Combine( + ::testing::ValuesIn(fc_bias_shapes), + ::testing::ValuesIn(quantized_precisions()), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // Subgraph + ::testing::Values(1), // Tokenized MatMul+Bias + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +std::vector> fc_quantized_shapes{ + { + {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}}}, + {{}, {{2500, 256}}}, + {{}, {{256, 64}}} + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedsQuantized, MatMulsQuantized, + ::testing::Combine( + ::testing::ValuesIn(fc_quantized_shapes), + ::testing::ValuesIn(quantized_precisions()), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // Reshape on weights is folded => only 1 Subgraph remains + ::testing::Values(1), // Tokenized [MatMul+FQ+Matmul] + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedsQuantizedSoftmax, MatMulsQuantizedSoftmax, + ::testing::Combine( + ::testing::ValuesIn(fc_quantized_shapes), + ::testing::ValuesIn(quantized_precisions()), + ::testing::Values(MatMulType::FullyConnected), + ::testing::Values(1), // Reshape on weights is folded => only 1 Subgraph remains + ::testing::Values(1), // Tokenized [MatMul+FQ+Matmul] + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MatMul::getTestCaseName); +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp index d95b7e6341e7e4..b0e8d58da2f0b2 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp @@ -149,61 +149,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulEltwiseChainCascade, MatMulEltwise ::testing::Values(ov::test::utils::DEVICE_CPU)), MatMul::getTestCaseName); -std::vector> fc_input_shapes{ - { - {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}}}, - {{}, {{2500, 256}}} - }, -}; - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnected, MatMul, - ::testing::Combine( - ::testing::ValuesIn(fc_input_shapes), - ::testing::ValuesIn(precisions(false)), - ::testing::Values(MatMulType::FullyConnected), - ::testing::Values(1), // MatMul - ::testing::Values(1), // Tokenized MatMul - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MatMul::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedFQ, MatMulFQ, - ::testing::Combine( - ::testing::ValuesIn(fc_input_shapes), - ::testing::ValuesIn(precisions()), - ::testing::Values(MatMulType::FullyConnected), - ::testing::Values(1), // MatMul; - ::testing::Values(1), // Tokenized MatMul - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MatMul::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedEltwiseChain, MatMulEltwiseChain, - ::testing::Combine( - ::testing::ValuesIn(fc_input_shapes), - ::testing::ValuesIn(precisions()), - ::testing::Values(MatMulType::FullyConnected), - ::testing::Values(1), // MatMul - ::testing::Values(1), // Tokenized MatMul - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MatMul::getTestCaseName); - -std::vector> fc_cascade_shapes{ - { - {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}, {2, 1, 32, 2500}}}, - {PartialShape{}, {{2500, 128}}}, - {PartialShape{}, {{128, 64}}}, - }, -}; - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedEltwiseChainCascade, MatMulEltwiseChainCascade, - ::testing::Combine( - ::testing::ValuesIn(fc_cascade_shapes), - ::testing::ValuesIn(precisions()), - ::testing::Values(MatMulType::FullyConnected), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MatMul::getTestCaseName); - const auto& transpose_b_shapes = STATIC_SHAPES( {{3, 3, 64, 64}, {3, 3, 64, 64}}, {{1, 1, 32, 128}, {1, 1, 64, 128}}, @@ -223,23 +168,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulTransposeB, MatMulTransposeB, ::testing::Values(ov::test::utils::DEVICE_CPU)), MatMul::getTestCaseName); -std::vector> fc_transpose_b_shapes{ - { - {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}}}, - {{}, {{256, 2500}}} - }, -}; - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedTransposeB, MatMulTransposeB, - ::testing::Combine( - ::testing::ValuesIn(fc_transpose_b_shapes), - ::testing::ValuesIn(precisions(false)), - ::testing::Values(MatMulType::FullyConnected), - ::testing::Values(1), // MatMul - ::testing::Values(1), // Tokenized MatMul - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MatMul::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBias, MatMulBias, ::testing::Combine( ::testing::ValuesIn(STATIC_SHAPES({{1, 2, 69, 43}, {2, 1, 43, 49}, {1, 1, 69, 49}}, @@ -251,24 +179,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBias, MatMulBias, ::testing::Values(ov::test::utils::DEVICE_CPU)), MatMul::getTestCaseName); -std::vector> fc_bias_shapes{ - { - {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}}}, - {{}, {{2500, 256}}}, - {PartialShape{-1, -1, -1, 256}, {{1, 1, 32, 256}, {1, 1, 80, 256}}} - }, -}; - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedBias, MatMulBias, - ::testing::Combine( - ::testing::ValuesIn(fc_bias_shapes), - ::testing::ValuesIn(precisions(false)), - ::testing::Values(MatMulType::FullyConnected), - ::testing::Values(1), // Subgraph; - ::testing::Values(1), // Tokenized MatMul+Bias - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MatMul::getTestCaseName); - std::vector> input_shapes_dynamic_bias{ { {PartialShape{-1, -1, -1, -1}, {{1, 2, 69, 43}, {1, 2, 95, 1023}, {1, 2, 69, 43}}}, @@ -302,16 +212,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBiasQuantized, MatMulBiasQuantized ::testing::Values(ov::test::utils::DEVICE_CPU)), MatMul::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedBiasQuantized, MatMulBiasQuantized, - ::testing::Combine( - ::testing::ValuesIn(fc_bias_shapes), - ::testing::ValuesIn(quantized_precisions()), - ::testing::Values(MatMulType::FullyConnected), - ::testing::Values(1), // Subgraph - ::testing::Values(1), // Tokenized MatMul+Bias - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MatMul::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized, ::testing::Combine( ::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), @@ -322,24 +222,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized, ::testing::Values(ov::test::utils::DEVICE_CPU)), MatMul::getTestCaseName); -std::vector> fcs_shapes{ - { - {PartialShape{-1, -1, -1, 2500}, {{2, 1, 32, 2500}, {1, 3, 80, 2500}}}, - {{}, {{2500, 256}}}, - {{}, {{256, 64}}} - }, -}; - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedsQuantized, MatMulsQuantized, - ::testing::Combine( - ::testing::ValuesIn(fcs_shapes), - ::testing::ValuesIn(quantized_precisions()), - ::testing::Values(MatMulType::FullyConnected), - ::testing::Values(1), // Reshape on weights is folded => only 1 Subgraph remains - ::testing::Values(1), // Tokenized [MatMul+FQ+Matmul] - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MatMul::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantizedSoftmax, MatMulsQuantizedSoftmax, ::testing::Combine( ::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), @@ -349,17 +231,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantizedSoftmax, MatMulsQuantize ::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ] ::testing::Values(ov::test::utils::DEVICE_CPU)), MatMul::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnectedsQuantizedSoftmax, MatMulsQuantizedSoftmax, - ::testing::Combine( - ::testing::ValuesIn(fcs_shapes), - ::testing::ValuesIn(quantized_precisions()), - ::testing::Values(MatMulType::FullyConnected), - ::testing::Values(1), // Reshape on weights is folded => only 1 Subgraph remains - ::testing::Values(1), // Tokenized [MatMul+FQ+Matmul] - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MatMul::getTestCaseName); - } // namespace } // namespace snippets } // namespace test diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp index 0011bb65622d5e..c05087283305e4 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp @@ -81,19 +81,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnected, TransposeMatMul, ::testing::Values(1), // Tokenized MatMul + FusedTranspose ::testing::Values(ov::test::utils::DEVICE_CPU)), TransposeMatMul::getTestCaseName); - -// TODO: FuseTransposeToBrgemm supports fusing only if Transpose is before Parameter in cases when Transpose is on input at the moment -// When we support the branch Parameter->FQ->Transpose->MatMul[0th input], uncomment this test case please -// INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulFQ, TransposeMatMulFQ, -// ::testing::Combine( -// ::testing::ValuesIn(transpose_input_shapes), -// ::testing::Values(0), // Transpose on 0th Matmul input -// ::testing::Values(ov::element::i8), -// ::testing::Values(MatMulType::MatMul), -// ::testing::Values(1), // MatMul -// ::testing::Values(1), // Tokenized MatMul + FusedTranspose -// ::testing::Values(ov::test::utils::DEVICE_CPU)), -// TransposeMatMulFQ::getTestCaseName); } // namespace transpose_zero_input namespace transpose_first_input { @@ -181,24 +168,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnected, TransposeMatMul, ::testing::ValuesIn(fc_transpose_input_shapes), ::testing::Values(2), // Transpose on Matmul output ::testing::ValuesIn(precisions(true)), - ::testing::Values(MatMulType::MatMul), + ::testing::Values(MatMulType::FullyConnected), ::testing::Values(1), // MatMul ::testing::Values(1), // Tokenized MatMul + FusedTranspose ::testing::Values(ov::test::utils::DEVICE_CPU)), TransposeMatMul::getTestCaseName); - -// TODO: At the moment we doesn't support the branch MatMul[output]->Transpose->FQ. -// When we add support, uncomment this test case please -// INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulFQ, TransposeMatMulFQ, -// ::testing::Combine( -// ::testing::ValuesIn(transpose_input_shapes), -// ::testing::Values(2), // Transpose on Matmul output -// ::testing::Values(ov::element::i8), -// ::testing::Values(MatMulType::MatMul), -// ::testing::Values(1), // MatMul -// ::testing::Values(1), // Tokenized MatMul + FusedTranspose -// ::testing::Values(ov::test::utils::DEVICE_CPU)), -// TransposeMatMulFQ::getTestCaseName); } // namespace transpose_output namespace explicit_transpose { diff --git a/src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp b/src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp index 5d19991764476d..282c1241555cc9 100644 --- a/src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp @@ -24,8 +24,7 @@ typedef std::tuple < std::string // target device > CheckBroadcastParams; -class CheckBroadcast : public testing::WithParamInterface, - virtual public SnippetsTestsCommon { +class CheckBroadcast : public testing::WithParamInterface, virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/eltwise_two_results.hpp b/src/tests/functional/plugin/shared/include/snippets/eltwise_two_results.hpp index dfc258e3b20229..b28bd300b9908c 100644 --- a/src/tests/functional/plugin/shared/include/snippets/eltwise_two_results.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/eltwise_two_results.hpp @@ -19,7 +19,7 @@ typedef std::tuple< > EltwiseTwoResultsParams; class EltwiseTwoResults : public testing::WithParamInterface, - virtual public SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); @@ -27,7 +27,6 @@ class EltwiseTwoResults : public testing::WithParamInterface GroupNormalizationParams; class GroupNormalization : public testing::WithParamInterface, - virtual public SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/matmul.hpp b/src/tests/functional/plugin/shared/include/snippets/matmul.hpp index 16c309e9a71aac..c592b3900d1b84 100644 --- a/src/tests/functional/plugin/shared/include/snippets/matmul.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/matmul.hpp @@ -26,7 +26,7 @@ class MatMulBase : public SnippetsTestsCommon { * @brief Erases shapes with the given indices from inputDynamicShapes and targetStaticShapes */ void filter_shape_info(const std::set& idces_to_remove); - virtual void init_subgraph(const std::vector& types) = 0; + virtual std::shared_ptr get_builder(const std::vector& types) = 0; MatMulType matmul_type; }; @@ -38,47 +38,47 @@ class MatMul : public testing::WithParamInterface& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; class MatMulTransposeB : public MatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; class MatMulFQ : public MatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; class MatMulBias : public MatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; class MatMulBiasQuantized : public MatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; class MatMulsQuantized : public MatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; class MatMulsQuantizedSoftmax : public MatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; class MatMulEltwiseChain : public MatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; class MatMulEltwiseChainCascade : public MatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; } // namespace snippets diff --git a/src/tests/functional/plugin/shared/include/snippets/three_inputs_eltwise.hpp b/src/tests/functional/plugin/shared/include/snippets/three_inputs_eltwise.hpp index b87562474fb7e3..a14162460d4f39 100644 --- a/src/tests/functional/plugin/shared/include/snippets/three_inputs_eltwise.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/three_inputs_eltwise.hpp @@ -20,7 +20,7 @@ typedef std::tuple< > ThreeInputsEltwiseParams; class ThreeInputsEltwise : public testing::WithParamInterface, - virtual public SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/transpose.hpp b/src/tests/functional/plugin/shared/include/snippets/transpose.hpp index 7fb26fa2f38649..f0f1f254ee37d0 100644 --- a/src/tests/functional/plugin/shared/include/snippets/transpose.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/transpose.hpp @@ -27,7 +27,7 @@ typedef std::tuple< > TransposeMulParams; class Transpose : public testing::WithParamInterface, - virtual public SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); @@ -36,7 +36,7 @@ class Transpose : public testing::WithParamInterface, - virtual public SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/src/tests/functional/plugin/shared/include/snippets/transpose_matmul.hpp b/src/tests/functional/plugin/shared/include/snippets/transpose_matmul.hpp index e03aa50395ebcd..07960cb422d5e4 100644 --- a/src/tests/functional/plugin/shared/include/snippets/transpose_matmul.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/transpose_matmul.hpp @@ -28,24 +28,24 @@ class TransposeMatMul : public testing::WithParamInterface& types) override; + std::shared_ptr get_builder(const std::vector& types) override; size_t transpose_position; }; class TransposeMatMulFQ : public TransposeMatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; class ExplicitTransposeMatMul : public TransposeMatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; class ExplicitTransposeMatMulBias : public TransposeMatMul { protected: - void init_subgraph(const std::vector& types) override; + std::shared_ptr get_builder(const std::vector& types) override; }; } // namespace snippets diff --git a/src/tests/functional/plugin/shared/include/snippets/two_inputs_and_outputs.hpp b/src/tests/functional/plugin/shared/include/snippets/two_inputs_and_outputs.hpp index 95487603cfc8e8..b39b48e2819a92 100644 --- a/src/tests/functional/plugin/shared/include/snippets/two_inputs_and_outputs.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/two_inputs_and_outputs.hpp @@ -18,7 +18,7 @@ typedef std::tuple< > TwoInputsAndOutputsParams; class TwoInputsAndOutputs : public testing::WithParamInterface, - virtual public SnippetsTestsCommon { + virtual public SnippetsTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj); @@ -26,7 +26,6 @@ class TwoInputsAndOutputs : public testing::WithParamInterfaceGetParam(); init_input_shapes(input_shapes); - init_subgraph(elem_types); + const auto builder = get_builder(elem_types); + function = builder->getOriginal(); + filter_shape_info(builder->get_constant_input_idces()); if (!configuration.count("SNIPPETS_MODE")) { configuration.insert({"SNIPPETS_MODE", "IGNORE_CALLBACK"}); } } -void MatMul::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::MatMulFunction(inputDynamicShapes, types, matmul_type); - function = f.getOriginal(); - filter_shape_info(f.get_constant_input_idces()); +std::shared_ptr MatMul::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type); } -void MatMulTransposeB::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::MatMulFunction(inputDynamicShapes, types, matmul_type, true); - function = f.getOriginal(); - filter_shape_info(f.get_constant_input_idces()); +std::shared_ptr MatMulTransposeB::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type, true); } -void MatMulFQ::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::FQMatMulFunction(inputDynamicShapes, matmul_type); - function = f.getOriginal(); - filter_shape_info(f.get_constant_input_idces()); +std::shared_ptr MatMulFQ::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, matmul_type); } -void MatMulBias::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::MatMulBiasFunction(inputDynamicShapes, types, matmul_type); - function = f.getOriginal(); - filter_shape_info(f.get_constant_input_idces()); +std::shared_ptr MatMulBias::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type); } -void MatMulBiasQuantized::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::MatMulBiasQuantizedFunction(inputDynamicShapes, types, matmul_type); - function = f.getOriginal(); - filter_shape_info(f.get_constant_input_idces()); +std::shared_ptr MatMulBiasQuantized::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type); } -void MatMulsQuantized::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::MatMulsQuantizedFunction(inputDynamicShapes, types, matmul_type); - function = f.getOriginal(); - filter_shape_info(f.get_constant_input_idces()); +std::shared_ptr MatMulsQuantized::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type); } -void MatMulsQuantizedSoftmax::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::MatMulsQuantizedSoftmaxFunction(inputDynamicShapes, types, matmul_type); - function = f.getOriginal(); - filter_shape_info(f.get_constant_input_idces()); +std::shared_ptr MatMulsQuantizedSoftmax::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type); } -void MatMulEltwiseChain::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::MatMulEltwiseChainFunction(inputDynamicShapes, types, matmul_type); - function = f.getOriginal(); - filter_shape_info(f.get_constant_input_idces()); +std::shared_ptr MatMulEltwiseChain::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type); } -void MatMulEltwiseChainCascade::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::MatMulEltwiseChainCascadeFunction(inputDynamicShapes, types, matmul_type); - function = f.getOriginal(); - filter_shape_info(f.get_constant_input_idces()); +std::shared_ptr MatMulEltwiseChainCascade::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type); } TEST_P(MatMul, CompareWithRefImpl) { diff --git a/src/tests/functional/plugin/shared/src/snippets/transpose_matmul.cpp b/src/tests/functional/plugin/shared/src/snippets/transpose_matmul.cpp index ec6be824813c0b..d0a594a881c6ed 100644 --- a/src/tests/functional/plugin/shared/src/snippets/transpose_matmul.cpp +++ b/src/tests/functional/plugin/shared/src/snippets/transpose_matmul.cpp @@ -41,34 +41,28 @@ void TransposeMatMul::SetUp() { std::tie(input_shapes, transpose_position, elem_types, matmul_type, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam(); init_input_shapes(input_shapes); - init_subgraph(elem_types); + const auto builder = get_builder(elem_types); + function = builder->getOriginal(); + filter_shape_info(builder->get_constant_input_idces()); if (!configuration.count("SNIPPETS_MODE")) { configuration.insert({"SNIPPETS_MODE", "IGNORE_CALLBACK"}); } } -void TransposeMatMul::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::Transpose0213MatMulFunction(inputDynamicShapes, types, matmul_type, transpose_position); - function = f.getOriginal(); - MatMulBase::filter_shape_info(f.get_constant_input_idces()); +std::shared_ptr TransposeMatMul::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, types, matmul_type, transpose_position); } -void TransposeMatMulFQ::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::FQMatMulFunction(inputDynamicShapes, matmul_type, transpose_position); - function = f.getOriginal(); - MatMulBase::filter_shape_info(f.get_constant_input_idces()); +std::shared_ptr TransposeMatMulFQ::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes, matmul_type, transpose_position); } -void ExplicitTransposeMatMul::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::TransposeMatMulFunction(inputDynamicShapes); - function = f.getOriginal(); - MatMulBase::filter_shape_info(f.get_constant_input_idces()); +std::shared_ptr ExplicitTransposeMatMul::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes); } -void ExplicitTransposeMatMulBias::init_subgraph(const std::vector& types) { - auto f = ov::test::snippets::TransposeMatMulBiasFunction(inputDynamicShapes); - function = f.getOriginal(); - MatMulBase::filter_shape_info(f.get_constant_input_idces()); +std::shared_ptr ExplicitTransposeMatMulBias::get_builder(const std::vector& types) { + return std::make_shared(inputDynamicShapes); } TEST_P(TransposeMatMul, CompareWithRefImpl) { diff --git a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_matmul.cpp b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_matmul.cpp index a61d9c9e40ec0d..2a7442cf255fc5 100644 --- a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_matmul.cpp +++ b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_matmul.cpp @@ -13,15 +13,10 @@ namespace ov { namespace test { namespace snippets { namespace { -constexpr float i8_fq_il = -35.0172004; -constexpr float i8_fq_ih = 34.7436294; -constexpr float u8_fq_il = 0; -constexpr float u8_fq_ih = 0.820726; - -std::shared_ptr build_mm_second_input(const ov::element::Type& precision, - const ov::PartialShape& shape, - MatMulType type, - ov::ParameterVector& params) { +std::shared_ptr make_matmul_b_input(const ov::element::Type& precision, + const ov::PartialShape& shape, + MatMulType type, + ov::ParameterVector& params) { std::shared_ptr result; switch (type) { case MatMulType::FullyConnected: @@ -32,9 +27,19 @@ std::shared_ptr build_mm_second_input(const ov::element::Type& precisi return param; } default: - OPENVINO_THROW("Unexpected MatMulType is passed in build_mm_second_input"); + OPENVINO_THROW("Unexpected MatMulType is passed in make_matmul_b_input"); } } + +std::shared_ptr make_fake_quantize(const ov::Output& in, bool signed_interval) { + static const float i8_fq_il = -35.0172004; + static const float i8_fq_ih = 34.7436294; + static const float u8_fq_il = 0; + static const float u8_fq_ih = 0.820726; + const auto low = signed_interval ? i8_fq_il : u8_fq_il; + const auto high = signed_interval ? i8_fq_ih : u8_fq_ih; + return ov::test::utils::make_fake_quantize(in, ov::element::f32, 256, {1}, {low}, {high}, {low}, {high}); +} } // namespace std::ostream &operator<<(std::ostream& os, MatMulType type) { @@ -65,14 +70,14 @@ MatMulFunctionBase::MatMulFunctionBase(const std::vector& inputSha void MatMulFunctionBase::validate_function(const std::shared_ptr &f) const { OPENVINO_ASSERT(f != nullptr, "The test requires Model to be defined"); - const auto shape_size = input_shapes.size(); + const auto count_of_shapes = input_shapes.size(); const auto idces_to_remove = get_constant_input_idces(); - OPENVINO_ASSERT(std::all_of(idces_to_remove.begin(), idces_to_remove.end(), [&shape_size](size_t x) { return x < shape_size; }), + OPENVINO_ASSERT(std::all_of(idces_to_remove.begin(), idces_to_remove.end(), [&count_of_shapes](size_t x) { return x < count_of_shapes; }), "constant_input_idces must be less than input shapes size"); std::vector shapes_to_check; for (size_t i = 0; i < input_shapes.size(); ++i) { - if (idces_to_remove.find(i) == idces_to_remove.end()) + if (idces_to_remove.count(i) == 0) shapes_to_check.push_back(input_shapes[i]); } SnippetsFunctionBase::validate_params_shape(shapes_to_check, f->get_parameters()); @@ -81,7 +86,7 @@ void MatMulFunctionBase::validate_function(const std::shared_ptr &f) cons std::shared_ptr MatMulFunction::initOriginal() const { auto data0 = std::make_shared(precisions[0], input_shapes[0]); ov::ParameterVector params{data0}; - auto data1 = build_mm_second_input(precisions[1], input_shapes[1], matmul_type, params); + auto data1 = make_matmul_b_input(precisions[1], input_shapes[1], matmul_type, params); std::shared_ptr matmul; if (precisions[1] == ov::element::i8) { matmul = std::make_shared>( @@ -98,7 +103,7 @@ std::shared_ptr MatMulFunction::initOriginal() const { std::shared_ptr MatMulFunction::initReference() const { auto data0 = std::make_shared(precisions[0], input_shapes[0]); ov::ParameterVector params{data0}; - auto data1 = build_mm_second_input(precisions[1], input_shapes[1], matmul_type, params); + auto data1 = make_matmul_b_input(precisions[1], input_shapes[1], matmul_type, params); auto indata0 = std::make_shared(precisions[0], data0->get_output_partial_shape(0)); auto indata1 = std::make_shared(precisions[1], data1->get_output_partial_shape(0)); std::shared_ptr matmul; @@ -122,17 +127,12 @@ std::shared_ptr FQMatMulFunction::initOriginal() const { auto data0 = std::make_shared(precision, input_shapes[0]); ParameterVector params{data0}; - auto ih = std::make_shared(ov::element::f32, ov::Shape{1}, std::vector{i8_fq_ih}); - auto il = std::make_shared(ov::element::f32, ov::Shape{1}, std::vector{i8_fq_il}); - auto oh = std::make_shared(ov::element::f32, ov::Shape{1}, std::vector{i8_fq_ih}); - auto ol = std::make_shared(ov::element::f32, ov::Shape{1}, std::vector{i8_fq_il}); - auto fq = std::make_shared(data0, il, ih, ol, oh, 256); - std::shared_ptr in0 = fq; + auto in0 = make_fake_quantize(data0, true); if (pos == 0) { in0 = std::make_shared(in0, const_order); } - auto data1 = build_mm_second_input(ov::element::i8, input_shapes[1], matmul_type, params); + auto data1 = make_matmul_b_input(ov::element::i8, input_shapes[1], matmul_type, params); auto convert = std::make_shared(data1, ov::element::f32); auto deq_mul = std::make_shared(ov::element::f32, ov::Shape{1}, std::vector{0.00499185826}); auto mul = std::make_shared(convert, deq_mul); @@ -150,7 +150,7 @@ std::shared_ptr FQMatMulFunction::initOriginal() const { std::shared_ptr MatMulBiasFunction::initOriginal() const { auto data0 = std::make_shared(precision, input_shapes[0]); ParameterVector params{data0}; - auto data1 = build_mm_second_input(precision, input_shapes[1], matmul_type, params); + auto data1 = make_matmul_b_input(precision, input_shapes[1], matmul_type, params); auto data2 = std::make_shared(precision, input_shapes[2]); params.push_back(data2); @@ -170,7 +170,7 @@ std::shared_ptr MatMulBiasFunction::initOriginal() const { std::shared_ptr MatMulBiasQuantizedFunction::initOriginal() const { auto data0 = std::make_shared(precisions[0], input_shapes[0]); ParameterVector params{data0}; - auto data1 = build_mm_second_input(precisions[1], input_shapes[1], matmul_type, params); + auto data1 = make_matmul_b_input(precisions[1], input_shapes[1], matmul_type, params); auto data2 = std::make_shared(precision, input_shapes[2]); params.push_back(data2); @@ -179,22 +179,22 @@ std::shared_ptr MatMulBiasQuantizedFunction::initOriginal() const { std::vector{element::f32}, ov::op::TemporaryReplaceOutputType(data0, element::f32).get(), ov::op::TemporaryReplaceOutputType(data1, element::f32).get()); - auto fq2 = ov::test::utils::make_fake_quantize(matmul, ov::element::f32, 256, {1}, {i8_fq_il}, {i8_fq_ih}, {i8_fq_il}, {i8_fq_ih}); + auto fq2 = make_fake_quantize(matmul, true); auto bias = std::make_shared(fq2, data2); return std::make_shared(NodeVector{bias}, params); } std::shared_ptr MatMulsQuantizedFunction::initOriginal() const { auto data0 = std::make_shared(precisions[0], input_shapes[0]); ParameterVector params{data0}; - auto data1 = build_mm_second_input(precisions[1], input_shapes[1], matmul_type, params); - auto data2 = build_mm_second_input(precision, input_shapes[2], matmul_type, params); + auto data1 = make_matmul_b_input(precisions[1], input_shapes[1], matmul_type, params); + auto data2 = make_matmul_b_input(precision, input_shapes[2], matmul_type, params); auto matmul0 = std::make_shared>( std::vector{ov::element::f32, element::f32}, std::vector{element::f32}, ov::op::TemporaryReplaceOutputType(data0, element::f32).get(), ov::op::TemporaryReplaceOutputType(data1, element::f32).get()); - auto fq0 = ov::test::utils::make_fake_quantize(matmul0, ov::element::f32, 256, {1}, {u8_fq_il}, {u8_fq_ih}, {u8_fq_il}, {u8_fq_ih}); - auto fq2 = ov::test::utils::make_fake_quantize(data2, ov::element::f32, 256, {1}, {i8_fq_il}, {i8_fq_ih}, {i8_fq_il}, {i8_fq_ih}); + auto fq0 = make_fake_quantize(matmul0, false); + auto fq2 = make_fake_quantize(data2, true); auto new_shape = std::make_shared(ov::element::u64, ov::Shape{4}, std::vector{1, 1, input_shapes[2].get_shape()[0], input_shapes[2].get_shape()[1]}); auto reshape = std::make_shared(fq2, new_shape, false); @@ -203,20 +203,13 @@ std::shared_ptr MatMulsQuantizedFunction::initOriginal() const { std::vector{element::f32}, ov::op::TemporaryReplaceOutputType(fq0, element::f32).get(), ov::op::TemporaryReplaceOutputType(reshape, element::f32).get()); - auto fq3 = ov::test::utils::make_fake_quantize(matmul1, - ov::element::f32, - 256, - {1}, - {i8_fq_il}, - {i8_fq_ih}, - {i8_fq_il}, - {i8_fq_ih}); + auto fq3 = make_fake_quantize(matmul1, true); return std::make_shared(NodeVector{fq3}, params); } std::shared_ptr Transpose0213MatMulFunction::initOriginal() const { auto data0 = std::make_shared(precisions[0], input_shapes[0]); ParameterVector params{data0}; - auto data1 = build_mm_second_input(precisions[1], input_shapes[1], matmul_type, params); + auto data1 = make_matmul_b_input(precisions[1], input_shapes[1], matmul_type, params); auto const_order = std::make_shared(ov::element::i32, Shape {4}, std::vector{0, 2, 1, 3}); std::shared_ptr result; switch (transpose_position) { @@ -295,16 +288,16 @@ std::shared_ptr TransposeMulMatMulBiasFunction::initOriginal() const std::shared_ptr MatMulsQuantizedSoftmaxFunction::initOriginal() const { auto data0 = std::make_shared(precisions[0], input_shapes[0]); ParameterVector params{data0}; - auto data1 = build_mm_second_input(precisions[1], input_shapes[1], matmul_type, params); - auto data2 = build_mm_second_input(precision, input_shapes[2], matmul_type, params); + auto data1 = make_matmul_b_input(precisions[1], input_shapes[1], matmul_type, params); + auto data2 = make_matmul_b_input(precision, input_shapes[2], matmul_type, params); auto matmul0 = std::make_shared>( std::vector{ov::element::f32, element::f32}, std::vector{element::f32}, ov::op::TemporaryReplaceOutputType(data0, element::f32).get(), ov::op::TemporaryReplaceOutputType(data1, element::f32).get()); auto softmax = std::make_shared(matmul0, -1); - auto fq0 = ov::test::utils::make_fake_quantize(softmax, ov::element::f32, 256, {1}, {u8_fq_il}, {u8_fq_ih}, {u8_fq_il}, {u8_fq_ih}); - auto fq2 = ov::test::utils::make_fake_quantize(data2, ov::element::f32, 256, {1}, {i8_fq_il}, {i8_fq_ih}, {i8_fq_il}, {i8_fq_ih}); + auto fq0 = make_fake_quantize(softmax, false); + auto fq2 = make_fake_quantize(data2, true); auto new_shape = std::make_shared(ov::element::u64, ov::Shape{4}, std::vector{1, 1, input_shapes[2].get_shape()[0], input_shapes[2].get_shape()[1]}); auto reshape = std::make_shared(fq2, new_shape, false); @@ -313,21 +306,14 @@ std::shared_ptr MatMulsQuantizedSoftmaxFunction::initOriginal() const std::vector{element::f32}, ov::op::TemporaryReplaceOutputType(fq0, element::f32).get(), ov::op::TemporaryReplaceOutputType(reshape, element::f32).get()); - auto fq3 = ov::test::utils::make_fake_quantize(matmul1, - ov::element::f32, - 256, - {1}, - {i8_fq_il}, - {i8_fq_ih}, - {i8_fq_il}, - {i8_fq_ih}); + auto fq3 = make_fake_quantize(matmul1, true); return std::make_shared(NodeVector{fq3}, params); } std::shared_ptr MatMulEltwiseChainFunction::initOriginal() const { auto data0 = std::make_shared(precision, input_shapes[0]); ParameterVector params{data0}; - auto data1 = build_mm_second_input(precision, input_shapes[1], matmul_type, params); + auto data1 = make_matmul_b_input(precision, input_shapes[1], matmul_type, params); const auto matmul = std::make_shared>( std::vector{element::f32, element::f32}, @@ -352,8 +338,8 @@ std::shared_ptr MatMulEltwiseChainFunction::initOriginal() const { std::shared_ptr MatMulEltwiseChainCascadeFunction::initOriginal() const { auto data0 = std::make_shared(precision, input_shapes[0]); ParameterVector params{data0}; - auto data1 = build_mm_second_input(precision, input_shapes[1], matmul_type, params); - auto data2 = build_mm_second_input(precision, input_shapes[2], matmul_type, params); + auto data1 = make_matmul_b_input(precision, input_shapes[1], matmul_type, params); + auto data2 = make_matmul_b_input(precision, input_shapes[2], matmul_type, params); const auto matmul1 = std::make_shared>( std::vector{element::f32, element::f32},